Commit 419b7d7b by Nicolas Capens Committed by Nicolas Capens

Fix unaligned access on depth values

This was tested by using __writeeflags(__readeflags() | 0x40000) to enable alignment checks on x86. Bug: b/169957911 Change-Id: Ie97b2fda281548fac94b13abe93213a9a1495b0c Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/48929Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarSean Risser <srisser@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Kokoro-Result: kokoro <noreply+kokoro@google.com>
parent 19f01495
...@@ -410,9 +410,7 @@ Bool PixelRoutine::depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int & ...@@ -410,9 +410,7 @@ Bool PixelRoutine::depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int &
if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable)) if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
{ {
// FIXME: Properly optimizes? zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
zValue.xy = *Pointer<Float4>(buffer);
zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
} }
Int4 zTest; Int4 zTest;
...@@ -489,9 +487,8 @@ Bool PixelRoutine::depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x ...@@ -489,9 +487,8 @@ Bool PixelRoutine::depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x
if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable)) if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
{ {
// FIXME: Properly optimizes? zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer), 0));
zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0); zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer + pitch), 1));
zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
} }
Int4 zTest; Int4 zTest;
...@@ -559,9 +556,13 @@ Bool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, ...@@ -559,9 +556,13 @@ Bool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x,
} }
if(state.depthFormat == VK_FORMAT_D16_UNORM) if(state.depthFormat == VK_FORMAT_D16_UNORM)
{
return depthTest16(zBuffer, q, x, z, sMask, zMask, cMask); return depthTest16(zBuffer, q, x, z, sMask, zMask, cMask);
}
else else
{
return depthTest32F(zBuffer, q, x, z, sMask, zMask, cMask); return depthTest32F(zBuffer, q, x, z, sMask, zMask, cMask);
}
} }
void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha) void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha)
...@@ -603,16 +604,13 @@ void PixelRoutine::writeDepth32F(Pointer<Byte> &zBuffer, int q, const Int &x, co ...@@ -603,16 +604,13 @@ void PixelRoutine::writeDepth32F(Pointer<Byte> &zBuffer, int q, const Int &x, co
if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable)) if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
{ {
// FIXME: Properly optimizes? zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
zValue.xy = *Pointer<Float4>(buffer);
zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
} }
Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + zMask * 16, 16)); Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + zMask * 16, 16));
zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + zMask * 16, 16)); zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + zMask * 16, 16));
Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue)); Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
// FIXME: Properly optimizes?
*Pointer<Float2>(buffer) = Float2(Z.xy); *Pointer<Float2>(buffer) = Float2(Z.xy);
*Pointer<Float2>(buffer + pitch) = Float2(Z.zw); *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
} }
...@@ -638,20 +636,16 @@ void PixelRoutine::writeDepth16(Pointer<Byte> &zBuffer, int q, const Int &x, con ...@@ -638,20 +636,16 @@ void PixelRoutine::writeDepth16(Pointer<Byte> &zBuffer, int q, const Int &x, con
if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable)) if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
{ {
// FIXME: Properly optimizes? zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer), 0));
zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0); zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer + pitch), 1));
zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
} }
Z = Z & *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q) + zMask * 8, 8); Z = Z & *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q) + zMask * 8, 8);
zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q) + zMask * 8, 8); zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q) + zMask * 8, 8);
Z = Z | zValue; Z = Z | zValue;
// FIXME: Properly optimizes? *Pointer<Int>(buffer) = Extract(As<Int2>(Z), 0);
*Pointer<Short>(buffer) = Extract(Z, 0); *Pointer<Int>(buffer + pitch) = Extract(As<Int2>(Z), 1);
*Pointer<Short>(buffer + 2) = Extract(Z, 1);
*Pointer<Short>(buffer + pitch) = Extract(Z, 2);
*Pointer<Short>(buffer + pitch + 2) = Extract(Z, 3);
} }
void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask) void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
...@@ -662,9 +656,13 @@ void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, const Int &x, const ...@@ -662,9 +656,13 @@ void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, const Int &x, const
} }
if(state.depthFormat == VK_FORMAT_D16_UNORM) if(state.depthFormat == VK_FORMAT_D16_UNORM)
{
writeDepth16(zBuffer, q, x, z, zMask); writeDepth16(zBuffer, q, x, z, zMask);
}
else else
{
writeDepth32F(zBuffer, q, x, z, zMask); writeDepth32F(zBuffer, q, x, z, zMask);
}
} }
void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, const Int &x, const Int &sMask, const Int &zMask, const Int &cMask) void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, const Int &x, const Int &sMask, const Int &zMask, const Int &cMask)
......
...@@ -4083,6 +4083,15 @@ Float4::Float4(const Reference<Float> &rhs) ...@@ -4083,6 +4083,15 @@ Float4::Float4(const Reference<Float> &rhs)
*this = RValue<Float>(rhs.loadValue()); *this = RValue<Float>(rhs.loadValue());
} }
Float4::Float4(RValue<Float2> lo, RValue<Float2> hi)
: XYZW(this)
{
int shuffle[4] = { 0, 1, 4, 5 }; // Real type is v4i32
Value *packed = Nucleus::createShuffleVector(lo.value(), hi.value(), shuffle);
storeValue(packed);
}
RValue<Float4> Float4::operator=(float x) RValue<Float4> Float4::operator=(float x)
{ {
return *this = Float4(x, x, x, x); return *this = Float4(x, x, x, x);
......
...@@ -2254,6 +2254,7 @@ public: ...@@ -2254,6 +2254,7 @@ public:
Float4(const Swizzle2<Float4, X> &x, const SwizzleMask2<Float4, Y> &y); Float4(const Swizzle2<Float4, X> &x, const SwizzleMask2<Float4, Y> &y);
template<int X, int Y> template<int X, int Y>
Float4(const SwizzleMask2<Float4, X> &x, const SwizzleMask2<Float4, Y> &y); Float4(const SwizzleMask2<Float4, X> &x, const SwizzleMask2<Float4, Y> &y);
Float4(RValue<Float2> lo, RValue<Float2> hi);
RValue<Float4> operator=(float replicate); RValue<Float4> operator=(float replicate);
RValue<Float4> operator=(RValue<Float4> rhs); RValue<Float4> operator=(RValue<Float4> rhs);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment