Commit eeb81843 by Nicolas Capens Committed by Nicolas Capens

Fix back-face culling for vertices near w=0

When the homogeneous w coordinate is near 0, the projected x or y coordinates (after viewport and subpixel scaling) may be larger than what's representable by 32-bit signed integers. On x86 this produces the "indefinite" integer value 0x80000000. This negative value is returned even for positive inputs. This causes the back-face culling calculations to reject triangles which are in fact visible. This change addresses that by introducing the rr::RoundIntClamped() function, which guarantees that very large positive floating-point values are converted to a positive integer value near INT_MAX. Note that the Vulkan spec states that "[the determination whether the triangle is back-facing or front-facing] is made based on the sign of the (clipped or unclipped) polygon’s area computed in framebuffer coordinates." Thus the alternative of performing culling using the unprojected coordinates would have to take negative viewport dimensions into account, and may not produce the same results as sub-pixel snapped coordinates. Adjust back-face culling of wireframe triangles to use the same calculations as solid triangles. Bug: b/177382194 Change-Id: Ice8493d4cfd164638f091bf5a39b5396d65458e2 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/51648Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com>
parent 3549479d
...@@ -712,11 +712,18 @@ int DrawCall::setupWireframeTriangles(Triangle *triangles, Primitive *primitives ...@@ -712,11 +712,18 @@ int DrawCall::setupWireframeTriangles(Triangle *triangles, Primitive *primitives
const Vertex &v1 = triangles[i].v1; const Vertex &v1 = triangles[i].v1;
const Vertex &v2 = triangles[i].v2; const Vertex &v2 = triangles[i].v2;
float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + float A = ((float)v0.projected.y - (float)v2.projected.y) * (float)v1.projected.x +
(v0.x * v2.y - v0.y * v2.x) * v1.w + ((float)v2.projected.y - (float)v1.projected.y) * (float)v0.projected.x +
(v2.x * v1.y - v1.x * v2.y) * v0.w; ((float)v1.projected.y - (float)v0.projected.y) * (float)v2.projected.x; // Area
int w0w1w2 = bit_cast<int>(v0.w) ^
bit_cast<int>(v1.w) ^
bit_cast<int>(v2.w);
A = w0w1w2 < 0 ? -A : A;
bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (A >= 0.0f) : (A <= 0.0f);
bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
if(state.cullMode & VK_CULL_MODE_FRONT_BIT) if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
{ {
if(frontFacing) continue; if(frontFacing) continue;
......
...@@ -81,18 +81,13 @@ void SetupRoutine::generate() ...@@ -81,18 +81,13 @@ void SetupRoutine::generate()
Float A = (y0 - y2) * x1 + (y2 - y1) * x0 + (y1 - y0) * x2; // Area Float A = (y0 - y2) * x1 + (y2 - y1) * x0 + (y1 - y0) * x2; // Area
If(A == 0.0f)
{
Return(0);
}
Int w0w1w2 = *Pointer<Int>(v0 + OFFSET(Vertex, w)) ^ Int w0w1w2 = *Pointer<Int>(v0 + OFFSET(Vertex, w)) ^
*Pointer<Int>(v1 + OFFSET(Vertex, w)) ^ *Pointer<Int>(v1 + OFFSET(Vertex, w)) ^
*Pointer<Int>(v2 + OFFSET(Vertex, w)); *Pointer<Int>(v2 + OFFSET(Vertex, w));
A = IfThenElse(w0w1w2 < 0, -A, A); A = IfThenElse(w0w1w2 < 0, -A, A);
Bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? A > 0.0f : A < 0.0f; Bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (A >= 0.0f) : (A <= 0.0f);
if(state.cullMode & VK_CULL_MODE_FRONT_BIT) if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
{ {
......
...@@ -546,8 +546,8 @@ void VertexRoutine::writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCac ...@@ -546,8 +546,8 @@ void VertexRoutine::writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCac
Float4 rhw = Float4(1.0f) / w; Float4 rhw = Float4(1.0f) / w;
Vector4f proj; Vector4f proj;
proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData, WxF)))); proj.x = As<Float4>(RoundIntClamped(*Pointer<Float4>(data + OFFSET(DrawData, X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData, WxF))));
proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData, HxF)))); proj.y = As<Float4>(RoundIntClamped(*Pointer<Float4>(data + OFFSET(DrawData, Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData, HxF))));
proj.z = pos.z * rhw; proj.z = pos.z * rhw;
proj.w = rhw; proj.w = rhw;
......
...@@ -2674,6 +2674,21 @@ RValue<Int4> RoundInt(RValue<Float4> cast) ...@@ -2674,6 +2674,21 @@ RValue<Int4> RoundInt(RValue<Float4> cast)
#endif #endif
} }
RValue<Int4> RoundIntClamped(RValue<Float4> cast)
{
RR_DEBUG_INFO_UPDATE_LOC();
#if defined(__i386__) || defined(__x86_64__)
// cvtps2dq produces 0x80000000, a negative value, for input larger than
// 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
// saturate to 0x80000000.
return x86::cvtps2dq(Min(cast, Float4(0x7FFFFF80)));
#else
// ARM saturates to the largest positive or negative integer. Unit tests
// verify that lowerRoundInt() behaves as desired.
return As<Int4>(V(lowerRoundInt(V(cast.value()), T(Int4::type()))));
#endif
}
RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y) RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
{ {
RR_DEBUG_INFO_UPDATE_LOC(); RR_DEBUG_INFO_UPDATE_LOC();
......
...@@ -2007,7 +2007,14 @@ inline RValue<Int4> CmpGE(RValue<Int4> x, RValue<Int4> y) ...@@ -2007,7 +2007,14 @@ inline RValue<Int4> CmpGE(RValue<Int4> x, RValue<Int4> y)
} }
RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y); RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y);
RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y); RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y);
// Convert to nearest integer. If a converted value is outside of the integer
// range, the returned result is undefined.
RValue<Int4> RoundInt(RValue<Float4> cast); RValue<Int4> RoundInt(RValue<Float4> cast);
// Rounds to the nearest integer, but clamps very large values to an
// implementation-dependent range.
// Specifically, on x86, values larger than 2147483583.0 are converted to
// 2147483583 (0x7FFFFFBF) instead of producing 0x80000000.
RValue<Int4> RoundIntClamped(RValue<Float4> cast);
RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y); RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y);
RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y); RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y);
RValue<Int> Extract(RValue<Int4> val, int i); RValue<Int> Extract(RValue<Int4> val, int i);
......
...@@ -3589,6 +3589,33 @@ RValue<Int4> RoundInt(RValue<Float4> cast) ...@@ -3589,6 +3589,33 @@ RValue<Int4> RoundInt(RValue<Float4> cast)
} }
} }
RValue<Int4> RoundIntClamped(RValue<Float4> cast)
{
RR_DEBUG_INFO_UPDATE_LOC();
// cvtps2dq produces 0x80000000, a negative value, for input larger than
// 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
// saturate to 0x80000000.
RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
if(emulateIntrinsics || CPUID::ARM)
{
// Push the fractional part off the mantissa. Accurate up to +/-2^22.
return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
}
else
{
Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
nearbyint->addArg(clamped.value());
::basicBlock->appendInst(nearbyint);
return RValue<Int4>(V(result));
}
}
RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y) RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
{ {
RR_DEBUG_INFO_UPDATE_LOC(); RR_DEBUG_INFO_UPDATE_LOC();
......
...@@ -780,6 +780,40 @@ TEST(ReactorUnitTests, NotNeg) ...@@ -780,6 +780,40 @@ TEST(ReactorUnitTests, NotNeg)
EXPECT_EQ(out[8][3], 0x00000000u); EXPECT_EQ(out[8][3], 0x00000000u);
} }
TEST(ReactorUnitTests, RoundInt)
{
FunctionT<int(void *)> function;
{
Pointer<Byte> out = function.Arg<0>();
*Pointer<Int4>(out + 0) = RoundInt(Float4(3.1f, 3.6f, -3.1f, -3.6f));
*Pointer<Int4>(out + 16) = RoundIntClamped(Float4(2147483648.0f, -2147483648.0f, 2147483520, -2147483520));
Return(0);
}
auto routine = function(testName().c_str());
int out[2][4];
memset(&out, 0, sizeof(out));
routine(&out);
EXPECT_EQ(out[0][0], 3);
EXPECT_EQ(out[0][1], 4);
EXPECT_EQ(out[0][2], -3);
EXPECT_EQ(out[0][3], -4);
// x86 returns 0x80000000 for values which cannot be represented in a 32-bit
// integer, but RoundIntClamped() clamps to ensure a positive value for
// positive input. ARM saturates to the largest representable integers.
EXPECT_GE(out[1][0], 2147483520);
EXPECT_LT(out[1][1], -2147483647);
EXPECT_EQ(out[1][2], 2147483520);
EXPECT_EQ(out[1][3], -2147483520);
}
TEST(ReactorUnitTests, FPtoUI) TEST(ReactorUnitTests, FPtoUI)
{ {
FunctionT<int(void *)> function; FunctionT<int(void *)> function;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment