Commit 9ad035b8 by Ben Clayton

Vulkan: Implement shaderClipDistance and shaderCullDistance

The clip distance is used to perform per-fragment clipping - any fragments with a negative linear-interpolated distance are discarded. The cull distance is used to perform per-primitive culling - any primitives with all vertices with a negative distance are discarded. Bug: b/139207336 Tests: dEQP-VK.clipping.* Change-Id: Ia6680601b27599152f68410df47aaaa726d0b349 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/34915 Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com>
parent 2b9e22ac
......@@ -50,7 +50,8 @@ namespace sw
OUTLINE_RESOLUTION = 8192, // Maximum vertical resolution of the render target
MIPMAP_LEVELS = 14,
MAX_UNIFORM_BLOCK_SIZE = 16384,
MAX_CLIP_PLANES = 6,
MAX_CLIP_DISTANCES = 8,
MAX_CULL_DISTANCES = 8,
MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS = 64,
MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS = 64,
MIN_TEXEL_OFFSET = -8,
......
......@@ -65,6 +65,9 @@ namespace sw
PlaneEquation w;
PlaneEquation V[MAX_INTERFACE_COMPONENTS];
PlaneEquation clipDistance[MAX_CLIP_DISTANCES];
PlaneEquation cullDistance[MAX_CULL_DISTANCES];
// Masks for two-sided stencil
int64_t clockwiseMask;
int64_t invClockwiseMask;
......
......@@ -157,6 +157,18 @@ namespace sw
yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, V[interpolant].B), 16);
}
}
for (int i = 0; i < MAX_CLIP_DISTANCES; i++)
{
DclipDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].C), 16) +
yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].B), 16);
}
for (int i = 0; i < MAX_CULL_DISTANCES; i++)
{
DcullDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].C), 16) +
yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].B), 16);
}
}
Short4 xLeft[4];
......
......@@ -37,6 +37,8 @@ namespace sw
Float4 Dw;
Float4 Dv[MAX_INTERFACE_COMPONENTS];
Float4 Df;
Float4 DclipDistance[MAX_CLIP_DISTANCES];
Float4 DcullDistance[MAX_CULL_DISTANCES];
UInt occlusion;
......
......@@ -624,25 +624,32 @@ namespace sw
Vertex &v1 = triangles->v1;
Vertex &v2 = triangles->v2;
if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
Polygon polygon(&v0.position, &v1.position, &v2.position);
if((v0.cullMask | v1.cullMask | v2.cullMask) == 0)
{
Polygon polygon(&v0.position, &v1.position, &v2.position);
continue;
}
int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE)
{
continue;
}
if(clipFlagsOr != Clipper::CLIP_FINITE)
int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
if(clipFlagsOr != Clipper::CLIP_FINITE)
{
if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
{
if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
{
continue;
}
continue;
}
}
if(drawCall->setupRoutine(primitives, triangles, &polygon, data))
{
primitives += ms;
visible++;
}
if(drawCall->setupRoutine(primitives, triangles, &polygon, data))
{
primitives += ms;
visible++;
}
}
......@@ -793,6 +800,11 @@ namespace sw
Vertex &v0 = triangle.v0;
Vertex &v1 = triangle.v1;
if((v0.cullMask | v1.cullMask) == 0)
{
return false;
}
const float4 &P0 = v0.position;
const float4 &P1 = v1.position;
......@@ -1082,6 +1094,11 @@ namespace sw
Vertex &v = triangle.v0;
if(v.cullMask == 0)
{
return false;
}
float pSize = v.pointSize;
pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
......
......@@ -39,6 +39,9 @@ namespace sw
float pointSize;
int clipFlags;
int cullMask;
float clipDistance[MAX_CLIP_DISTANCES];
float cullDistance[MAX_CLIP_DISTANCES];
alignas(16) struct
{
......
......@@ -162,6 +162,50 @@ namespace sw
}
setBuiltins(x, y, z, w, cMask);
for (uint32_t i = 0; i < MAX_CLIP_DISTANCES; i++)
{
auto distance = interpolate(xxxx, DclipDistance[i], rhw,
primitive + OFFSET(Primitive, clipDistance[i]),
false, true, false);
auto clipMask = SignMask(CmpGE(distance, SIMD::Float(0)));
for (auto ms = 0u; ms < state.multiSample; ms++)
{
// TODO: Fragments discarded by clipping do not exist at
// all -- they should not be counted in queries or have
// their Z/S effects performed when early fragment tests
// are enabled.
cMask[ms] &= clipMask;
}
if (spirvShader->getUsedCapabilities().ClipDistance)
{
auto it = spirvShader->inputBuiltins.find(spv::BuiltInClipDistance);
if(it != spirvShader->inputBuiltins.end())
{
if (i < it->second.SizeInComponents)
{
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = distance;
}
}
}
}
if (spirvShader->getUsedCapabilities().CullDistance)
{
auto it = spirvShader->inputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->inputBuiltins.end())
{
for (uint32_t i = 0; i < it->second.SizeInComponents; i++)
{
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
interpolate(xxxx, DcullDistance[i], rhw,
primitive + OFFSET(Primitive, cullDistance[i]),
false, true, false);
}
}
}
}
Bool alphaPass = true;
......
......@@ -452,17 +452,33 @@ namespace sw
OFFSET(Vertex, v[interpolant]),
OFFSET(Primitive, V[interpolant]),
state.gradient[interpolant].Flat,
!state.gradient[interpolant].NoPerspective, 0);
!state.gradient[interpolant].NoPerspective);
}
}
for (int i = 0; i < MAX_CLIP_DISTANCES; i++)
{
setupGradient(primitive, tri, w012, M, v0, v1, v2,
OFFSET(Vertex, clipDistance[i]),
OFFSET(Primitive, clipDistance[i]),
false, true);
}
for (int i = 0; i < MAX_CULL_DISTANCES; i++)
{
setupGradient(primitive, tri, w012, M, v0, v1, v2,
OFFSET(Vertex, cullDistance[i]),
OFFSET(Primitive, cullDistance[i]),
false, true);
}
Return(1);
}
routine = function("SetupRoutine");
}
void SetupRoutine::setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flat, bool perspective, int component)
void SetupRoutine::setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flat, bool perspective)
{
if(!flat)
{
......
......@@ -33,7 +33,7 @@ namespace sw
SetupFunction::RoutineType getRoutine();
private:
void setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flatShading, bool perspective, int component);
void setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flatShading, bool perspective);
void edge(Pointer<Byte> &primitive, Pointer<Byte> &data, const Int &Xa, const Int &Ya, const Int &Xb, const Int &Yb, Int &q);
void conditionalRotate1(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2);
void conditionalRotate2(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2);
......
......@@ -351,21 +351,23 @@ namespace sw
{
case spv::CapabilityMatrix: capabilities.Matrix = true; break;
case spv::CapabilityShader: capabilities.Shader = true; break;
case spv::CapabilityClipDistance: capabilities.ClipDistance = true; break;
case spv::CapabilityCullDistance: capabilities.CullDistance = true; break;
case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break;
case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break;
case spv::CapabilityImage1D: capabilities.Image1D = true; break;
case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break;
case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break;
case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break;
case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break;
case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break;
case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
case spv::CapabilityMultiView: capabilities.MultiView = true; break;
case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break;
case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break;
case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
case spv::CapabilityMultiView: capabilities.MultiView = true; break;
default:
UNSUPPORTED("Unsupported capability %u", insn.word(1));
}
......
......@@ -484,21 +484,23 @@ namespace sw
{
bool Matrix : 1;
bool Shader : 1;
bool ClipDistance : 1;
bool CullDistance : 1;
bool InputAttachment : 1;
bool Sampled1D : 1;
bool Image1D : 1;
bool SampledBuffer : 1;
bool ImageBuffer : 1;
bool StorageImageExtendedFormats : 1;
bool ImageQuery : 1;
bool DerivativeControl : 1;
bool GroupNonUniform : 1;
bool MultiView : 1;
bool DeviceGroup : 1;
bool GroupNonUniformVote : 1;
bool GroupNonUniformBallot : 1;
bool GroupNonUniformShuffle : 1;
bool GroupNonUniformShuffleRelative : 1;
bool StorageImageExtendedFormats : 1;
bool DeviceGroup : 1;
bool MultiView : 1;
};
Capabilities const &getUsedCapabilities() const
......
......@@ -62,6 +62,7 @@ namespace sw
readInput(batch);
program(batch, vertexCount);
computeClipFlags();
computeCullMask();
writeCache(vertexCache, tagCache, batch);
}
......@@ -143,6 +144,26 @@ namespace sw
clipFlags |= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)];
}
void VertexRoutine::computeCullMask()
{
cullMask = Int(15);
if (spirvShader->getUsedCapabilities().CullDistance)
{
auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
if (it != spirvShader->outputBuiltins.end())
{
auto &var = routine.getVariable(it->second.Id);
for (uint32_t i = 0; i < it->second.SizeInComponents; i++)
{
auto const &distance = var[it->second.FirstComponent + i];
auto mask = SignMask(CmpGE(distance, SIMD::Float(0)));
cullMask &= mask;
}
}
}
}
Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
bool robustBufferAccess, UInt & robustnessSize, Int baseVertex)
{
......@@ -154,9 +175,9 @@ namespace sw
// bytes of memory past the end of the buffer, up to the end of the bound range)."
UInt4 offsets = (*Pointer<UInt4>(As<Pointer<UInt4>>(batch)) + As<UInt4>(Int4(baseVertex))) * UInt4(stride);
Pointer<Byte> source0 = buffer + offsets.x;
Pointer<Byte> source1 = buffer + offsets.y;
Pointer<Byte> source2 = buffer + offsets.z;
Pointer<Byte> source0 = buffer + offsets.x;
Pointer<Byte> source1 = buffer + offsets.y;
Pointer<Byte> source2 = buffer + offsets.z;
Pointer<Byte> source3 = buffer + offsets.w;
UInt4 zero(0);
......@@ -564,7 +585,7 @@ namespace sw
it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize);
if(it != spirvShader->outputBuiltins.end())
{
assert(it->second.SizeInComponents == 1);
ASSERT(it->second.SizeInComponents == 1);
auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent];
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,pointSize)) = Extract(psize, 3);
......@@ -573,11 +594,66 @@ namespace sw
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0);
}
uint32_t clipIndex = 0;
if (spirvShader->getUsedCapabilities().ClipDistance)
{
it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
if(it != spirvShader->outputBuiltins.end())
{
ASSERT(it->second.SizeInComponents <= MAX_CLIP_DISTANCES);
for(; clipIndex < it->second.SizeInComponents; clipIndex++)
{
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + clipIndex];
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 3);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 2);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 1);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 0);
}
}
}
for(; clipIndex < MAX_CLIP_DISTANCES; clipIndex++)
{
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
}
uint32_t cullIndex = 0;
if (spirvShader->getUsedCapabilities().CullDistance)
{
it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->outputBuiltins.end())
{
ASSERT(it->second.SizeInComponents <= MAX_CULL_DISTANCES);
for(; cullIndex < it->second.SizeInComponents; cullIndex++)
{
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + cullIndex];
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 3);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 2);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 1);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 0);
}
}
}
for(; cullIndex < MAX_CULL_DISTANCES; cullIndex++)
{
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
}
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 8) & 0x0000000FF;
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 0) & 0x0000000FF;
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullMask)) = -((cullMask >> 3) & 1);
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullMask)) = -((cullMask >> 2) & 1);
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullMask)) = -((cullMask >> 1) & 1);
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullMask)) = -((cullMask >> 0) & 1);
transpose4x4(proj.x, proj.y, proj.z, proj.w);
*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,projected), 16) = proj.w;
......@@ -614,6 +690,7 @@ namespace sw
*Pointer<Int>(vertex + OFFSET(Vertex,pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,pointSize));
*Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,clipFlags));
*Pointer<Int>(vertex + OFFSET(Vertex,cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,cullMask));
*Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,projected));
for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
......@@ -623,5 +700,13 @@ namespace sw
*Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4);
}
}
for(int i = 0; i < MAX_CLIP_DISTANCES; i++)
{
*Pointer<Float>(vertex + OFFSET(Vertex, clipDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, clipDistance[i]), 4);
}
for(int i = 0; i < MAX_CULL_DISTANCES; i++)
{
*Pointer<Float>(vertex + OFFSET(Vertex, cullDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, cullDistance[i]), 4);
}
}
}
......@@ -55,6 +55,7 @@ namespace sw
Pointer<Byte> constants;
Int clipFlags;
Int cullMask;
SpirvRoutine routine;
......@@ -70,6 +71,7 @@ namespace sw
bool robustBufferAccess, UInt& robustnessSize, Int baseVertex);
void readInput(Pointer<UInt> &batch);
void computeClipFlags();
void computeCullMask();
void writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch);
void writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry);
};
......
......@@ -84,8 +84,8 @@ const VkPhysicalDeviceFeatures& PhysicalDevice::getFeatures() const
VK_FALSE, // shaderSampledImageArrayDynamicIndexing
VK_FALSE, // shaderStorageBufferArrayDynamicIndexing
VK_FALSE, // shaderStorageImageArrayDynamicIndexing
VK_FALSE, // shaderClipDistance
VK_FALSE, // shaderCullDistance
VK_TRUE, // shaderClipDistance
VK_TRUE, // shaderCullDistance
VK_FALSE, // shaderFloat64
VK_FALSE, // shaderInt64
VK_FALSE, // shaderInt16
......@@ -269,9 +269,9 @@ const VkPhysicalDeviceLimits& PhysicalDevice::getLimits() const
1, // maxSampleMaskWords
VK_FALSE, // timestampComputeAndGraphics
60, // timestampPeriod
8, // maxClipDistances
8, // maxCullDistances
8, // maxCombinedClipAndCullDistances
sw::MAX_CLIP_DISTANCES, // maxClipDistances
sw::MAX_CULL_DISTANCES, // maxCullDistances
sw::MAX_CLIP_DISTANCES + sw::MAX_CULL_DISTANCES, // maxCombinedClipAndCullDistances
2, // discreteQueuePriorities
{ 1.0, vk::MAX_POINT_SIZE }, // pointSizeRange[2]
{ 1.0, 1.0 }, // lineWidthRange[2] (unsupported)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment