Commit f6a128b6 by Ben Clayton

Vulkan: Optimize shaderClipDistance and shaderCullDistance

Only process clip/cull distances if they're actually outputted from the vertex shader. There's still overhead of having these distances stored in each vertex / primitive, but we should optimize the structure sizes as a larger set of changes. Bug: b/139207336 Tests: dEQP-VK.clipping.* Change-Id: I8f04b1c3ea823bb1a8cf62f18c987e01cd0c979a Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/35032Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent 9ad035b8
...@@ -154,6 +154,9 @@ namespace sw ...@@ -154,6 +154,9 @@ namespace sw
{ {
State state; State state;
state.numClipDistances = context->vertexShader->getNumOutputClipDistances();
state.numCullDistances = context->vertexShader->getNumOutputCullDistances();
if(context->pixelShader) if(context->pixelShader)
{ {
state.shaderID = context->pixelShader->getSerialID(); state.shaderID = context->pixelShader->getSerialID();
......
...@@ -63,6 +63,9 @@ namespace sw ...@@ -63,6 +63,9 @@ namespace sw
uint64_t shaderID; uint64_t shaderID;
unsigned int numClipDistances;
unsigned int numCullDistances;
VkCompareOp depthCompareMode; VkCompareOp depthCompareMode;
bool depthWriteEnable; bool depthWriteEnable;
......
...@@ -158,13 +158,13 @@ namespace sw ...@@ -158,13 +158,13 @@ namespace sw
} }
} }
for (int i = 0; i < MAX_CLIP_DISTANCES; i++) for (unsigned int i = 0; i < state.numClipDistances; i++)
{ {
DclipDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].C), 16) + DclipDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].C), 16) +
yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].B), 16); yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].B), 16);
} }
for (int i = 0; i < MAX_CULL_DISTANCES; i++) for (unsigned int i = 0; i < state.numCullDistances; i++)
{ {
DcullDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].C), 16) + DcullDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].C), 16) +
yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].B), 16); yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].B), 16);
......
...@@ -81,6 +81,9 @@ namespace sw ...@@ -81,6 +81,9 @@ namespace sw
state.multiSample = context->sampleCount; state.multiSample = context->sampleCount;
state.rasterizerDiscard = context->rasterizerDiscard; state.rasterizerDiscard = context->rasterizerDiscard;
state.numClipDistances = context->vertexShader->getNumOutputClipDistances();
state.numCullDistances = context->vertexShader->getNumOutputCullDistances();
if (context->pixelShader) if (context->pixelShader)
{ {
for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++) for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
......
...@@ -51,6 +51,8 @@ namespace sw ...@@ -51,6 +51,8 @@ namespace sw
VkCullModeFlags cullMode : BITS(VK_CULL_MODE_FLAG_BITS_MAX_ENUM); VkCullModeFlags cullMode : BITS(VK_CULL_MODE_FLAG_BITS_MAX_ENUM);
unsigned int multiSample : 3; // 1, 2 or 4 unsigned int multiSample : 3; // 1, 2 or 4
bool rasterizerDiscard : 1; bool rasterizerDiscard : 1;
unsigned int numClipDistances : 4; // [0 - 8]
unsigned int numCullDistances : 4; // [0 - 8]
SpirvShader::InterfaceComponent gradient[MAX_INTERFACE_COMPONENTS]; SpirvShader::InterfaceComponent gradient[MAX_INTERFACE_COMPONENTS];
}; };
......
...@@ -163,7 +163,7 @@ namespace sw ...@@ -163,7 +163,7 @@ namespace sw
setBuiltins(x, y, z, w, cMask); setBuiltins(x, y, z, w, cMask);
for (uint32_t i = 0; i < MAX_CLIP_DISTANCES; i++) for (uint32_t i = 0; i < state.numClipDistances; i++)
{ {
auto distance = interpolate(xxxx, DclipDistance[i], rhw, auto distance = interpolate(xxxx, DclipDistance[i], rhw,
primitive + OFFSET(Primitive, clipDistance[i]), primitive + OFFSET(Primitive, clipDistance[i]),
...@@ -197,7 +197,9 @@ namespace sw ...@@ -197,7 +197,9 @@ namespace sw
auto it = spirvShader->inputBuiltins.find(spv::BuiltInCullDistance); auto it = spirvShader->inputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->inputBuiltins.end()) if(it != spirvShader->inputBuiltins.end())
{ {
for (uint32_t i = 0; i < it->second.SizeInComponents; i++) for (uint32_t i = 0; i < state.numCullDistances; i++)
{
if (i < it->second.SizeInComponents)
{ {
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
interpolate(xxxx, DcullDistance[i], rhw, interpolate(xxxx, DcullDistance[i], rhw,
...@@ -207,6 +209,7 @@ namespace sw ...@@ -207,6 +209,7 @@ namespace sw
} }
} }
} }
}
Bool alphaPass = true; Bool alphaPass = true;
......
...@@ -456,7 +456,7 @@ namespace sw ...@@ -456,7 +456,7 @@ namespace sw
} }
} }
for (int i = 0; i < MAX_CLIP_DISTANCES; i++) for (unsigned int i = 0; i < state.numClipDistances; i++)
{ {
setupGradient(primitive, tri, w012, M, v0, v1, v2, setupGradient(primitive, tri, w012, M, v0, v1, v2,
OFFSET(Vertex, clipDistance[i]), OFFSET(Vertex, clipDistance[i]),
...@@ -464,7 +464,7 @@ namespace sw ...@@ -464,7 +464,7 @@ namespace sw
false, true); false, true);
} }
for (int i = 0; i < MAX_CULL_DISTANCES; i++) for (unsigned int i = 0; i < state.numCullDistances; i++)
{ {
setupGradient(primitive, tri, w012, M, v0, v1, v2, setupGradient(primitive, tri, w012, M, v0, v1, v2,
OFFSET(Vertex, cullDistance[i]), OFFSET(Vertex, cullDistance[i]),
......
...@@ -508,6 +508,36 @@ namespace sw ...@@ -508,6 +508,36 @@ namespace sw
return capabilities; return capabilities;
} }
// getNumOutputClipDistances() returns the number of ClipDistances
// outputted by this shader.
unsigned int getNumOutputClipDistances() const
{
if (getUsedCapabilities().ClipDistance)
{
auto it = outputBuiltins.find(spv::BuiltInClipDistance);
if(it != outputBuiltins.end())
{
return it->second.SizeInComponents;
}
}
return 0;
}
// getNumOutputCullDistances() returns the number of CullDistances
// outputted by this shader.
unsigned int getNumOutputCullDistances() const
{
if (getUsedCapabilities().CullDistance)
{
auto it = outputBuiltins.find(spv::BuiltInCullDistance);
if(it != outputBuiltins.end())
{
return it->second.SizeInComponents;
}
}
return 0;
}
enum AttribType : unsigned char enum AttribType : unsigned char
{ {
ATTRIBTYPE_FLOAT, ATTRIBTYPE_FLOAT,
......
...@@ -148,21 +148,18 @@ namespace sw ...@@ -148,21 +148,18 @@ namespace sw
{ {
cullMask = Int(15); cullMask = Int(15);
if (spirvShader->getUsedCapabilities().CullDistance)
{
auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance); auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
if (it != spirvShader->outputBuiltins.end()) if (it != spirvShader->outputBuiltins.end())
{ {
auto &var = routine.getVariable(it->second.Id); auto count = spirvShader->getNumOutputCullDistances();
for (uint32_t i = 0; i < it->second.SizeInComponents; i++) for (uint32_t i = 0; i < count; i++)
{ {
auto const &distance = var[it->second.FirstComponent + i]; auto const &distance = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
auto mask = SignMask(CmpGE(distance, SIMD::Float(0))); auto mask = SignMask(CmpGE(distance, SIMD::Float(0)));
cullMask &= mask; cullMask &= mask;
} }
} }
} }
}
Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch, Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
bool robustBufferAccess, UInt & robustnessSize, Int baseVertex) bool robustBufferAccess, UInt & robustnessSize, Int baseVertex)
...@@ -594,55 +591,33 @@ namespace sw ...@@ -594,55 +591,33 @@ namespace sw
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0); *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0);
} }
uint32_t clipIndex = 0;
if (spirvShader->getUsedCapabilities().ClipDistance)
{
it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance); it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
if(it != spirvShader->outputBuiltins.end()) if(it != spirvShader->outputBuiltins.end())
{ {
ASSERT(it->second.SizeInComponents <= MAX_CLIP_DISTANCES); auto count = spirvShader->getNumOutputClipDistances();
for(; clipIndex < it->second.SizeInComponents; clipIndex++) for(unsigned int i = 0; i < count; i++)
{ {
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + clipIndex]; auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 3); *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 3);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 2); *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 2);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 1); *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 1);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 0); *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 0);
}
}
} }
for(; clipIndex < MAX_CLIP_DISTANCES; clipIndex++)
{
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
} }
uint32_t cullIndex = 0;
if (spirvShader->getUsedCapabilities().CullDistance)
{
it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance); it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->outputBuiltins.end()) if(it != spirvShader->outputBuiltins.end())
{ {
ASSERT(it->second.SizeInComponents <= MAX_CULL_DISTANCES); auto count = spirvShader->getNumOutputCullDistances();
for(; cullIndex < it->second.SizeInComponents; cullIndex++) for(unsigned int i = 0; i < count; i++)
{ {
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + cullIndex]; auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 3); *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 3);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 2); *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 2);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 1); *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 1);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 0); *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 0);
}
} }
} }
for(; cullIndex < MAX_CULL_DISTANCES; cullIndex++)
{
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
}
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF; *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF; *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
...@@ -700,11 +675,11 @@ namespace sw ...@@ -700,11 +675,11 @@ namespace sw
*Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4); *Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4);
} }
} }
for(int i = 0; i < MAX_CLIP_DISTANCES; i++) for(unsigned int i = 0; i < spirvShader->getNumOutputClipDistances(); i++)
{ {
*Pointer<Float>(vertex + OFFSET(Vertex, clipDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, clipDistance[i]), 4); *Pointer<Float>(vertex + OFFSET(Vertex, clipDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, clipDistance[i]), 4);
} }
for(int i = 0; i < MAX_CULL_DISTANCES; i++) for(unsigned int i = 0; i < spirvShader->getNumOutputCullDistances(); i++)
{ {
*Pointer<Float>(vertex + OFFSET(Vertex, cullDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, cullDistance[i]), 4); *Pointer<Float>(vertex + OFFSET(Vertex, cullDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, cullDistance[i]), 4);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment