Commit f6a128b6 by Ben Clayton

Vulkan: Optimize shaderClipDistance and shaderCullDistance

Only process clip/cull distances if they're actually outputted from the vertex shader. There's still overhead of having these distances stored in each vertex / primitive, but we should optimize the structure sizes as a larger set of changes. Bug: b/139207336 Tests: dEQP-VK.clipping.* Change-Id: I8f04b1c3ea823bb1a8cf62f18c987e01cd0c979a Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/35032Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent 9ad035b8
......@@ -154,6 +154,9 @@ namespace sw
{
State state;
state.numClipDistances = context->vertexShader->getNumOutputClipDistances();
state.numCullDistances = context->vertexShader->getNumOutputCullDistances();
if(context->pixelShader)
{
state.shaderID = context->pixelShader->getSerialID();
......
......@@ -63,6 +63,9 @@ namespace sw
uint64_t shaderID;
unsigned int numClipDistances;
unsigned int numCullDistances;
VkCompareOp depthCompareMode;
bool depthWriteEnable;
......
......@@ -158,13 +158,13 @@ namespace sw
}
}
for (int i = 0; i < MAX_CLIP_DISTANCES; i++)
for (unsigned int i = 0; i < state.numClipDistances; i++)
{
DclipDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].C), 16) +
yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].B), 16);
}
for (int i = 0; i < MAX_CULL_DISTANCES; i++)
for (unsigned int i = 0; i < state.numCullDistances; i++)
{
DcullDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].C), 16) +
yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].B), 16);
......
......@@ -81,6 +81,9 @@ namespace sw
state.multiSample = context->sampleCount;
state.rasterizerDiscard = context->rasterizerDiscard;
state.numClipDistances = context->vertexShader->getNumOutputClipDistances();
state.numCullDistances = context->vertexShader->getNumOutputCullDistances();
if (context->pixelShader)
{
for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
......
......@@ -51,6 +51,8 @@ namespace sw
VkCullModeFlags cullMode : BITS(VK_CULL_MODE_FLAG_BITS_MAX_ENUM);
unsigned int multiSample : 3; // 1, 2 or 4
bool rasterizerDiscard : 1;
unsigned int numClipDistances : 4; // [0 - 8]
unsigned int numCullDistances : 4; // [0 - 8]
SpirvShader::InterfaceComponent gradient[MAX_INTERFACE_COMPONENTS];
};
......
......@@ -163,7 +163,7 @@ namespace sw
setBuiltins(x, y, z, w, cMask);
for (uint32_t i = 0; i < MAX_CLIP_DISTANCES; i++)
for (uint32_t i = 0; i < state.numClipDistances; i++)
{
auto distance = interpolate(xxxx, DclipDistance[i], rhw,
primitive + OFFSET(Primitive, clipDistance[i]),
......@@ -197,12 +197,15 @@ namespace sw
auto it = spirvShader->inputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->inputBuiltins.end())
{
for (uint32_t i = 0; i < it->second.SizeInComponents; i++)
for (uint32_t i = 0; i < state.numCullDistances; i++)
{
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
interpolate(xxxx, DcullDistance[i], rhw,
primitive + OFFSET(Primitive, cullDistance[i]),
false, true, false);
if (i < it->second.SizeInComponents)
{
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
interpolate(xxxx, DcullDistance[i], rhw,
primitive + OFFSET(Primitive, cullDistance[i]),
false, true, false);
}
}
}
}
......
......@@ -456,7 +456,7 @@ namespace sw
}
}
for (int i = 0; i < MAX_CLIP_DISTANCES; i++)
for (unsigned int i = 0; i < state.numClipDistances; i++)
{
setupGradient(primitive, tri, w012, M, v0, v1, v2,
OFFSET(Vertex, clipDistance[i]),
......@@ -464,7 +464,7 @@ namespace sw
false, true);
}
for (int i = 0; i < MAX_CULL_DISTANCES; i++)
for (unsigned int i = 0; i < state.numCullDistances; i++)
{
setupGradient(primitive, tri, w012, M, v0, v1, v2,
OFFSET(Vertex, cullDistance[i]),
......
......@@ -508,6 +508,36 @@ namespace sw
return capabilities;
}
// getNumOutputClipDistances() returns the number of ClipDistances
// outputted by this shader.
unsigned int getNumOutputClipDistances() const
{
if (getUsedCapabilities().ClipDistance)
{
auto it = outputBuiltins.find(spv::BuiltInClipDistance);
if(it != outputBuiltins.end())
{
return it->second.SizeInComponents;
}
}
return 0;
}
// getNumOutputCullDistances() returns the number of CullDistances
// outputted by this shader.
unsigned int getNumOutputCullDistances() const
{
if (getUsedCapabilities().CullDistance)
{
auto it = outputBuiltins.find(spv::BuiltInCullDistance);
if(it != outputBuiltins.end())
{
return it->second.SizeInComponents;
}
}
return 0;
}
enum AttribType : unsigned char
{
ATTRIBTYPE_FLOAT,
......
......@@ -148,18 +148,15 @@ namespace sw
{
cullMask = Int(15);
if (spirvShader->getUsedCapabilities().CullDistance)
auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
if (it != spirvShader->outputBuiltins.end())
{
auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
if (it != spirvShader->outputBuiltins.end())
auto count = spirvShader->getNumOutputCullDistances();
for (uint32_t i = 0; i < count; i++)
{
auto &var = routine.getVariable(it->second.Id);
for (uint32_t i = 0; i < it->second.SizeInComponents; i++)
{
auto const &distance = var[it->second.FirstComponent + i];
auto mask = SignMask(CmpGE(distance, SIMD::Float(0)));
cullMask &= mask;
}
auto const &distance = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
auto mask = SignMask(CmpGE(distance, SIMD::Float(0)));
cullMask &= mask;
}
}
}
......@@ -594,55 +591,33 @@ namespace sw
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0);
}
uint32_t clipIndex = 0;
if (spirvShader->getUsedCapabilities().ClipDistance)
it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
if(it != spirvShader->outputBuiltins.end())
{
it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
if(it != spirvShader->outputBuiltins.end())
auto count = spirvShader->getNumOutputClipDistances();
for(unsigned int i = 0; i < count; i++)
{
ASSERT(it->second.SizeInComponents <= MAX_CLIP_DISTANCES);
for(; clipIndex < it->second.SizeInComponents; clipIndex++)
{
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + clipIndex];
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 3);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 2);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 1);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 0);
}
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 3);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 2);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 1);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 0);
}
}
for(; clipIndex < MAX_CLIP_DISTANCES; clipIndex++)
{
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
}
uint32_t cullIndex = 0;
if (spirvShader->getUsedCapabilities().CullDistance)
it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->outputBuiltins.end())
{
it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->outputBuiltins.end())
auto count = spirvShader->getNumOutputCullDistances();
for(unsigned int i = 0; i < count; i++)
{
ASSERT(it->second.SizeInComponents <= MAX_CULL_DISTANCES);
for(; cullIndex < it->second.SizeInComponents; cullIndex++)
{
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + cullIndex];
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 3);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 2);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 1);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 0);
}
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 3);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 2);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 1);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 0);
}
}
for(; cullIndex < MAX_CULL_DISTANCES; cullIndex++)
{
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
}
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
......@@ -700,11 +675,11 @@ namespace sw
*Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4);
}
}
for(int i = 0; i < MAX_CLIP_DISTANCES; i++)
for(unsigned int i = 0; i < spirvShader->getNumOutputClipDistances(); i++)
{
*Pointer<Float>(vertex + OFFSET(Vertex, clipDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, clipDistance[i]), 4);
}
for(int i = 0; i < MAX_CULL_DISTANCES; i++)
for(unsigned int i = 0; i < spirvShader->getNumOutputCullDistances(); i++)
{
*Pointer<Float>(vertex + OFFSET(Vertex, cullDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, cullDistance[i]), 4);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment