Per sample shading

This cl introduces per sample shading in the fragment shader. Rather than call the fragment shader multiple times per sample, this cl adds a potential loop in the fragment shader where each sample is processes in one of the loop's iteration. - Each multisample related loop now processes either all samples, like before, or the current sample, if per sample shading is enabled - A new per sample PixelProgram::maskAny() function was added - emitEpilog() now has an option not to clear phis in order to be able to only clear them on the last sample - The routine's fragCoord values are set per sample, with the proper sample offsets - Similarly, the xxxx and yyyy values used for interpolation are now offset with the proper sample offsets when per sample shading is enabled Bug: b/171415086 Change-Id: Ibd0c1bad23e2d81f7fa97240ebb50f88f1fee36e Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/51733Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com> Commit-Queue: Alexis Hétu <sugoi@google.com> Tested-by: 's avatarAlexis Hétu <sugoi@google.com> Kokoro-Result: kokoro <noreply+kokoro@google.com>
parent 3e9b79ff
...@@ -57,6 +57,7 @@ void ComputeProgram::generate() ...@@ -57,6 +57,7 @@ void ComputeProgram::generate()
shader->emitProlog(&routine); shader->emitProlog(&routine);
emit(&routine); emit(&routine);
shader->emitEpilog(&routine); shader->emitEpilog(&routine);
shader->clearPhis(&routine);
} }
void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3]) void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3])
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "PixelProgram.hpp" #include "PixelProgram.hpp"
#include "Constants.hpp"
#include "SamplerCore.hpp" #include "SamplerCore.hpp"
#include "Device/Primitive.hpp" #include "Device/Primitive.hpp"
...@@ -56,14 +57,37 @@ Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const ...@@ -56,14 +57,37 @@ Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const
return mask; return mask;
} }
void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]) Int4 PixelProgram::maskAny(Int cMask, Int sMask, Int zMask) const
{
Int maskUnion = cMask & sMask & zMask;
// Convert to 4 booleans
Int4 laneBits = Int4(1, 2, 4, 8);
Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
Int4 mask(maskUnion);
mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
return mask;
}
void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], int sampleId)
{ {
routine.setImmutableInputBuiltins(spirvShader); routine.setImmutableInputBuiltins(spirvShader);
// TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff // TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff
// they are ever going to be read. // they are ever going to be read.
routine.fragCoord[0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f); float x0 = 0.5f;
routine.fragCoord[1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f); float y0 = 0.5f;
float x1 = 1.5f;
float y1 = 1.5f;
if((state.multiSampleCount > 1) && (sampleId >= 0))
{
x0 = Constants::VkSampleLocations4[sampleId][0];
y0 = Constants::VkSampleLocations4[sampleId][1];
x1 = 1.0f + x0;
y1 = 1.0f + y0;
}
routine.fragCoord[0] = SIMD::Float(Float(x)) + SIMD::Float(x0, x1, x0, x1);
routine.fragCoord[1] = SIMD::Float(Float(y)) + SIMD::Float(y0, y0, y1, y1);
routine.fragCoord[2] = z[0]; // sample 0 routine.fragCoord[2] = z[0]; // sample 0
routine.fragCoord[3] = w; routine.fragCoord[3] = w;
...@@ -109,8 +133,11 @@ void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cM ...@@ -109,8 +133,11 @@ void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cM
}); });
} }
void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4], int sampleId)
{ {
unsigned int sampleLoopInit = (sampleId >= 0) ? sampleId : 0;
unsigned int sampleLoopEnd = (sampleId >= 0) ? sampleId + 1 : state.multiSampleCount;
routine.descriptorSets = data + OFFSET(DrawData, descriptorSets); routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets); routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
routine.pushConstants = data + OFFSET(DrawData, pushConstants); routine.pushConstants = data + OFFSET(DrawData, pushConstants);
...@@ -130,8 +157,8 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) ...@@ -130,8 +157,8 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
static_assert(SIMD::Width == 4, "Expects SIMD width to be 4"); static_assert(SIMD::Width == 4, "Expects SIMD width to be 4");
Int4 laneBits = Int4(1, 2, 4, 8); Int4 laneBits = Int4(1, 2, 4, 8);
Int4 inputSampleMask = Int4(1) & CmpNEQ(Int4(cMask[0]) & laneBits, Int4(0)); Int4 inputSampleMask = 0;
for(auto i = 1u; i < state.multiSampleCount; i++) for(auto i = sampleLoopInit; i < sampleLoopEnd; i++)
{ {
inputSampleMask |= Int4(1 << i) & CmpNEQ(Int4(cMask[i]) & laneBits, Int4(0)); inputSampleMask |= Int4(1 << i) & CmpNEQ(Int4(cMask[i]) & laneBits, Int4(0));
} }
...@@ -146,11 +173,15 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) ...@@ -146,11 +173,15 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
// Note: all lanes initially active to facilitate derivatives etc. Actual coverage is // Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
// handled separately, through the cMask. // handled separately, through the cMask.
auto activeLaneMask = SIMD::Int(0xFFFFFFFF); auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask); auto storesAndAtomicsMask = (sampleId >= 0) ? maskAny(cMask[sampleId], sMask[sampleId], zMask[sampleId]) : maskAny(cMask, sMask, zMask);
routine.killMask = 0; routine.killMask = 0;
spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets); spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets);
spirvShader->emitEpilog(&routine); spirvShader->emitEpilog(&routine);
if((sampleId < 0) || (sampleId == static_cast<int>(state.multiSampleCount - 1)))
{
spirvShader->clearPhis(&routine);
}
for(int i = 0; i < RENDERTARGETS; i++) for(int i = 0; i < RENDERTARGETS; i++)
{ {
...@@ -168,7 +199,7 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) ...@@ -168,7 +199,7 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
if(spirvShader->getModes().ContainsKill) if(spirvShader->getModes().ContainsKill)
{ {
for(auto i = 0u; i < state.multiSampleCount; i++) for(auto i = sampleLoopInit; i < sampleLoopEnd; i++)
{ {
cMask[i] &= ~routine.killMask; cMask[i] &= ~routine.killMask;
} }
...@@ -179,7 +210,7 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) ...@@ -179,7 +210,7 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
{ {
auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]); auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]);
for(auto i = 0u; i < state.multiSampleCount; i++) for(auto i = sampleLoopInit; i < sampleLoopEnd; i++)
{ {
cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << i), SIMD::Int(0))); cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << i), SIMD::Int(0)));
} }
...@@ -192,14 +223,19 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) ...@@ -192,14 +223,19 @@ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
} }
} }
Bool PixelProgram::alphaTest(Int cMask[4]) Bool PixelProgram::alphaTest(Int cMask[4], int sampleId)
{ {
if(!state.alphaToCoverage) if(!state.alphaToCoverage)
{ {
return true; return true;
} }
alphaToCoverage(cMask, c[0].w); alphaToCoverage(cMask, c[0].w, sampleId);
if(sampleId >= 0)
{
return cMask[sampleId] != 0x0;
}
Int pass = cMask[0]; Int pass = cMask[0];
...@@ -211,8 +247,11 @@ Bool PixelProgram::alphaTest(Int cMask[4]) ...@@ -211,8 +247,11 @@ Bool PixelProgram::alphaTest(Int cMask[4])
return pass != 0x0; return pass != 0x0;
} }
void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], int sampleId)
{ {
unsigned int sampleLoopInit = (sampleId >= 0) ? sampleId : 0;
unsigned int sampleLoopEnd = (sampleId >= 0) ? sampleId + 1 : state.multiSampleCount;
for(int index = 0; index < RENDERTARGETS; index++) for(int index = 0; index < RENDERTARGETS; index++)
{ {
if(!state.colorWriteActive(index)) if(!state.colorWriteActive(index))
...@@ -237,7 +276,7 @@ void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4 ...@@ -237,7 +276,7 @@ void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4
case VK_FORMAT_A8B8G8R8_SRGB_PACK32: case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
case VK_FORMAT_A2B10G10R10_UNORM_PACK32: case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
case VK_FORMAT_A2R10G10B10_UNORM_PACK32: case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
for(unsigned int q = 0; q < state.multiSampleCount; q++) for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
{ {
if(state.multiSampleMask & (1 << q)) if(state.multiSampleMask & (1 << q))
{ {
...@@ -283,7 +322,7 @@ void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4 ...@@ -283,7 +322,7 @@ void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4
case VK_FORMAT_A8B8G8R8_SINT_PACK32: case VK_FORMAT_A8B8G8R8_SINT_PACK32:
case VK_FORMAT_A2B10G10R10_UINT_PACK32: case VK_FORMAT_A2B10G10R10_UINT_PACK32:
case VK_FORMAT_A2R10G10B10_UINT_PACK32: case VK_FORMAT_A2R10G10B10_UINT_PACK32:
for(unsigned int q = 0; q < state.multiSampleCount; q++) for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
{ {
if(state.multiSampleMask & (1 << q)) if(state.multiSampleMask & (1 << q))
{ {
......
...@@ -34,10 +34,10 @@ public: ...@@ -34,10 +34,10 @@ public:
virtual ~PixelProgram() {} virtual ~PixelProgram() {}
protected: protected:
virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]); virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], int sampleId);
virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]); virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4], int sampleId);
virtual Bool alphaTest(Int cMask[4]); virtual Bool alphaTest(Int cMask[4], int sampleId);
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]); virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], int sampleId);
private: private:
// Color outputs // Color outputs
...@@ -48,6 +48,7 @@ private: ...@@ -48,6 +48,7 @@ private:
Int4 maskAny(Int cMask[4]) const; Int4 maskAny(Int cMask[4]) const;
Int4 maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const; Int4 maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const;
Int4 maskAny(Int cMask, Int sMask, Int zMask) const;
}; };
} // namespace sw } // namespace sw
......
...@@ -62,222 +62,246 @@ void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBu ...@@ -62,222 +62,246 @@ void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBu
Int zMask[4]; // Depth mask Int zMask[4]; // Depth mask
Int sMask[4]; // Stencil mask Int sMask[4]; // Stencil mask
for(unsigned int q = 0; q < state.multiSampleCount; q++) bool perSampleShading = (state.sampleShadingEnabled && (state.minSampleShading > 0.0f)) ||
{ (spirvShader && spirvShader->getModes().ContainsSampleQualifier);
zMask[q] = cMask[q]; unsigned int numSampleRenders = perSampleShading ? state.multiSampleCount : 1;
sMask[q] = cMask[q];
}
for(unsigned int q = 0; q < state.multiSampleCount; q++) for(unsigned int i = 0; i < numSampleRenders; ++i)
{ {
stencilTest(sBuffer, q, x, sMask[q], cMask[q]); int sampleId = perSampleShading ? i : -1;
} unsigned int sampleLoopInit = perSampleShading ? sampleId : 0;
unsigned int sampleLoopEnd = perSampleShading ? sampleId + 1 : state.multiSampleCount;
Float4 f; for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
Float4 rhwCentroid; {
zMask[q] = cMask[q];
Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16); sMask[q] = cMask[q];
}
if(interpolateZ()) for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
{
for(unsigned int q = 0; q < state.multiSampleCount; q++)
{ {
Float4 x = xxxx; stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
}
if(state.enableMultiSampling) Float4 f;
{ Float4 rhwCentroid;
x += *Pointer<Float4>(constants + OFFSET(Constants, X) + q * sizeof(float4));
}
z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive, z), false, false); Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16);
if(state.depthBias) if(interpolateZ())
{
for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
{ {
z[q] += *Pointer<Float4>(primitive + OFFSET(Primitive, zBias), 16); Float4 x = xxxx;
}
if(state.depthClamp) if(state.enableMultiSampling)
{ {
z[q] = Min(Max(z[q], Float4(0.0f)), Float4(1.0f)); x -= *Pointer<Float4>(constants + OFFSET(Constants, X) + q * sizeof(float4));
} }
}
}
Bool depthPass = false; z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive, z), false, false);
if(earlyDepthTest) if(state.depthBias)
{ {
for(unsigned int q = 0; q < state.multiSampleCount; q++) z[q] += *Pointer<Float4>(primitive + OFFSET(Primitive, zBias), 16);
{ }
depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
}
}
If(depthPass || Bool(!earlyDepthTest)) if(state.depthClamp)
{ {
Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16); z[q] = Min(Max(z[q], Float4(0.0f)), Float4(1.0f));
}
}
}
// Centroid locations Bool depthPass = false;
Float4 XXXX = Float4(0.0f);
Float4 YYYY = Float4(0.0f);
if(state.centroid) if(earlyDepthTest)
{ {
Float4 WWWW(1.0e-9f); for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
for(unsigned int q = 0; q < state.multiSampleCount; q++)
{ {
XXXX += *Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]); depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
YYYY += *Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]);
WWWW += *Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]);
} }
WWWW = Rcp(WWWW, Precision::Relaxed);
XXXX *= WWWW;
YYYY *= WWWW;
XXXX += xxxx;
YYYY += yyyy;
} }
if(interpolateW()) If(depthPass || Bool(!earlyDepthTest))
{ {
w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive, w), false, false); Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16);
rhw = reciprocal(w, false, false, true);
// Centroid locations
Float4 XXXX = Float4(0.0f);
Float4 YYYY = Float4(0.0f);
if(state.centroid) if(state.centroid)
{ {
rhwCentroid = reciprocal(SpirvRoutine::interpolateAtXY(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, w), false, false)); Float4 WWWW(1.0e-9f);
for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
{
XXXX += *Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]);
YYYY += *Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]);
WWWW += *Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]);
}
WWWW = Rcp(WWWW, Precision::Relaxed);
XXXX *= WWWW;
YYYY *= WWWW;
XXXX += xxxx;
YYYY += yyyy;
} }
}
if(spirvShader) if(interpolateW())
{
for(int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
{ {
auto const &input = spirvShader->inputs[interpolant]; w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive, w), false, false);
if(input.Type != SpirvShader::ATTRIBTYPE_UNUSED) rhw = reciprocal(w, false, false, true);
if(state.centroid)
{ {
if(input.Centroid && state.enableMultiSampling) rhwCentroid = reciprocal(SpirvRoutine::interpolateAtXY(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, w), false, false));
{
routine.inputs[interpolant] =
SpirvRoutine::interpolateAtXY(XXXX, YYYY, rhwCentroid,
primitive + OFFSET(Primitive, V[interpolant]),
input.Flat, !input.NoPerspective);
}
else
{
routine.inputs[interpolant] =
interpolate(xxxx, Dv[interpolant], rhw,
primitive + OFFSET(Primitive, V[interpolant]),
input.Flat, !input.NoPerspective);
}
} }
} }
setBuiltins(x, y, z, w, cMask); if(spirvShader)
for(uint32_t i = 0; i < state.numClipDistances; i++)
{ {
auto distance = interpolate(xxxx, DclipDistance[i], rhw, if(perSampleShading && (state.multiSampleCount > 1))
primitive + OFFSET(Primitive, clipDistance[i]), {
false, true); xxxx += Float4(Constants::SampleLocationsX[sampleId]);
yyyy += Float4(Constants::SampleLocationsY[sampleId]);
}
auto clipMask = SignMask(CmpGE(distance, SIMD::Float(0))); for(int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
for(auto ms = 0u; ms < state.multiSampleCount; ms++)
{ {
// FIXME(b/148105887): Fragments discarded by clipping do not exist at auto const &input = spirvShader->inputs[interpolant];
// all -- they should not be counted in queries or have their Z/S effects if(input.Type != SpirvShader::ATTRIBTYPE_UNUSED)
// performed when early fragment tests are enabled. {
cMask[ms] &= clipMask; if(input.Centroid && state.enableMultiSampling)
{
routine.inputs[interpolant] =
SpirvRoutine::interpolateAtXY(XXXX, YYYY, rhwCentroid,
primitive + OFFSET(Primitive, V[interpolant]),
input.Flat, !input.NoPerspective);
}
else if(perSampleShading)
{
routine.inputs[interpolant] =
SpirvRoutine::interpolateAtXY(xxxx, yyyy, rhw,
primitive + OFFSET(Primitive, V[interpolant]),
input.Flat, !input.NoPerspective);
}
else
{
routine.inputs[interpolant] =
interpolate(xxxx, Dv[interpolant], rhw,
primitive + OFFSET(Primitive, V[interpolant]),
input.Flat, !input.NoPerspective);
}
}
} }
if(spirvShader->getUsedCapabilities().ClipDistance) setBuiltins(x, y, z, w, cMask, sampleId);
for(uint32_t i = 0; i < state.numClipDistances; i++)
{ {
auto it = spirvShader->inputBuiltins.find(spv::BuiltInClipDistance); auto distance = interpolate(xxxx, DclipDistance[i], rhw,
if(it != spirvShader->inputBuiltins.end()) primitive + OFFSET(Primitive, clipDistance[i]),
false, true);
auto clipMask = SignMask(CmpGE(distance, SIMD::Float(0)));
for(auto ms = sampleLoopInit; ms < sampleLoopEnd; ms++)
{
// FIXME(b/148105887): Fragments discarded by clipping do not exist at
// all -- they should not be counted in queries or have their Z/S effects
// performed when early fragment tests are enabled.
cMask[ms] &= clipMask;
}
if(spirvShader->getUsedCapabilities().ClipDistance)
{ {
if(i < it->second.SizeInComponents) auto it = spirvShader->inputBuiltins.find(spv::BuiltInClipDistance);
if(it != spirvShader->inputBuiltins.end())
{ {
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = distance; if(i < it->second.SizeInComponents)
{
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = distance;
}
} }
} }
} }
}
if(spirvShader->getUsedCapabilities().CullDistance) if(spirvShader->getUsedCapabilities().CullDistance)
{
auto it = spirvShader->inputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->inputBuiltins.end())
{ {
for(uint32_t i = 0; i < state.numCullDistances; i++) auto it = spirvShader->inputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->inputBuiltins.end())
{ {
if(i < it->second.SizeInComponents) for(uint32_t i = 0; i < state.numCullDistances; i++)
{ {
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = if(i < it->second.SizeInComponents)
interpolate(xxxx, DcullDistance[i], rhw, {
primitive + OFFSET(Primitive, cullDistance[i]), routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
false, true); interpolate(xxxx, DcullDistance[i], rhw,
primitive + OFFSET(Primitive, cullDistance[i]),
false, true);
}
} }
} }
} }
} }
}
Bool alphaPass = true;
if(spirvShader) Bool alphaPass = true;
{
bool earlyFragTests = (spirvShader && spirvShader->getModes().EarlyFragmentTests);
applyShader(cMask, earlyFragTests ? sMask : cMask, earlyDepthTest ? zMask : cMask);
}
alphaPass = alphaTest(cMask);
if((spirvShader && spirvShader->getModes().ContainsKill) || state.alphaToCoverage) if(spirvShader)
{
for(unsigned int q = 0; q < state.multiSampleCount; q++)
{ {
zMask[q] &= cMask[q]; bool earlyFragTests = (spirvShader && spirvShader->getModes().EarlyFragmentTests);
sMask[q] &= cMask[q]; applyShader(cMask, earlyFragTests ? sMask : cMask, earlyDepthTest ? zMask : cMask, sampleId);
} }
}
If(alphaPass) alphaPass = alphaTest(cMask, sampleId);
{
if(!earlyDepthTest) if((spirvShader && spirvShader->getModes().ContainsKill) || state.alphaToCoverage)
{ {
for(unsigned int q = 0; q < state.multiSampleCount; q++) for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
{ {
depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); zMask[q] &= cMask[q];
sMask[q] &= cMask[q];
} }
} }
If(depthPass || Bool(earlyDepthTest)) If(alphaPass)
{ {
for(unsigned int q = 0; q < state.multiSampleCount; q++) if(!earlyDepthTest)
{ {
if(state.multiSampleMask & (1 << q)) for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
{ {
writeDepth(zBuffer, q, x, z[q], zMask[q]); depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
}
}
if(state.occlusionEnabled) If(depthPass || Bool(earlyDepthTest))
{
for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
{
if(state.multiSampleMask & (1 << q))
{ {
occlusion += *Pointer<UInt>(constants + OFFSET(Constants, occlusionCount) + 4 * (zMask[q] & sMask[q])); writeDepth(zBuffer, q, x, z[q], zMask[q]);
if(state.occlusionEnabled)
{
occlusion += *Pointer<UInt>(constants + OFFSET(Constants, occlusionCount) + 4 * (zMask[q] & sMask[q]));
}
} }
} }
}
rasterOperation(cBuffer, x, sMask, zMask, cMask); rasterOperation(cBuffer, x, sMask, zMask, cMask, sampleId);
}
} }
} }
}
for(unsigned int q = 0; q < state.multiSampleCount; q++) for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
{
if(state.multiSampleMask & (1 << q))
{ {
writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]); if(state.multiSampleMask & (1 << q))
{
writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
}
} }
} }
} }
...@@ -546,22 +570,24 @@ Bool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, ...@@ -546,22 +570,24 @@ Bool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x,
} }
} }
void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha) void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha, int sampleId)
{ {
Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c0))); static const int a2c[4] = {
Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c1))); OFFSET(DrawData, a2c0),
Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c2))); OFFSET(DrawData, a2c1),
Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c3))); OFFSET(DrawData, a2c2),
OFFSET(DrawData, a2c3),
Int aMask0 = SignMask(coverage0); };
Int aMask1 = SignMask(coverage1);
Int aMask2 = SignMask(coverage2); unsigned int sampleLoopInit = (sampleId >= 0) ? sampleId : 0;
Int aMask3 = SignMask(coverage3); unsigned int sampleLoopEnd = (sampleId >= 0) ? sampleId + 1 : state.multiSampleCount;
cMask[0] &= aMask0; for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
cMask[1] &= aMask1; {
cMask[2] &= aMask2; Int4 coverage = CmpNLT(alpha, *Pointer<Float4>(data + a2c[q]));
cMask[3] &= aMask3; Int aMask = SignMask(coverage);
cMask[q] &= aMask;
}
} }
void PixelRoutine::writeDepth32F(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask) void PixelRoutine::writeDepth32F(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
......
...@@ -45,15 +45,15 @@ protected: ...@@ -45,15 +45,15 @@ protected:
// Depth output // Depth output
Float4 oDepth; Float4 oDepth;
virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]) = 0; virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], int sampleId) = 0;
virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) = 0; virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4], int sampleId) = 0;
virtual Bool alphaTest(Int cMask[4]) = 0; virtual Bool alphaTest(Int cMask[4], int sampleId) = 0;
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) = 0; virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], int sampleId) = 0;
void quad(Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) override; void quad(Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) override;
void alphaTest(Int &aMask, const Short4 &alpha); void alphaTest(Int &aMask, const Short4 &alpha);
void alphaToCoverage(Int cMask[4], const Float4 &alpha); void alphaToCoverage(Int cMask[4], const Float4 &alpha, int sampleId);
// Raster operations // Raster operations
void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4s &current, const Int &x); void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4s &current, const Int &x);
......
...@@ -2471,7 +2471,10 @@ void SpirvShader::emitEpilog(SpirvRoutine *routine) const ...@@ -2471,7 +2471,10 @@ void SpirvShader::emitEpilog(SpirvRoutine *routine) const
break; break;
} }
} }
}
void SpirvShader::clearPhis(SpirvRoutine *routine) const
{
// Clear phis that are no longer used. This serves two purposes: // Clear phis that are no longer used. This serves two purposes:
// (1) The phi rr::Variables are destructed, preventing pointless // (1) The phi rr::Variables are destructed, preventing pointless
// materialization. // materialization.
......
...@@ -784,6 +784,7 @@ public: ...@@ -784,6 +784,7 @@ public:
void emitProlog(SpirvRoutine *routine) const; void emitProlog(SpirvRoutine *routine) const;
void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const; void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
void emitEpilog(SpirvRoutine *routine) const; void emitEpilog(SpirvRoutine *routine) const;
void clearPhis(SpirvRoutine *routine) const;
bool containsImageWrite() const { return imageWriteEmitted; } bool containsImageWrite() const { return imageWriteEmitted; }
......
...@@ -83,6 +83,7 @@ void VertexProgram::program(Pointer<UInt> &batch, UInt &vertexCount) ...@@ -83,6 +83,7 @@ void VertexProgram::program(Pointer<UInt> &batch, UInt &vertexCount)
spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets); spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets);
spirvShader->emitEpilog(&routine); spirvShader->emitEpilog(&routine);
spirvShader->clearPhis(&routine);
} }
} // namespace sw } // namespace sw
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment