Commit 920100cd by Alexis Hetu Committed by Alexis Hétu

Enable fragmentStoresAndAtomics

A few changes were made to make this work: - activeLaneMask now contains cMask. Since it's only used in loads, stores and atomics, it should be fine to include it - Added a storesAndAtomicsMask, which also contains sMask (stencil) and zMask (depth) for early fragment tests. The mask affects all atomic operations and store operations into storage buffer and images. - support for spv::BuiltInHelperInvocation was added Bug b/140294254 Test: dEQP-VK.* Change-Id: I42b97a766ddfe331bb2767d80d4360104a221482 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/34114 Presubmit-Ready: Alexis Hétu <sugoi@google.com> Tested-by: 's avatarAlexis Hétu <sugoi@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com>
parent fe23c66f
...@@ -188,7 +188,7 @@ namespace sw ...@@ -188,7 +188,7 @@ namespace sw
setSubgroupBuiltins(data, routine, workgroupID, localInvocationIndex, subgroupIndex); setSubgroupBuiltins(data, routine, workgroupID, localInvocationIndex, subgroupIndex);
shader->emit(routine, activeLaneMask, descriptorSets); shader->emit(routine, activeLaneMask, activeLaneMask, descriptorSets);
} }
} }
......
...@@ -20,7 +20,43 @@ ...@@ -20,7 +20,43 @@
namespace sw namespace sw
{ {
void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) // Union all cMask and return it as 4 booleans
Int4 PixelProgram::maskAny(Int cMask[4]) const
{
// See if at least 1 sample is used
Int maskUnion = cMask[0];
for(auto i = 1u; i < state.multiSample; i++)
{
maskUnion |= cMask[i];
}
// Convert to 4 booleans
Int4 laneBits = Int4(1, 2, 4, 8);
Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
Int4 mask(maskUnion);
mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
return mask;
}
// Union all cMask/sMask/zMask and return it as 4 booleans
Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const
{
// See if at least 1 sample is used
Int maskUnion = cMask[0] & sMask[0] & zMask[0];
for(auto i = 1u; i < state.multiSample; i++)
{
maskUnion |= (cMask[i] & sMask[i] & zMask[i]);
}
// Convert to 4 booleans
Int4 laneBits = Int4(1, 2, 4, 8);
Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
Int4 mask(maskUnion);
mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
return mask;
}
void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4])
{ {
routine.setImmutableInputBuiltins(spirvShader); routine.setImmutableInputBuiltins(spirvShader);
...@@ -54,12 +90,18 @@ namespace sw ...@@ -54,12 +90,18 @@ namespace sw
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width)); value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
}); });
routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(~maskAny(cMask));
});
routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1); routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1);
routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1); routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1);
routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID)); routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID));
} }
void PixelProgram::applyShader(Int cMask[4]) void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
{ {
routine.descriptorSets = data + OFFSET(DrawData, descriptorSets); routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets); routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
...@@ -96,9 +138,10 @@ namespace sw ...@@ -96,9 +138,10 @@ namespace sw
// Note: all lanes initially active to facilitate derivatives etc. Actual coverage is // Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
// handled separately, through the cMask. // handled separately, through the cMask.
auto activeLaneMask = SIMD::Int(0xFFFFFFFF); auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask);
routine.killMask = 0; routine.killMask = 0;
spirvShader->emit(&routine, activeLaneMask, descriptorSets); spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets);
spirvShader->emitEpilog(&routine); spirvShader->emitEpilog(&routine);
for(int i = 0; i < RENDERTARGETS; i++) for(int i = 0; i < RENDERTARGETS; i++)
......
...@@ -34,8 +34,8 @@ namespace sw ...@@ -34,8 +34,8 @@ namespace sw
virtual ~PixelProgram() {} virtual ~PixelProgram() {}
protected: protected:
virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w); virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]);
virtual void applyShader(Int cMask[4]); virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]);
virtual Bool alphaTest(Int cMask[4]); virtual Bool alphaTest(Int cMask[4]);
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]); virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);
...@@ -46,6 +46,8 @@ namespace sw ...@@ -46,6 +46,8 @@ namespace sw
// Raster operations // Raster operations
void clampColor(Vector4f oC[RENDERTARGETS]); void clampColor(Vector4f oC[RENDERTARGETS]);
Int4 maskAny(Int cMask[4]) const;
Int4 maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const;
Float4 linearToSRGB(const Float4 &x); Float4 linearToSRGB(const Float4 &x);
}; };
} }
......
...@@ -53,7 +53,7 @@ namespace sw ...@@ -53,7 +53,7 @@ namespace sw
void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
{ {
// TODO: consider shader which modifies sample mask in general // TODO: consider shader which modifies sample mask in general
const bool earlyDepthTest = !spirvShader || (!spirvShader->getModes().DepthReplacing && !state.alphaToCoverage); const bool earlyDepthTest = !spirvShader || (spirvShader->getModes().EarlyFragmentTests && !spirvShader->getModes().DepthReplacing && !state.alphaToCoverage);
Int zMask[4]; // Depth mask Int zMask[4]; // Depth mask
Int sMask[4]; // Stencil mask Int sMask[4]; // Stencil mask
...@@ -161,14 +161,15 @@ namespace sw ...@@ -161,14 +161,15 @@ namespace sw
} }
} }
setBuiltins(x, y, z, w); setBuiltins(x, y, z, w, cMask);
} }
Bool alphaPass = true; Bool alphaPass = true;
if (spirvShader) if (spirvShader)
{ {
applyShader(cMask); bool earlyFragTests = (spirvShader && spirvShader->getModes().EarlyFragmentTests);
applyShader(cMask, earlyFragTests ? sMask : cMask, earlyDepthTest ? zMask : cMask);
} }
alphaPass = alphaTest(cMask); alphaPass = alphaTest(cMask);
......
...@@ -43,8 +43,8 @@ namespace sw ...@@ -43,8 +43,8 @@ namespace sw
// Depth output // Depth output
Float4 oDepth; Float4 oDepth;
virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) = 0; virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]) = 0;
virtual void applyShader(Int cMask[4]) = 0; virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) = 0;
virtual Bool alphaTest(Int cMask[4]) = 0; virtual Bool alphaTest(Int cMask[4]) = 0;
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) = 0; virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) = 0;
......
...@@ -1367,6 +1367,19 @@ namespace sw ...@@ -1367,6 +1367,19 @@ namespace sw
} }
} }
bool SpirvShader::StoresInHelperInvocation(spv::StorageClass storageClass)
{
switch (storageClass)
{
case spv::StorageClassUniform:
case spv::StorageClassStorageBuffer:
case spv::StorageClassImage:
return false;
default:
return true;
}
}
bool SpirvShader::IsExplicitLayout(spv::StorageClass storageClass) bool SpirvShader::IsExplicitLayout(spv::StorageClass storageClass)
{ {
switch (storageClass) switch (storageClass)
...@@ -2136,9 +2149,9 @@ namespace sw ...@@ -2136,9 +2149,9 @@ namespace sw
} }
} }
void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const
{ {
EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess, executionModel); EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, robustBufferAccess, executionModel);
// Emit everything up to the first label // Emit everything up to the first label
// TODO: Separate out dispatch of block from non-block instructions? // TODO: Separate out dispatch of block from non-block instructions?
...@@ -2975,6 +2988,12 @@ namespace sw ...@@ -2975,6 +2988,12 @@ namespace sw
bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass); bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass); auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
SIMD::Int mask = state->activeLaneMask();
if (!StoresInHelperInvocation(pointerTy.storageClass))
{
mask = mask & state->storesAndAtomicsMask();
}
if (object.kind == Object::Kind::Constant) if (object.kind == Object::Kind::Constant)
{ {
// Constant source data. // Constant source data.
...@@ -2983,7 +3002,7 @@ namespace sw ...@@ -2983,7 +3002,7 @@ namespace sw
{ {
auto p = ptr + offset; auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); } if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, SIMD::Float(src[i]), robustness, state->activeLaneMask(), atomic, memoryOrder); SIMD::Store(p, SIMD::Float(src[i]), robustness, mask, atomic, memoryOrder);
}); });
} }
else else
...@@ -2994,7 +3013,7 @@ namespace sw ...@@ -2994,7 +3013,7 @@ namespace sw
{ {
auto p = ptr + offset; auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); } if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, src.Float(i), robustness, state->activeLaneMask(), atomic, memoryOrder); SIMD::Store(p, src.Float(i), robustness, mask, atomic, memoryOrder);
}); });
} }
...@@ -5843,10 +5862,11 @@ namespace sw ...@@ -5843,10 +5862,11 @@ namespace sw
auto ptr = state->getPointer(insn.word(3)); auto ptr = state->getPointer(insn.word(3));
auto ptrOffsets = ptr.offsets(); auto ptrOffsets = ptr.offsets();
SIMD::UInt x; SIMD::UInt x(0);
auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
for (int j = 0; j < SIMD::Width; j++) for (int j = 0; j < SIMD::Width; j++)
{ {
If(Extract(state->activeLaneMask(), j) != 0) If(Extract(mask, j) != 0)
{ {
auto offset = Extract(ptrOffsets, j); auto offset = Extract(ptrOffsets, j);
auto laneValue = Extract(value, j); auto laneValue = Extract(value, j);
...@@ -5914,10 +5934,11 @@ namespace sw ...@@ -5914,10 +5934,11 @@ namespace sw
auto ptr = state->getPointer(insn.word(3)); auto ptr = state->getPointer(insn.word(3));
auto ptrOffsets = ptr.offsets(); auto ptrOffsets = ptr.offsets();
SIMD::UInt x; SIMD::UInt x(0);
auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
for (int j = 0; j < SIMD::Width; j++) for (int j = 0; j < SIMD::Width; j++)
{ {
If(Extract(state->activeLaneMask(), j) != 0) If(Extract(mask, j) != 0)
{ {
auto offset = Extract(ptrOffsets, j); auto offset = Extract(ptrOffsets, j);
auto laneValue = Extract(value.UInt(0), j); auto laneValue = Extract(value.UInt(0), j);
......
...@@ -889,7 +889,7 @@ namespace sw ...@@ -889,7 +889,7 @@ namespace sw
std::vector<InterfaceComponent> outputs; std::vector<InterfaceComponent> outputs;
void emitProlog(SpirvRoutine *routine) const; void emitProlog(SpirvRoutine *routine) const;
void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const; void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
void emitEpilog(SpirvRoutine *routine) const; void emitEpilog(SpirvRoutine *routine) const;
using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>; using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
...@@ -968,6 +968,9 @@ namespace sw ...@@ -968,6 +968,9 @@ namespace sw
static bool IsStorageInterleavedByLane(spv::StorageClass storageClass); static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
static bool IsExplicitLayout(spv::StorageClass storageClass); static bool IsExplicitLayout(spv::StorageClass storageClass);
// Output storage buffers and images should not be affected by helper invocations
static bool StoresInHelperInvocation(spv::StorageClass storageClass);
template<typename F> template<typename F>
int VisitInterfaceInner(Type::ID id, Decorations d, F f) const; int VisitInterfaceInner(Type::ID id, Decorations d, F f) const;
...@@ -991,12 +994,14 @@ namespace sw ...@@ -991,12 +994,14 @@ namespace sw
EmitState(SpirvRoutine *routine, EmitState(SpirvRoutine *routine,
Function::ID function, Function::ID function,
RValue<SIMD::Int> activeLaneMask, RValue<SIMD::Int> activeLaneMask,
RValue<SIMD::Int> storesAndAtomicsMask,
const vk::DescriptorSet::Bindings &descriptorSets, const vk::DescriptorSet::Bindings &descriptorSets,
bool robustBufferAccess, bool robustBufferAccess,
spv::ExecutionModel executionModel) spv::ExecutionModel executionModel)
: routine(routine), : routine(routine),
function(function), function(function),
activeLaneMaskValue(activeLaneMask.value), activeLaneMaskValue(activeLaneMask.value),
storesAndAtomicsMaskValue(storesAndAtomicsMask.value),
descriptorSets(descriptorSets), descriptorSets(descriptorSets),
robustBufferAccess(robustBufferAccess), robustBufferAccess(robustBufferAccess),
executionModel(executionModel) executionModel(executionModel)
...@@ -1010,6 +1015,12 @@ namespace sw ...@@ -1010,6 +1015,12 @@ namespace sw
return RValue<SIMD::Int>(activeLaneMaskValue); return RValue<SIMD::Int>(activeLaneMaskValue);
} }
RValue<SIMD::Int> storesAndAtomicsMask() const
{
ASSERT(storesAndAtomicsMaskValue != nullptr);
return RValue<SIMD::Int>(storesAndAtomicsMaskValue);
}
void setActiveLaneMask(RValue<SIMD::Int> mask) void setActiveLaneMask(RValue<SIMD::Int> mask)
{ {
activeLaneMaskValue = mask.value; activeLaneMaskValue = mask.value;
...@@ -1030,6 +1041,7 @@ namespace sw ...@@ -1030,6 +1041,7 @@ namespace sw
Function::ID function; // The current function being built. Function::ID function; // The current function being built.
Block::ID block; // The current block being built. Block::ID block; // The current block being built.
rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask.
rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask.
Block::Set visited; // Blocks already built. Block::Set visited; // Blocks already built.
std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks; std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
std::deque<Block::ID> *pending; std::deque<Block::ID> *pending;
......
...@@ -80,7 +80,7 @@ namespace sw ...@@ -80,7 +80,7 @@ namespace sw
} }
auto activeLaneMask = SIMD::Int(0xFFFFFFFF); auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
spirvShader->emit(&routine, activeLaneMask, descriptorSets); spirvShader->emit(&routine, activeLaneMask, activeLaneMask, descriptorSets);
spirvShader->emitEpilog(&routine); spirvShader->emitEpilog(&routine);
} }
......
...@@ -57,7 +57,7 @@ const VkPhysicalDeviceFeatures& PhysicalDevice::getFeatures() const ...@@ -57,7 +57,7 @@ const VkPhysicalDeviceFeatures& PhysicalDevice::getFeatures() const
VK_FALSE, // occlusionQueryPrecise VK_FALSE, // occlusionQueryPrecise
VK_FALSE, // pipelineStatisticsQuery VK_FALSE, // pipelineStatisticsQuery
VK_FALSE, // vertexPipelineStoresAndAtomics VK_FALSE, // vertexPipelineStoresAndAtomics
VK_FALSE, // fragmentStoresAndAtomics VK_TRUE, // fragmentStoresAndAtomics
VK_FALSE, // shaderTessellationAndGeometryPointSize VK_FALSE, // shaderTessellationAndGeometryPointSize
VK_FALSE, // shaderImageGatherExtended VK_FALSE, // shaderImageGatherExtended
VK_FALSE, // shaderStorageImageExtendedFormats VK_FALSE, // shaderStorageImageExtendedFormats
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment