Commit 920100cd by Alexis Hetu Committed by Alexis Hétu

Enable fragmentStoresAndAtomics

A few changes were made to make this work: - activeLaneMask now contains cMask. Since it's only used in loads, stores and atomics, it should be fine to include it - Added a storesAndAtomicsMask, which also contains sMask (stencil) and zMask (depth) for early fragment tests. The mask affects all atomic operations and store operations into storage buffer and images. - support for spv::BuiltInHelperInvocation was added Bug b/140294254 Test: dEQP-VK.* Change-Id: I42b97a766ddfe331bb2767d80d4360104a221482 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/34114 Presubmit-Ready: Alexis Hétu <sugoi@google.com> Tested-by: 's avatarAlexis Hétu <sugoi@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com>
parent fe23c66f
......@@ -188,7 +188,7 @@ namespace sw
setSubgroupBuiltins(data, routine, workgroupID, localInvocationIndex, subgroupIndex);
shader->emit(routine, activeLaneMask, descriptorSets);
shader->emit(routine, activeLaneMask, activeLaneMask, descriptorSets);
}
}
......
......@@ -20,7 +20,43 @@
namespace sw
{
void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
// Union all cMask and return it as 4 booleans
Int4 PixelProgram::maskAny(Int cMask[4]) const
{
// See if at least 1 sample is used
Int maskUnion = cMask[0];
for(auto i = 1u; i < state.multiSample; i++)
{
maskUnion |= cMask[i];
}
// Convert to 4 booleans
Int4 laneBits = Int4(1, 2, 4, 8);
Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
Int4 mask(maskUnion);
mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
return mask;
}
// Union all cMask/sMask/zMask and return it as 4 booleans
Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const
{
// See if at least 1 sample is used
Int maskUnion = cMask[0] & sMask[0] & zMask[0];
for(auto i = 1u; i < state.multiSample; i++)
{
maskUnion |= (cMask[i] & sMask[i] & zMask[i]);
}
// Convert to 4 booleans
Int4 laneBits = Int4(1, 2, 4, 8);
Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
Int4 mask(maskUnion);
mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
return mask;
}
void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4])
{
routine.setImmutableInputBuiltins(spirvShader);
......@@ -54,12 +90,18 @@ namespace sw
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
});
routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(~maskAny(cMask));
});
routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1);
routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1);
routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID));
}
void PixelProgram::applyShader(Int cMask[4])
void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
{
routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
......@@ -96,9 +138,10 @@ namespace sw
// Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
// handled separately, through the cMask.
auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask);
routine.killMask = 0;
spirvShader->emit(&routine, activeLaneMask, descriptorSets);
spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets);
spirvShader->emitEpilog(&routine);
for(int i = 0; i < RENDERTARGETS; i++)
......
......@@ -34,8 +34,8 @@ namespace sw
virtual ~PixelProgram() {}
protected:
virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w);
virtual void applyShader(Int cMask[4]);
virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]);
virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]);
virtual Bool alphaTest(Int cMask[4]);
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);
......@@ -46,6 +46,8 @@ namespace sw
// Raster operations
void clampColor(Vector4f oC[RENDERTARGETS]);
Int4 maskAny(Int cMask[4]) const;
Int4 maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const;
Float4 linearToSRGB(const Float4 &x);
};
}
......
......@@ -53,7 +53,7 @@ namespace sw
void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
{
// TODO: consider shader which modifies sample mask in general
const bool earlyDepthTest = !spirvShader || (!spirvShader->getModes().DepthReplacing && !state.alphaToCoverage);
const bool earlyDepthTest = !spirvShader || (spirvShader->getModes().EarlyFragmentTests && !spirvShader->getModes().DepthReplacing && !state.alphaToCoverage);
Int zMask[4]; // Depth mask
Int sMask[4]; // Stencil mask
......@@ -161,14 +161,15 @@ namespace sw
}
}
setBuiltins(x, y, z, w);
setBuiltins(x, y, z, w, cMask);
}
Bool alphaPass = true;
if (spirvShader)
{
applyShader(cMask);
bool earlyFragTests = (spirvShader && spirvShader->getModes().EarlyFragmentTests);
applyShader(cMask, earlyFragTests ? sMask : cMask, earlyDepthTest ? zMask : cMask);
}
alphaPass = alphaTest(cMask);
......
......@@ -43,8 +43,8 @@ namespace sw
// Depth output
Float4 oDepth;
virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) = 0;
virtual void applyShader(Int cMask[4]) = 0;
virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]) = 0;
virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) = 0;
virtual Bool alphaTest(Int cMask[4]) = 0;
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) = 0;
......
......@@ -1367,6 +1367,19 @@ namespace sw
}
}
bool SpirvShader::StoresInHelperInvocation(spv::StorageClass storageClass)
{
switch (storageClass)
{
case spv::StorageClassUniform:
case spv::StorageClassStorageBuffer:
case spv::StorageClassImage:
return false;
default:
return true;
}
}
bool SpirvShader::IsExplicitLayout(spv::StorageClass storageClass)
{
switch (storageClass)
......@@ -2136,9 +2149,9 @@ namespace sw
}
}
void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const
void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const
{
EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess, executionModel);
EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, robustBufferAccess, executionModel);
// Emit everything up to the first label
// TODO: Separate out dispatch of block from non-block instructions?
......@@ -2975,6 +2988,12 @@ namespace sw
bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
SIMD::Int mask = state->activeLaneMask();
if (!StoresInHelperInvocation(pointerTy.storageClass))
{
mask = mask & state->storesAndAtomicsMask();
}
if (object.kind == Object::Kind::Constant)
{
// Constant source data.
......@@ -2983,7 +3002,7 @@ namespace sw
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, SIMD::Float(src[i]), robustness, state->activeLaneMask(), atomic, memoryOrder);
SIMD::Store(p, SIMD::Float(src[i]), robustness, mask, atomic, memoryOrder);
});
}
else
......@@ -2994,7 +3013,7 @@ namespace sw
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
SIMD::Store(p, src.Float(i), robustness, state->activeLaneMask(), atomic, memoryOrder);
SIMD::Store(p, src.Float(i), robustness, mask, atomic, memoryOrder);
});
}
......@@ -5843,10 +5862,11 @@ namespace sw
auto ptr = state->getPointer(insn.word(3));
auto ptrOffsets = ptr.offsets();
SIMD::UInt x;
SIMD::UInt x(0);
auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
for (int j = 0; j < SIMD::Width; j++)
{
If(Extract(state->activeLaneMask(), j) != 0)
If(Extract(mask, j) != 0)
{
auto offset = Extract(ptrOffsets, j);
auto laneValue = Extract(value, j);
......@@ -5914,10 +5934,11 @@ namespace sw
auto ptr = state->getPointer(insn.word(3));
auto ptrOffsets = ptr.offsets();
SIMD::UInt x;
SIMD::UInt x(0);
auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
for (int j = 0; j < SIMD::Width; j++)
{
If(Extract(state->activeLaneMask(), j) != 0)
If(Extract(mask, j) != 0)
{
auto offset = Extract(ptrOffsets, j);
auto laneValue = Extract(value.UInt(0), j);
......
......@@ -889,7 +889,7 @@ namespace sw
std::vector<InterfaceComponent> outputs;
void emitProlog(SpirvRoutine *routine) const;
void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
void emitEpilog(SpirvRoutine *routine) const;
using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
......@@ -967,6 +967,9 @@ namespace sw
//
static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
static bool IsExplicitLayout(spv::StorageClass storageClass);
// Output storage buffers and images should not be affected by helper invocations
static bool StoresInHelperInvocation(spv::StorageClass storageClass);
template<typename F>
int VisitInterfaceInner(Type::ID id, Decorations d, F f) const;
......@@ -991,12 +994,14 @@ namespace sw
EmitState(SpirvRoutine *routine,
Function::ID function,
RValue<SIMD::Int> activeLaneMask,
RValue<SIMD::Int> storesAndAtomicsMask,
const vk::DescriptorSet::Bindings &descriptorSets,
bool robustBufferAccess,
spv::ExecutionModel executionModel)
: routine(routine),
function(function),
activeLaneMaskValue(activeLaneMask.value),
storesAndAtomicsMaskValue(storesAndAtomicsMask.value),
descriptorSets(descriptorSets),
robustBufferAccess(robustBufferAccess),
executionModel(executionModel)
......@@ -1010,6 +1015,12 @@ namespace sw
return RValue<SIMD::Int>(activeLaneMaskValue);
}
RValue<SIMD::Int> storesAndAtomicsMask() const
{
ASSERT(storesAndAtomicsMaskValue != nullptr);
return RValue<SIMD::Int>(storesAndAtomicsMaskValue);
}
void setActiveLaneMask(RValue<SIMD::Int> mask)
{
activeLaneMaskValue = mask.value;
......@@ -1030,6 +1041,7 @@ namespace sw
Function::ID function; // The current function being built.
Block::ID block; // The current block being built.
rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask.
rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask.
Block::Set visited; // Blocks already built.
std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
std::deque<Block::ID> *pending;
......
......@@ -80,7 +80,7 @@ namespace sw
}
auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
spirvShader->emit(&routine, activeLaneMask, descriptorSets);
spirvShader->emit(&routine, activeLaneMask, activeLaneMask, descriptorSets);
spirvShader->emitEpilog(&routine);
}
......
......@@ -57,7 +57,7 @@ const VkPhysicalDeviceFeatures& PhysicalDevice::getFeatures() const
VK_FALSE, // occlusionQueryPrecise
VK_FALSE, // pipelineStatisticsQuery
VK_FALSE, // vertexPipelineStoresAndAtomics
VK_FALSE, // fragmentStoresAndAtomics
VK_TRUE, // fragmentStoresAndAtomics
VK_FALSE, // shaderTessellationAndGeometryPointSize
VK_FALSE, // shaderImageGatherExtended
VK_FALSE, // shaderStorageImageExtendedFormats
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment