Commit 49d8158b by Ben Clayton

SpirvRoutine: Add activeLaneMask to conditionally load / store per lane.

Use this to disable reads and writes on compute shader lanes that are not part of the subgroup. Bug: b/126871859 Bug: b/128527271 Change-Id: Idd7ad240a8f09e6e47db34b6ed5b0ec7ba959d39 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/27009 Presubmit-Ready: Ben Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com>
parent 35e90e22
...@@ -80,6 +80,9 @@ namespace sw ...@@ -80,6 +80,9 @@ namespace sw
{ {
Int4 localInvocationIndex = Int4(invocationIndex) + Int4(0, 1, 2, 3); Int4 localInvocationIndex = Int4(invocationIndex) + Int4(0, 1, 2, 3);
// Disable lanes where (invocationIDs >= numInvocations)
routine.activeLaneMask = CmpLT(localInvocationIndex, Int4(numInvocations));
Int4 localInvocationID[3]; Int4 localInvocationID[3];
{ {
Int4 idx = localInvocationIndex; Int4 idx = localInvocationIndex;
...@@ -113,13 +116,10 @@ namespace sw ...@@ -113,13 +116,10 @@ namespace sw
Int4(Extract(workgroupSize, component)) + Int4(Extract(workgroupSize, component)) +
localInvocationID[component]; localInvocationID[component];
value[builtin.FirstComponent + component] = As<Float4>(globalInvocationID); value[builtin.FirstComponent + component] = As<Float4>(globalInvocationID);
// RR_WATCH(component, globalInvocationID); // RR_WATCH(component, globalInvocationID, routine.activeLaneMask);
} }
}); });
// TODO(bclayton): Disable lanes where (invocationIDs >= numInvocations)
// Int4 enabledLanes = invocationIDs < Int4(numInvocations);
// Process numLanes of the workgroup. // Process numLanes of the workgroup.
shader->emit(&routine); shader->emit(&routine);
} }
......
...@@ -1269,43 +1269,57 @@ namespace sw ...@@ -1269,43 +1269,57 @@ namespace sw
} }
bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass); bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
auto anyInactiveLanes = SignMask(~routine->activeLaneMask) != 0;
auto &dst = routine->createIntermediate(objectId, objectTy.sizeInComponents); auto load = SpirvRoutine::Value(objectTy.sizeInComponents);
if (pointer.kind == Object::Kind::Value) If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
{ {
// Divergent offsets. // Divergent offsets or masked lanes.
auto offsets = routine->getIntermediate(pointerId).Int(0); auto offsets = pointer.kind == Object::Kind::Value ?
As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
RValue<SIMD::Int>(SIMD::Int(0));
for (auto i = 0u; i < objectTy.sizeInComponents; i++) for (auto i = 0u; i < objectTy.sizeInComponents; i++)
{ {
// i wish i had a Float,Float,Float,Float constructor here.. // i wish i had a Float,Float,Float,Float constructor here..
SIMD::Float v;
for (int j = 0; j < SIMD::Width; j++) for (int j = 0; j < SIMD::Width; j++)
{ {
Int offset = Int(i) + Extract(offsets, j); If(Extract(routine->activeLaneMask, j) != 0)
if (interleavedByLane) { offset = offset * SIMD::Width + j; } {
v = Insert(v, ptrBase[offset], j); Int offset = Int(i) + Extract(offsets, j);
if (interleavedByLane) { offset = offset * SIMD::Width + j; }
load[i] = Insert(load[i], ptrBase[offset], j);
}
} }
dst.emplace(i, v);
} }
} }
else if (interleavedByLane) Else
{ {
// Lane-interleaved data. No divergent offsets. // No divergent offsets or masked lanes.
Pointer<SIMD::Float> src = ptrBase; if (interleavedByLane)
for (auto i = 0u; i < objectTy.sizeInComponents; i++)
{ {
dst.emplace(i, src[i]); // Lane-interleaved data.
Pointer<SIMD::Float> src = ptrBase;
for (auto i = 0u; i < objectTy.sizeInComponents; i++)
{
load[i] = src[i];
}
} }
} else
else
{
// Non-interleaved data. No divergent offsets.
for (auto i = 0u; i < objectTy.sizeInComponents; i++)
{ {
dst.emplace(i, RValue<SIMD::Float>(ptrBase[i])); // Non-interleaved data.
for (auto i = 0u; i < objectTy.sizeInComponents; i++)
{
load[i] = RValue<SIMD::Float>(ptrBase[i]);
}
} }
} }
auto &dst = routine->createIntermediate(objectId, objectTy.sizeInComponents);
for (auto i = 0u; i < objectTy.sizeInComponents; i++)
{
dst.emplace(i, load[i]);
}
} }
void SpirvShader::EmitAccessChain(InsnIterator insn, SpirvRoutine *routine) const void SpirvShader::EmitAccessChain(InsnIterator insn, SpirvRoutine *routine) const
...@@ -1348,28 +1362,35 @@ namespace sw ...@@ -1348,28 +1362,35 @@ namespace sw
} }
bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass); bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
auto anyInactiveLanes = SignMask(~routine->activeLaneMask) != 0;
if (object.kind == Object::Kind::Constant) if (object.kind == Object::Kind::Constant)
{ {
// Constant source data.
auto src = reinterpret_cast<float *>(object.constantValue.get()); auto src = reinterpret_cast<float *>(object.constantValue.get());
If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
if (pointer.kind == Object::Kind::Value)
{ {
// Constant source data. Divergent offsets. // Divergent offsets or masked lanes.
auto offsets = routine->getIntermediate(pointerId).Int(0); auto offsets = pointer.kind == Object::Kind::Value ?
As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
RValue<SIMD::Int>(SIMD::Int(0));
for (auto i = 0u; i < elementTy.sizeInComponents; i++) for (auto i = 0u; i < elementTy.sizeInComponents; i++)
{ {
for (int j = 0; j < SIMD::Width; j++) for (int j = 0; j < SIMD::Width; j++)
{ {
Int offset = Int(i) + Extract(offsets, j); If(Extract(routine->activeLaneMask, j) != 0)
if (interleavedByLane) { offset = offset * SIMD::Width + j; } {
ptrBase[offset] = RValue<Float>(src[i]); Int offset = Int(i) + Extract(offsets, j);
if (interleavedByLane) { offset = offset * SIMD::Width + j; }
ptrBase[offset] = RValue<Float>(src[i]);
}
} }
} }
} }
else Else
{ {
// Constant source data. No divergent offsets. // Constant source data.
// No divergent offsets or masked lanes.
Pointer<SIMD::Float> dst = ptrBase; Pointer<SIMD::Float> dst = ptrBase;
for (auto i = 0u; i < elementTy.sizeInComponents; i++) for (auto i = 0u; i < elementTy.sizeInComponents; i++)
{ {
...@@ -1379,38 +1400,47 @@ namespace sw ...@@ -1379,38 +1400,47 @@ namespace sw
} }
else else
{ {
// Intermediate source data.
auto &src = routine->getIntermediate(objectId); auto &src = routine->getIntermediate(objectId);
If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
if (pointer.kind == Object::Kind::Value)
{ {
// Intermediate source data. Divergent offsets. // Divergent offsets or masked lanes.
auto offsets = routine->getIntermediate(pointerId).Int(0); auto offsets = pointer.kind == Object::Kind::Value ?
As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
RValue<SIMD::Int>(SIMD::Int(0));
for (auto i = 0u; i < elementTy.sizeInComponents; i++) for (auto i = 0u; i < elementTy.sizeInComponents; i++)
{ {
for (int j = 0; j < SIMD::Width; j++) for (int j = 0; j < SIMD::Width; j++)
{ {
Int offset = Int(i) + Extract(offsets, j); If(Extract(routine->activeLaneMask, j) != 0)
if (interleavedByLane) { offset = offset * SIMD::Width + j; } {
ptrBase[offset] = Extract(src.Float(i), j); Int offset = Int(i) + Extract(offsets, j);
if (interleavedByLane) { offset = offset * SIMD::Width + j; }
ptrBase[offset] = Extract(src.Float(i), j);
}
} }
} }
} }
else if (interleavedByLane) Else
{ {
// Intermediate source data. Lane-interleaved data. No divergent offsets. // No divergent offsets or masked lanes.
Pointer<SIMD::Float> dst = ptrBase; if (interleavedByLane)
for (auto i = 0u; i < elementTy.sizeInComponents; i++)
{ {
dst[i] = src.Float(i); // Lane-interleaved data.
Pointer<SIMD::Float> dst = ptrBase;
for (auto i = 0u; i < elementTy.sizeInComponents; i++)
{
dst[i] = src.Float(i);
}
} }
} else
else
{
// Intermediate source data. Non-interleaved data. No divergent offsets.
Pointer<SIMD::Float> dst = ptrBase;
for (auto i = 0u; i < elementTy.sizeInComponents; i++)
{ {
dst[i] = SIMD::Float(src.Float(i)); // Intermediate source data. Non-interleaved data.
Pointer<SIMD::Float> dst = ptrBase;
for (auto i = 0u; i < elementTy.sizeInComponents; i++)
{
dst[i] = SIMD::Float(src.Float(i));
}
} }
} }
} }
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <array> #include <array>
#include <cstring> #include <cstring>
#include <functional>
#include <string> #include <string>
#include <vector> #include <vector>
#include <unordered_map> #include <unordered_map>
...@@ -518,6 +519,8 @@ namespace sw ...@@ -518,6 +519,8 @@ namespace sw
Value inputs = Value{MAX_INTERFACE_COMPONENTS}; Value inputs = Value{MAX_INTERFACE_COMPONENTS};
Value outputs = Value{MAX_INTERFACE_COMPONENTS}; Value outputs = Value{MAX_INTERFACE_COMPONENTS};
SIMD::Int activeLaneMask = SIMD::Int(0xFFFFFFFF);
std::array<Pointer<Byte>, vk::MAX_BOUND_DESCRIPTOR_SETS> descriptorSets; std::array<Pointer<Byte>, vk::MAX_BOUND_DESCRIPTOR_SETS> descriptorSets;
void createLvalue(SpirvShader::Object::ID id, uint32_t size) void createLvalue(SpirvShader::Object::ID id, uint32_t size)
......
...@@ -405,6 +405,65 @@ TEST_P(SwiftShaderVulkanBufferToBufferComputeTest, Memcpy) ...@@ -405,6 +405,65 @@ TEST_P(SwiftShaderVulkanBufferToBufferComputeTest, Memcpy)
test(src.str(), [](uint32_t i) { return i; }, [](uint32_t i) { return i; }); test(src.str(), [](uint32_t i) { return i; }, [](uint32_t i) { return i; });
} }
TEST_P(SwiftShaderVulkanBufferToBufferComputeTest, GlobalInvocationId)
{
std::stringstream src;
src <<
"OpCapability Shader\n"
"OpMemoryModel Logical GLSL450\n"
"OpEntryPoint GLCompute %1 \"main\" %2\n"
"OpExecutionMode %1 LocalSize " <<
GetParam().localSizeX << " " <<
GetParam().localSizeY << " " <<
GetParam().localSizeZ << "\n" <<
"OpDecorate %3 ArrayStride 4\n"
"OpMemberDecorate %4 0 Offset 0\n"
"OpDecorate %4 BufferBlock\n"
"OpDecorate %5 DescriptorSet 0\n"
"OpDecorate %5 Binding 1\n"
"OpDecorate %2 BuiltIn GlobalInvocationId\n"
"OpDecorate %6 DescriptorSet 0\n"
"OpDecorate %6 Binding 0\n"
"%7 = OpTypeVoid\n"
"%8 = OpTypeFunction %7\n" // void()
"%9 = OpTypeInt 32 1\n" // int32
"%10 = OpTypeInt 32 0\n" // uint32
"%3 = OpTypeRuntimeArray %9\n" // int32[]
"%4 = OpTypeStruct %3\n" // struct{ int32[] }
"%11 = OpTypePointer Uniform %4\n" // struct{ int32[] }*
"%5 = OpVariable %11 Uniform\n" // struct{ int32[] }* in
"%12 = OpConstant %9 0\n" // int32(0)
"%13 = OpConstant %9 1\n" // int32(1)
"%14 = OpConstant %10 0\n" // uint32(0)
"%15 = OpConstant %10 1\n" // uint32(1)
"%16 = OpConstant %10 2\n" // uint32(2)
"%17 = OpTypeVector %10 3\n" // vec4<int32>
"%18 = OpTypePointer Input %17\n" // vec4<int32>*
"%2 = OpVariable %18 Input\n" // gl_GlobalInvocationId
"%19 = OpTypePointer Input %10\n" // uint32*
"%6 = OpVariable %11 Uniform\n" // struct{ int32[] }* out
"%20 = OpTypePointer Uniform %9\n" // int32*
"%1 = OpFunction %7 None %8\n" // -- Function begin --
"%21 = OpLabel\n"
"%22 = OpAccessChain %19 %2 %14\n" // &gl_GlobalInvocationId.x
"%23 = OpAccessChain %19 %2 %15\n" // &gl_GlobalInvocationId.y
"%24 = OpAccessChain %19 %2 %16\n" // &gl_GlobalInvocationId.z
"%25 = OpLoad %10 %22\n" // gl_GlobalInvocationId.x
"%26 = OpLoad %10 %23\n" // gl_GlobalInvocationId.y
"%27 = OpLoad %10 %24\n" // gl_GlobalInvocationId.z
"%28 = OpAccessChain %20 %6 %12 %25\n" // &in.arr[gl_GlobalInvocationId.x]
"%29 = OpLoad %9 %28\n" // out.arr[gl_GlobalInvocationId.x]
"%30 = OpIAdd %9 %29 %26\n" // in[gl_GlobalInvocationId.x] + gl_GlobalInvocationId.y
"%31 = OpIAdd %9 %30 %27\n" // in[gl_GlobalInvocationId.x] + gl_GlobalInvocationId.y + gl_GlobalInvocationId.z
"%32 = OpAccessChain %20 %5 %12 %25\n" // &out.arr[gl_GlobalInvocationId.x]
"OpStore %32 %31\n" // out.arr[gl_GlobalInvocationId.x] = in[gl_GlobalInvocationId.x] + gl_GlobalInvocationId.y + gl_GlobalInvocationId.z
"OpReturn\n"
"OpFunctionEnd\n";
// gl_GlobalInvocationId.y and gl_GlobalInvocationId.z should both be zero.
test(src.str(), [](uint32_t i) { return i; }, [](uint32_t i) { return i; });
}
TEST_P(SwiftShaderVulkanBufferToBufferComputeTest, BranchSimple) TEST_P(SwiftShaderVulkanBufferToBufferComputeTest, BranchSimple)
{ {
std::stringstream src; std::stringstream src;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment