Commit 9d654343 by Sean Risser

Implement shaderSubgroupBroadcastDynamicId

OpGroupNonUniformBroadcast can be supplied either a constant value or an intermediate. I've split this Op into a fast path for constants that use the original code, and a slow path that handles intermediates. Tests: dEQP-VK.subgroups.ballot_broadcast.* Bug: b/169608683 Change-Id: Idc74f3fe7e906315c59cf5b3bf3a450046e37375 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/50671 Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarSean Risser <srisser@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 7cbb9808
...@@ -139,12 +139,39 @@ SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, Emit ...@@ -139,12 +139,39 @@ SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, Emit
case spv::OpGroupNonUniformBroadcast: case spv::OpGroupNonUniformBroadcast:
{ {
auto valueId = Object::ID(insn.word(4)); auto valueId = Object::ID(insn.word(4));
auto id = SIMD::Int(GetConstScalarInt(insn.word(5))); auto idId = Object::ID(insn.word(5));
Operand value(this, state, valueId); Operand value(this, state, valueId);
auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
for(auto i = 0u; i < type.componentCount; i++) // Decide between the fast path for constants and the slow path for
// intermediates.
if(getObject(idId).kind == SpirvShader::Object::Kind::Constant)
{
auto id = SIMD::Int(GetConstScalarInt(insn.word(5)));
auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
for(auto i = 0u; i < type.componentCount; i++)
{
dst.move(i, OrAll(value.Int(i) & mask));
}
}
else
{ {
dst.move(i, OrAll(value.Int(i) & mask)); Operand id(this, state, idId);
SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask()); // Considers helper invocations active. See b/151137030
SIMD::UInt inactive = ~active;
SIMD::UInt filled = id.UInt(0) & active;
for(int j = 0; j < SIMD::Width - 1; j++)
{
filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value
}
auto mask = CmpEQ(filled, SIMD::UInt(0, 1, 2, 3));
for(uint32_t i = 0u; i < type.componentCount; i++)
{
dst.move(i, OrAll(value.UInt(i) & mask));
}
} }
break; break;
} }
......
...@@ -271,7 +271,7 @@ static void getPhysicalDeviceVulkan12Features(T *features) ...@@ -271,7 +271,7 @@ static void getPhysicalDeviceVulkan12Features(T *features)
features->vulkanMemoryModelAvailabilityVisibilityChains = VK_FALSE; features->vulkanMemoryModelAvailabilityVisibilityChains = VK_FALSE;
features->shaderOutputViewportIndex = VK_FALSE; features->shaderOutputViewportIndex = VK_FALSE;
features->shaderOutputLayer = VK_FALSE; features->shaderOutputLayer = VK_FALSE;
features->subgroupBroadcastDynamicId = VK_FALSE; features->subgroupBroadcastDynamicId = VK_TRUE;
} }
void PhysicalDevice::getFeatures2(VkPhysicalDeviceFeatures2 *features) const void PhysicalDevice::getFeatures2(VkPhysicalDeviceFeatures2 *features) const
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment