Commit 69c37491 by Ben Clayton

SpirvShader: Fixes for phis.

Loops attempted to special-case phis with allocas to deal with their non-ssa nature, but this didn't handle divergent loops correctly. Rearchitect phis to always use an alloca, and mask the updates with the edge masks. This handles all cases in a cleaner, more unified implementation. Tests: dEQP-VK.glsl.switch.* Tests: dEQP-VK.glsl.functions.control_flow.* Bug: b/128527271 Change-Id: I5b450cd3f5f10b1076d65750d326fedc286abfe5 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/31269 Presubmit-Ready: Ben Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarBen Clayton <bclayton@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 0e976bca
...@@ -1900,6 +1900,13 @@ namespace sw ...@@ -1900,6 +1900,13 @@ namespace sw
} }
break; break;
} }
case spv::OpPhi:
{
auto type = getType(insn.word(1));
Object::ID resultId = insn.word(2);
routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
break;
}
default: default:
// Nothing else produces interface variables, so can all be safely ignored. // Nothing else produces interface variables, so can all be safely ignored.
break; break;
...@@ -2065,88 +2072,44 @@ namespace sw ...@@ -2065,88 +2072,44 @@ namespace sw
return; // Already emitted this loop. return; // Already emitted this loop.
} }
// loopActiveLaneMask is the mask of lanes that are continuing to loop. std::unordered_set<Block::ID> incomingBlocks;
// This is initialized with the incoming active lane masks. std::unordered_set<Block::ID> loopBlocks;
SIMD::Int loopActiveLaneMask = SIMD::Int(0);
for (auto in : block.ins) for (auto in : block.ins)
{ {
if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back-edge
{ {
loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId); incomingBlocks.emplace(in);
}
else
{
loopBlocks.emplace(in);
} }
} }
// mergeActiveLaneMasks contains edge lane masks for the merge block. // Emit the loop phi instructions, and initialize them with a value from
// This is the union of all edge masks across all iterations of the loop. // the incoming blocks.
std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
for (auto in : getBlock(block.mergeBlock).ins)
{
mergeActiveLaneMasks.emplace(in, SIMD::Int(0));
}
// Generate an alloca for each of the loop's phis.
// These will be primed with the incoming, non back edge Phi values
// before the loop, and then updated just before the loop jumps back to
// the block.
struct LoopPhi
{
LoopPhi(Object::ID id, uint32_t size) : phiId(id), storage(size) {}
Object::ID phiId; // The Phi identifier.
Object::ID continueValue; // The source merge value from the loop.
Array<SIMD::Int> storage; // The alloca.
};
std::vector<LoopPhi> phis;
// For each OpPhi between the block start and the merge instruction:
for (auto insn = block.begin(); insn != block.mergeInstruction; insn++) for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
{ {
if (insn.opcode() == spv::OpPhi) if (insn.opcode() == spv::OpPhi)
{ {
auto objectId = Object::ID(insn.word(2)); StorePhi(insn, state, incomingBlocks);
auto &object = getObject(objectId); }
auto &type = getType(object.type); }
LoopPhi phi(insn.word(2), type.sizeInComponents);
// Start with the Phi set to 0.
for (uint32_t i = 0; i < type.sizeInComponents; i++)
{
phi.storage[i] = SIMD::Int(0);
}
// For each Phi source:
for (uint32_t w = 3; w < insn.wordCount(); w += 2)
{
auto varId = Object::ID(insn.word(w + 0));
auto blockId = Block::ID(insn.word(w + 1));
if (block.ins.count(blockId) == 0)
{
continue; // In is unreachable. Ignore.
}
if (existsPath(state->currentBlock, blockId, block.mergeBlock)) // loopActiveLaneMask is the mask of lanes that are continuing to loop.
{ // This is initialized with the incoming active lane masks.
// This source is from a loop back-edge. SIMD::Int loopActiveLaneMask = SIMD::Int(0);
ASSERT(phi.continueValue == 0 || phi.continueValue == varId); for (auto in : incomingBlocks)
phi.continueValue = varId; {
} loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
else }
{
// This source is from a preceding block.
for (uint32_t i = 0; i < type.sizeInComponents; i++)
{
auto in = GenericValue(this, state->routine, varId);
auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
}
}
}
phis.push_back(phi); // mergeActiveLaneMasks contains edge lane masks for the merge block.
} // This is the union of all edge masks across all iterations of the loop.
std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
for (auto in : getBlock(block.mergeBlock).ins)
{
mergeActiveLaneMasks.emplace(in, SIMD::Int(0));
} }
// Create the loop basic blocks // Create the loop basic blocks
...@@ -2157,25 +2120,17 @@ namespace sw ...@@ -2157,25 +2120,17 @@ namespace sw
Nucleus::createBr(headerBasicBlock); Nucleus::createBr(headerBasicBlock);
Nucleus::setInsertBlock(headerBasicBlock); Nucleus::setInsertBlock(headerBasicBlock);
// Load the Phi values from storage.
// This will load at the start of each loop.
for (auto &phi : phis)
{
auto &type = getType(getObject(phi.phiId).type);
auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
for (unsigned int i = 0u; i < type.sizeInComponents; i++)
{
dst.move(i, phi.storage[i]);
}
}
// Load the active lane mask. // Load the active lane mask.
state->setActiveLaneMask(loopActiveLaneMask); state->setActiveLaneMask(loopActiveLaneMask);
// Emit all the non-phi instructions in this loop header block. // Emit the non-phi loop header block's instructions.
for (auto insn = block.begin(); insn != block.end(); insn++) for (auto insn = block.begin(); insn != block.end(); insn++)
{ {
if (insn.opcode() != spv::OpPhi) if (insn.opcode() == spv::OpPhi)
{
LoadPhi(insn, state);
}
else
{ {
EmitInstruction(insn, state); EmitInstruction(insn, state);
} }
...@@ -2211,17 +2166,12 @@ namespace sw ...@@ -2211,17 +2166,12 @@ namespace sw
} }
} }
// Update loop phi values // Update loop phi values.
for (auto &phi : phis) for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
{ {
if (phi.continueValue != 0) if (insn.opcode() == spv::OpPhi)
{ {
auto val = GenericValue(this, state->routine, phi.continueValue); StorePhi(insn, state, loopBlocks);
auto &type = getType(getObject(phi.phiId).type);
for (unsigned int i = 0u; i < type.sizeInComponents; i++)
{
phi.storage[i] = (val.Int(i) & loopActiveLaneMask) | (phi.storage[i] & ~loopActiveLaneMask);
}
} }
} }
...@@ -4604,43 +4554,60 @@ namespace sw ...@@ -4604,43 +4554,60 @@ namespace sw
SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
{ {
auto currentBlock = getBlock(state->currentBlock);
StorePhi(insn, state, currentBlock.ins);
LoadPhi(insn, state);
return EmitResult::Continue;
}
void SpirvShader::LoadPhi(InsnIterator insn, EmitState *state) const
{
auto routine = state->routine;
auto typeId = Type::ID(insn.word(1));
auto type = getType(typeId);
auto objectId = Object::ID(insn.word(2));
auto storageIt = state->routine->phis.find(objectId);
ASSERT(storageIt != state->routine->phis.end());
auto &storage = storageIt->second;
auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
for(uint32_t i = 0; i < type.sizeInComponents; i++)
{
dst.move(i, storage[i]);
}
}
void SpirvShader::StorePhi(InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const
{
auto routine = state->routine; auto routine = state->routine;
auto typeId = Type::ID(insn.word(1)); auto typeId = Type::ID(insn.word(1));
auto type = getType(typeId); auto type = getType(typeId);
auto objectId = Object::ID(insn.word(2)); auto objectId = Object::ID(insn.word(2));
auto currentBlock = getBlock(state->currentBlock); auto currentBlock = getBlock(state->currentBlock);
auto tmp = std::unique_ptr<SIMD::Int[]>(new SIMD::Int[type.sizeInComponents]); auto storageIt = state->routine->phis.find(objectId);
ASSERT(storageIt != state->routine->phis.end());
auto &storage = storageIt->second;
bool first = true;
for (uint32_t w = 3; w < insn.wordCount(); w += 2) for (uint32_t w = 3; w < insn.wordCount(); w += 2)
{ {
auto varId = Object::ID(insn.word(w + 0)); auto varId = Object::ID(insn.word(w + 0));
auto blockId = Block::ID(insn.word(w + 1)); auto blockId = Block::ID(insn.word(w + 1));
if (currentBlock.ins.count(blockId) == 0) if (filter.count(blockId) == 0)
{ {
continue; // In is unreachable. Ignore. continue;
} }
auto in = GenericValue(this, routine, varId);
auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock); auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
auto in = GenericValue(this, routine, varId);
for (uint32_t i = 0; i < type.sizeInComponents; i++) for (uint32_t i = 0; i < type.sizeInComponents; i++)
{ {
auto inMasked = in.Int(i) & mask; storage[i] = As<SIMD::Float>((As<SIMD::Int>(storage[i]) & ~mask) | (in.Int(i) & mask));
tmp[i] = first ? inMasked : (tmp[i] | inMasked);
} }
first = false;
}
auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
for(uint32_t i = 0; i < type.sizeInComponents; i++)
{
dst.move(i, tmp[i]);
} }
return EmitResult::Continue;
} }
SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
......
...@@ -960,6 +960,15 @@ namespace sw ...@@ -960,6 +960,15 @@ namespace sw
SIMD::Pointer GetTexelAddress(SpirvRoutine const *routine, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const; SIMD::Pointer GetTexelAddress(SpirvRoutine const *routine, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
uint32_t GetConstScalarInt(Object::ID id) const; uint32_t GetConstScalarInt(Object::ID id) const;
// LoadPhi loads the phi values from the alloca storage and places the
// load values into the intermediate with the phi's result id.
void LoadPhi(InsnIterator insn, EmitState *state) const;
// StorePhi updates the phi's alloca storage value using the incoming
// values from blocks that are both in the OpPhi instruction and in
// filter.
void StorePhi(InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const;
// Emits a rr::Fence for the given MemorySemanticsMask. // Emits a rr::Fence for the given MemorySemanticsMask.
void Fence(spv::MemorySemanticsMask semantics) const; void Fence(spv::MemorySemanticsMask semantics) const;
...@@ -1008,6 +1017,8 @@ namespace sw ...@@ -1008,6 +1017,8 @@ namespace sw
std::unordered_map<SpirvShader::Object::ID, SIMD::Pointer> pointers; std::unordered_map<SpirvShader::Object::ID, SIMD::Pointer> pointers;
std::unordered_map<SpirvShader::Object::ID, Variable> phis;
Variable inputs = Variable{MAX_INTERFACE_COMPONENTS}; Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
Variable outputs = Variable{MAX_INTERFACE_COMPONENTS}; Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment