SpirvShader: Fixes for phis.

Loops attempted to special-case phis with allocas to deal with their non-ssa nature, but this didn't handle divergent loops correctly. Rearchitect phis to always use an alloca, and mask the updates with the edge masks. This handles all cases in a cleaner, more unified implementation. Tests: dEQP-VK.glsl.switch.* Tests: dEQP-VK.glsl.functions.control_flow.* Bug: b/128527271 Change-Id: I5b450cd3f5f10b1076d65750d326fedc286abfe5 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/31269 Presubmit-Ready: Ben Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: Ben Clayton <bclayton@google.com> Reviewed-by: Chris Forbes <chrisforbes@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>

SpirvShader: Fixes for phis.
69c37491 · Ben Clayton · 0e976bca · 69c37491 · 69c37491
Commit 69c37491 authored May 13, 2019 by Ben Clayton
Hide whitespace changes
Inline Side-by-side

Showing with 87 additions and 109 deletions

SpirvShader.cpp src/Pipeline/SpirvShader.cpp +76 -109

SpirvShader.hpp src/Pipeline/SpirvShader.hpp +11 -0

No files found.
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -1900,6 +1900,13 @@ namespace sw
 				}
 				break;
 			}
+			case spv::OpPhi:
+			{
+				auto type = getType(insn.word(1));
+				Object::ID resultId = insn.word(2);
+				routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
+				break;
+			}
 			default:
 				// Nothing else produces interface variables, so can all be safely ignored.
 				break;
@@ -2065,88 +2072,44 @@ namespace sw
 			return; // Already emitted this loop.
 		}

-		// loopActiveLaneMask is the mask of lanes that are continuing to loop.
-		// This is initialized with the incoming active lane masks.
-		SIMD::Int loopActiveLaneMask = SIMD::Int(0);
+		std::unordered_set<Block::ID> incomingBlocks;
+		std::unordered_set<Block::ID> loopBlocks;
 		for (auto in : block.ins)
 		{
-			if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
+			if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back-edge
 			{
-				loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
+				incomingBlocks.emplace(in);
+			}
+			else
+			{
+				loopBlocks.emplace(in);
 			}
 		}

-		// mergeActiveLaneMasks contains edge lane masks for the merge block.
-		// This is the union of all edge masks across all iterations of the loop.
-		std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
-		for (auto in : getBlock(block.mergeBlock).ins)
-		{
-			mergeActiveLaneMasks.emplace(in, SIMD::Int(0));
-		}
-
-		// Generate an alloca for each of the loop's phis.
-		// These will be primed with the incoming, non back edge Phi values
-		// before the loop, and then updated just before the loop jumps back to
-		// the block.
-		struct LoopPhi
-		{
-			LoopPhi(Object::ID id, uint32_t size) : phiId(id), storage(size) {}
-
-			Object::ID phiId; // The Phi identifier.
-			Object::ID continueValue; // The source merge value from the loop.
-			Array<SIMD::Int> storage; // The alloca.
-		};
-
-		std::vector<LoopPhi> phis;
-
-		// For each OpPhi between the block start and the merge instruction:
+		// Emit the loop phi instructions, and initialize them with a value from
+		// the incoming blocks.
 		for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
 		{
 			if (insn.opcode() == spv::OpPhi)
 			{
-				auto objectId = Object::ID(insn.word(2));
-				auto &object = getObject(objectId);
-				auto &type = getType(object.type);
-
-				LoopPhi phi(insn.word(2), type.sizeInComponents);
-
-				// Start with the Phi set to 0.
-				for (uint32_t i = 0; i < type.sizeInComponents; i++)
-				{
-					phi.storage[i] = SIMD::Int(0);
-				}
-
-				// For each Phi source:
-				for (uint32_t w = 3; w < insn.wordCount(); w += 2)
-				{
-					auto varId = Object::ID(insn.word(w + 0));
-					auto blockId = Block::ID(insn.word(w + 1));
-
-					if (block.ins.count(blockId) == 0)
-					{
-						continue; // In is unreachable. Ignore.
-					}
+				StorePhi(insn, state, incomingBlocks);
+			}
+		}

-					if (existsPath(state->currentBlock, blockId, block.mergeBlock))
-					{
-						// This source is from a loop back-edge.
-						ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
-						phi.continueValue = varId;
-					}
-					else
-					{
-						// This source is from a preceding block.
-						for (uint32_t i = 0; i < type.sizeInComponents; i++)
-						{
-							auto in = GenericValue(this, state->routine, varId);
-							auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
-							phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
-						}
-					}
-				}
+		// loopActiveLaneMask is the mask of lanes that are continuing to loop.
+		// This is initialized with the incoming active lane masks.
+		SIMD::Int loopActiveLaneMask = SIMD::Int(0);
+		for (auto in : incomingBlocks)
+		{
+			loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
+		}

-				phis.push_back(phi);
-			}
+		// mergeActiveLaneMasks contains edge lane masks for the merge block.
+		// This is the union of all edge masks across all iterations of the loop.
+		std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
+		for (auto in : getBlock(block.mergeBlock).ins)
+		{
+			mergeActiveLaneMasks.emplace(in, SIMD::Int(0));
 		}

 		// Create the loop basic blocks
@@ -2157,25 +2120,17 @@ namespace sw
 		Nucleus::createBr(headerBasicBlock);
 		Nucleus::setInsertBlock(headerBasicBlock);

-		// Load the Phi values from storage.
-		// This will load at the start of each loop.
-		for (auto &phi : phis)
-		{
-			auto &type = getType(getObject(phi.phiId).type);
-			auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
-			for (unsigned int i = 0u; i < type.sizeInComponents; i++)
-			{
-				dst.move(i, phi.storage[i]);
-			}
-		}
-
 		// Load the active lane mask.
 		state->setActiveLaneMask(loopActiveLaneMask);

-		// Emit all the non-phi instructions in this loop header block.
+		// Emit the non-phi loop header block's instructions.
 		for (auto insn = block.begin(); insn != block.end(); insn++)
 		{
-			if (insn.opcode() != spv::OpPhi)
+			if (insn.opcode() == spv::OpPhi)
+			{
+				LoadPhi(insn, state);
+			}
+			else
 			{
 				EmitInstruction(insn, state);
 			}
@@ -2211,17 +2166,12 @@ namespace sw
 			}
 		}

-		// Update loop phi values
-		for (auto &phi : phis)
+		// Update loop phi values.
+		for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
 		{
-			if (phi.continueValue != 0)
+			if (insn.opcode() == spv::OpPhi)
 			{
-				auto val = GenericValue(this, state->routine, phi.continueValue);
-				auto &type = getType(getObject(phi.phiId).type);
-				for (unsigned int i = 0u; i < type.sizeInComponents; i++)
-				{
-					phi.storage[i] = (val.Int(i) & loopActiveLaneMask) | (phi.storage[i] & ~loopActiveLaneMask);
-				}
+				StorePhi(insn, state, loopBlocks);
 			}
 		}

@@ -4604,43 +4554,60 @@ namespace sw

 	SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
 	{
+		auto currentBlock = getBlock(state->currentBlock);
+		StorePhi(insn, state, currentBlock.ins);
+		LoadPhi(insn, state);
+		return EmitResult::Continue;
+	}
+
+	void SpirvShader::LoadPhi(InsnIterator insn, EmitState *state) const
+	{
+		auto routine = state->routine;
+		auto typeId = Type::ID(insn.word(1));
+		auto type = getType(typeId);
+		auto objectId = Object::ID(insn.word(2));
+
+		auto storageIt = state->routine->phis.find(objectId);
+		ASSERT(storageIt != state->routine->phis.end());
+		auto &storage = storageIt->second;
+
+		auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
+		for(uint32_t i = 0; i < type.sizeInComponents; i++)
+		{
+			dst.move(i, storage[i]);
+		}
+	}
+
+	void SpirvShader::StorePhi(InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const
+	{
 		auto routine = state->routine;
 		auto typeId = Type::ID(insn.word(1));
 		auto type = getType(typeId);
 		auto objectId = Object::ID(insn.word(2));
 		auto currentBlock = getBlock(state->currentBlock);

-		auto tmp = std::unique_ptr<SIMD::Int[]>(new SIMD::Int[type.sizeInComponents]);
+		auto storageIt = state->routine->phis.find(objectId);
+		ASSERT(storageIt != state->routine->phis.end());
+		auto &storage = storageIt->second;

-		bool first = true;
 		for (uint32_t w = 3; w < insn.wordCount(); w += 2)
 		{
 			auto varId = Object::ID(insn.word(w + 0));
 			auto blockId = Block::ID(insn.word(w + 1));

-			if (currentBlock.ins.count(blockId) == 0)
+			if (filter.count(blockId) == 0)
 			{
-				continue; // In is unreachable. Ignore.
+				continue;
 			}

-			auto in = GenericValue(this, routine, varId);
 			auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
+			auto in = GenericValue(this, routine, varId);

 			for (uint32_t i = 0; i < type.sizeInComponents; i++)
 			{
-				auto inMasked = in.Int(i) & mask;
-				tmp[i] = first ? inMasked : (tmp[i] | inMasked);
+				storage[i] = As<SIMD::Float>((As<SIMD::Int>(storage[i]) & ~mask) | (in.Int(i) & mask));
 			}
-			first = false;
-		}
-
-		auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
-		for(uint32_t i = 0; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, tmp[i]);
 		}
-
-		return EmitResult::Continue;
 	}

 	SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const

--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -960,6 +960,15 @@ namespace sw
 		SIMD::Pointer GetTexelAddress(SpirvRoutine const *routine, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
 		uint32_t GetConstScalarInt(Object::ID id) const;

+		// LoadPhi loads the phi values from the alloca storage and places the
+		// load values into the intermediate with the phi's result id.
+		void LoadPhi(InsnIterator insn, EmitState *state) const;
+
+		// StorePhi updates the phi's alloca storage value using the incoming
+		// values from blocks that are both in the OpPhi instruction and in
+		// filter.
+		void StorePhi(InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const;
+
 		// Emits a rr::Fence for the given MemorySemanticsMask.
 		void Fence(spv::MemorySemanticsMask semantics) const;

@@ -1008,6 +1017,8 @@ namespace sw

 		std::unordered_map<SpirvShader::Object::ID, SIMD::Pointer> pointers;

+		std::unordered_map<SpirvShader::Object::ID, Variable> phis;
+
 		Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
 		Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};