SpirvShader: Implement OpMemoryBarrier.

Tests: dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.* Tests: dEQP-VK.subgroups.basic.compute.* Tests: dEQP-VK.compute.basic.* Bug: b/132232716 Change-Id: If238f6b4af5c0ff6909a62241e0adb5677cb6c0b Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30852 Presubmit-Ready: Ben Clayton <bclayton@google.com> Tested-by: Chris Forbes <chrisforbes@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>

SpirvShader: Implement OpMemoryBarrier.
b16c5867 · Ben Clayton · Chris Forbes · 895df0dd · b16c5867 · b16c5867
Commit b16c5867 authored May 08, 2019 by Ben Clayton Committed by Chris Forbes May 09, 2019
7 changed files
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -880,6 +880,7 @@ namespace sw
 			case spv::OpAtomicStore:
 			case spv::OpImageWrite:
 			case spv::OpCopyMemory:
+			case spv::OpMemoryBarrier:
 				// Don't need to do anything during analysis pass
 				break;

@@ -2461,6 +2462,9 @@ namespace sw
 		case spv::OpCopyMemory:
 			return EmitCopyMemory(insn, state);

+		case spv::OpMemoryBarrier:
+			return EmitMemoryBarrier(insn, state);
+
 		case spv::OpGroupNonUniformElect:
 			return EmitGroupNonUniform(insn, state);

@@ -4316,7 +4320,13 @@ namespace sw

 	std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
 	{
-		switch(memorySemantics)
+		auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
+			spv::MemorySemanticsAcquireMask |
+			spv::MemorySemanticsReleaseMask |
+			spv::MemorySemanticsAcquireReleaseMask |
+			spv::MemorySemanticsSequentiallyConsistentMask
+		);
+		switch (control)
 		{
 		case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
 		case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
@@ -4324,7 +4334,9 @@ namespace sw
 		case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
 		case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
 		default:
-			UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
+			// "it is invalid for more than one of these four bits to be set:
+			// Acquire, Release, AcquireRelease, or SequentiallyConsistent."
+			UNREACHABLE("MemorySemanticsMask: %x", int(control));
 			return std::memory_order_acq_rel;
 		}
 	}
@@ -5456,11 +5468,29 @@ namespace sw
 		return EmitResult::Continue;
 	}

+	SpirvShader::EmitResult SpirvShader::EmitMemoryBarrier(InsnIterator insn, EmitState *state) const
+	{
+		auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(2)));
+		// TODO: We probably want to consider the memory scope here. For now,
+		// just always emit the full fence.
+		Fence(semantics);
+		return EmitResult::Continue;
+	}
+
+	void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
+	{
+		if (semantics == spv::MemorySemanticsMaskNone)
+		{
+			return; //no-op
+		}
+		rr::Fence(MemoryOrder(semantics));
+	}
+
 	SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
 	{
 		auto &type = getType(Type::ID(insn.word(1)));
 		Object::ID resultId = insn.word(2);
-		auto scope = GetScope(insn.word(3));
+		auto scope = spv::Scope(GetConstScalarInt(insn.word(3)));
 		ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1");

 		auto &dst = state->routine->createIntermediate(resultId, type.sizeInComponents);
@@ -5485,12 +5515,12 @@ namespace sw
 		return EmitResult::Continue;
 	}

-	spv::Scope SpirvShader::GetScope(Object::ID id) const
+	uint32_t SpirvShader::GetConstScalarInt(Object::ID id) const
 	{
 		auto &scopeObj = getObject(id);
 		ASSERT(scopeObj.kind == Object::Kind::Constant);
 		ASSERT(getType(scopeObj.type).sizeInComponents == 1);
-		return spv::Scope(scopeObj.constantValue[0]);
+		return scopeObj.constantValue[0];
 	}

 	void SpirvShader::emitEpilog(SpirvRoutine *routine) const

--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -934,11 +934,15 @@ namespace sw
 		EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const;
+		EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const;

 		void GetImageDimensions(SpirvRoutine const *routine, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
 		SIMD::Pointer GetTexelAddress(SpirvRoutine const *routine, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
-		spv::Scope GetScope(Object::ID id) const;
+		uint32_t GetConstScalarInt(Object::ID id) const;
+
+		// Emits a rr::Fence for the given MemorySemanticsMask.
+		void Fence(spv::MemorySemanticsMask semantics) const;

 		// OpcodeName() returns the name of the opcode op.
 		// If NDEBUG is defined, then OpcodeName() will only return the numerical code.

--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -1455,6 +1455,11 @@ namespace rr
 		::builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
 	}

+	void Nucleus::createFence(std::memory_order memoryOrder)
+	{
+		::builder->CreateFence(atomicOrdering(true, memoryOrder));
+	}
+
 	Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
 	{
 		RR_DEBUG_INFO_UPDATE_LOC();

--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -124,6 +124,9 @@ namespace rr
 		static Value *createGather(Value *base, Type *elementType, Value *offsets, Value *mask, unsigned int alignment);
 		static void createScatter(Value *base, Value *value, Value *offsets, Value *mask, unsigned int alignment);

+		// Barrier instructions
+		static void createFence(std::memory_order memoryOrder);
+
 		// Atomic instructions
 		static Value *createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
 		static Value *createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);

--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "Reactor.hpp"
+#include "Debug.hpp"

 // Define REACTOR_MATERIALIZE_LVALUES_ON_DEFINITION to non-zero to ensure all
 // variables have a stack location obtained throuch alloca().
@@ -4226,4 +4227,14 @@ namespace rr
 		Nucleus::createScatter(base.value, val.value, offsets.value, mask.value, alignment);
 	}

+	void Fence(std::memory_order memoryOrder)
+	{
+		ASSERT_MSG(memoryOrder == std::memory_order_acquire ||
+			memoryOrder == std::memory_order_release ||
+			memoryOrder == std::memory_order_acq_rel ||
+			memoryOrder == std::memory_order_seq_cst,
+			"Unsupported memoryOrder: %d", int(memoryOrder));
+		Nucleus::createFence(memoryOrder);
+	}
+
 }
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2385,6 +2385,11 @@ namespace rr
 		Store(RValue<T>(value), RValue<Pointer<T>>(pointer), alignment, atomic, memoryOrder);
 	}

+	// Fence adds a memory barrier that enforces ordering constraints on memory
+	// operations. memoryOrder can only be one of:
+	// std::memory_order_acquire, std::memory_order_release,
+	// std::memory_order_acq_rel, or std::memory_order_seq_cst.
+	void Fence(std::memory_order memoryOrder);

 	template<class T, int S = 1>
 	class Array : public LValue<T>

--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3455,6 +3455,7 @@ namespace rr

 	// Below are functions currently unimplemented for the Subzero backend.
 	// They are stubbed to satisfy the linker.
+	void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); }
 	Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createGather()"); return nullptr; }
 	void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createScatter()"); }
 	RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4(0); }