LLVMReactor: Drop codegenMutex, now thread safe.

LLVMReactor used to have a Big Fat Global Mutex over the entire lifetime of the Nucleus object. This was required as LLVMReactor used global variables for storing builder state. Over the past year, there has been significant code cleanup and global state has been reduced to a couple of globals that can now be marked thread_local. With all state now being immutable global or thread local, we are now able to remove the mutex. ASAN and TSAN checks for our unittests are clean. Bug: b/153803432 Change-Id: Ibe4019fb783f86e734387db431539e915369b488 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33484Tested-by: Ben Clayton <bclayton@google.com> Reviewed-by: Antonio Maiorano <amaiorano@google.com>

LLVMReactor: Drop codegenMutex, now thread safe.
20cf5c5c · Ben Clayton · 7ccdeedd · 20cf5c5c · 20cf5c5c · 20cf5c5c
Commit 20cf5c5c authored Jul 01, 2019 by Ben Clayton
6 changed files
--- a/src/Reactor/Coroutine.hpp
+++ b/src/Reactor/Coroutine.hpp
@@ -135,12 +135,10 @@ public:
 	// executable code. After calling, no more reactor functions may be
 	// called without building a new rr::Function or rr::Coroutine.
 	// While automatically called by operator(), finalize() should be called
-	// as early as possible to release the global Reactor mutex lock.
+	// as soon as possible once the coroutine has been fully built.
-	// It must also be called explicitly on the same thread that instantiates
+	// finalize() *must* be called explicitly on the same thread that
-	// the Coroutine instance if operator() is invoked on separate threads.
+	// instantiates the Coroutine instance if operator() is to be invoked on
-	// This is because presently, Reactor backends use a global mutex scoped
+	// different threads.
-	// to the generation of routines, and these must be locked/unlocked on the
-	// same thread.
 	inline void finalize(const Config::Edit &cfg = Config::Edit::None);
 	// Starts execution of the coroutine and returns a unique_ptr to a

--- a/src/Reactor/LLVMJIT.cpp
+++ b/src/Reactor/LLVMJIT.cpp
@@ -57,6 +57,7 @@ __pragma(warning(push))
    __pragma(warning(pop))
 #endif
+#include <atomic>
 #include <unordered_map>
 #if defined(_WIN64)
@@ -610,7 +611,7 @@ public:
 		for(size_t i = 0; i < count; i++)
 		{
 			auto func = funcs[i];
-			static size_t numEmittedFunctions = 0;
+			static std::atomic<size_t> numEmittedFunctions = { 0 };
 			std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
 			func->setName(name);
 			func->setLinkage(llvm::GlobalValue::ExternalLinkage);

--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -59,10 +59,13 @@ extern "C" void X86CompilationCallback()
 }
 #endif
+#if !LLVM_ENABLE_THREADS
+#	error "LLVM_ENABLE_THREADS needs to be enabled"
+#endif
 namespace {
-std::unique_ptr<rr::JITBuilder> jit;
+thread_local std::unique_ptr<rr::JITBuilder> jit;
-std::mutex codegenMutex;
 // Default configuration settings. Must be accessed under mutex lock.
 std::mutex defaultConfigLock;
@@ -599,8 +602,6 @@ static ::llvm::Function *createFunction(const char *name, ::llvm::Type *retTy, c
 Nucleus::Nucleus()
 {
-	::codegenMutex.lock();  // Reactor and LLVM are currently not thread safe
 	ASSERT(jit == nullptr);
 	jit.reset(new JITBuilder(Nucleus::getDefaultConfig()));
 }
@@ -608,7 +609,6 @@ Nucleus::Nucleus()
 Nucleus::~Nucleus()
 {
 	jit.reset();
-	::codegenMutex.unlock();
 }
 void Nucleus::setDefaultConfig(const Config &cfg)

--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -63,7 +63,7 @@ void rr::Config::Edit::apply(const std::vector<std::pair<ListEdit, T>> &edits, s
 }
 // Set of variables that do not have a stack location yet.
-std::unordered_set<Variable *> Variable::unmaterializedVariables;
+thread_local std::unordered_set<Variable *> Variable::unmaterializedVariables;
 Variable::Variable(Type *type, int arraySize)
    : arraySize(arraySize)

--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -133,7 +133,7 @@ private:
 	static void materializeAll();
 	static void killUnmaterialized();
-	static std::unordered_set<Variable *> unmaterializedVariables;
+	static thread_local std::unordered_set<Variable *> unmaterializedVariables;
 	Type *const type;
 	mutable Value *rvalue = nullptr;

--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -2949,6 +2949,114 @@ TEST(ReactorUnitTests, LoadFromConstantData)
 	EXPECT_EQ(result, value);
 }
+TEST(ReactorUnitTests, Multithreaded_Function)
+{
+	constexpr int numThreads = 32;
+	constexpr int numLoops = 64;
+	auto threads = std::unique_ptr<std::thread[]>(new std::thread[numThreads]);
+	auto results = std::unique_ptr<int[]>(new int[numThreads * numLoops]);
+	for(int t = 0; t < numThreads; t++)
+	{
+		auto threadFunc = [&](int t) {
+			for(int l = 0; l < numLoops; l++)
+			{
+				FunctionT<int(int, int)> function;
+				{
+					Int a = function.Arg<0>();
+					Int b = function.Arg<1>();
+					Return((a << 16) | b);
+				}
+				auto f = function("thread%d_loop%d", t, l);
+				results[t * numLoops + l] = f(t, l);
+			}
+		};
+		threads[t] = std::thread(threadFunc, t);
+	}
+	for(int t = 0; t < numThreads; t++)
+	{
+		threads[t].join();
+	}
+	for(int t = 0; t < numThreads; t++)
+	{
+		for(int l = 0; l < numLoops; l++)
+		{
+			auto expect = (t << 16) | l;
+			auto result = results[t * numLoops + l];
+			EXPECT_EQ(result, expect);
+		}
+	}
+}
+TEST(ReactorUnitTests, Multithreaded_Coroutine)
+{
+	if(!rr::Caps.CoroutinesSupported)
+	{
+		SUCCEED() << "Coroutines not supported";
+		return;
+	}
+	constexpr int numThreads = 32;
+	constexpr int numLoops = 64;
+	struct Result
+	{
+		bool yieldReturns[3];
+		int yieldValues[3];
+	};
+	auto threads = std::unique_ptr<std::thread[]>(new std::thread[numThreads]);
+	auto results = std::unique_ptr<Result[]>(new Result[numThreads * numLoops]);
+	for(int t = 0; t < numThreads; t++)
+	{
+		auto threadFunc = [&](int t) {
+			for(int l = 0; l < numLoops; l++)
+			{
+				Coroutine<int(int, int)> function;
+				{
+					Int a = function.Arg<0>();
+					Int b = function.Arg<1>();
+					Yield(a);
+					Yield(b);
+				}
+				auto coroutine = function(t, l);
+				auto &result = results[t * numLoops + l];
+				result = {};
+				result.yieldReturns[0] = coroutine->await(result.yieldValues[0]);
+				result.yieldReturns[1] = coroutine->await(result.yieldValues[1]);
+				result.yieldReturns[2] = coroutine->await(result.yieldValues[2]);
+			}
+		};
+		threads[t] = std::thread(threadFunc, t);
+	}
+	for(int t = 0; t < numThreads; t++)
+	{
+		threads[t].join();
+	}
+	for(int t = 0; t < numThreads; t++)
+	{
+		for(int l = 0; l < numLoops; l++)
+		{
+			auto const &result = results[t * numLoops + l];
+			EXPECT_EQ(result.yieldReturns[0], true);
+			EXPECT_EQ(result.yieldValues[0], t);
+			EXPECT_EQ(result.yieldReturns[1], true);
+			EXPECT_EQ(result.yieldValues[1], l);
+			EXPECT_EQ(result.yieldReturns[2], false);
+			EXPECT_EQ(result.yieldValues[2], 0);
+		}
+	}
+}
 int main(int argc, char **argv)
 {
 	::testing::InitGoogleTest(&argc, argv);