Implement MemorySanitizer instrumentation of Reactor routines

MemorySanitizer checks for uninitialized memory and variable usage. It instruments each instruction to update 'shadow' memory which indicates which bits are 'poisoned', and inserts tests for dereferencing pointers and conditional branching which makes use of not fully initialized data. The instrumentation is done by the llvm::MemorySanitizerLegacyPass. Functions must opt-in to the instrumentation with the 'SanitizeMemory' attribute. MemorySanitizer relies on several TLS variables for storing the shadow value of function parameters and return values. The JIT makes calls to __emutls_get_address() to obtain the address off these variables, passing it the address of a __emutls_v.* control structure unique to the TLS variable. We replace the former with our own function through the symbol resolver, and the latter are represented by enum values, to allow obtaining the real TLS variable's address in C++ code. This is enabled behind a REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION flag. For now, only enable CMake builds. Bug: b/155148722 Change-Id: I6d755244589c9b0de19a283f9dff5d8a3bf6f24b Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/49829 Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Antonio Maiorano <amaiorano@google.com>

Implement MemorySanitizer instrumentation of Reactor routines
4804ac87 · Nicolas Capens · Nicolas Capens · b638dfe1 · 4804ac87 · 4804ac87
Commit 4804ac87 authored Nov 02, 2020 by Nicolas Capens Committed by Nicolas Capens Nov 12, 2020
7 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -166,6 +166,10 @@ set_property(CACHE SWIFTSHADER_LLVM_VERSION PROPERTY STRINGS "10.0")
 set(REACTOR_DEFAULT_OPT_LEVEL "Default" CACHE STRING "Reactor default optimization level")
 set_property(CACHE REACTOR_DEFAULT_OPT_LEVEL PROPERTY STRINGS "None" "Less" "Default" "Aggressive")

+# Enable instrumentation of Reactor routines for MemorySanitizer builds (LLVM backend).
+# TODO(b/155148722): Remove when unconditionally instrumenting for all build systems.
+set(REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION TRUE)
+
 if(NOT DEFINED SWIFTSHADER_LOGGING_LEVEL)
    set(SWIFTSHADER_LOGGING_LEVEL "Info" CACHE STRING "SwiftShader logging level")
    set_property(CACHE SWIFTSHADER_LOGGING_LEVEL PROPERTY STRINGS "Verbose" "Debug" "Info" "Warn" "Error" "Fatal" "Disabled")
@@ -511,7 +515,6 @@ else()

    if(SWIFTSHADER_MSAN)
        if(NOT DEFINED ENV{SWIFTSHADER_MSAN_INSTRUMENTED_LIBCXX_PATH})
-
            message(FATAL_ERROR " \n"
                    " MemorySanitizer usage requires an instrumented build of libc++.\n"
                    " Set the SWIFTSHADER_MSAN_INSTRUMENTED_LIBCXX_PATH environment variable to the\n"

--- a/src/Reactor/CMakeLists.txt
+++ b/src/Reactor/CMakeLists.txt
@@ -64,6 +64,11 @@ if(REACTOR_EMIT_DEBUG_INFO)
    list(APPEND REACTOR_PRIVATE_LINK_LIBRARIES Boost::boost)
 endif(REACTOR_EMIT_DEBUG_INFO)

+# Enable instrumentation of Reactor routines for MemorySanitizer builds (LLVM backend).
+# TODO(b/155148722): Remove when unconditionally instrumenting for all build systems.
+if(REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION)
+    list(APPEND REACTOR_PUBLIC_COMPILE_DEFINITIONS "REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION")
+endif()

 # SubzeroReactor library

@@ -83,6 +88,8 @@ target_include_directories(ReactorSubzero
 )

 target_compile_definitions(ReactorSubzero
+    PUBLIC
+        ${REACTOR_PUBLIC_COMPILE_DEFINITIONS}
    PRIVATE
        ${REACTOR_PRIVATE_COMPILE_DEFINITIONS}
 )
@@ -118,6 +125,8 @@ target_include_directories(ReactorLLVM
 )

 target_compile_definitions(ReactorLLVM
+    PUBLIC
+        ${REACTOR_PUBLIC_COMPILE_DEFINITIONS}
    PRIVATE
        ${REACTOR_PRIVATE_COMPILE_DEFINITIONS}
 )

--- a/src/Reactor/LLVMJIT.cpp
+++ b/src/Reactor/LLVMJIT.cpp
@@ -32,6 +32,7 @@ __pragma(warning(push))
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"

@@ -53,6 +54,53 @@ extern "C" signed __aeabi_idivmod();
 #	include "sanitizer/msan_interface.h"  // TODO(b/155148722): Remove when we no longer unpoison all writes.

 #	include <dlfcn.h>  // dlsym()
+
+// MemorySanitizer uses thread-local storage (TLS) data arrays for passing around
+// the 'shadow' values of function arguments and return values. The LLVM JIT can't
+// access TLS directly, but it calls __emutls_get_address() to obtain the address.
+// Typically, it would be passed a pointer to an __emutls_control structure with a
+// name starting with "__emutls_v." that represents the TLS. Both the address of
+// __emutls_get_address and the __emutls_v. structures are provided to the JIT by
+// the symbol resolver, which can be overridden.
+// We take advantage of this by substituting __emutls_get_address() with our own
+// implementation, namely rr::getTLSAddress(), and substituting the __emutls_v
+// variables with rr::MSanTLS enums. getTLSAddress() can then provide the address
+// of the real TLS variable corresponding to the enum, in statically compiled C++.
+
+// Forward declare the real TLS variables used by MemorySanitizer. These are
+// defined in llvm-project/compiler-rt/lib/msan/msan.cpp.
+extern __thread unsigned long long __msan_param_tls[];
+extern __thread unsigned long long __msan_retval_tls[];
+extern __thread unsigned long long __msan_va_arg_tls[];
+extern __thread unsigned long long __msan_va_arg_overflow_size_tls;
+
+namespace rr {
+
+enum class MSanTLS
+{
+	param = 1,            // __msan_param_tls
+	retval,               // __msan_retval_tls
+	va_arg,               // __msan_va_arg_tls
+	va_arg_overflow_size  // __msan_va_arg_overflow_size_tls
+};
+
+static void *getTLSAddress(void *control)
+{
+	auto tlsIndex = static_cast<MSanTLS>(reinterpret_cast<uintptr_t>(control));
+	switch(tlsIndex)
+	{
+
+		case MSanTLS::param: return reinterpret_cast<void *>(&__msan_param_tls);
+		case MSanTLS::retval: return reinterpret_cast<void *>(&__msan_retval_tls);
+		case MSanTLS::va_arg: return reinterpret_cast<void *>(&__msan_va_arg_tls);
+		case MSanTLS::va_arg_overflow_size: return reinterpret_cast<void *>(&__msan_va_arg_overflow_size_tls);
+		default:
+			UNSUPPORTED("MemorySanitizer used an unrecognized TLS variable: %d", tlsIndex);
+			return nullptr;
+	}
+}
+
+}  // namespace rr
 #endif

 namespace {
@@ -111,6 +159,10 @@ JITGlobals *JITGlobals::get()
 		jitTargetMachineBuilder.setCPU(llvm::sys::getHostCPUName());
 #endif

+		// Reactor's MemorySanitizer support depends on intercepting __emutls_get_address calls.
+		ASSERT(!__has_feature(memory_sanitizer) || (jitTargetMachineBuilder.getOptions().ExplicitEmulatedTLS &&
+		                                            jitTargetMachineBuilder.getOptions().EmulatedTLS));
+
 		auto dataLayout = jitTargetMachineBuilder.getDefaultDataLayoutForTarget();
 		ASSERT_MSG(dataLayout, "JITTargetMachineBuilder::getDefaultDataLayoutForTarget() failed");

@@ -416,6 +468,8 @@ class ExternalSymbolGenerator : public llvm::orc::JITDylib::DefinitionGenerator
 			functions.try_emplace("coroutine_alloc_frame", reinterpret_cast<void *>(coroutine_alloc_frame));
 			functions.try_emplace("coroutine_free_frame", reinterpret_cast<void *>(coroutine_free_frame));

+			functions.try_emplace("memset", reinterpret_cast<void *>(memset));
+
 #ifdef __APPLE__
 			functions.try_emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
 #elif defined(__linux__)
@@ -446,6 +500,12 @@ class ExternalSymbolGenerator : public llvm::orc::JITDylib::DefinitionGenerator
 #endif
 #if __has_feature(memory_sanitizer)
 			functions.try_emplace("msan_unpoison", reinterpret_cast<void *>(__msan_unpoison));  // TODO(b/155148722): Remove when we no longer unpoison all writes.
+
+			functions.try_emplace("emutls_get_address", reinterpret_cast<void *>(rr::getTLSAddress));
+			functions.try_emplace("emutls_v.__msan_retval_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::retval)));
+			functions.try_emplace("emutls_v.__msan_param_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::param)));
+			functions.try_emplace("emutls_v.__msan_va_arg_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg)));
+			functions.try_emplace("emutls_v.__msan_va_arg_overflow_size_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg_overflow_size)));
 #endif
 		}
 	};
@@ -664,6 +724,13 @@ void JITBuilder::optimize(const rr::Config &cfg)

 	llvm::legacy::PassManager passManager;

+#ifdef REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION
+	if(__has_feature(memory_sanitizer))
+	{
+		passManager.add(llvm::createMemorySanitizerLegacyPassPass());
+	}
+#endif
+
 	for(auto pass : cfg.getOptimization().getPasses())
 	{
 		switch(pass)

--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -490,6 +490,11 @@ static llvm::Function *createFunction(const char *name, llvm::Type *retTy, const
 	auto func = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, name, jit->module.get());
 	func->setDoesNotThrow();
 	func->setCallingConv(llvm::CallingConv::C);
+	if(__has_feature(memory_sanitizer))
+	{
+		func->addFnAttr(llvm::Attribute::SanitizeMemory);
+	}
+
 	return func;
 }


--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -52,6 +52,18 @@ int DebugPrintf(const char *format, ...);
 }
 #endif

+// A Clang extension to determine compiler features.
+// We use it to detect Sanitizer builds (e.g. -fsanitize=memory).
+#ifndef __has_feature
+#	define __has_feature(x) 0
+#endif
+
+// Whether Reactor routine instrumentation is enabled for MSan builds.
+// TODO(b/155148722): Remove when unconditionally instrumenting for all build systems.
+#if !defined REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION
+#	define REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION 0
+#endif
+
 namespace rr {

 std::string BackendName();

--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -93,8 +93,24 @@ TEST(ReactorUnitTests, Uninitialized)

 	auto routine = function("one");

-	int result = routine();
-	EXPECT_EQ(result, result);  // Anything is fine, just don't crash
+	if(!__has_feature(memory_sanitizer) || !REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION)
+	{
+		int result = routine();
+		EXPECT_EQ(result, result);  // Anything is fine, just don't crash
+	}
+	else
+	{
+		// Optimizations may turn the conditional If() in the Reactor code
+		// into a conditional move or arithmetic operations, which would not
+		// trigger a MemorySanitizer error. However, in that case the equals
+		// operator below should trigger it before the abort is reached.
+		EXPECT_DEATH(
+		    {
+			    int result = routine();
+			    if(result == 0) abort();
+		    },
+		    "MemorySanitizer: use-of-uninitialized-value");
+	}
 }

 TEST(ReactorUnitTests, Unreachable)

--- a/third_party/llvm-10.0/CMakeLists.txt
+++ b/third_party/llvm-10.0/CMakeLists.txt
@@ -688,6 +688,7 @@ set(LLVM_LIST
    ${LLVM_DIR}/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
    ${LLVM_DIR}/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
    ${LLVM_DIR}/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
+    ${LLVM_DIR}/lib/Transforms/Instrumentation/MemorySanitizer.cpp
    ${LLVM_DIR}/lib/Transforms/IPO/ArgumentPromotion.cpp
    ${LLVM_DIR}/lib/Transforms/IPO/Attributor.cpp
    ${LLVM_DIR}/lib/Transforms/IPO/BarrierNoopPass.cpp