Refactor emulated vector type handling for LLVM

- Add clarifying comments. - Add self-explanatory helper functions. - Fix typeSize to handle all Reactor types. - Use uintptr_t base type for emulated type enum. Prevents 0x########00000000 from being interpreted as emulated type. Bug b/126028338 Change-Id: Ib38cf9b59f58c0f6046f9c84a6152849791bb4ed Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/26648Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Ben Clayton <bclayton@google.com>

Refactor emulated vector type handling for LLVM
1a5c3b91 · Nicolas Capens · Nicolas Capens · bb8c8e21 · 1a5c3b91
Commit 1a5c3b91 authored Mar 08, 2019 by Nicolas Capens Committed by Nicolas Capens Mar 11, 2019
Show whitespace changes
Inline Side-by-side

Showing with 66 additions and 48 deletions

LLVMReactor.cpp src/Reactor/LLVMReactor.cpp +66 -48

No files found.
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -710,24 +710,36 @@ namespace rr
 	Optimization optimization[10] = {InstructionCombining, Disabled};
-	enum EmulatedType
+	// The abstract Type* types are implemented as LLVM types, except that
-	{
+	// 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
+	// and VFP in ARM, and eliminate the overhead of converting them to explicit
+	// 128-bit ones. LLVM types are pointers, so we can represent emulated types
+	// as abstract pointers with small enum values.
+	enum InternalType : uintptr_t
+	{
+		// Emulated types:
 		Type_v2i32,
 		Type_v4i16,
 		Type_v2i16,
 		Type_v8i8,
 		Type_v4i8,
 		Type_v2f32,
-		EmulatedTypeCount
+		EmulatedTypeCount,
+		// Returned by asInternalType() to indicate that the abstract Type*
+		// should be interpreted as LLVM type pointer:
+		Type_LLVM
 	};
-	llvm::Type *T(Type *t)
+	inline InternalType asInternalType(Type *type)
 	{
-		uintptr_t type = reinterpret_cast<uintptr_t>(t);
+		InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
-		if(type < EmulatedTypeCount)
+		return (t < EmulatedTypeCount) ? t : Type_LLVM;
+	}
+	llvm::Type *T(Type *t)
 	{
 		// Use 128-bit vectors to implement logically shorter ones.
-			switch(type)
+		switch(asInternalType(t))
 		{
 		case Type_v2i32: return T(Int4::getType());
 		case Type_v4i16: return T(Short8::getType());
@@ -735,19 +747,17 @@ namespace rr
 		case Type_v8i8:  return T(Byte16::getType());
 		case Type_v4i8:  return T(Byte16::getType());
 		case Type_v2f32: return T(Float4::getType());
-			default: assert(false);
+		case Type_LLVM:  return reinterpret_cast<llvm::Type*>(t);
+		default: assert(false); return nullptr;
 		}
 	}
-		return reinterpret_cast<llvm::Type*>(t);
-	}
 	inline Type *T(llvm::Type *t)
 	{
 		return reinterpret_cast<Type*>(t);
 	}
-	Type *T(EmulatedType t)
+	Type *T(InternalType t)
 	{
 		return reinterpret_cast<Type*>(t);
 	}
@@ -779,10 +789,7 @@ namespace rr
 	static size_t typeSize(Type *type)
 	{
-		uintptr_t t = reinterpret_cast<uintptr_t>(type);
+		switch(asInternalType(type))
-		if(t < EmulatedTypeCount)
-		{
-			switch(t)
 		{
 		case Type_v2i32: return 8;
 		case Type_v4i16: return 8;
@@ -790,19 +797,34 @@ namespace rr
 		case Type_v8i8:  return 8;
 		case Type_v4i8:  return 4;
 		case Type_v2f32: return 8;
-			default: assert(false);
+		case Type_LLVM:
-			}
+			{
+				llvm::Type *t = T(type);
+				if(t->isPointerTy())
+				{
+					return sizeof(void*);
 				}
-		return T(type)->getPrimitiveSizeInBits() / 8;
+				// At this point we should only have LLVM 'primitive' types.
+				unsigned int bits = t->getPrimitiveSizeInBits();
+				assert(bits != 0);
+				// TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
+				// but are typically stored as one byte. The DataLayout structure should
+				// be used here and many other places if this assumption fails.
+				return (bits + 7) / 8;
+			}
+			break;
+		default:
+			assert(false);
+			return 0;
+		}
 	}
 	static unsigned int elementCount(Type *type)
 	{
-		uintptr_t t = reinterpret_cast<uintptr_t>(type);
+		switch(asInternalType(type))
-		if(t < EmulatedTypeCount)
-		{
-			switch(t)
 		{
 		case Type_v2i32: return 2;
 		case Type_v4i16: return 4;
@@ -810,13 +832,11 @@ namespace rr
 		case Type_v8i8:  return 8;
 		case Type_v4i8:  return 4;
 		case Type_v2f32: return 2;
-			default: assert(false);
+		case Type_LLVM:  return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
+		default: assert(false); return 0;
 		}
 	}
-		return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
-	}
 	Nucleus::Nucleus()
 	{
 		::codegenMutex.lock();   // Reactor and LLVM are currently not thread safe
@@ -1171,10 +1191,7 @@ namespace rr
 	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
 	{
-		uintptr_t t = reinterpret_cast<uintptr_t>(type);
+		switch(asInternalType(type))
-		if(t < EmulatedTypeCount)
-		{
-			switch(t)
 		{
 		case Type_v2i32:
 		case Type_v4i16:
@@ -1196,22 +1213,18 @@ namespace rr
 				Value *v = createInsertElement(u, i, 0);
 				return createBitCast(v, type);
 			}
-				break;
+			// Fallthrough to non-emulated case.
-			default:
+		case Type_LLVM:
-				assert(false);
-			}
-		}
 			assert(V(ptr)->getType()->getContainedType(0) == T(type));
 			return V(::builder->Insert(new llvm::LoadInst(V(ptr), "", isVolatile, alignment)));
+		default:
+			assert(false); return nullptr;
+		}
 	}
 	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
 	{
-		uintptr_t t = reinterpret_cast<uintptr_t>(type);
+		switch(asInternalType(type))
-		if(t < EmulatedTypeCount)
-		{
-			switch(t)
 		{
 		case Type_v2i32:
 		case Type_v4i16:
@@ -1233,15 +1246,14 @@ namespace rr
 					Int::getType(), isVolatile, alignment);
 				return value;
 			}
-				break;
+			// Fallthrough to non-emulated case.
-			default:
+		case Type_LLVM:
-				assert(false);
-			}
-		}
 			assert(V(ptr)->getType()->getContainedType(0) == T(type));
 			::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
 			return value;
+		default:
+			assert(false); return nullptr;
+		}
 	}
 	Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
@@ -1268,15 +1280,21 @@ namespace rr
 				createSExt(index, Long::getType());
 		}
-		if (reinterpret_cast<uintptr_t>(type) >= EmulatedTypeCount)
+		// For non-emulated types we can rely on LLVM's GEP to calculate the
+		// effective address correctly.
+		if(asInternalType(type) == Type_LLVM)
 		{
 			return V(::builder->CreateGEP(V(ptr), V(index)));
 		}
+		// For emulated types we have to multiply the index by the intended
+		// type size ourselves to obain the byte offset.
 		index = (sizeof(void*) == 8) ?
 			createMul(index, createConstantLong((int64_t)typeSize(type))) :
 			createMul(index, createConstantInt((int)typeSize(type)));
+		// Cast to a byte pointer, apply the byte offset, and cast back to the
+		// original pointer type.
 		return createBitCast(
 			V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
 			T(llvm::PointerType::get(T(type), 0)));