SprivShader: Replace hand-rolled bitcount with LLVM intrinsic

Moved the hand-rolled implementation to Subzero. As we've started exposing bit intrinsics, we might as fix the TODOs. Bug: b/126873455 Tests: dEQP-VK.glsl.builtin.function.integer.bitcount.* Change-Id: Ic37dfd5d73187f2b3afa444abfd9e22439c871b1 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/28792 Presubmit-Ready: Ben Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: Ben Clayton <bclayton@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>

SprivShader: Replace hand-rolled bitcount with LLVM intrinsic
0179e5eb · Ben Clayton · a786c4a2 · 0179e5eb · 0179e5eb · 0179e5eb
Commit 0179e5eb authored Apr 10, 2019 by Ben Clayton
Showing with 19 additions and 15 deletions

SpirvShader.cpp src/Pipeline/SpirvShader.cpp +1 -15

LLVMReactor.cpp src/Reactor/LLVMReactor.cpp +6 -0

Reactor.hpp src/Reactor/Reactor.hpp +1 -0

SubzeroReactor.cpp src/Reactor/SubzeroReactor.cpp +11 -0

No files found.
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -2771,25 +2771,11 @@ namespace sw
 				break;
 			}
 			case spv::OpBitReverse:
-			{
 				dst.move(i, BitReverse(src.UInt(i)));
 				break;
-			}
 			case spv::OpBitCount:
-			{
+				dst.move(i, BitCount(src.UInt(i)));
-				// TODO: Add an intrinsic to reactor. Even if there isn't a
-				// single vector instruction, there may be target-dependent
-				// ways to make this faster.
-				// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-				auto v = src.UInt(i);
-				SIMD::UInt c = v - ((v >> 1) & SIMD::UInt(0x55555555));
-				c = ((c >> 2) & SIMD::UInt(0x33333333)) + (c & SIMD::UInt(0x33333333));
-				c = ((c >> 4) + c) & SIMD::UInt(0x0F0F0F0F);
-				c = ((c >> 8) + c) & SIMD::UInt(0x00FF00FF);
-				c = ((c >> 16) + c) & SIMD::UInt(0x0000FFFF);
-				dst.move(i, c);
 				break;
-			}
 			case spv::OpSNegate:
 				dst.move(i, -src.Int(i));
 				break;

--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -3220,6 +3220,12 @@ namespace rr
 		return RValue<UInt4>(V(::builder->CreateCall(func, { V(v.value) })));
 	}
+	RValue<UInt4> BitCount(RValue<UInt4> v)
+	{
+		auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::ctpop, { T(UInt4::getType()) } );
+		return RValue<UInt4>(V(::builder->CreateCall(func, { V(v.value) })));
+	}
 	RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
 	{
 #if REACTOR_LLVM_VERSION < 7

--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2233,6 +2233,7 @@ namespace rr
 	// Bit Manipulation functions.
 	// TODO: Currentlhy unimplemented for Subzero.
 	RValue<UInt4> BitReverse(RValue<UInt4> x);
+	RValue<UInt4> BitCount(RValue<UInt4> x);
 	// Count leading zeros.
 	// Returns 32 when: isZeroUndef && x == 0.

--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3371,6 +3371,17 @@ namespace rr
 		return v;
 	}
+	RValue<UInt4> BitCount(RValue<UInt4> x)
+	{
+		// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+		UInt4 v = x - ((x >> 1) & UInt4(0x55555555));
+		v = ((v >> 2) & UInt4(0x33333333)) + (v & UInt4(0x33333333));
+		v = ((v >> 4) + v) & UInt4(0x0F0F0F0F);
+		v = ((v >> 8) + v) & UInt4(0x00FF00FF);
+		v = ((v >> 16) + v) & UInt4(0x0000FFFF);
+		return v;
+	}
 	Type *Float4::getType()
 	{
 		return T(Ice::IceType_v4f32);