Emulate rounding to the nearest integer.

This implementation works by adding a large value which makes the fractional part no longer fit in the mantissa, and then subtracting it again. It matches nearbyint() for values up to 2^22, positive or negative. The 'magic number' of 0x00C00000 is derived by first observing that the integer values 0x00800000 to 0x00FFFFFF can be represented exactly in single-precision floating-point format but can't have a fractional part because there are 24 mantissa bits (the top one being hidden). So when adding 0x00800000 to for example 0.6, it forces the hardware to round it to the nearest representable integer, being 0x00800001. Subtracting 0x00800000 again gives us 1.0. This works for rounding any value from 0.0 to 0x007FFFFF. However, it doesn't work for negative values, because the intermediate result would be less than 0x00800000 and thus leave some room for fractional bits in the mantissa. The solution is to use 0x00C00000 instead so the range gets split between positive and negative values. Note that values greater than the upper bound will still round to integers, but not the nearest ones, while values less than the lower bound can result in fractional values. Bug b/37495485 Change-Id: I1aed2d831269fcf21b8d3313856a9b9756a532ef Reviewed-on: https://swiftshader-review.googlesource.com/9488Reviewed-by: Nicolas Capens <capn@google.com> Reviewed-by: Corentin Wallez <cwallez@google.com> Tested-by: Nicolas Capens <capn@google.com>

Emulate rounding to the nearest integer.
f7b75889 · Nicolas Capens · ac6e751f · f7b75889
Commit f7b75889 authored Apr 26, 2017 by Nicolas Capens
Hide whitespace changes
Inline Side-by-side

Showing with 50 additions and 15 deletions

SubzeroReactor.cpp src/Reactor/SubzeroReactor.cpp +50 -15

No files found.
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -77,6 +77,7 @@ namespace
 	class CPUID
 	{
 	public:
+		const static bool ARM;
 		const static bool SSE4_1;

 	private:
@@ -96,6 +97,17 @@ namespace
 			#endif
 		}

+		static bool detectARM()
+		{
+			#if defined(__arm__)
+				return true;
+			#elif defined(__i386__) || defined(__x86_64__)
+				return false;
+			#else
+				#error "Unknown architecture"
+			#endif
+		}
+
 		static bool detectSSE4_1()
 		{
 			#if defined(__i386__) || defined(__x86_64__)
@@ -108,7 +120,9 @@ namespace
 		}
 	};

+	const bool CPUID::ARM = CPUID::detectARM();
 	const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
+	const bool emulateIntrinsics = CPUID::ARM;
 }

 namespace sw
@@ -4146,14 +4160,22 @@ namespace sw

 	RValue<Int> RoundInt(RValue<Float> cast)
 	{
-		Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
-		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
-		auto target = ::context->getConstantUndef(Ice::IceType_i32);
-		auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
-		nearbyint->addArg(cast.value);
-		::basicBlock->appendInst(nearbyint);
+		if(emulateIntrinsics)
+		{
+			// Push the fractional part off the mantissa. Accurate up to +/-2^22.
+			return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
+		}
+		else
+		{
+			Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+			auto target = ::context->getConstantUndef(Ice::IceType_i32);
+			auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+			nearbyint->addArg(cast.value);
+			::basicBlock->appendInst(nearbyint);

-		return RValue<Int>(V(result));
+			return RValue<Int>(V(result));
+		}
 	}

 	Type *Int::getType()
@@ -5301,14 +5323,22 @@ namespace sw

 	RValue<Int4> RoundInt(RValue<Float4> cast)
 	{
-		Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
-		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
-		auto target = ::context->getConstantUndef(Ice::IceType_i32);
-		auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
-		nearbyint->addArg(cast.value);
-		::basicBlock->appendInst(nearbyint);
+		if(emulateIntrinsics)
+		{
+			// Push the fractional part off the mantissa. Accurate up to +/-2^22.
+			return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
+		}
+		else
+		{
+			Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
+			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+			auto target = ::context->getConstantUndef(Ice::IceType_i32);
+			auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+			nearbyint->addArg(cast.value);
+			::basicBlock->appendInst(nearbyint);

-		return RValue<Int4>(V(result));
+			return RValue<Int4>(V(result));
+		}
 	}

 	RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
@@ -6247,7 +6277,12 @@ namespace sw

 	RValue<Float4> Round(RValue<Float4> x)
 	{
-		if(CPUID::SSE4_1)
+		if(emulateIntrinsics)
+		{
+			// Push the fractional part off the mantissa. Accurate up to +/-2^22.
+			return (x + Float4(0x00C00000)) - Float4(0x00C00000);
+		}
+		else if(CPUID::SSE4_1)
 		{
 			Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
 			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};