Commit f7b75889 by Nicolas Capens

Emulate rounding to the nearest integer.

This implementation works by adding a large value which makes the fractional part no longer fit in the mantissa, and then subtracting it again. It matches nearbyint() for values up to 2^22, positive or negative. The 'magic number' of 0x00C00000 is derived by first observing that the integer values 0x00800000 to 0x00FFFFFF can be represented exactly in single-precision floating-point format but can't have a fractional part because there are 24 mantissa bits (the top one being hidden). So when adding 0x00800000 to for example 0.6, it forces the hardware to round it to the nearest representable integer, being 0x00800001. Subtracting 0x00800000 again gives us 1.0. This works for rounding any value from 0.0 to 0x007FFFFF. However, it doesn't work for negative values, because the intermediate result would be less than 0x00800000 and thus leave some room for fractional bits in the mantissa. The solution is to use 0x00C00000 instead so the range gets split between positive and negative values. Note that values greater than the upper bound will still round to integers, but not the nearest ones, while values less than the lower bound can result in fractional values. Bug b/37495485 Change-Id: I1aed2d831269fcf21b8d3313856a9b9756a532ef Reviewed-on: https://swiftshader-review.googlesource.com/9488Reviewed-by: 's avatarNicolas Capens <capn@google.com> Reviewed-by: 's avatarCorentin Wallez <cwallez@google.com> Tested-by: 's avatarNicolas Capens <capn@google.com>
parent ac6e751f
......@@ -77,6 +77,7 @@ namespace
class CPUID
{
public:
const static bool ARM;
const static bool SSE4_1;
private:
......@@ -96,6 +97,17 @@ namespace
#endif
}
static bool detectARM()
{
#if defined(__arm__)
return true;
#elif defined(__i386__) || defined(__x86_64__)
return false;
#else
#error "Unknown architecture"
#endif
}
static bool detectSSE4_1()
{
#if defined(__i386__) || defined(__x86_64__)
......@@ -108,7 +120,9 @@ namespace
}
};
const bool CPUID::ARM = CPUID::detectARM();
const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
const bool emulateIntrinsics = CPUID::ARM;
}
namespace sw
......@@ -4146,14 +4160,22 @@ namespace sw
RValue<Int> RoundInt(RValue<Float> cast)
{
Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
nearbyint->addArg(cast.value);
::basicBlock->appendInst(nearbyint);
if(emulateIntrinsics)
{
// Push the fractional part off the mantissa. Accurate up to +/-2^22.
return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
}
else
{
Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
nearbyint->addArg(cast.value);
::basicBlock->appendInst(nearbyint);
return RValue<Int>(V(result));
return RValue<Int>(V(result));
}
}
Type *Int::getType()
......@@ -5301,14 +5323,22 @@ namespace sw
RValue<Int4> RoundInt(RValue<Float4> cast)
{
Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
nearbyint->addArg(cast.value);
::basicBlock->appendInst(nearbyint);
if(emulateIntrinsics)
{
// Push the fractional part off the mantissa. Accurate up to +/-2^22.
return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
}
else
{
Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
nearbyint->addArg(cast.value);
::basicBlock->appendInst(nearbyint);
return RValue<Int4>(V(result));
return RValue<Int4>(V(result));
}
}
RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
......@@ -6247,7 +6277,12 @@ namespace sw
RValue<Float4> Round(RValue<Float4> x)
{
if(CPUID::SSE4_1)
if(emulateIntrinsics)
{
// Push the fractional part off the mantissa. Accurate up to +/-2^22.
return (x + Float4(0x00C00000)) - Float4(0x00C00000);
}
else if(CPUID::SSE4_1)
{
Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment