Commit 24c49ddd by Alexis Hetu Committed by Alexis Hétu

R11G11B10F support

- Moved the R11G11B10F conversion code from the Blitter to ShaderCore - Used the conversion code in PixelRoutine where appropriate to make the R11G11B10F work as a renderable format - Added the VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT and VK_FORMAT_FEATURE_BLIT_DST_BIT to the VK_FORMAT_B10G11R11_UFLOAT_PACK32 format This allows ANGLE to expose the GL_EXT_color_buffer_float (and GL_EXT_color_buffer_half_float, which depends on GL_EXT_color_buffer_float in ANGLE, due a driver issue). Bug: b/146223877 Tests: dEQP-VK.*b10g11r11* Change-Id: I04ad29f7b7d497705186ae290a05868abfc13c42 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39550 Presubmit-Ready: Alexis Hétu <sugoi@google.com> Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarAlexis Hétu <sugoi@google.com>
parent fbbfeb70
...@@ -398,19 +398,7 @@ Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state) ...@@ -398,19 +398,7 @@ Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state)
c.x = Float(*Pointer<Half>(element)); c.x = Float(*Pointer<Half>(element));
break; break;
case VK_FORMAT_B10G11R11_UFLOAT_PACK32: case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
// 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa. c = r11g11b10Unpack(*Pointer<UInt>(element));
// Since the Half float format also has a 5 bit exponent, we can convert these formats to half by
// copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format.
// In this case, we have:
// B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R
// 1st Short: |xxxxxxxxxx---------------------|
// 2nd Short: |xxxx---------------------xxxxxx|
// 3rd Short: |--------------------xxxxxxxxxxxx|
// These memory reads overlap, but each of them contains an entire channel, so we can read this without
// any int -> short conversion.
c.x = Float(As<Half>((*Pointer<UShort>(element + 0) & UShort(0x07FF)) << UShort(4)));
c.y = Float(As<Half>((*Pointer<UShort>(element + 1) & UShort(0x3FF8)) << UShort(1)));
c.z = Float(As<Half>((*Pointer<UShort>(element + 2) & UShort(0xFFC0)) >> UShort(1)));
break; break;
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
// This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B. // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
...@@ -621,16 +609,7 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) ...@@ -621,16 +609,7 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
break; break;
case VK_FORMAT_B10G11R11_UFLOAT_PACK32: case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
{ {
// 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa. UInt rgb = r11g11b10Pack(c);
// Since the 16-bit half-precision float format also has a 5 bit exponent, we can extract these minifloats from them.
// FIXME(b/138944025): Handle negative values, Inf, and NaN.
// FIXME(b/138944025): Perform rounding before truncating the mantissa.
UInt r = (UInt(As<UShort>(Half(c.x))) & 0x00007FF0) >> 4;
UInt g = (UInt(As<UShort>(Half(c.y))) & 0x00007FF0) << 7;
UInt b = (UInt(As<UShort>(Half(c.z))) & 0x00007FE0) << 17;
UInt rgb = r | g | b;
UInt old = *Pointer<UInt>(element); UInt old = *Pointer<UInt>(element);
......
...@@ -244,6 +244,7 @@ void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4 ...@@ -244,6 +244,7 @@ void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4
case VK_FORMAT_R16_SFLOAT: case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SFLOAT: case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT: case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
case VK_FORMAT_R32_SFLOAT: case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32_SFLOAT: case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT:
...@@ -329,6 +330,7 @@ void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS]) ...@@ -329,6 +330,7 @@ void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
case VK_FORMAT_R16_SFLOAT: case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SFLOAT: case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT: case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_SINT:
......
...@@ -546,27 +546,72 @@ void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N) ...@@ -546,27 +546,72 @@ void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N)
} }
} }
UInt4 halfToFloatBits(UInt4 halfBits) SIMD::UInt halfToFloatBits(SIMD::UInt halfBits)
{ {
auto magic = UInt4(126 << 23); auto magic = SIMD::UInt(126 << 23);
auto sign16 = halfBits & UInt4(0x8000); auto sign16 = halfBits & SIMD::UInt(0x8000);
auto man16 = halfBits & UInt4(0x3FF); auto man16 = halfBits & SIMD::UInt(0x03FF);
auto exp16 = halfBits & UInt4(0x7C00); auto exp16 = halfBits & SIMD::UInt(0x7C00);
auto isDnormOrZero = CmpEQ(exp16, UInt4(0)); auto isDnormOrZero = CmpEQ(exp16, SIMD::UInt(0));
auto isInfOrNaN = CmpEQ(exp16, UInt4(0x7C00)); auto isInfOrNaN = CmpEQ(exp16, SIMD::UInt(0x7C00));
auto sign32 = sign16 << 16; auto sign32 = sign16 << 16;
auto man32 = man16 << 13; auto man32 = man16 << 13;
auto exp32 = (exp16 + UInt4(0x1C000)) << 13; auto exp32 = (exp16 + SIMD::UInt(0x1C000)) << 13;
auto norm32 = (man32 | exp32) | (isInfOrNaN & UInt4(0x7F800000)); auto norm32 = (man32 | exp32) | (isInfOrNaN & SIMD::UInt(0x7F800000));
auto denorm32 = As<UInt4>(As<Float4>(magic + man16) - As<Float4>(magic)); auto denorm32 = As<SIMD::UInt>(As<SIMD::Float>(magic + man16) - As<SIMD::Float>(magic));
return sign32 | (norm32 & ~isDnormOrZero) | (denorm32 & isDnormOrZero); return sign32 | (norm32 & ~isDnormOrZero) | (denorm32 & isDnormOrZero);
} }
SIMD::UInt floatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits)
{
static const uint32_t mask_sign = 0x80000000u;
static const uint32_t mask_round = ~0xfffu;
static const uint32_t c_f32infty = 255 << 23;
static const uint32_t c_magic = 15 << 23;
static const uint32_t c_nanbit = 0x200;
static const uint32_t c_infty_as_fp16 = 0x7c00;
static const uint32_t c_clamp = (31 << 23) - 0x1000;
SIMD::UInt justsign = SIMD::UInt(mask_sign) & floatBits;
SIMD::UInt absf = floatBits ^ justsign;
SIMD::UInt b_isnormal = CmpNLE(SIMD::UInt(c_f32infty), absf);
// Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
// instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt(mask_round)) * As<SIMD::Float>(SIMD::UInt(c_magic)),
As<SIMD::Float>(SIMD::UInt(c_clamp))))) - SIMD::UInt(mask_round)) >> 13) & b_isnormal) |
((b_isnormal ^ SIMD::UInt(0xFFFFFFFF)) &
((CmpNLE(absf, SIMD::UInt(c_f32infty)) & SIMD::UInt(c_nanbit)) | SIMD::UInt(c_infty_as_fp16)));
return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16);
}
sw::SIMD::Float r11g11b10Unpack(UInt r11g11b10bits)
{
// 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa.
// Since the Half float format also has a 5 bit exponent, we can convert these formats to half by
// copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format.
// In this case, we have:
// MSB | B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R | LSB
SIMD::UInt halfBits;
halfBits = Insert(halfBits, (r11g11b10bits & UInt(0x000007FFu)) << 4, 0);
halfBits = Insert(halfBits, (r11g11b10bits & UInt(0x003FF800u)) >> 7, 1);
halfBits = Insert(halfBits, (r11g11b10bits & UInt(0xFFC00000u)) >> 17, 2);
halfBits = Insert(halfBits, UInt(0x00003C00u), 3);
return As<sw::SIMD::Float>(halfToFloatBits(halfBits));
}
UInt r11g11b10Pack(sw::SIMD::Float &value)
{
SIMD::UInt halfBits = floatToHalfBits(As<SIMD::UInt>(value), true) &
SIMD::UInt(0x7FF00000, 0x7FF00000, 0x7FE00000, 0);
return (UInt(halfBits.x) >> 20) | (UInt(halfBits.y) >> 9) | (UInt(halfBits.z) << 1);
}
rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints) rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
{ {
......
...@@ -190,7 +190,10 @@ void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); ...@@ -190,7 +190,10 @@ void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N); void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
UInt4 halfToFloatBits(UInt4 halfBits); sw::SIMD::UInt halfToFloatBits(sw::SIMD::UInt halfBits);
sw::SIMD::UInt floatToHalfBits(sw::SIMD::UInt floatBits, bool storeInUpperBits);
sw::SIMD::Float r11g11b10Unpack(UInt r11g11b10bits);
UInt r11g11b10Pack(sw::SIMD::Float &value);
rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints); rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints);
......
...@@ -1087,8 +1087,6 @@ private: ...@@ -1087,8 +1087,6 @@ private:
// Helper as we often need to take dot products as part of doing other things. // Helper as we often need to take dot products as part of doing other things.
SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const; SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const;
SIMD::UInt FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const;
// Splits x into a floating-point significand in the range [0.5, 1.0) // Splits x into a floating-point significand in the range [0.5, 1.0)
// and an integral exponent of two, such that: // and an integral exponent of two, such that:
// x = significand * 2^exponent // x = significand * 2^exponent
......
...@@ -544,30 +544,6 @@ SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, Gen ...@@ -544,30 +544,6 @@ SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, Gen
return d; return d;
} }
SIMD::UInt SpirvShader::FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const
{
static const uint32_t mask_sign = 0x80000000u;
static const uint32_t mask_round = ~0xfffu;
static const uint32_t c_f32infty = 255 << 23;
static const uint32_t c_magic = 15 << 23;
static const uint32_t c_nanbit = 0x200;
static const uint32_t c_infty_as_fp16 = 0x7c00;
static const uint32_t c_clamp = (31 << 23) - 0x1000;
SIMD::UInt justsign = SIMD::UInt(mask_sign) & floatBits;
SIMD::UInt absf = floatBits ^ justsign;
SIMD::UInt b_isnormal = CmpNLE(SIMD::UInt(c_f32infty), absf);
// Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
// instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt(mask_round)) * As<SIMD::Float>(SIMD::UInt(c_magic)),
As<SIMD::Float>(SIMD::UInt(c_clamp))))) - SIMD::UInt(mask_round)) >> 13) & b_isnormal) |
((b_isnormal ^ SIMD::UInt(0xFFFFFFFF)) & ((CmpNLE(absf, SIMD::UInt(c_f32infty)) & SIMD::UInt(c_nanbit)) |
SIMD::UInt(c_infty_as_fp16)));
return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16);
}
std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const
{ {
// Assumes IEEE 754 // Assumes IEEE 754
......
...@@ -431,7 +431,7 @@ SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, ...@@ -431,7 +431,7 @@ SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn,
case GLSLstd450PackHalf2x16: case GLSLstd450PackHalf2x16:
{ {
auto val = GenericValue(this, state, insn.word(5)); auto val = GenericValue(this, state, insn.word(5));
dst.move(0, FloatToHalfBits(val.UInt(0), false) | FloatToHalfBits(val.UInt(1), true)); dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true));
break; break;
} }
case GLSLstd450UnpackSnorm4x8: case GLSLstd450UnpackSnorm4x8:
......
...@@ -900,8 +900,8 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState ...@@ -900,8 +900,8 @@ SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState
break; break;
case spv::ImageFormatRgba16f: case spv::ImageFormatRgba16f:
texelSize = 8; texelSize = 8;
packed[0] = FloatToHalfBits(texel.UInt(0), false) | FloatToHalfBits(texel.UInt(1), true); packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
packed[1] = FloatToHalfBits(texel.UInt(2), false) | FloatToHalfBits(texel.UInt(3), true); packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true);
numPackedElements = 2; numPackedElements = 2;
break; break;
case spv::ImageFormatRgba16i: case spv::ImageFormatRgba16i:
......
...@@ -630,6 +630,7 @@ void PhysicalDevice::getFormatProperties(Format format, VkFormatProperties* pFor ...@@ -630,6 +630,7 @@ void PhysicalDevice::getFormatProperties(Format format, VkFormatProperties* pFor
case VK_FORMAT_R32G32B32A32_UINT: case VK_FORMAT_R32G32B32A32_UINT:
case VK_FORMAT_R32G32B32A32_SINT: case VK_FORMAT_R32G32B32A32_SINT:
case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
pFormatProperties->optimalTilingFeatures |= pFormatProperties->optimalTilingFeatures |=
VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT; VK_FORMAT_FEATURE_BLIT_DST_BIT;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment