Commit d354695d by Chris Forbes

Implement fp16 texture formats

TODO: figure out what is happening with linear filtering. Possibly pre-existing breakage -- if we enable filtering support for R32_SFLOAT that fails in the same way. Test: dEQP-VK.texture.* Test: dEQP-VK.image.* Test: dEQP-VK.pipeline.* Change-Id: Ia461418d772eb5aceb101b84eaa239b0c0bce2c0 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30288Tested-by: 's avatarChris Forbes <chrisforbes@google.com> Presubmit-Ready: Chris Forbes <chrisforbes@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent b5a4546c
...@@ -195,11 +195,14 @@ namespace sw ...@@ -195,11 +195,14 @@ namespace sw
if(componentCount < 4) c.w = Float4(1.0f); if(componentCount < 4) c.w = Float4(1.0f);
break; break;
case VK_FORMAT_R32_SFLOAT: case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R16_SFLOAT:
c.y = Float4(0.0f); c.y = Float4(0.0f);
case VK_FORMAT_R32G32_SFLOAT: case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R16G16_SFLOAT:
c.z = Float4(0.0f); c.z = Float4(0.0f);
c.w = Float4(1.0f); c.w = Float4(1.0f);
case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
break; break;
default: default:
ASSERT(false); ASSERT(false);
...@@ -1830,8 +1833,60 @@ namespace sw ...@@ -1830,8 +1833,60 @@ namespace sw
int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0; int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0; int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
if (has16bitTextureComponents())
{
switch (textureComponentCount())
{
case 4:
{
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8));
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8));
c.x = As<Float4>(halfToFloatBits(t0));
c.y = As<Float4>(halfToFloatBits(t1));
c.z = As<Float4>(halfToFloatBits(t2));
c.w = As<Float4>(halfToFloatBits(t3));
transpose4x4(c.x, c.y, c.z, c.w);
break;
}
case 2:
{
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
// FIXME: shuffles
c.x = As<Float4>(halfToFloatBits(t0));
c.y = As<Float4>(halfToFloatBits(t1));
c.z = As<Float4>(halfToFloatBits(t2));
c.w = As<Float4>(halfToFloatBits(t3));
transpose4x4(c.x, c.y, c.z, c.w);
break;
}
case 1:
{
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2));
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2));
c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
break;
}
default:
UNIMPLEMENTED("fp16 sampling %d components", textureComponentCount());
}
}
else
{
// Read texels // Read texels
switch(textureComponentCount()) switch (textureComponentCount())
{ {
case 4: case 4:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
...@@ -1867,6 +1922,7 @@ namespace sw ...@@ -1867,6 +1922,7 @@ namespace sw
default: default:
ASSERT(false); ASSERT(false);
} }
}
if(state.compare != COMPARE_BYPASS) if(state.compare != COMPARE_BYPASS)
{ {
......
...@@ -559,4 +559,17 @@ namespace sw ...@@ -559,4 +559,17 @@ namespace sw
case 4: transpose4x4(row0, row1, row2, row3); break; case 4: transpose4x4(row0, row1, row2, row3); break;
} }
} }
UInt4 halfToFloatBits(UInt4 halfBits)
{
static const uint32_t mask_nosign = 0x7FFF;
static const uint32_t magic = (254 - 15) << 23;
static const uint32_t was_infnan = 0x7BFF;
static const uint32_t exp_infnan = 255 << 23;
UInt4 expmant = halfBits & UInt4(mask_nosign);
return As<UInt4>(As<Float4>(expmant << 13) * As<Float4>(UInt4(magic))) |
((halfBits ^ UInt4(expmant)) << 16) |
(CmpNLE(As<UInt4>(expmant), UInt4(was_infnan)) & UInt4(exp_infnan));
}
} }
...@@ -90,6 +90,8 @@ namespace sw ...@@ -90,6 +90,8 @@ namespace sw
void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N); void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
UInt4 halfToFloatBits(UInt4 halfBits);
} }
#endif // sw_ShaderCore_hpp #endif // sw_ShaderCore_hpp
...@@ -3818,8 +3818,8 @@ namespace sw ...@@ -3818,8 +3818,8 @@ namespace sw
case GLSLstd450UnpackHalf2x16: case GLSLstd450UnpackHalf2x16:
{ {
auto val = GenericValue(this, routine, insn.word(5)); auto val = GenericValue(this, routine, insn.word(5));
dst.move(0, HalfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF))); dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
dst.move(1, HalfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16)); dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
break; break;
} }
case GLSLstd450Fma: case GLSLstd450Fma:
...@@ -4325,19 +4325,6 @@ namespace sw ...@@ -4325,19 +4325,6 @@ namespace sw
return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16); return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16);
} }
SIMD::UInt SpirvShader::HalfToFloatBits(SIMD::UInt halfBits) const
{
static const uint32_t mask_nosign = 0x7FFF;
static const uint32_t magic = (254 - 15) << 23;
static const uint32_t was_infnan = 0x7BFF;
static const uint32_t exp_infnan = 255 << 23;
SIMD::UInt expmant = halfBits & SIMD::UInt(mask_nosign);
return As<SIMD::UInt>(As<SIMD::Float>(expmant << 13) * As<SIMD::Float>(SIMD::UInt(magic))) |
((halfBits ^ SIMD::UInt(expmant)) << 16) |
(CmpNLE(As<SIMD::UInt>(expmant), SIMD::UInt(was_infnan)) & SIMD::UInt(exp_infnan));
}
std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const
{ {
// Assumes IEEE 754 // Assumes IEEE 754
...@@ -4868,10 +4855,10 @@ namespace sw ...@@ -4868,10 +4855,10 @@ namespace sw
dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff)); dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
break; break;
case VK_FORMAT_R16G16B16A16_SFLOAT: case VK_FORMAT_R16G16B16A16_SFLOAT:
dst.move(0, HalfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF))); dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
dst.move(1, HalfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16)); dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
dst.move(2, HalfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF))); dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
dst.move(3, HalfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16)); dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
break; break;
case VK_FORMAT_R8G8B8A8_SNORM: case VK_FORMAT_R8G8B8A8_SNORM:
dst.move(0, Min(Max(SIMD::Float(((packed[0]<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); dst.move(0, Min(Max(SIMD::Float(((packed[0]<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
...@@ -4956,7 +4943,7 @@ namespace sw ...@@ -4956,7 +4943,7 @@ namespace sw
dst.move(3, SIMD::Int(1)); dst.move(3, SIMD::Int(1));
break; break;
case VK_FORMAT_R16_SFLOAT: case VK_FORMAT_R16_SFLOAT:
dst.move(0, HalfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF))); dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
dst.move(1, SIMD::Float(0)); dst.move(1, SIMD::Float(0));
dst.move(2, SIMD::Float(0)); dst.move(2, SIMD::Float(0));
dst.move(3, SIMD::Float(1)); dst.move(3, SIMD::Float(1));
...@@ -4974,8 +4961,8 @@ namespace sw ...@@ -4974,8 +4961,8 @@ namespace sw
dst.move(3, SIMD::Int(1)); dst.move(3, SIMD::Int(1));
break; break;
case VK_FORMAT_R16G16_SFLOAT: case VK_FORMAT_R16G16_SFLOAT:
dst.move(0, HalfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF))); dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
dst.move(1, HalfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16)); dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
dst.move(2, SIMD::Float(0)); dst.move(2, SIMD::Float(0));
dst.move(3, SIMD::Float(1)); dst.move(3, SIMD::Float(1));
break; break;
......
...@@ -918,7 +918,6 @@ namespace sw ...@@ -918,7 +918,6 @@ namespace sw
SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const; SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const;
SIMD::UInt FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const; SIMD::UInt FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const;
SIMD::UInt HalfToFloatBits(SIMD::UInt halfBits) const;
// Splits x into a floating-point significand in the range [0.5, 1.0) // Splits x into a floating-point significand in the range [0.5, 1.0)
// and an integral exponent of two, such that: // and an integral exponent of two, such that:
......
...@@ -1727,10 +1727,13 @@ bool Format::has16bitTextureFormat() const ...@@ -1727,10 +1727,13 @@ bool Format::has16bitTextureFormat() const
case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT: case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
return false; return false;
default: default:
...@@ -1773,10 +1776,13 @@ bool Format::has8bitTextureComponents() const ...@@ -1773,10 +1776,13 @@ bool Format::has8bitTextureComponents() const
case VK_FORMAT_R32G32B32A32_UINT: case VK_FORMAT_R32G32B32A32_UINT:
case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT: case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
return false; return false;
default: default:
...@@ -1820,10 +1826,13 @@ bool Format::has16bitTextureComponents() const ...@@ -1820,10 +1826,13 @@ bool Format::has16bitTextureComponents() const
case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT: case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
return true; return true;
default: default:
UNIMPLEMENTED("Format: %d", int(format)); UNIMPLEMENTED("Format: %d", int(format));
...@@ -1854,10 +1863,13 @@ bool Format::has32bitIntegerTextureComponents() const ...@@ -1854,10 +1863,13 @@ bool Format::has32bitIntegerTextureComponents() const
case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT: case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R32_SFLOAT: case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32_SFLOAT: case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT:
...@@ -1912,10 +1924,13 @@ bool Format::hasYuvFormat() const ...@@ -1912,10 +1924,13 @@ bool Format::hasYuvFormat() const
case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT: case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
return false; return false;
default: default:
UNIMPLEMENTED("Format: %d", int(format)); UNIMPLEMENTED("Format: %d", int(format));
...@@ -1934,6 +1949,7 @@ bool Format::isRGBComponent(int component) const ...@@ -1934,6 +1949,7 @@ bool Format::isRGBComponent(int component) const
case VK_FORMAT_R8_UINT: case VK_FORMAT_R8_UINT:
case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT: case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R32_SINT: case VK_FORMAT_R32_SINT:
case VK_FORMAT_R32_UINT: case VK_FORMAT_R32_UINT:
case VK_FORMAT_R32_SFLOAT: case VK_FORMAT_R32_SFLOAT:
...@@ -1945,6 +1961,7 @@ bool Format::isRGBComponent(int component) const ...@@ -1945,6 +1961,7 @@ bool Format::isRGBComponent(int component) const
case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_UNORM: case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R32G32_SINT: case VK_FORMAT_R32G32_SINT:
case VK_FORMAT_R32G32_UINT: case VK_FORMAT_R32G32_UINT:
case VK_FORMAT_R32G32_SFLOAT: case VK_FORMAT_R32G32_SFLOAT:
...@@ -1959,6 +1976,7 @@ bool Format::isRGBComponent(int component) const ...@@ -1959,6 +1976,7 @@ bool Format::isRGBComponent(int component) const
case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R32G32B32A32_SINT: case VK_FORMAT_R32G32B32A32_SINT:
case VK_FORMAT_R32G32B32A32_UINT: case VK_FORMAT_R32G32B32A32_UINT:
case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment