Commit d354695d by Chris Forbes

Implement fp16 texture formats

TODO: figure out what is happening with linear filtering. Possibly pre-existing breakage -- if we enable filtering support for R32_SFLOAT that fails in the same way. Test: dEQP-VK.texture.* Test: dEQP-VK.image.* Test: dEQP-VK.pipeline.* Change-Id: Ia461418d772eb5aceb101b84eaa239b0c0bce2c0 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30288Tested-by: 's avatarChris Forbes <chrisforbes@google.com> Presubmit-Ready: Chris Forbes <chrisforbes@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent b5a4546c
......@@ -195,11 +195,14 @@ namespace sw
if(componentCount < 4) c.w = Float4(1.0f);
break;
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R16_SFLOAT:
c.y = Float4(0.0f);
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R16G16_SFLOAT:
c.z = Float4(0.0f);
c.w = Float4(1.0f);
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
break;
default:
ASSERT(false);
......@@ -1830,42 +1833,95 @@ namespace sw
int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
// Read texels
switch(textureComponentCount())
if (has16bitTextureComponents())
{
case 4:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x4(c.x, c.y, c.z, c.w);
break;
case 3:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x3(c.x, c.y, c.z, c.w);
break;
case 2:
// FIXME: Optimal shuffling?
c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
c.y = c.x;
c.x = Float4(c.x.xz, c.z.xz);
c.y = Float4(c.y.yw, c.z.yw);
break;
case 1:
// FIXME: Optimal shuffling?
c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
break;
default:
ASSERT(false);
switch (textureComponentCount())
{
case 4:
{
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8));
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8));
c.x = As<Float4>(halfToFloatBits(t0));
c.y = As<Float4>(halfToFloatBits(t1));
c.z = As<Float4>(halfToFloatBits(t2));
c.w = As<Float4>(halfToFloatBits(t3));
transpose4x4(c.x, c.y, c.z, c.w);
break;
}
case 2:
{
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
// FIXME: shuffles
c.x = As<Float4>(halfToFloatBits(t0));
c.y = As<Float4>(halfToFloatBits(t1));
c.z = As<Float4>(halfToFloatBits(t2));
c.w = As<Float4>(halfToFloatBits(t3));
transpose4x4(c.x, c.y, c.z, c.w);
break;
}
case 1:
{
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2));
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2));
c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
break;
}
default:
UNIMPLEMENTED("fp16 sampling %d components", textureComponentCount());
}
}
else
{
// Read texels
switch (textureComponentCount())
{
case 4:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x4(c.x, c.y, c.z, c.w);
break;
case 3:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x3(c.x, c.y, c.z, c.w);
break;
case 2:
// FIXME: Optimal shuffling?
c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
c.y = c.x;
c.x = Float4(c.x.xz, c.z.xz);
c.y = Float4(c.y.yw, c.z.yw);
break;
case 1:
// FIXME: Optimal shuffling?
c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
break;
default:
ASSERT(false);
}
}
if(state.compare != COMPARE_BYPASS)
......
......@@ -559,4 +559,17 @@ namespace sw
case 4: transpose4x4(row0, row1, row2, row3); break;
}
}
UInt4 halfToFloatBits(UInt4 halfBits)
{
static const uint32_t mask_nosign = 0x7FFF;
static const uint32_t magic = (254 - 15) << 23;
static const uint32_t was_infnan = 0x7BFF;
static const uint32_t exp_infnan = 255 << 23;
UInt4 expmant = halfBits & UInt4(mask_nosign);
return As<UInt4>(As<Float4>(expmant << 13) * As<Float4>(UInt4(magic))) |
((halfBits ^ UInt4(expmant)) << 16) |
(CmpNLE(As<UInt4>(expmant), UInt4(was_infnan)) & UInt4(exp_infnan));
}
}
......@@ -90,6 +90,8 @@ namespace sw
void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
UInt4 halfToFloatBits(UInt4 halfBits);
}
#endif // sw_ShaderCore_hpp
......@@ -3818,8 +3818,8 @@ namespace sw
case GLSLstd450UnpackHalf2x16:
{
auto val = GenericValue(this, routine, insn.word(5));
dst.move(0, HalfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
dst.move(1, HalfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
break;
}
case GLSLstd450Fma:
......@@ -4325,19 +4325,6 @@ namespace sw
return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16);
}
SIMD::UInt SpirvShader::HalfToFloatBits(SIMD::UInt halfBits) const
{
static const uint32_t mask_nosign = 0x7FFF;
static const uint32_t magic = (254 - 15) << 23;
static const uint32_t was_infnan = 0x7BFF;
static const uint32_t exp_infnan = 255 << 23;
SIMD::UInt expmant = halfBits & SIMD::UInt(mask_nosign);
return As<SIMD::UInt>(As<SIMD::Float>(expmant << 13) * As<SIMD::Float>(SIMD::UInt(magic))) |
((halfBits ^ SIMD::UInt(expmant)) << 16) |
(CmpNLE(As<SIMD::UInt>(expmant), SIMD::UInt(was_infnan)) & SIMD::UInt(exp_infnan));
}
std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const
{
// Assumes IEEE 754
......@@ -4868,10 +4855,10 @@ namespace sw
dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
break;
case VK_FORMAT_R16G16B16A16_SFLOAT:
dst.move(0, HalfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
dst.move(1, HalfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
dst.move(2, HalfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
dst.move(3, HalfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
break;
case VK_FORMAT_R8G8B8A8_SNORM:
dst.move(0, Min(Max(SIMD::Float(((packed[0]<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
......@@ -4956,7 +4943,7 @@ namespace sw
dst.move(3, SIMD::Int(1));
break;
case VK_FORMAT_R16_SFLOAT:
dst.move(0, HalfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
dst.move(1, SIMD::Float(0));
dst.move(2, SIMD::Float(0));
dst.move(3, SIMD::Float(1));
......@@ -4974,8 +4961,8 @@ namespace sw
dst.move(3, SIMD::Int(1));
break;
case VK_FORMAT_R16G16_SFLOAT:
dst.move(0, HalfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
dst.move(1, HalfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
dst.move(2, SIMD::Float(0));
dst.move(3, SIMD::Float(1));
break;
......
......@@ -918,7 +918,6 @@ namespace sw
SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const;
SIMD::UInt FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const;
SIMD::UInt HalfToFloatBits(SIMD::UInt halfBits) const;
// Splits x into a floating-point significand in the range [0.5, 1.0)
// and an integral exponent of two, such that:
......
......@@ -1727,10 +1727,13 @@ bool Format::has16bitTextureFormat() const
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
return false;
default:
......@@ -1773,10 +1776,13 @@ bool Format::has8bitTextureComponents() const
case VK_FORMAT_R32G32B32A32_UINT:
case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
return false;
default:
......@@ -1820,10 +1826,13 @@ bool Format::has16bitTextureComponents() const
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
return true;
default:
UNIMPLEMENTED("Format: %d", int(format));
......@@ -1854,10 +1863,13 @@ bool Format::has32bitIntegerTextureComponents() const
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
......@@ -1912,10 +1924,13 @@ bool Format::hasYuvFormat() const
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
return false;
default:
UNIMPLEMENTED("Format: %d", int(format));
......@@ -1934,6 +1949,7 @@ bool Format::isRGBComponent(int component) const
case VK_FORMAT_R8_UINT:
case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R32_SINT:
case VK_FORMAT_R32_UINT:
case VK_FORMAT_R32_SFLOAT:
......@@ -1945,6 +1961,7 @@ bool Format::isRGBComponent(int component) const
case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R32G32_SINT:
case VK_FORMAT_R32G32_UINT:
case VK_FORMAT_R32G32_SFLOAT:
......@@ -1959,6 +1976,7 @@ bool Format::isRGBComponent(int component) const
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R32G32B32A32_SINT:
case VK_FORMAT_R32G32B32A32_UINT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment