Commit 8d7f233a by Chris Forbes

Refactor float sampling path to be more regular

Change-Id: I8e68ecda72c9ac077c81e7a03eb53ec3c43e7812 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30433Tested-by: 's avatarChris Forbes <chrisforbes@google.com> Presubmit-Ready: Chris Forbes <chrisforbes@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent 788ea9f7
...@@ -1835,6 +1835,7 @@ namespace sw ...@@ -1835,6 +1835,7 @@ namespace sw
Vector4f c; Vector4f c;
UInt index[4]; UInt index[4];
UInt4 t0, t1, t2, t3;
computeIndices(index, uuuu, vvvv, wwww, mipmap, function); computeIndices(index, uuuu, vvvv, wwww, mipmap, function);
if(hasFloatTexture() || has32bitIntegerTextureComponents()) if(hasFloatTexture() || has32bitIntegerTextureComponents())
...@@ -1844,76 +1845,56 @@ namespace sw ...@@ -1844,76 +1845,56 @@ namespace sw
int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0; int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0; int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
if (has16bitTextureComponents()) switch (state.textureFormat)
{ {
switch (textureComponentCount()) case VK_FORMAT_R16_SFLOAT:
{ t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2));
case 4: t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2));
{ t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2));
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8)); t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8)); c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8)); c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
break;
case VK_FORMAT_R16G16_SFLOAT:
t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4));
t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4));
t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
// FIXME: shuffles
c.x = As<Float4>(halfToFloatBits(t0)); c.x = As<Float4>(halfToFloatBits(t0));
c.y = As<Float4>(halfToFloatBits(t1)); c.y = As<Float4>(halfToFloatBits(t1));
c.z = As<Float4>(halfToFloatBits(t2)); c.z = As<Float4>(halfToFloatBits(t2));
c.w = As<Float4>(halfToFloatBits(t3)); c.w = As<Float4>(halfToFloatBits(t3));
transpose4x4(c.x, c.y, c.z, c.w); transpose4x4(c.x, c.y, c.z, c.w);
break; break;
} case VK_FORMAT_R16G16B16A16_SFLOAT:
case 2: t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8));
{ t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4)); t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4)); t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
// FIXME: shuffles
c.x = As<Float4>(halfToFloatBits(t0)); c.x = As<Float4>(halfToFloatBits(t0));
c.y = As<Float4>(halfToFloatBits(t1)); c.y = As<Float4>(halfToFloatBits(t1));
c.z = As<Float4>(halfToFloatBits(t2)); c.z = As<Float4>(halfToFloatBits(t2));
c.w = As<Float4>(halfToFloatBits(t3)); c.w = As<Float4>(halfToFloatBits(t3));
transpose4x4(c.x, c.y, c.z, c.w); transpose4x4(c.x, c.y, c.z, c.w);
break; break;
} case VK_FORMAT_R32_SFLOAT:
case 1: case VK_FORMAT_R32_SINT:
{ case VK_FORMAT_R32_UINT:
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2)); // FIXME: Optimal shuffling?
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2)); c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2)); c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2)); c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
break;
}
default:
UNIMPLEMENTED("fp16 sampling %d components", textureComponentCount());
}
}
else
{
// Read texels
switch (textureComponentCount())
{
case 4:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x4(c.x, c.y, c.z, c.w);
break;
case 3:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x3(c.x, c.y, c.z, c.w);
break; break;
case 2: case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32_SINT:
case VK_FORMAT_R32G32_UINT:
// FIXME: Optimal shuffling? // FIXME: Optimal shuffling?
c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8); c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8); c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
...@@ -1923,16 +1904,27 @@ namespace sw ...@@ -1923,16 +1904,27 @@ namespace sw
c.x = Float4(c.x.xz, c.z.xz); c.x = Float4(c.x.xz, c.z.xz);
c.y = Float4(c.y.yw, c.z.yw); c.y = Float4(c.y.yw, c.z.yw);
break; break;
case 1: case VK_FORMAT_R32G32B32_SFLOAT:
// FIXME: Optimal shuffling? case VK_FORMAT_R32G32B32_SINT:
c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4); case VK_FORMAT_R32G32B32_UINT:
c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4); c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4); c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4); c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x3(c.x, c.y, c.z, c.w);
break;
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SINT:
case VK_FORMAT_R32G32B32A32_UINT:
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x4(c.x, c.y, c.z, c.w);
break; break;
default: default:
ASSERT(false); UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat));
}
} }
if(state.compare != COMPARE_BYPASS) if(state.compare != COMPARE_BYPASS)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment