Commit 8d7f233a by Chris Forbes

Refactor float sampling path to be more regular

Change-Id: I8e68ecda72c9ac077c81e7a03eb53ec3c43e7812 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30433Tested-by: 's avatarChris Forbes <chrisforbes@google.com> Presubmit-Ready: Chris Forbes <chrisforbes@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent 788ea9f7
...@@ -1835,6 +1835,7 @@ namespace sw ...@@ -1835,6 +1835,7 @@ namespace sw
Vector4f c; Vector4f c;
UInt index[4]; UInt index[4];
UInt4 t0, t1, t2, t3;
computeIndices(index, uuuu, vvvv, wwww, mipmap, function); computeIndices(index, uuuu, vvvv, wwww, mipmap, function);
if(hasFloatTexture() || has32bitIntegerTextureComponents()) if(hasFloatTexture() || has32bitIntegerTextureComponents())
...@@ -1844,95 +1845,86 @@ namespace sw ...@@ -1844,95 +1845,86 @@ namespace sw
int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0; int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0; int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
if (has16bitTextureComponents()) switch (state.textureFormat)
{ {
switch (textureComponentCount()) case VK_FORMAT_R16_SFLOAT:
{ t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2));
case 4: t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2));
{ t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2));
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8)); t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8)); c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8)); c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
c.x = As<Float4>(halfToFloatBits(t0)); c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
c.y = As<Float4>(halfToFloatBits(t1)); break;
c.z = As<Float4>(halfToFloatBits(t2)); case VK_FORMAT_R16G16_SFLOAT:
c.w = As<Float4>(halfToFloatBits(t3)); t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4));
transpose4x4(c.x, c.y, c.z, c.w); t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4));
break; t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
} t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
case 2:
{ // FIXME: shuffles
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4)); c.x = As<Float4>(halfToFloatBits(t0));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4)); c.y = As<Float4>(halfToFloatBits(t1));
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4)); c.z = As<Float4>(halfToFloatBits(t2));
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4)); c.w = As<Float4>(halfToFloatBits(t3));
transpose4x4(c.x, c.y, c.z, c.w);
// FIXME: shuffles break;
c.x = As<Float4>(halfToFloatBits(t0)); case VK_FORMAT_R16G16B16A16_SFLOAT:
c.y = As<Float4>(halfToFloatBits(t1)); t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8));
c.z = As<Float4>(halfToFloatBits(t2)); t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
c.w = As<Float4>(halfToFloatBits(t3)); t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8));
transpose4x4(c.x, c.y, c.z, c.w); t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8));
break;
} c.x = As<Float4>(halfToFloatBits(t0));
case 1: c.y = As<Float4>(halfToFloatBits(t1));
{ c.z = As<Float4>(halfToFloatBits(t2));
UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2)); c.w = As<Float4>(halfToFloatBits(t3));
UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2)); transpose4x4(c.x, c.y, c.z, c.w);
UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2)); break;
UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2)); case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32_SINT:
c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0); case VK_FORMAT_R32_UINT:
c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0); // FIXME: Optimal shuffling?
c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0); c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0); c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
break; c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
} c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
default: break;
UNIMPLEMENTED("fp16 sampling %d components", textureComponentCount()); case VK_FORMAT_R32G32_SFLOAT:
} case VK_FORMAT_R32G32_SINT:
} case VK_FORMAT_R32G32_UINT:
else // FIXME: Optimal shuffling?
{ c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
// Read texels c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
switch (textureComponentCount()) c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
{ c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
case 4: c.y = c.x;
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); c.x = Float4(c.x.xz, c.z.xz);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); c.y = Float4(c.y.yw, c.z.yw);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); break;
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); case VK_FORMAT_R32G32B32_SFLOAT:
transpose4x4(c.x, c.y, c.z, c.w); case VK_FORMAT_R32G32B32_SINT:
break; case VK_FORMAT_R32G32B32_UINT:
case 3: c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); transpose4x3(c.x, c.y, c.z, c.w);
transpose4x3(c.x, c.y, c.z, c.w); break;
break; case VK_FORMAT_R32G32B32A32_SFLOAT:
case 2: case VK_FORMAT_R32G32B32A32_SINT:
// FIXME: Optimal shuffling? case VK_FORMAT_R32G32B32A32_UINT:
c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8); c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8); c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8); c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8); c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
c.y = c.x; transpose4x4(c.x, c.y, c.z, c.w);
c.x = Float4(c.x.xz, c.z.xz); break;
c.y = Float4(c.y.yw, c.z.yw);
break; default:
case 1: UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat));
// FIXME: Optimal shuffling?
c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
break;
default:
ASSERT(false);
}
} }
if(state.compare != COMPARE_BYPASS) if(state.compare != COMPARE_BYPASS)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment