Refactor float sampling path to be more regular

Change-Id: I8e68ecda72c9ac077c81e7a03eb53ec3c43e7812 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30433Tested-by: Chris Forbes <chrisforbes@google.com> Presubmit-Ready: Chris Forbes <chrisforbes@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>

Refactor float sampling path to be more regular
8d7f233a · Chris Forbes · 788ea9f7 · 8d7f233a
Commit 8d7f233a authored May 02, 2019 by Chris Forbes
Hide whitespace changes
Inline Side-by-side

Showing with 80 additions and 88 deletions

SamplerCore.cpp src/Pipeline/SamplerCore.cpp +80 -88

No files found.
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -1835,6 +1835,7 @@ namespace sw
 		Vector4f c;
 		UInt index[4];
+		UInt4 t0, t1, t2, t3;
 		computeIndices(index, uuuu, vvvv, wwww, mipmap, function);
 		if(hasFloatTexture() || has32bitIntegerTextureComponents())
@@ -1844,95 +1845,86 @@ namespace sw
 			int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
 			int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
-			if (has16bitTextureComponents())
+			switch (state.textureFormat)
 			{
-				switch (textureComponentCount())
+			case VK_FORMAT_R16_SFLOAT:
-				{
+				t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2));
-				case 4:
+				t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2));
-				{
+				t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2));
-					UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8));
+				t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2));
-					UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
-					UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8));
+				c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
-					UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8));
+				c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
+				c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
-					c.x = As<Float4>(halfToFloatBits(t0));
+				c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
-					c.y = As<Float4>(halfToFloatBits(t1));
+				break;
-					c.z = As<Float4>(halfToFloatBits(t2));
+			case VK_FORMAT_R16G16_SFLOAT:
-					c.w = As<Float4>(halfToFloatBits(t3));
+				t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4));
-					transpose4x4(c.x, c.y, c.z, c.w);
+				t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4));
-					break;
+				t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
-				}
+				t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
-				case 2:
-				{
+				// FIXME: shuffles
-					UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4));
+				c.x = As<Float4>(halfToFloatBits(t0));
-					UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4));
+				c.y = As<Float4>(halfToFloatBits(t1));
-					UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
+				c.z = As<Float4>(halfToFloatBits(t2));
-					UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
+				c.w = As<Float4>(halfToFloatBits(t3));
+				transpose4x4(c.x, c.y, c.z, c.w);
-					// FIXME: shuffles
+				break;
-					c.x = As<Float4>(halfToFloatBits(t0));
+			case VK_FORMAT_R16G16B16A16_SFLOAT:
-					c.y = As<Float4>(halfToFloatBits(t1));
+				t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8));
-					c.z = As<Float4>(halfToFloatBits(t2));
+				t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
-					c.w = As<Float4>(halfToFloatBits(t3));
+				t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8));
-					transpose4x4(c.x, c.y, c.z, c.w);
+				t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8));
-					break;
-				}
+				c.x = As<Float4>(halfToFloatBits(t0));
-				case 1:
+				c.y = As<Float4>(halfToFloatBits(t1));
-				{
+				c.z = As<Float4>(halfToFloatBits(t2));
-					UInt4 t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2));
+				c.w = As<Float4>(halfToFloatBits(t3));
-					UInt4 t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2));
+				transpose4x4(c.x, c.y, c.z, c.w);
-					UInt4 t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2));
+				break;
-					UInt4 t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2));
+			case VK_FORMAT_R32_SFLOAT:
+			case VK_FORMAT_R32_SINT:
-					c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
+			case VK_FORMAT_R32_UINT:
-					c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
+				// FIXME: Optimal shuffling?
-					c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
+				c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
-					c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
+				c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
-					break;
+				c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
-				}
+				c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
-				default:
+				break;
-					UNIMPLEMENTED("fp16 sampling %d components", textureComponentCount());
+			case VK_FORMAT_R32G32_SFLOAT:
-				}
+			case VK_FORMAT_R32G32_SINT:
-			}
+			case VK_FORMAT_R32G32_UINT:
-			else
+				// FIXME: Optimal shuffling?
-			{
+				c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
-				// Read texels
+				c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
-				switch (textureComponentCount())
+				c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
-				{
+				c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
-				case 4:
+				c.y = c.x;
-					c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
+				c.x = Float4(c.x.xz, c.z.xz);
-					c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
+				c.y = Float4(c.y.yw, c.z.yw);
-					c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
+				break;
-					c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
+			case VK_FORMAT_R32G32B32_SFLOAT:
-					transpose4x4(c.x, c.y, c.z, c.w);
+			case VK_FORMAT_R32G32B32_SINT:
-					break;
+			case VK_FORMAT_R32G32B32_UINT:
-				case 3:
+				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
-					c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
+				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
-					c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
+				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
-					c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
+				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
-					c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
+				transpose4x3(c.x, c.y, c.z, c.w);
-					transpose4x3(c.x, c.y, c.z, c.w);
+				break;
-					break;
+			case VK_FORMAT_R32G32B32A32_SFLOAT:
-				case 2:
+			case VK_FORMAT_R32G32B32A32_SINT:
-					// FIXME: Optimal shuffling?
+			case VK_FORMAT_R32G32B32A32_UINT:
-					c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
+				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
-					c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
+				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
-					c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
+				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
-					c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
+				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
-					c.y = c.x;
+				transpose4x4(c.x, c.y, c.z, c.w);
-					c.x = Float4(c.x.xz, c.z.xz);
+				break;
-					c.y = Float4(c.y.yw, c.z.yw);
-					break;
+			default:
-				case 1:
+				UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat));
-					// FIXME: Optimal shuffling?
-					c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
-					c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
-					c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
-					c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
-					break;
-				default:
-					ASSERT(false);
-				}
 			}
 			if(state.compare != COMPARE_BYPASS)