Commit e4a88b91 by Nicolas Capens Committed by Nicolas Capens

Support 3-component integer formats natively.

Change-Id: Id48bc7a232c50b753da64cb914e75b5d590ae47d Reviewed-on: https://swiftshader-review.googlesource.com/14369Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 52243687
...@@ -3571,9 +3571,11 @@ namespace sw ...@@ -3571,9 +3571,11 @@ namespace sw
case FORMAT_R32UI: case FORMAT_R32UI:
return FORMAT_R32UI; return FORMAT_R32UI;
case FORMAT_X16B16G16R16I: case FORMAT_X16B16G16R16I:
return FORMAT_X16B16G16R16I;
case FORMAT_A16B16G16R16I: case FORMAT_A16B16G16R16I:
return FORMAT_A16B16G16R16I; return FORMAT_A16B16G16R16I;
case FORMAT_X16B16G16R16UI: case FORMAT_X16B16G16R16UI:
return FORMAT_X16B16G16R16UI;
case FORMAT_A16B16G16R16UI: case FORMAT_A16B16G16R16UI:
return FORMAT_A16B16G16R16UI; return FORMAT_A16B16G16R16UI;
case FORMAT_A2R10G10B10: case FORMAT_A2R10G10B10:
...@@ -3581,9 +3583,11 @@ namespace sw ...@@ -3581,9 +3583,11 @@ namespace sw
case FORMAT_A16B16G16R16: case FORMAT_A16B16G16R16:
return FORMAT_A16B16G16R16; return FORMAT_A16B16G16R16;
case FORMAT_X32B32G32R32I: case FORMAT_X32B32G32R32I:
return FORMAT_X32B32G32R32I;
case FORMAT_A32B32G32R32I: case FORMAT_A32B32G32R32I:
return FORMAT_A32B32G32R32I; return FORMAT_A32B32G32R32I;
case FORMAT_X32B32G32R32UI: case FORMAT_X32B32G32R32UI:
return FORMAT_X32B32G32R32UI;
case FORMAT_A32B32G32R32UI: case FORMAT_A32B32G32R32UI:
return FORMAT_A32B32G32R32UI; return FORMAT_A32B32G32R32UI;
case FORMAT_G8R8I: case FORMAT_G8R8I:
......
...@@ -2001,6 +2001,13 @@ namespace sw ...@@ -2001,6 +2001,13 @@ namespace sw
c.w = Pointer<Short4>(buffer[f3])[index[3]]; c.w = Pointer<Short4>(buffer[f3])[index[3]];
transpose4x4(c.x, c.y, c.z, c.w); transpose4x4(c.x, c.y, c.z, c.w);
break; break;
case 3:
c.x = Pointer<Short4>(buffer[f0])[index[0]];
c.y = Pointer<Short4>(buffer[f1])[index[1]];
c.z = Pointer<Short4>(buffer[f2])[index[2]];
c.w = Pointer<Short4>(buffer[f3])[index[3]];
transpose4x3(c.x, c.y, c.z, c.w);
break;
case 2: case 2:
c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]); c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1]))); c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
...@@ -2159,13 +2166,11 @@ namespace sw ...@@ -2159,13 +2166,11 @@ namespace sw
transpose4x4(c.x, c.y, c.z, c.w); transpose4x4(c.x, c.y, c.z, c.w);
break; break;
case 3: case 3:
ASSERT(state.textureFormat == FORMAT_X32B32G32R32F);
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x3(c.x, c.y, c.z, c.w); transpose4x3(c.x, c.y, c.z, c.w);
c.w = Float4(1.0f);
break; break;
case 2: case 2:
// FIXME: Optimal shuffling? // FIXME: Optimal shuffling?
......
...@@ -490,6 +490,18 @@ namespace sw ...@@ -490,6 +490,18 @@ namespace sw
row3 = UnpackHigh(tmp0, tmp1); row3 = UnpackHigh(tmp0, tmp1);
} }
void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
{
Int2 tmp0 = UnpackHigh(row0, row1);
Int2 tmp1 = UnpackHigh(row2, row3);
Int2 tmp2 = UnpackLow(row0, row1);
Int2 tmp3 = UnpackLow(row2, row3);
row0 = UnpackLow(tmp2, tmp3);
row1 = UnpackHigh(tmp2, tmp3);
row2 = UnpackLow(tmp0, tmp1);
}
void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
{ {
Float4 tmp0 = UnpackLow(row0, row1); Float4 tmp0 = UnpackLow(row0, row1);
......
...@@ -82,6 +82,7 @@ namespace sw ...@@ -82,6 +82,7 @@ namespace sw
Float4 dot4(const Vector4f &v0, const Vector4f &v1); Float4 dot4(const Vector4f &v0, const Vector4f &v1);
void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment