Commit 0405ba06 by Nicolas Capens Committed by Nicolas Capens

Take advantage of return value optimization

Just a refactoring to improve syntax and avoid non-const references. Also don't assume SIMD types are 4-wide. Also add [[fallthrough]] cases found in the process. Bug: b/143351714 Bug: b/142661203 Change-Id: I12a0178338ce4c711bcbf62825d230580f3e92f0 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40288 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com>
parent d6d0edc2
...@@ -611,10 +611,13 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) ...@@ -611,10 +611,13 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
break; break;
case VK_FORMAT_R16G16B16A16_SFLOAT: case VK_FORMAT_R16G16B16A16_SFLOAT:
if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); } if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
// [[fallthrough]]
case VK_FORMAT_R16G16B16_SFLOAT: case VK_FORMAT_R16G16B16_SFLOAT:
if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); } if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
// [[fallthrough]]
case VK_FORMAT_R16G16_SFLOAT: case VK_FORMAT_R16G16_SFLOAT:
if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); } if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
// [[fallthrough]]
case VK_FORMAT_R16_SFLOAT: case VK_FORMAT_R16_SFLOAT:
if(writeR) { *Pointer<Half>(element) = Half(c.x); } if(writeR) { *Pointer<Half>(element) = Half(c.x); }
break; break;
...@@ -690,14 +693,17 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) ...@@ -690,14 +693,17 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_R8G8B8A8_SSCALED: case VK_FORMAT_R8G8B8A8_SSCALED:
case VK_FORMAT_A8B8G8R8_SSCALED_PACK32: case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); } if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
// [[fallthrough]]
case VK_FORMAT_R8G8B8_SINT: case VK_FORMAT_R8G8B8_SINT:
case VK_FORMAT_R8G8B8_SNORM: case VK_FORMAT_R8G8B8_SNORM:
case VK_FORMAT_R8G8B8_SSCALED: case VK_FORMAT_R8G8B8_SSCALED:
if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); } if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
// [[fallthrough]]
case VK_FORMAT_R8G8_SINT: case VK_FORMAT_R8G8_SINT:
case VK_FORMAT_R8G8_SNORM: case VK_FORMAT_R8G8_SNORM:
case VK_FORMAT_R8G8_SSCALED: case VK_FORMAT_R8G8_SSCALED:
if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
// [[fallthrough]]
case VK_FORMAT_R8_SINT: case VK_FORMAT_R8_SINT:
case VK_FORMAT_R8_SNORM: case VK_FORMAT_R8_SNORM:
case VK_FORMAT_R8_SSCALED: case VK_FORMAT_R8_SSCALED:
...@@ -708,11 +714,13 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) ...@@ -708,11 +714,13 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_R8G8B8_USCALED: case VK_FORMAT_R8G8B8_USCALED:
case VK_FORMAT_R8G8B8_SRGB: case VK_FORMAT_R8G8B8_SRGB:
if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
// [[fallthrough]]
case VK_FORMAT_R8G8_UINT: case VK_FORMAT_R8G8_UINT:
case VK_FORMAT_R8G8_UNORM: case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8_USCALED: case VK_FORMAT_R8G8_USCALED:
case VK_FORMAT_R8G8_SRGB: case VK_FORMAT_R8G8_SRGB:
if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
// [[fallthrough]]
case VK_FORMAT_R8_UINT: case VK_FORMAT_R8_UINT:
case VK_FORMAT_R8_UNORM: case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8_USCALED: case VK_FORMAT_R8_USCALED:
...@@ -814,8 +822,10 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) ...@@ -814,8 +822,10 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
break; break;
case VK_FORMAT_R32G32B32_SINT: case VK_FORMAT_R32G32B32_SINT:
if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
// [[fallthrough]]
case VK_FORMAT_R32G32_SINT: case VK_FORMAT_R32G32_SINT:
if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
// [[fallthrough]]
case VK_FORMAT_R32_SINT: case VK_FORMAT_R32_SINT:
if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
break; break;
...@@ -834,8 +844,10 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) ...@@ -834,8 +844,10 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
break; break;
case VK_FORMAT_R32G32B32_UINT: case VK_FORMAT_R32G32B32_UINT:
if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
// [[fallthrough]]
case VK_FORMAT_R32G32_UINT: case VK_FORMAT_R32G32_UINT:
if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
// [[fallthrough]]
case VK_FORMAT_R32_UINT: case VK_FORMAT_R32_UINT:
if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
break; break;
...@@ -970,8 +982,10 @@ Int4 Blitter::readInt4(Pointer<Byte> element, const State &state) ...@@ -970,8 +982,10 @@ Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
case VK_FORMAT_R8G8B8A8_SINT: case VK_FORMAT_R8G8B8A8_SINT:
c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3); c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2); c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
// [[fallthrough]]
case VK_FORMAT_R8G8_SINT: case VK_FORMAT_R8G8_SINT:
c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1); c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
// [[fallthrough]]
case VK_FORMAT_R8_SINT: case VK_FORMAT_R8_SINT:
c = Insert(c, Int(*Pointer<SByte>(element)), 0); c = Insert(c, Int(*Pointer<SByte>(element)), 0);
break; break;
...@@ -991,8 +1005,10 @@ Int4 Blitter::readInt4(Pointer<Byte> element, const State &state) ...@@ -991,8 +1005,10 @@ Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
case VK_FORMAT_R8G8B8A8_UINT: case VK_FORMAT_R8G8B8A8_UINT:
c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3); c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2); c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
// [[fallthrough]]
case VK_FORMAT_R8G8_UINT: case VK_FORMAT_R8G8_UINT:
c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1); c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
// [[fallthrough]]
case VK_FORMAT_R8_UINT: case VK_FORMAT_R8_UINT:
case VK_FORMAT_S8_UINT: case VK_FORMAT_S8_UINT:
c = Insert(c, Int(*Pointer<Byte>(element)), 0); c = Insert(c, Int(*Pointer<Byte>(element)), 0);
...@@ -1000,16 +1016,20 @@ Int4 Blitter::readInt4(Pointer<Byte> element, const State &state) ...@@ -1000,16 +1016,20 @@ Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_SINT:
c = Insert(c, Int(*Pointer<Short>(element + 6)), 3); c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
c = Insert(c, Int(*Pointer<Short>(element + 4)), 2); c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
// [[fallthrough]]
case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SINT:
c = Insert(c, Int(*Pointer<Short>(element + 2)), 1); c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
// [[fallthrough]]
case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_SINT:
c = Insert(c, Int(*Pointer<Short>(element)), 0); c = Insert(c, Int(*Pointer<Short>(element)), 0);
break; break;
case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R16G16B16A16_UINT:
c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3); c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2); c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
// [[fallthrough]]
case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R16G16_UINT:
c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1); c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
// [[fallthrough]]
case VK_FORMAT_R16_UINT: case VK_FORMAT_R16_UINT:
c = Insert(c, Int(*Pointer<UShort>(element)), 0); c = Insert(c, Int(*Pointer<UShort>(element)), 0);
break; break;
...@@ -1020,6 +1040,7 @@ Int4 Blitter::readInt4(Pointer<Byte> element, const State &state) ...@@ -1020,6 +1040,7 @@ Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
case VK_FORMAT_R32G32_SINT: case VK_FORMAT_R32G32_SINT:
case VK_FORMAT_R32G32_UINT: case VK_FORMAT_R32G32_UINT:
c = Insert(c, *Pointer<Int>(element + 4), 1); c = Insert(c, *Pointer<Int>(element + 4), 1);
// [[fallthrough]]
case VK_FORMAT_R32_SINT: case VK_FORMAT_R32_SINT:
case VK_FORMAT_R32_UINT: case VK_FORMAT_R32_UINT:
c = Insert(c, *Pointer<Int>(element), 0); c = Insert(c, *Pointer<Int>(element), 0);
...@@ -1096,6 +1117,7 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) ...@@ -1096,6 +1117,7 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_B8G8R8A8_SINT: case VK_FORMAT_B8G8R8A8_SINT:
case VK_FORMAT_B8G8R8A8_SSCALED: case VK_FORMAT_B8G8R8A8_SSCALED:
if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); } if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
// [[fallthrough]]
case VK_FORMAT_B8G8R8_SINT: case VK_FORMAT_B8G8R8_SINT:
case VK_FORMAT_B8G8R8_SSCALED: case VK_FORMAT_B8G8R8_SSCALED:
if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); } if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
...@@ -1107,12 +1129,15 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) ...@@ -1107,12 +1129,15 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_R8G8B8A8_SSCALED: case VK_FORMAT_R8G8B8A8_SSCALED:
case VK_FORMAT_A8B8G8R8_SSCALED_PACK32: case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); } if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
// [[fallthrough]]
case VK_FORMAT_R8G8B8_SINT: case VK_FORMAT_R8G8B8_SINT:
case VK_FORMAT_R8G8B8_SSCALED: case VK_FORMAT_R8G8B8_SSCALED:
if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); } if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
// [[fallthrough]]
case VK_FORMAT_R8G8_SINT: case VK_FORMAT_R8G8_SINT:
case VK_FORMAT_R8G8_SSCALED: case VK_FORMAT_R8G8_SSCALED:
if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); } if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
// [[fallthrough]]
case VK_FORMAT_R8_SINT: case VK_FORMAT_R8_SINT:
case VK_FORMAT_R8_SSCALED: case VK_FORMAT_R8_SSCALED:
if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); } if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
...@@ -1158,6 +1183,7 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) ...@@ -1158,6 +1183,7 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_B8G8R8A8_UINT: case VK_FORMAT_B8G8R8A8_UINT:
case VK_FORMAT_B8G8R8A8_USCALED: case VK_FORMAT_B8G8R8A8_USCALED:
if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); } if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
// [[fallthrough]]
case VK_FORMAT_B8G8R8_UINT: case VK_FORMAT_B8G8R8_UINT:
case VK_FORMAT_B8G8R8_USCALED: case VK_FORMAT_B8G8R8_USCALED:
case VK_FORMAT_B8G8R8_SRGB: case VK_FORMAT_B8G8R8_SRGB:
...@@ -1170,12 +1196,15 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) ...@@ -1170,12 +1196,15 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_R8G8B8A8_USCALED: case VK_FORMAT_R8G8B8A8_USCALED:
case VK_FORMAT_A8B8G8R8_USCALED_PACK32: case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); } if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
// [[fallthrough]]
case VK_FORMAT_R8G8B8_UINT: case VK_FORMAT_R8G8B8_UINT:
case VK_FORMAT_R8G8B8_USCALED: case VK_FORMAT_R8G8B8_USCALED:
if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); } if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
// [[fallthrough]]
case VK_FORMAT_R8G8_UINT: case VK_FORMAT_R8G8_UINT:
case VK_FORMAT_R8G8_USCALED: case VK_FORMAT_R8G8_USCALED:
if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); } if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
// [[fallthrough]]
case VK_FORMAT_R8_UINT: case VK_FORMAT_R8_UINT:
case VK_FORMAT_R8_USCALED: case VK_FORMAT_R8_USCALED:
case VK_FORMAT_S8_UINT: case VK_FORMAT_S8_UINT:
...@@ -1184,12 +1213,15 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) ...@@ -1184,12 +1213,15 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16G16B16A16_SSCALED: case VK_FORMAT_R16G16B16A16_SSCALED:
if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); } if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
// [[fallthrough]]
case VK_FORMAT_R16G16B16_SINT: case VK_FORMAT_R16G16B16_SINT:
case VK_FORMAT_R16G16B16_SSCALED: case VK_FORMAT_R16G16B16_SSCALED:
if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); } if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
// [[fallthrough]]
case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16_SSCALED: case VK_FORMAT_R16G16_SSCALED:
if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); } if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
// [[fallthrough]]
case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16_SSCALED: case VK_FORMAT_R16_SSCALED:
if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); } if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
...@@ -1197,12 +1229,15 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) ...@@ -1197,12 +1229,15 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R16G16B16A16_USCALED: case VK_FORMAT_R16G16B16A16_USCALED:
if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); } if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
// [[fallthrough]]
case VK_FORMAT_R16G16B16_UINT: case VK_FORMAT_R16G16B16_UINT:
case VK_FORMAT_R16G16B16_USCALED: case VK_FORMAT_R16G16B16_USCALED:
if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); } if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
// [[fallthrough]]
case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16_USCALED: case VK_FORMAT_R16G16_USCALED:
if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); } if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
// [[fallthrough]]
case VK_FORMAT_R16_UINT: case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_USCALED: case VK_FORMAT_R16_USCALED:
if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); } if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
...@@ -1247,8 +1282,10 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) ...@@ -1247,8 +1282,10 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
break; break;
case VK_FORMAT_R32G32B32_UINT: case VK_FORMAT_R32G32B32_UINT:
if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
// [[fallthrough]]
case VK_FORMAT_R32G32_UINT: case VK_FORMAT_R32G32_UINT:
if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
// [[fallthrough]]
case VK_FORMAT_R32_UINT: case VK_FORMAT_R32_UINT:
if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
break; break;
......
...@@ -1060,7 +1060,7 @@ void PixelRoutine::readPixel(int index, const Pointer<Byte> &cBuffer, const Int ...@@ -1060,7 +1060,7 @@ void PixelRoutine::readPixel(int index, const Pointer<Byte> &cBuffer, const Int
v = Insert(v, *Pointer<Int>(buffer + 0), 2); v = Insert(v, *Pointer<Int>(buffer + 0), 2);
v = Insert(v, *Pointer<Int>(buffer + 4), 3); v = Insert(v, *Pointer<Int>(buffer + 4), 3);
a2b10g10r10Unpack(v, pixel); pixel = a2b10g10r10Unpack(v);
} }
break; break;
case VK_FORMAT_A2R10G10B10_UNORM_PACK32: case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
...@@ -1072,7 +1072,7 @@ void PixelRoutine::readPixel(int index, const Pointer<Byte> &cBuffer, const Int ...@@ -1072,7 +1072,7 @@ void PixelRoutine::readPixel(int index, const Pointer<Byte> &cBuffer, const Int
v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2); v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2);
v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3); v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3);
a2r10g10b10Unpack(v, pixel); pixel = a2r10g10b10Unpack(v);
} }
break; break;
default: default:
......
...@@ -1699,7 +1699,7 @@ Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer) ...@@ -1699,7 +1699,7 @@ Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer)
cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2); cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3); cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
a2b10g10r10Unpack(cc, c); c = a2b10g10r10Unpack(cc);
} }
else if(state.textureFormat == VK_FORMAT_A2R10G10B10_UNORM_PACK32) else if(state.textureFormat == VK_FORMAT_A2R10G10B10_UNORM_PACK32)
{ {
...@@ -1709,7 +1709,7 @@ Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer) ...@@ -1709,7 +1709,7 @@ Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer)
cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2); cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3); cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
a2r10g10b10Unpack(cc, c); c = a2r10g10b10Unpack(cc);
} }
else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UINT_PACK32) else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UINT_PACK32)
{ {
......
...@@ -601,7 +601,7 @@ Float4 r11g11b10Unpack(UInt r11g11b10bits) ...@@ -601,7 +601,7 @@ Float4 r11g11b10Unpack(UInt r11g11b10bits)
// copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format. // copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format.
// In this case, we have: // In this case, we have:
// MSB | B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R | LSB // MSB | B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R | LSB
SIMD::UInt halfBits; UInt4 halfBits;
halfBits = Insert(halfBits, (r11g11b10bits & UInt(0x000007FFu)) << 4, 0); halfBits = Insert(halfBits, (r11g11b10bits & UInt(0x000007FFu)) << 4, 0);
halfBits = Insert(halfBits, (r11g11b10bits & UInt(0x003FF800u)) >> 7, 1); halfBits = Insert(halfBits, (r11g11b10bits & UInt(0x003FF800u)) >> 7, 1);
halfBits = Insert(halfBits, (r11g11b10bits & UInt(0xFFC00000u)) >> 17, 2); halfBits = Insert(halfBits, (r11g11b10bits & UInt(0xFFC00000u)) >> 17, 2);
...@@ -609,15 +609,18 @@ Float4 r11g11b10Unpack(UInt r11g11b10bits) ...@@ -609,15 +609,18 @@ Float4 r11g11b10Unpack(UInt r11g11b10bits)
return As<Float4>(halfToFloatBits(halfBits)); return As<Float4>(halfToFloatBits(halfBits));
} }
UInt r11g11b10Pack(sw::SIMD::Float &value) UInt r11g11b10Pack(const Float4 &value)
{ {
SIMD::UInt halfBits = floatToHalfBits(As<SIMD::UInt>(value), true) & auto halfBits = floatToHalfBits(As<UInt4>(value), true);
SIMD::UInt(0x7FF00000, 0x7FF00000, 0x7FE00000, 0); // Truncates instead of rounding. See b/147900455
return (UInt(halfBits.x) >> 20) | (UInt(halfBits.y) >> 9) | (UInt(halfBits.z) << 1); UInt4 truncBits = halfBits & UInt4(0x7FF00000, 0x7FF00000, 0x7FE00000, 0);
return (UInt(truncBits.x) >> 20) | (UInt(truncBits.y) >> 9) | (UInt(truncBits.z) << 1);
} }
void a2b10g10r10Unpack(Int4 &value, Vector4s &result) Vector4s a2b10g10r10Unpack(const Int4 &value)
{ {
Vector4s result;
result.x = Short4(value << 6) & Short4(0xFFC0u); result.x = Short4(value << 6) & Short4(0xFFC0u);
result.y = Short4(value >> 4) & Short4(0xFFC0u); result.y = Short4(value >> 4) & Short4(0xFFC0u);
result.z = Short4(value >> 14) & Short4(0xFFC0u); result.z = Short4(value >> 14) & Short4(0xFFC0u);
...@@ -630,10 +633,14 @@ void a2b10g10r10Unpack(Int4 &value, Vector4s &result) ...@@ -630,10 +633,14 @@ void a2b10g10r10Unpack(Int4 &value, Vector4s &result)
result.w |= As<Short4>(As<UShort4>(result.w) >> 2); result.w |= As<Short4>(As<UShort4>(result.w) >> 2);
result.w |= As<Short4>(As<UShort4>(result.w) >> 4); result.w |= As<Short4>(As<UShort4>(result.w) >> 4);
result.w |= As<Short4>(As<UShort4>(result.w) >> 8); result.w |= As<Short4>(As<UShort4>(result.w) >> 8);
return result;
} }
void a2r10g10b10Unpack(Int4 &value, Vector4s &result) Vector4s a2r10g10b10Unpack(const Int4 &value)
{ {
Vector4s result;
result.x = Short4(value >> 14) & Short4(0xFFC0u); result.x = Short4(value >> 14) & Short4(0xFFC0u);
result.y = Short4(value >> 4) & Short4(0xFFC0u); result.y = Short4(value >> 4) & Short4(0xFFC0u);
result.z = Short4(value << 6) & Short4(0xFFC0u); result.z = Short4(value << 6) & Short4(0xFFC0u);
...@@ -646,6 +653,8 @@ void a2r10g10b10Unpack(Int4 &value, Vector4s &result) ...@@ -646,6 +653,8 @@ void a2r10g10b10Unpack(Int4 &value, Vector4s &result)
result.w |= As<Short4>(As<UShort4>(result.w) >> 2); result.w |= As<Short4>(As<UShort4>(result.w) >> 2);
result.w |= As<Short4>(As<UShort4>(result.w) >> 4); result.w |= As<Short4>(As<UShort4>(result.w) >> 4);
result.w |= As<Short4>(As<UShort4>(result.w) >> 8); result.w |= As<Short4>(As<UShort4>(result.w) >> 8);
return result;
} }
rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints) rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
......
...@@ -206,10 +206,10 @@ void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N) ...@@ -206,10 +206,10 @@ void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N)
sw::SIMD::UInt halfToFloatBits(sw::SIMD::UInt halfBits); sw::SIMD::UInt halfToFloatBits(sw::SIMD::UInt halfBits);
sw::SIMD::UInt floatToHalfBits(sw::SIMD::UInt floatBits, bool storeInUpperBits); sw::SIMD::UInt floatToHalfBits(sw::SIMD::UInt floatBits, bool storeInUpperBits);
sw::SIMD::Float r11g11b10Unpack(UInt r11g11b10bits); Float4 r11g11b10Unpack(UInt r11g11b10bits);
UInt r11g11b10Pack(sw::SIMD::Float &value); UInt r11g11b10Pack(const Float4 &value);
void a2b10g10r10Unpack(Int4 &value, Vector4s &result); Vector4s a2b10g10r10Unpack(const Int4 &value);
void a2r10g10b10Unpack(Int4 &value, Vector4s &result); Vector4s a2r10g10b10Unpack(const Int4 &value);
rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints); rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment