Commit 3716c206 by Alexis Hetu Committed by Alexis Hétu

Improve 1010102 blend precision

Making sure we extend 1010102 to the full 16 bit range fixes 51 of the 53 failures found in ToT dEQP-VK.*a2b10* Also added a utility function to OR all elements of an int vector, which allows us to use more vector operations (as opposed to scalar) while improving the readability. Bug: b/146633956 Change-Id: If8b946c45cf27f5868d7a97166e21dba565ed72f Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39768Tested-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 10a900e5
......@@ -24,6 +24,16 @@
#include <utility>
namespace {
rr::RValue<rr::Int> PackFields(rr::Int4 const &ints, const sw::int4 shifts)
{
return (rr::Int(ints.x) << shifts[0]) |
(rr::Int(ints.y) << shifts[1]) |
(rr::Int(ints.z) << shifts[2]) |
(rr::Int(ints.w) << shifts[3]);
}
} // namespace
namespace sw {
Blitter::Blitter()
......@@ -825,28 +835,21 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_R5G6B5_UNORM_PACK16:
if(writeR && writeG && writeB)
{
*Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
(RoundInt(Float(c.y)) << Int(5)) |
(RoundInt(Float(c.x)) << Int(11)));
*Pointer<UShort>(element) = UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 }));
}
else
{
unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
unsigned short unmask = ~mask;
*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
(UShort(RoundInt(Float(c.z)) |
(RoundInt(Float(c.y)) << Int(5)) |
(RoundInt(Float(c.x)) << Int(11))) &
(UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 })) &
UShort(mask));
}
break;
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
if(writeRGBA)
{
*Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
(RoundInt(Float(c.z)) << Int(1)) |
(RoundInt(Float(c.y)) << Int(6)) |
(RoundInt(Float(c.x)) << Int(11)));
*Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 }));
}
else
{
......@@ -856,20 +859,14 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
(writeB ? 0x001F : 0x0000);
unsigned short unmask = ~mask;
*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
(UShort(RoundInt(Float(c.w)) |
(RoundInt(Float(c.z)) << Int(1)) |
(RoundInt(Float(c.y)) << Int(6)) |
(RoundInt(Float(c.x)) << Int(11))) &
(UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 })) &
UShort(mask));
}
break;
case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
if(writeRGBA)
{
*Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
(RoundInt(Float(c.x)) << Int(1)) |
(RoundInt(Float(c.y)) << Int(6)) |
(RoundInt(Float(c.z)) << Int(11)));
*Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 }));
}
else
{
......@@ -879,20 +876,14 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
(writeB ? 0x001F : 0x0000);
unsigned short unmask = ~mask;
*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
(UShort(RoundInt(Float(c.w)) |
(RoundInt(Float(c.x)) << Int(1)) |
(RoundInt(Float(c.y)) << Int(6)) |
(RoundInt(Float(c.z)) << Int(11))) &
(UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 })) &
UShort(mask));
}
break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
if(writeRGBA)
{
*Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
(RoundInt(Float(c.y)) << Int(5)) |
(RoundInt(Float(c.x)) << Int(10)) |
(RoundInt(Float(c.w)) << Int(15)));
*Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 }));
}
else
{
......@@ -902,10 +893,7 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
(writeB ? 0x001F : 0x0000);
unsigned short unmask = ~mask;
*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
(UShort(RoundInt(Float(c.z)) |
(RoundInt(Float(c.y)) << Int(5)) |
(RoundInt(Float(c.x)) << Int(10)) |
(RoundInt(Float(c.w)) << Int(15))) &
(UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 })) &
UShort(mask));
}
break;
......@@ -914,10 +902,7 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
if(writeRGBA)
{
*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
(RoundInt(Float(c.y)) << 10) |
(RoundInt(Float(c.z)) << 20) |
(RoundInt(Float(c.w)) << 30));
*Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 }));
}
else
{
......@@ -927,10 +912,7 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
(writeR ? 0x000003FF : 0x0000);
unsigned int unmask = ~mask;
*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
(UInt(RoundInt(Float(c.x)) |
(RoundInt(Float(c.y)) << 10) |
(RoundInt(Float(c.z)) << 20) |
(RoundInt(Float(c.w)) << 30)) &
(As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 })) &
UInt(mask));
}
break;
......@@ -939,10 +921,7 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
if(writeRGBA)
{
*Pointer<UInt>(element) = UInt(RoundInt(Float(c.z)) |
(RoundInt(Float(c.y)) << 10) |
(RoundInt(Float(c.x)) << 20) |
(RoundInt(Float(c.w)) << 30));
*Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 }));
}
else
{
......@@ -952,10 +931,7 @@ void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
(writeB ? 0x000003FF : 0x0000);
unsigned int unmask = ~mask;
*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
(UInt(RoundInt(Float(c.z)) |
(RoundInt(Float(c.y)) << 10) |
(RoundInt(Float(c.x)) << 20) |
(RoundInt(Float(c.w)) << 30)) &
(As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 })) &
UInt(mask));
}
break;
......@@ -1133,8 +1109,7 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
if(writeRGBA)
{
*Pointer<UInt>(element) =
UInt((Extract(c, 0)) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30));
*Pointer<UInt>(element) = As<UInt>(PackFields(c, { 0, 10, 20, 30 }));
}
else
{
......@@ -1144,7 +1119,7 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
(writeR ? 0x000003FF : 0x0000);
unsigned int unmask = ~mask;
*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
(UInt(Extract(c, 0) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
(As<UInt>(PackFields(c, { 0, 10, 20, 30 })) & UInt(mask));
}
break;
case VK_FORMAT_A2R10G10B10_UINT_PACK32:
......@@ -1153,8 +1128,7 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
if(writeRGBA)
{
*Pointer<UInt>(element) =
UInt((Extract(c, 2)) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30));
*Pointer<UInt>(element) = As<UInt>(PackFields(c, { 20, 10, 0, 30 }));
}
else
{
......@@ -1164,7 +1138,7 @@ void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
(writeB ? 0x000003FF : 0x0000);
unsigned int unmask = ~mask;
*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
(UInt(Extract(c, 2) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
(As<UInt>(PackFields(c, { 20, 10, 0, 30 })) & UInt(mask));
}
break;
case VK_FORMAT_B8G8R8A8_UINT:
......
......@@ -1055,10 +1055,7 @@ void PixelRoutine::readPixel(int index, const Pointer<Byte> &cBuffer, const Int
v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2);
v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3);
pixel.x = Short4(v << 6) & Short4(0xFFC0u);
pixel.y = Short4(v >> 4) & Short4(0xFFC0u);
pixel.z = Short4(v >> 14) & Short4(0xFFC0u);
pixel.w = Short4(v >> 16) & Short4(0xC000u);
a2b10g10r10Unpack(v, pixel);
}
break;
default:
......
......@@ -1699,16 +1699,7 @@ Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer)
cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
// shift each 10 bit field left 6, and replicate 6 high bits into bottom 6
c.x = Short4(((cc << 6) & Int4(0xFFC0)) | ((cc >> 4) & Int4(0x3F)));
c.y = Short4(((cc >> 4) & Int4(0xFFC0)) | ((cc >> 14) & Int4(0x3F)));
c.z = Short4(((cc >> 14) & Int4(0xFFC0)) | ((cc >> 24) & Int4(0x3F)));
c.w = Short4(((cc >> 16) & Int4(0xC000)));
// replicate 2 bit alpha component all the way down
c.w |= (c.w >> 8) & Short4(0xc0);
c.w |= (c.w >> 4) & Short4(0x0c0c);
c.w |= (c.w >> 2) & Short4(0x3333);
a2b10g10r10Unpack(cc, c);
}
else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UINT_PACK32)
{
......
......@@ -616,6 +616,22 @@ UInt r11g11b10Pack(sw::SIMD::Float &value)
return (UInt(halfBits.x) >> 20) | (UInt(halfBits.y) >> 9) | (UInt(halfBits.z) << 1);
}
void a2b10g10r10Unpack(Int4 &value, Vector4s &result)
{
result.x = Short4(value << 6) & Short4(0xFFC0u);
result.y = Short4(value >> 4) & Short4(0xFFC0u);
result.z = Short4(value >> 14) & Short4(0xFFC0u);
result.w = Short4(value >> 16) & Short4(0xC000u);
// Expand to 16 bit range
result.x |= As<Short4>(As<UShort4>(result.x) >> 10);
result.y |= As<Short4>(As<UShort4>(result.y) >> 10);
result.z |= As<Short4>(As<UShort4>(result.z) >> 10);
result.w |= As<Short4>(As<UShort4>(result.w) >> 2);
result.w |= As<Short4>(As<UShort4>(result.w) >> 4);
result.w |= As<Short4>(As<UShort4>(result.w) >> 8);
}
rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
{
return rr::SignMask(ints) != 0;
......
......@@ -208,6 +208,7 @@ sw::SIMD::UInt halfToFloatBits(sw::SIMD::UInt halfBits);
sw::SIMD::UInt floatToHalfBits(sw::SIMD::UInt floatBits, bool storeInUpperBits);
sw::SIMD::Float r11g11b10Unpack(UInt r11g11b10bits);
UInt r11g11b10Pack(sw::SIMD::Float &value);
void a2b10g10r10Unpack(Int4 &value, Vector4s &result);
rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment