Commit 33438a68 by Nicolas Capens Committed by Nicolas Capens

Refactor vector packing.

x86 vector packing instructions always treat the input as having signed integer components, but can perform signed or unsigned saturation on the output. In Reactor the Pack() intrinsic has overloads which differentiate between them based on the signedness of the input, but this is confusing. Also simplify emulation of saturating add/subtract. Bug b/37496082 Change-Id: I0625fff429ffb40f42baf9600c7760d9858b5d89 Reviewed-on: https://swiftshader-review.googlesource.com/12548Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent e6c3aa23
...@@ -253,10 +253,10 @@ namespace sw ...@@ -253,10 +253,10 @@ namespace sw
case FORMAT_A16B16G16R16: case FORMAT_A16B16G16R16:
For(, x < width - 1, x += 2) For(, x < width - 1, x += 2)
{ {
UShort4 c0 = As<UShort4>(Swizzle(*Pointer<Short4>(s + 0), 0xC6)) >> 8; Short4 c0 = As<UShort4>(Swizzle(*Pointer<Short4>(s + 0), 0xC6)) >> 8;
UShort4 c1 = As<UShort4>(Swizzle(*Pointer<Short4>(s + 8), 0xC6)) >> 8; Short4 c1 = As<UShort4>(Swizzle(*Pointer<Short4>(s + 8), 0xC6)) >> 8;
*Pointer<Int2>(d) = As<Int2>(Pack(c0, c1)); *Pointer<Int2>(d) = As<Int2>(PackUnsigned(c0, c1));
s += 2 * sBytes; s += 2 * sBytes;
d += 2 * dBytes; d += 2 * dBytes;
...@@ -300,9 +300,9 @@ namespace sw ...@@ -300,9 +300,9 @@ namespace sw
break; break;
case FORMAT_A16B16G16R16: case FORMAT_A16B16G16R16:
{ {
UShort4 c = As<UShort4>(Swizzle(*Pointer<Short4>(s), 0xC6)) >> 8; Short4 c = As<UShort4>(Swizzle(*Pointer<Short4>(s), 0xC6)) >> 8;
*Pointer<Int>(d) = Int(As<Int2>(Pack(c, c))); *Pointer<Int>(d) = Int(As<Int2>(PackUnsigned(c, c)));
} }
break; break;
case FORMAT_R5G6B5: case FORMAT_R5G6B5:
...@@ -361,10 +361,10 @@ namespace sw ...@@ -361,10 +361,10 @@ namespace sw
case FORMAT_A16B16G16R16: case FORMAT_A16B16G16R16:
For(, x < width - 1, x += 2) For(, x < width - 1, x += 2)
{ {
UShort4 c0 = *Pointer<UShort4>(s + 0) >> 8; Short4 c0 = *Pointer<UShort4>(s + 0) >> 8;
UShort4 c1 = *Pointer<UShort4>(s + 8) >> 8; Short4 c1 = *Pointer<UShort4>(s + 8) >> 8;
*Pointer<Int2>(d) = As<Int2>(Pack(c0, c1)); *Pointer<Int2>(d) = As<Int2>(PackUnsigned(c0, c1));
s += 2 * sBytes; s += 2 * sBytes;
d += 2 * dBytes; d += 2 * dBytes;
...@@ -408,9 +408,9 @@ namespace sw ...@@ -408,9 +408,9 @@ namespace sw
break; break;
case FORMAT_A16B16G16R16: case FORMAT_A16B16G16R16:
{ {
UShort4 c = *Pointer<UShort4>(s) >> 8; Short4 c = *Pointer<UShort4>(s) >> 8;
*Pointer<Int>(d) = Int(As<Int2>(Pack(c, c))); *Pointer<Int>(d) = Int(As<Int2>(PackUnsigned(c, c)));
} }
break; break;
case FORMAT_R5G6B5: case FORMAT_R5G6B5:
...@@ -503,8 +503,8 @@ namespace sw ...@@ -503,8 +503,8 @@ namespace sw
break; break;
case FORMAT_A16B16G16R16: case FORMAT_A16B16G16R16:
{ {
UShort4 cc = *Pointer<UShort4>(s) >> 8; Short4 cc = *Pointer<UShort4>(s) >> 8;
Int c = Int(As<Int2>(Pack(cc, cc))); Int c = Int(As<Int2>(PackUnsigned(cc, cc)));
*Pointer<Short>(d) = Short((c & 0x00F80000) >> 19 | *Pointer<Short>(d) = Short((c & 0x00F80000) >> 19 |
(c & 0x0000FC00) >> 5 | (c & 0x0000FC00) >> 5 |
...@@ -615,7 +615,7 @@ namespace sw ...@@ -615,7 +615,7 @@ namespace sw
{ {
case FORMAT_X8R8G8B8: case FORMAT_X8R8G8B8:
case FORMAT_A8R8G8B8: case FORMAT_A8R8G8B8:
*Pointer<Byte4>(d) = Byte4(Pack(As<UShort4>(c1), As<UShort4>(c1))); *Pointer<Byte4>(d) = Byte4(PackUnsigned(c1, c1));
break; break;
case FORMAT_X8B8G8R8: case FORMAT_X8B8G8R8:
case FORMAT_A8B8G8R8: case FORMAT_A8B8G8R8:
...@@ -624,12 +624,12 @@ namespace sw ...@@ -624,12 +624,12 @@ namespace sw
{ {
c1 = Swizzle(c1, 0xC6); c1 = Swizzle(c1, 0xC6);
*Pointer<Byte4>(d) = Byte4(Pack(As<UShort4>(c1), As<UShort4>(c1))); *Pointer<Byte4>(d) = Byte4(PackUnsigned(c1, c1));
} }
break; break;
case FORMAT_R8G8B8: case FORMAT_R8G8B8:
{ {
Int c = Int(As<Int2>(Pack(As<UShort4>(c1), As<UShort4>(c1)))); Int c = Int(As<Int2>(PackUnsigned(c1, c1)));
*Pointer<Byte>(d + 0) = Byte(c >> 0); *Pointer<Byte>(d + 0) = Byte(c >> 0);
*Pointer<Byte>(d + 1) = Byte(c >> 8); *Pointer<Byte>(d + 1) = Byte(c >> 8);
...@@ -638,7 +638,7 @@ namespace sw ...@@ -638,7 +638,7 @@ namespace sw
break; break;
case FORMAT_R5G6B5: case FORMAT_R5G6B5:
{ {
Int c = Int(As<Int2>(Pack(As<UShort4>(c1), As<UShort4>(c1)))); Int c = Int(As<Int2>(PackUnsigned(c1, c1)));
*Pointer<Short>(d) = Short((c & 0x00F80000) >> 8 | *Pointer<Short>(d) = Short((c & 0x00F80000) >> 8 |
(c & 0x0000FC00) >> 5 | (c & 0x0000FC00) >> 5 |
......
...@@ -2791,7 +2791,7 @@ namespace sw ...@@ -2791,7 +2791,7 @@ namespace sw
RValue<Short4> RoundShort4(RValue<Float4> cast) RValue<Short4> RoundShort4(RValue<Float4> cast)
{ {
RValue<Int4> int4 = RoundInt(cast); RValue<Int4> int4 = RoundInt(cast);
return As<Short4>(Pack(int4, int4)); return As<Short4>(PackSigned(int4, int4));
} }
RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y) RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
...@@ -2824,13 +2824,20 @@ namespace sw ...@@ -2824,13 +2824,20 @@ namespace sw
return x86::pmaddwd(x, y); return x86::pmaddwd(x, y);
} }
RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y) RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
{ {
auto result = x86::packsswb(x, y); auto result = x86::packsswb(x, y);
return As<SByte8>(Swizzle(As<Int4>(result), 0x88)); return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
} }
RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
{
auto result = x86::packuswb(x, y);
return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
}
RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y) RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
{ {
int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16
...@@ -2899,7 +2906,7 @@ namespace sw ...@@ -2899,7 +2906,7 @@ namespace sw
if(CPUID::supportsSSE4_1()) if(CPUID::supportsSSE4_1())
{ {
Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
*this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4))); *this = As<Short4>(PackUnsigned(int4, int4));
} }
else else
{ {
...@@ -3093,13 +3100,6 @@ namespace sw ...@@ -3093,13 +3100,6 @@ namespace sw
return x86::pavgw(x, y); return x86::pavgw(x, y);
} }
RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
{
auto result = x86::packuswb(x, y);
return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
}
Type *UShort4::getType() Type *UShort4::getType()
{ {
return T(Type_v4i16); return T(Type_v4i16);
...@@ -4846,11 +4846,16 @@ namespace sw ...@@ -4846,11 +4846,16 @@ namespace sw
return x86::cvtps2dq(cast); return x86::cvtps2dq(cast);
} }
RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y) RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
{ {
return x86::packssdw(x, y); return x86::packssdw(x, y);
} }
RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
{
return x86::packusdw(x, y);
}
RValue<Int> Extract(RValue<Int4> x, int i) RValue<Int> Extract(RValue<Int4> x, int i)
{ {
return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i)); return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
...@@ -5180,11 +5185,6 @@ namespace sw ...@@ -5180,11 +5185,6 @@ namespace sw
} }
} }
RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
{
return x86::packusdw(As<Int4>(x), As<Int4>(y));
}
Type *UInt4::getType() Type *UInt4::getType()
{ {
return T(llvm::VectorType::get(T(UInt::getType()), 4)); return T(llvm::VectorType::get(T(UInt::getType()), 4));
...@@ -6205,7 +6205,7 @@ namespace sw ...@@ -6205,7 +6205,7 @@ namespace sw
return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value))); return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value)));
} }
RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y) RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128); llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
......
...@@ -797,7 +797,8 @@ namespace sw ...@@ -797,7 +797,8 @@ namespace sw
RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y); RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y); RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y);
RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y); RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y);
RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y); RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y);
RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y);
RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y); RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y);
RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y); RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select); RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select);
...@@ -866,7 +867,6 @@ namespace sw ...@@ -866,7 +867,6 @@ namespace sw
RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y); RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y);
RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y); RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y);
RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y); RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y);
RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y);
class Short8 : public LValue<Short8> class Short8 : public LValue<Short8>
{ {
...@@ -1831,7 +1831,8 @@ namespace sw ...@@ -1831,7 +1831,8 @@ namespace sw
RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y); RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y);
RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y); RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y);
RValue<Int4> RoundInt(RValue<Float4> cast); RValue<Int4> RoundInt(RValue<Float4> cast);
RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y); RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y);
RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y);
RValue<Int> Extract(RValue<Int4> val, int i); RValue<Int> Extract(RValue<Int4> val, int i);
RValue<Int4> Insert(RValue<Int4> val, RValue<Int> element, int i); RValue<Int4> Insert(RValue<Int4> val, RValue<Int> element, int i);
RValue<Int> SignMask(RValue<Int4> x); RValue<Int> SignMask(RValue<Int4> x);
...@@ -1911,7 +1912,6 @@ namespace sw ...@@ -1911,7 +1912,6 @@ namespace sw
RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y); RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y);
RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y); RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y);
// RValue<UInt4> RoundInt(RValue<Float4> cast); // RValue<UInt4> RoundInt(RValue<Float4> cast);
RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y);
class Float : public LValue<Float> class Float : public LValue<Float>
{ {
......
...@@ -66,7 +66,7 @@ namespace sw ...@@ -66,7 +66,7 @@ namespace sw
RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y); RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y);
RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y); RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y);
RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y); RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y);
RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y); RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y);
RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y); RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y);
......
...@@ -445,8 +445,8 @@ namespace sw ...@@ -445,8 +445,8 @@ namespace sw
case FORMAT_A8R8G8B8: case FORMAT_A8R8G8B8:
if(writeRGBA) if(writeRGBA)
{ {
UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw)); Short4 c0 = RoundShort4(c.zyxw);
*Pointer<Byte4>(element) = Byte4(Pack(c0, c0)); *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
} }
else else
{ {
...@@ -460,8 +460,8 @@ namespace sw ...@@ -460,8 +460,8 @@ namespace sw
case FORMAT_SRGB8_A8: case FORMAT_SRGB8_A8:
if(writeRGBA) if(writeRGBA)
{ {
UShort4 c0 = As<UShort4>(RoundShort4(c)); Short4 c0 = RoundShort4(c);
*Pointer<Byte4>(element) = Byte4(Pack(c0, c0)); *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
} }
else else
{ {
...@@ -474,8 +474,8 @@ namespace sw ...@@ -474,8 +474,8 @@ namespace sw
case FORMAT_X8R8G8B8: case FORMAT_X8R8G8B8:
if(writeRGBA) if(writeRGBA)
{ {
UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw)) | UShort4(0x0000, 0x0000, 0x0000, 0xFFFFu); Short4 c0 = RoundShort4(c.zyxw) | Short4(0x0000, 0x0000, 0x0000, 0xFFFFu);
*Pointer<Byte4>(element) = Byte4(Pack(c0, c0)); *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
} }
else else
{ {
...@@ -489,8 +489,8 @@ namespace sw ...@@ -489,8 +489,8 @@ namespace sw
case FORMAT_SRGB8_X8: case FORMAT_SRGB8_X8:
if(writeRGBA) if(writeRGBA)
{ {
UShort4 c0 = As<UShort4>(RoundShort4(c)) | UShort4(0x0000, 0x0000, 0x0000, 0xFFFFu); Short4 c0 = RoundShort4(c) | Short4(0x0000, 0x0000, 0x0000, 0xFFFFu);
*Pointer<Byte4>(element) = Byte4(Pack(c0, c0)); *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
} }
else else
{ {
......
...@@ -284,7 +284,7 @@ namespace sw ...@@ -284,7 +284,7 @@ namespace sw
for(unsigned int q = 0; q < state.multiSample; q++) for(unsigned int q = 0; q < state.multiSample; q++)
{ {
Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx); Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx);
cMask[q] = SignMask(Pack(mask, mask)) & 0x0000000F; cMask[q] = SignMask(PackSigned(mask, mask)) & 0x0000000F;
} }
quad(cBuffer, zBuffer, sBuffer, cMask, x, y); quad(cBuffer, zBuffer, sBuffer, cMask, x, y);
......
...@@ -1665,7 +1665,7 @@ namespace sw ...@@ -1665,7 +1665,7 @@ namespace sw
void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src) void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src)
{ {
Short4 test = src.x | src.y | src.z; Short4 test = src.x | src.y | src.z;
Int kill = SignMask(Pack(test, test)) ^ 0x0000000F; Int kill = SignMask(PackSigned(test, test)) ^ 0x0000000F;
for(unsigned int q = 0; q < state.multiSample; q++) for(unsigned int q = 0; q < state.multiSample; q++)
{ {
......
...@@ -549,29 +549,29 @@ namespace sw ...@@ -549,29 +549,29 @@ namespace sw
break; break;
case ALPHA_EQUAL: case ALPHA_EQUAL:
cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
aMask = SignMask(Pack(cmp, Short4(0x0000))); aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break; break;
case ALPHA_NOTEQUAL: // a != b ~ !(a == b) case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
aMask = SignMask(Pack(cmp, Short4(0x0000))); aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break; break;
case ALPHA_LESS: // a < b ~ b > a case ALPHA_LESS: // a < b ~ b > a
cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha); cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha);
aMask = SignMask(Pack(cmp, Short4(0x0000))); aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break; break;
case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
cmp |= equal; cmp |= equal;
aMask = SignMask(Pack(cmp, Short4(0x0000))); aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break; break;
case ALPHA_LESSEQUAL: // a <= b ~ !(a > b) case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
aMask = SignMask(Pack(cmp, Short4(0x0000))); aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break; break;
case ALPHA_GREATER: // a > b case ALPHA_GREATER: // a > b
cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
aMask = SignMask(Pack(cmp, Short4(0x0000))); aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break; break;
default: default:
ASSERT(false); ASSERT(false);
...@@ -1452,8 +1452,8 @@ namespace sw ...@@ -1452,8 +1452,8 @@ namespace sw
current.y = As<Short4>(As<UShort4>(current.y) >> 8); current.y = As<Short4>(As<UShort4>(current.y) >> 8);
current.z = As<Short4>(As<UShort4>(current.z) >> 8); current.z = As<Short4>(As<UShort4>(current.z) >> 8);
current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); current.z = As<Short4>(PackUnsigned(current.z, current.x));
current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); current.y = As<Short4>(PackUnsigned(current.y, current.y));
current.x = current.z; current.x = current.z;
current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
...@@ -1469,8 +1469,8 @@ namespace sw ...@@ -1469,8 +1469,8 @@ namespace sw
current.z = As<Short4>(As<UShort4>(current.z) >> 8); current.z = As<Short4>(As<UShort4>(current.z) >> 8);
current.w = As<Short4>(As<UShort4>(current.w) >> 8); current.w = As<Short4>(As<UShort4>(current.w) >> 8);
current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); current.z = As<Short4>(PackUnsigned(current.z, current.x));
current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); current.y = As<Short4>(PackUnsigned(current.y, current.w));
current.x = current.z; current.x = current.z;
current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
...@@ -1490,8 +1490,8 @@ namespace sw ...@@ -1490,8 +1490,8 @@ namespace sw
current.y = As<Short4>(As<UShort4>(current.y) >> 8); current.y = As<Short4>(As<UShort4>(current.y) >> 8);
current.z = As<Short4>(As<UShort4>(current.z) >> 8); current.z = As<Short4>(As<UShort4>(current.z) >> 8);
current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z))); current.z = As<Short4>(PackUnsigned(current.x, current.z));
current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); current.y = As<Short4>(PackUnsigned(current.y, current.y));
current.x = current.z; current.x = current.z;
current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
...@@ -1507,8 +1507,8 @@ namespace sw ...@@ -1507,8 +1507,8 @@ namespace sw
current.z = As<Short4>(As<UShort4>(current.z) >> 8); current.z = As<Short4>(As<UShort4>(current.z) >> 8);
current.w = As<Short4>(As<UShort4>(current.w) >> 8); current.w = As<Short4>(As<UShort4>(current.w) >> 8);
current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z))); current.z = As<Short4>(PackUnsigned(current.x, current.z));
current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); current.y = As<Short4>(PackUnsigned(current.y, current.w));
current.x = current.z; current.x = current.z;
current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
...@@ -1521,17 +1521,17 @@ namespace sw ...@@ -1521,17 +1521,17 @@ namespace sw
case FORMAT_G8R8: case FORMAT_G8R8:
current.x = As<Short4>(As<UShort4>(current.x) >> 8); current.x = As<Short4>(As<UShort4>(current.x) >> 8);
current.y = As<Short4>(As<UShort4>(current.y) >> 8); current.y = As<Short4>(As<UShort4>(current.y) >> 8);
current.x = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.x))); current.x = As<Short4>(PackUnsigned(current.x, current.x));
current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); current.y = As<Short4>(PackUnsigned(current.y, current.y));
current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y)); current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
break; break;
case FORMAT_R8: case FORMAT_R8:
current.x = As<Short4>(As<UShort4>(current.x) >> 8); current.x = As<Short4>(As<UShort4>(current.x) >> 8);
current.x = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.x))); current.x = As<Short4>(PackUnsigned(current.x, current.x));
break; break;
case FORMAT_A8: case FORMAT_A8:
current.w = As<Short4>(As<UShort4>(current.w) >> 8); current.w = As<Short4>(As<UShort4>(current.w) >> 8);
current.w = As<Short4>(Pack(As<UShort4>(current.w), As<UShort4>(current.w))); current.w = As<Short4>(PackUnsigned(current.w, current.w));
break; break;
case FORMAT_G16R16: case FORMAT_G16R16:
current.z = current.x; current.z = current.x;
...@@ -2367,11 +2367,11 @@ namespace sw ...@@ -2367,11 +2367,11 @@ namespace sw
Short4 tmpCol = Short4(As<Int4>(oC.x)); Short4 tmpCol = Short4(As<Int4>(oC.x));
if(state.targetFormat[index] == FORMAT_R8I) if(state.targetFormat[index] == FORMAT_R8I)
{ {
tmpCol = As<Short4>(Pack(tmpCol, tmpCol)); tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
} }
else else
{ {
tmpCol = As<Short4>(Pack(As<UShort4>(tmpCol), As<UShort4>(tmpCol))); tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
} }
packedCol = Extract(As<Int2>(tmpCol), 0); packedCol = Extract(As<Int2>(tmpCol), 0);
...@@ -2466,11 +2466,11 @@ namespace sw ...@@ -2466,11 +2466,11 @@ namespace sw
if(state.targetFormat[index] == FORMAT_G8R8I) if(state.targetFormat[index] == FORMAT_G8R8I)
{ {
packedCol = As<Int2>(Pack(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
} }
else else
{ {
packedCol = As<Int2>(Pack(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)))); packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
} }
UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
...@@ -2604,11 +2604,11 @@ namespace sw ...@@ -2604,11 +2604,11 @@ namespace sw
if(state.targetFormat[index] == FORMAT_A8B8G8R8I) if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
{ {
packedCol = As<UInt2>(Pack(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
} }
else else
{ {
packedCol = As<UInt2>(Pack(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)))); packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
} }
value = *Pointer<UInt2>(buffer, 16); value = *Pointer<UInt2>(buffer, 16);
mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
...@@ -2622,11 +2622,11 @@ namespace sw ...@@ -2622,11 +2622,11 @@ namespace sw
if(state.targetFormat[index] == FORMAT_A8B8G8R8I) if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
{ {
packedCol = As<UInt2>(Pack(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
} }
else else
{ {
packedCol = As<UInt2>(Pack(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)))); packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
} }
value = *Pointer<UInt2>(buffer, 16); value = *Pointer<UInt2>(buffer, 16);
mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
......
...@@ -570,7 +570,7 @@ namespace sw ...@@ -570,7 +570,7 @@ namespace sw
void SamplerCore::border(Short4 &mask, Float4 &coordinates) void SamplerCore::border(Short4 &mask, Float4 &coordinates)
{ {
Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f))); Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
mask = As<Short4>(Int2(As<Int4>(Pack(border, border)))); mask = As<Short4>(Int2(As<Int4>(PackSigned(border, border))));
} }
void SamplerCore::border(Int4 &mask, Float4 &coordinates) void SamplerCore::border(Int4 &mask, Float4 &coordinates)
...@@ -2271,7 +2271,7 @@ namespace sw ...@@ -2271,7 +2271,7 @@ namespace sw
// Clamp // Clamp
convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000); convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000);
convert = As<Int4>(Pack(convert, convert)); convert = As<Int4>(PackSigned(convert, convert));
return As<Short4>(Int2(convert)) + Short4(0x8000u); return As<Short4>(Int2(convert)) + Short4(0x8000u);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment