Commit 3e7062b9 by Nicolas Capens

Fix packusdw SSE2 fallback.

Bug swiftshader:20 Change-Id: I81ad267d450713ffe2a5a84e1d7f7f140b515c85 Reviewed-on: https://swiftshader-review.googlesource.com/8454Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <capn@google.com> Tested-by: 's avatarNicolas Capens <capn@google.com>
parent a25311ad
...@@ -3017,11 +3017,11 @@ namespace sw ...@@ -3017,11 +3017,11 @@ namespace sw
if(!saturate || !CPUID::supportsSSE4_1()) if(!saturate || !CPUID::supportsSSE4_1())
{ {
*this = Short4(Int4(int4)); *this = Short4(int4);
} }
else else
{ {
*this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4))))); *this = As<Short4>(Int2(As<Int4>(x86::packusdw(int4, int4))));
} }
} }
...@@ -3276,6 +3276,12 @@ namespace sw ...@@ -3276,6 +3276,12 @@ namespace sw
} }
} }
Short8::Short8(short c)
{
int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7) Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
{ {
int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
...@@ -3354,6 +3360,12 @@ namespace sw ...@@ -3354,6 +3360,12 @@ namespace sw
return T(VectorType::get(Short::getType(), 8)); return T(VectorType::get(Short::getType(), 8));
} }
UShort8::UShort8(unsigned short c)
{
int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7) UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
{ {
int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
...@@ -5552,7 +5564,7 @@ namespace sw ...@@ -5552,7 +5564,7 @@ namespace sw
RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y) RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
{ {
return x86::packusdw(x, y); // FIXME: Fallback required return x86::packusdw(As<Int4>(x), As<Int4>(y));
} }
Type *UInt4::getType() Type *UInt4::getType()
...@@ -6888,7 +6900,7 @@ namespace sw ...@@ -6888,7 +6900,7 @@ namespace sw
return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value))); return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
} }
RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y) RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
{ {
if(CPUID::supportsSSE4_1()) if(CPUID::supportsSSE4_1())
{ {
...@@ -6898,8 +6910,10 @@ namespace sw ...@@ -6898,8 +6910,10 @@ namespace sw
} }
else else
{ {
// FIXME: Not an exact replacement! RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u)); RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
} }
} }
......
...@@ -870,6 +870,7 @@ namespace sw ...@@ -870,6 +870,7 @@ namespace sw
{ {
public: public:
Short8() = default; Short8() = default;
Short8(short c);
Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7); Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7);
Short8(RValue<Short8> rhs); Short8(RValue<Short8> rhs);
// Short8(const Short8 &rhs); // Short8(const Short8 &rhs);
...@@ -927,6 +928,7 @@ namespace sw ...@@ -927,6 +928,7 @@ namespace sw
{ {
public: public:
UShort8() = default; UShort8() = default;
UShort8(unsigned short c);
UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7); UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7);
UShort8(RValue<UShort8> rhs); UShort8(RValue<UShort8> rhs);
// UShort8(const UShort8 &rhs); // UShort8(const UShort8 &rhs);
......
...@@ -3534,6 +3534,12 @@ namespace sw ...@@ -3534,6 +3534,12 @@ namespace sw
return T(Type_v4i16); return T(Type_v4i16);
} }
Short8::Short8(short c)
{
int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7) Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
{ {
int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
...@@ -3600,6 +3606,12 @@ namespace sw ...@@ -3600,6 +3606,12 @@ namespace sw
return T(Ice::IceType_v8i16); return T(Ice::IceType_v8i16);
} }
UShort8::UShort8(unsigned short c)
{
int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7) UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
{ {
int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
......
...@@ -110,7 +110,7 @@ namespace sw ...@@ -110,7 +110,7 @@ namespace sw
RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y); RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y);
RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y); RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y);
RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y); RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y);
RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y); RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y);
RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y); RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y);
......
pnacl-subzero @ 4e679e51
Subproject commit 4b1bdae3955769f62e563e826065d10ddfe040ea Subproject commit 4e679e51f645b99940f43c80fec7f146d59eb00f
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment