Commit 9e013d46 by Nicolas Capens Committed by Nicolas Capens

Assume SSE2 support is available.

Chrome and many other products require SSE2 support as a minimum. Note that MMX checks are left in place for now. Dead code paths are removed. Bug swiftshader:78 Change-Id: Iabd8b1dc2092949d5dba29a78e75d014e808f12c Reviewed-on: https://swiftshader-review.googlesource.com/11068Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent fbf2bc53
...@@ -2553,44 +2553,28 @@ namespace sw ...@@ -2553,44 +2553,28 @@ namespace sw
{ {
Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType()); Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
#if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd) Value *packed;
Constant *pack[8];
pack[0] = Nucleus::createConstantInt(0);
pack[1] = Nucleus::createConstantInt(2);
pack[2] = Nucleus::createConstantInt(4);
pack[3] = Nucleus::createConstantInt(6);
Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
#else
Value *packed;
// FIXME: Use Swizzle<Short8> // FIXME: Use Swizzle<Short8>
if(!CPUID::supportsSSSE3()) if(!CPUID::supportsSSSE3())
{ {
int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7}; int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7};
int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6}; int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6};
Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw); Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw);
Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw); Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw);
Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType()); Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
packed = createSwizzle4(int4, 0x88); packed = createSwizzle4(int4, 0x88);
} }
else else
{ {
int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}; int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType()); Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
packed = Nucleus::createShuffleVector(byte16, byte16, pshufb); packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
} }
#if 0 // FIXME: No optimal instruction selection Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
Value *qword2 = Nucleus::createBitCast(packed, T(llvm::VectorType::get(T(Long::getType()), 2))); Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
Value *element = Nucleus::createExtractElement(qword2, 0);
Value *short4 = Nucleus::createBitCast(element, Short4::getType());
#else // FIXME: Requires SSE
Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
#endif
#endif
storeValue(short4); storeValue(short4);
} }
...@@ -4551,22 +4535,15 @@ namespace sw ...@@ -4551,22 +4535,15 @@ namespace sw
RValue<Int> Extract(RValue<Int2> val, int i) RValue<Int> Extract(RValue<Int2> val, int i)
{ {
if(false) // FIXME: LLVM does not generate optimal code if(i == 0)
{ {
return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i)); return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(llvm::VectorType::get(T(Int::getType()), 2))), Int::getType(), 0));
} }
else else
{ {
if(i == 0) Int2 val2 = As<Int2>(UnpackHigh(val, val));
{
return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(llvm::VectorType::get(T(Int::getType()), 2))), Int::getType(), 0));
}
else
{
Int2 val2 = As<Int2>(UnpackHigh(val, val));
return Extract(val2, 0); return Extract(val2, 0);
}
} }
} }
...@@ -5823,60 +5800,16 @@ namespace sw ...@@ -5823,60 +5800,16 @@ namespace sw
Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this) Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
{ {
#if 0 Value *a = Int4(cast).loadValue();
Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
#elif 0
Value *vector = loadValue();
Value *i8x = Nucleus::createExtractElement(cast.value, 0);
Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
Value *x = Nucleus::createInsertElement(vector, f32x, 0);
Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
#else
Value *a = Int4(cast).loadValue();
Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
#endif
storeValue(xyzw); storeValue(xyzw);
} }
Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this) Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
{ {
#if 0 Value *a = Int4(cast).loadValue();
Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
#elif 0
Value *vector = loadValue();
Value *i8x = Nucleus::createExtractElement(cast.value, 0);
Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
Value *x = Nucleus::createInsertElement(vector, f32x, 0);
Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
#else
Value *a = Int4(cast).loadValue();
Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
#endif
storeValue(xyzw); storeValue(xyzw);
} }
...@@ -6403,19 +6336,9 @@ namespace sw ...@@ -6403,19 +6336,9 @@ namespace sw
RValue<Int4> cvtps2dq(RValue<Float4> val) RValue<Int4> cvtps2dq(RValue<Float4> val)
{ {
if(CPUID::supportsSSE2()) llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
{
llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value))); return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
}
else
{
Int2 lo = x86::cvtps2pi(val);
Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
return Int4(lo, hi);
}
} }
RValue<Float> rcpss(RValue<Float> val) RValue<Float> rcpss(RValue<Float> val)
...@@ -6868,25 +6791,9 @@ namespace sw ...@@ -6868,25 +6791,9 @@ namespace sw
RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y) RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
{ {
if(CPUID::supportsSSE2()) llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
{
llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
}
else
{
Int2 loX = Int2(x);
Int2 hiX = Int2(Swizzle(x, 0xEE));
Int2 loY = Int2(y); return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
Int2 hiY = Int2(Swizzle(y, 0xEE));
Short4 lo = x86::packssdw(loX, hiX);
Short4 hi = x86::packssdw(loY, hiY);
return Short8(lo, hi);
}
} }
RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y) RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
...@@ -6971,22 +6878,9 @@ namespace sw ...@@ -6971,22 +6878,9 @@ namespace sw
RValue<Int4> pslld(RValue<Int4> x, unsigned char y) RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
{ {
if(CPUID::supportsSSE2()) llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
{
llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
}
else
{
Int2 lo = Int2(x);
Int2 hi = Int2(Swizzle(x, 0xEE));
lo = x86::pslld(lo, y); return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
hi = x86::pslld(hi, y);
return Int4(lo, hi);
}
} }
RValue<Int2> psrad(RValue<Int2> x, unsigned char y) RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
...@@ -6998,22 +6892,9 @@ namespace sw ...@@ -6998,22 +6892,9 @@ namespace sw
RValue<Int4> psrad(RValue<Int4> x, unsigned char y) RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
{ {
if(CPUID::supportsSSE2()) llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
{
llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
}
else
{
Int2 lo = Int2(x);
Int2 hi = Int2(Swizzle(x, 0xEE));
lo = x86::psrad(lo, y);
hi = x86::psrad(hi, y);
return Int4(lo, hi); return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
}
} }
RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y) RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
...@@ -7025,22 +6906,9 @@ namespace sw ...@@ -7025,22 +6906,9 @@ namespace sw
RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y) RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
{ {
if(CPUID::supportsSSE2()) llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
{
llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
}
else
{
UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
lo = x86::psrld(lo, y); return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
hi = x86::psrld(hi, y);
return UInt4(lo, hi);
}
} }
RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y) RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment