Commit 01a9796a by Nicolas Capens Committed by Nicolas Capens

Emulate 64-bit vectors using 128-bit vectors.

This effectively eliminates all use of MMX instructions, using SSE ones instead. Bug swiftshader:78 Change-Id: I3e519a0c5172688ff711654bf430a9d1dd058d0b Reviewed-on: https://swiftshader-review.googlesource.com/11049Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 9e013d46
...@@ -95,17 +95,18 @@ namespace sw ...@@ -95,17 +95,18 @@ namespace sw
llvm::Type *T(Type *t) llvm::Type *T(Type *t)
{ {
std::uintptr_t type = reinterpret_cast<std::uintptr_t>(t); uintptr_t type = reinterpret_cast<uintptr_t>(t);
if(type < EmulatedTypeCount) if(type < EmulatedTypeCount)
{ {
// Use 128-bit vectors to implement logically shorter ones.
switch(type) switch(type)
{ {
case Type_v2i32: return llvm::Type::getX86_MMXTy(*::context); case Type_v2i32: return T(Int4::getType());
case Type_v4i16: return llvm::Type::getX86_MMXTy(*::context); case Type_v4i16: return T(Short8::getType());
case Type_v2i16: return llvm::Type::getInt32Ty(*::context); case Type_v2i16: return T(Short8::getType());
case Type_v8i8: return llvm::Type::getX86_MMXTy(*::context); case Type_v8i8: return T(Byte16::getType());
case Type_v4i8: return llvm::Type::getInt32Ty(*::context); case Type_v4i8: return T(Byte16::getType());
case Type_v2f32: return llvm::VectorType::get(T(Float::getType()), 2); case Type_v2f32: return T(Float4::getType());
default: assert(false); default: assert(false);
} }
} }
...@@ -138,6 +139,26 @@ namespace sw ...@@ -138,6 +139,26 @@ namespace sw
return reinterpret_cast<BasicBlock*>(t); return reinterpret_cast<BasicBlock*>(t);
} }
static size_t typeSize(Type *type)
{
uintptr_t t = reinterpret_cast<uintptr_t>(type);
if(t < EmulatedTypeCount)
{
switch(t)
{
case Type_v2i32: return 8;
case Type_v4i16: return 8;
case Type_v2i16: return 4;
case Type_v8i8: return 8;
case Type_v4i8: return 4;
case Type_v2f32: return 8;
default: assert(false);
}
}
return T(type)->getPrimitiveSizeInBits() / 8;
}
Nucleus::Nucleus() Nucleus::Nucleus()
{ {
::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
...@@ -347,15 +368,11 @@ namespace sw ...@@ -347,15 +368,11 @@ namespace sw
void Nucleus::createRetVoid() void Nucleus::createRetVoid()
{ {
x86::emms();
::builder->CreateRetVoid(); ::builder->CreateRetVoid();
} }
void Nucleus::createRet(Value *v) void Nucleus::createRet(Value *v)
{ {
x86::emms();
::builder->CreateRet(v); ::builder->CreateRet(v);
} }
...@@ -474,28 +491,91 @@ namespace sw ...@@ -474,28 +491,91 @@ namespace sw
return V(::builder->CreateNot(v)); return V(::builder->CreateNot(v));
} }
Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align) Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
{ {
uintptr_t t = reinterpret_cast<uintptr_t>(type);
if(t < EmulatedTypeCount)
{
switch(t)
{
case Type_v2i32:
case Type_v4i16:
case Type_v8i8:
case Type_v2f32:
return createBitCast(createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))), createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment), 0), T(T(type)));
case Type_v2i16:
case Type_v4i8:
if(alignment != 0) // Not a local variable (all vectors are 128-bit).
{
Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Value *i = V(createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment));
i = createZExt(i, Long::getType());
Value *v = V(createInsertElement(u, i, 0));
return createBitCast(v, T(T(type)));
}
break;
default:
assert(false);
}
}
assert(ptr->getType()->getContainedType(0) == T(type)); assert(ptr->getType()->getContainedType(0) == T(type));
return V(::builder->Insert(new llvm::LoadInst(ptr, "", isVolatile, align))); return V(::builder->Insert(new llvm::LoadInst(ptr, "", isVolatile, alignment)));
} }
Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align) Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
{ {
uintptr_t t = reinterpret_cast<uintptr_t>(type);
if(t < EmulatedTypeCount)
{
switch(t)
{
case Type_v2i32:
case Type_v4i16:
case Type_v8i8:
case Type_v2f32:
createStore(createExtractElement(createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0), createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment);
return value;
case Type_v2i16:
case Type_v4i8:
if(alignment != 0) // Not a local variable (all vectors are 128-bit).
{
createStore(createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0), createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment);
return value;
}
break;
default:
assert(false);
}
}
assert(ptr->getType()->getContainedType(0) == T(type)); assert(ptr->getType()->getContainedType(0) == T(type));
::builder->Insert(new llvm::StoreInst(value, ptr, isVolatile, align)); ::builder->Insert(new llvm::StoreInst(value, ptr, isVolatile, alignment));
return value; return value;
} }
Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex) Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
{ {
if(unsignedIndex && sizeof(void*) == 8) if(sizeof(void*) == 8)
{
if(unsignedIndex)
{
index = createZExt(index, Long::getType());
}
else
{
index = createSExt(index, Long::getType());
}
index = createMul(index, createConstantLong((int64_t)typeSize(type)));
}
else
{ {
index = createZExt(index, Long::getType()); index = createMul(index, createConstantInt((int)typeSize(type)));
} }
assert(ptr->getType()->getContainedType(0) == T(type)); assert(ptr->getType()->getContainedType(0) == T(type));
return V(::builder->CreateGEP(ptr, index)); return createBitCast(V(::builder->CreateGEP(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0))), index)), T(llvm::PointerType::get(T(type), 0)));
} }
Value *Nucleus::createAtomicAdd(Value *ptr, Value *value) Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
...@@ -540,6 +620,24 @@ namespace sw ...@@ -540,6 +620,24 @@ namespace sw
Value *Nucleus::createBitCast(Value *v, Type *destType) Value *Nucleus::createBitCast(Value *v, Type *destType)
{ {
// Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
// support for casting between scalars and wide vectors. Emulate them by writing to the stack and
// reading back as the destination type.
if(!v->getType()->isVectorTy() && T(destType)->isVectorTy())
{
Value *readAddress = allocateStackVariable(destType);
Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(v->getType(), 0)));
createStore(v, writeAddress, T(v->getType()));
return createLoad(readAddress, destType);
}
else if(v->getType()->isVectorTy() && !T(destType)->isVectorTy())
{
Value *writeAddress = allocateStackVariable(T(v->getType()));
createStore(v, writeAddress, T(v->getType()));
Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
return createLoad(readAddress, destType);
}
return V(::builder->CreateBitCast(v, T(destType))); return V(::builder->CreateBitCast(v, T(destType)));
} }
...@@ -839,17 +937,6 @@ namespace sw ...@@ -839,17 +937,6 @@ namespace sw
return T(llvm::Type::getVoidTy(*::context)); return T(llvm::Type::getVoidTy(*::context));
} }
class MMX : public LValue<MMX>
{
public:
static Type *getType();
};
Type *MMX::getType()
{
return T(llvm::Type::getX86_MMXTy(*::context));
}
Bool::Bool(Argument<Bool> argument) Bool::Bool(Argument<Bool> argument)
{ {
storeValue(argument.value); storeValue(argument.value);
...@@ -1927,7 +2014,7 @@ namespace sw ...@@ -1927,7 +2014,7 @@ namespace sw
Byte4::Byte4(RValue<Byte8> cast) Byte4::Byte4(RValue<Byte8> cast)
{ {
storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), Int::getType())); storeValue(Nucleus::createBitCast(cast.value, getType()));
} }
Byte4::Byte4(const Reference<Byte4> &rhs) Byte4::Byte4(const Reference<Byte4> &rhs)
...@@ -1949,9 +2036,7 @@ namespace sw ...@@ -1949,9 +2036,7 @@ namespace sw
Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
{ {
int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
Value *vector = V(Nucleus::createConstantVector(constantVector, T(llvm::VectorType::get(T(Byte::getType()), 8)))); storeValue(Nucleus::createConstantVector(constantVector, getType()));
storeValue(Nucleus::createBitCast(vector, getType()));
} }
Byte8::Byte8(RValue<Byte8> rhs) Byte8::Byte8(RValue<Byte8> rhs)
...@@ -1996,26 +2081,12 @@ namespace sw ...@@ -1996,26 +2081,12 @@ namespace sw
RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs) RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
{
return x86::paddb(lhs, rhs);
}
else
{
return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
}
} }
RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs) RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
{
return x86::psubb(lhs, rhs);
}
else
{
return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
}
} }
// RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs) // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
...@@ -2035,38 +2106,17 @@ namespace sw ...@@ -2035,38 +2106,17 @@ namespace sw
RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs) RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
{
return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
}
} }
RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs) RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
{
return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
}
} }
RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs) RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
{
return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
}
} }
// RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs) // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
...@@ -2141,14 +2191,7 @@ namespace sw ...@@ -2141,14 +2191,7 @@ namespace sw
RValue<Byte8> operator~(RValue<Byte8> val) RValue<Byte8> operator~(RValue<Byte8> val)
{ {
if(CPUID::supportsMMX2()) return RValue<Byte8>(Nucleus::createNot(val.value));
{
return val ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
}
else
{
return RValue<Byte8>(Nucleus::createNot(val.value));
}
} }
RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y) RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
...@@ -2163,48 +2206,26 @@ namespace sw ...@@ -2163,48 +2206,26 @@ namespace sw
RValue<Short4> Unpack(RValue<Byte4> x) RValue<Short4> Unpack(RValue<Byte4> x)
{ {
Value *int2 = Nucleus::createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Int::getType()), 2))), x.value, 0); int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; // Real type is v16i8
Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType()); return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
} }
RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y) RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
{ {
Value *xx = Nucleus::createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Int::getType()), 2))), x.value, 0); return UnpackLow(As<Byte8>(x), As<Byte8>(y));
Value *yy = Nucleus::createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Int::getType()), 2))), y.value, 0);
return UnpackLow(As<Byte8>(xx), As<Byte8>(yy));
} }
RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y) RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
{ {
if(CPUID::supportsMMX2()) int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
{ return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return x86::punpcklbw(x, y);
}
else
{
int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
}
} }
RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y) RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
{ {
if(CPUID::supportsMMX2()) int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
{ auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return x86::punpckhbw(x, y); return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
}
else
{
int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
}
} }
RValue<Int> SignMask(RValue<Byte8> x) RValue<Int> SignMask(RValue<Byte8> x)
...@@ -2230,7 +2251,7 @@ namespace sw ...@@ -2230,7 +2251,7 @@ namespace sw
SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
{ {
int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
Value *vector = V(Nucleus::createConstantVector(constantVector, T(llvm::VectorType::get(T(SByte::getType()), 8)))); Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
storeValue(Nucleus::createBitCast(vector, getType())); storeValue(Nucleus::createBitCast(vector, getType()));
} }
...@@ -2277,26 +2298,12 @@ namespace sw ...@@ -2277,26 +2298,12 @@ namespace sw
RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs) RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
{
return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs)));
}
else
{
return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
}
} }
RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs) RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
{
return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs)));
}
else
{
return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
}
} }
// RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs) // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
...@@ -2401,14 +2408,7 @@ namespace sw ...@@ -2401,14 +2408,7 @@ namespace sw
RValue<SByte8> operator~(RValue<SByte8> val) RValue<SByte8> operator~(RValue<SByte8> val)
{ {
if(CPUID::supportsMMX2()) return RValue<SByte8>(Nucleus::createNot(val.value));
{
return val ^ SByte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
}
else
{
return RValue<SByte8>(Nucleus::createNot(val.value));
}
} }
RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y) RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
...@@ -2423,32 +2423,15 @@ namespace sw ...@@ -2423,32 +2423,15 @@ namespace sw
RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y) RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
{ {
if(CPUID::supportsMMX2()) int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
{ return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y)));
}
else
{
int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
}
} }
RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y) RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
{ {
if(CPUID::supportsMMX2()) int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
{ auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y))); return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
}
else
{
int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
}
} }
RValue<Int> SignMask(RValue<SByte8> x) RValue<Int> SignMask(RValue<SByte8> x)
...@@ -2523,7 +2506,7 @@ namespace sw ...@@ -2523,7 +2506,7 @@ namespace sw
Short2::Short2(RValue<Short4> cast) Short2::Short2(RValue<Short4> cast)
{ {
storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType())); storeValue(Nucleus::createBitCast(cast.value, getType()));
} }
Type *Short2::getType() Type *Short2::getType()
...@@ -2533,7 +2516,7 @@ namespace sw ...@@ -2533,7 +2516,7 @@ namespace sw
UShort2::UShort2(RValue<UShort4> cast) UShort2::UShort2(RValue<UShort4> cast)
{ {
storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType())); storeValue(Nucleus::createBitCast(cast.value, getType()));
} }
Type *UShort2::getType() Type *UShort2::getType()
...@@ -2543,38 +2526,21 @@ namespace sw ...@@ -2543,38 +2526,21 @@ namespace sw
Short4::Short4(RValue<Int> cast) Short4::Short4(RValue<Int> cast)
{ {
Value *extend = Nucleus::createZExt(cast.value, Long::getType()); Value *vector = loadValue();
Value *swizzle = Swizzle(As<Short4>(extend), 0x00).value; Value *element = Nucleus::createTrunc(cast.value, Short::getType());
Value *insert = Nucleus::createInsertElement(vector, element, 0);
Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
storeValue(swizzle); storeValue(swizzle);
} }
Short4::Short4(RValue<Int4> cast) Short4::Short4(RValue<Int4> cast)
{ {
int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType()); Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
Value *packed; Value *packed = Nucleus::createShuffleVector(short8, short8, select);
Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
// FIXME: Use Swizzle<Short8>
if(!CPUID::supportsSSSE3())
{
int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7};
int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6};
Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw);
Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw);
Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
packed = createSwizzle4(int4, 0x88);
}
else
{
int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
}
Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
storeValue(short4); storeValue(short4);
} }
...@@ -2594,17 +2560,13 @@ namespace sw ...@@ -2594,17 +2560,13 @@ namespace sw
Short4::Short4(short xyzw) Short4::Short4(short xyzw)
{ {
int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
Value *vector = V(Nucleus::createConstantVector(constantVector, T(llvm::VectorType::get(T(Short::getType()), 4)))); storeValue(Nucleus::createConstantVector(constantVector, getType()));
storeValue(Nucleus::createBitCast(vector, getType()));
} }
Short4::Short4(short x, short y, short z, short w) Short4::Short4(short x, short y, short z, short w)
{ {
int64_t constantVector[4] = {x, y, z, w}; int64_t constantVector[4] = {x, y, z, w};
Value *vector = V(Nucleus::createConstantVector(constantVector, T(llvm::VectorType::get(T(Short::getType()), 4)))); storeValue(Nucleus::createConstantVector(constantVector, getType()));
storeValue(Nucleus::createBitCast(vector, getType()));
} }
Short4::Short4(RValue<Short4> rhs) Short4::Short4(RValue<Short4> rhs)
...@@ -2687,38 +2649,17 @@ namespace sw ...@@ -2687,38 +2649,17 @@ namespace sw
RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs) RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
{
return x86::paddw(lhs, rhs);
}
else
{
return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
}
} }
RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs) RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
{
return x86::psubw(lhs, rhs);
}
else
{
return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
}
} }
RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs) RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
{
return x86::pmullw(lhs, rhs);
}
else
{
return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
}
} }
// RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs) // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
...@@ -2733,38 +2674,17 @@ namespace sw ...@@ -2733,38 +2674,17 @@ namespace sw
RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs) RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
{
return x86::pand(lhs, rhs);
}
else
{
return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
}
} }
RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs) RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
{
return x86::por(lhs, rhs);
}
else
{
return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
}
} }
RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs) RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
{
return x86::pxor(lhs, rhs);
}
else
{
return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
}
} }
RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs) RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
...@@ -2838,34 +2758,18 @@ namespace sw ...@@ -2838,34 +2758,18 @@ namespace sw
RValue<Short4> operator-(RValue<Short4> val) RValue<Short4> operator-(RValue<Short4> val)
{ {
if(CPUID::supportsMMX2()) return RValue<Short4>(Nucleus::createNeg(val.value));
{
return Short4(0, 0, 0, 0) - val;
}
else
{
return RValue<Short4>(Nucleus::createNeg(val.value));
}
} }
RValue<Short4> operator~(RValue<Short4> val) RValue<Short4> operator~(RValue<Short4> val)
{ {
if(CPUID::supportsMMX2()) return RValue<Short4>(Nucleus::createNot(val.value));
{
return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu);
}
else
{
return RValue<Short4>(Nucleus::createNot(val.value));
}
} }
RValue<Short4> RoundShort4(RValue<Float4> cast) RValue<Short4> RoundShort4(RValue<Float4> cast)
{ {
RValue<Int4> v4i32 = x86::cvtps2dq(cast); RValue<Int4> int4 = RoundInt(cast);
RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32); return As<Short4>(Pack(int4, int4));
return As<Short4>(Int2(As<Int4>(v8i16)));
} }
RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y) RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
...@@ -2900,73 +2804,50 @@ namespace sw ...@@ -2900,73 +2804,50 @@ namespace sw
RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y) RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
{ {
return x86::packsswb(x, y); auto result = x86::packsswb(x, y);
return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
} }
RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y) RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
{ {
if(CPUID::supportsMMX2()) int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16
{ return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return x86::punpcklwd(x, y);
}
else
{
int shuffle[4] = {0, 4, 1, 5};
Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
}
} }
RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y) RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
{ {
if(CPUID::supportsMMX2()) int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16
{ auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return x86::punpckhwd(x, y); return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
}
else
{
int shuffle[4] = {2, 6, 3, 7};
Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
}
} }
RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select) RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
{ {
if(CPUID::supportsMMX2()) // Real type is v8i16
int shuffle[8] =
{ {
return x86::pshufw(x, select); (select >> 0) & 0x03,
} (select >> 2) & 0x03,
else (select >> 4) & 0x03,
{ (select >> 6) & 0x03,
return RValue<Short4>(createSwizzle4(x.value, select)); (select >> 0) & 0x03,
} (select >> 2) & 0x03,
(select >> 4) & 0x03,
(select >> 6) & 0x03,
};
return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
} }
RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i) RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
{ {
if(CPUID::supportsMMX2()) return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
{
return x86::pinsrw(val, Int(element), i);
}
else
{
return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
}
} }
RValue<Short> Extract(RValue<Short4> val, int i) RValue<Short> Extract(RValue<Short4> val, int i)
{ {
if(CPUID::supportsMMX2()) return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
{
return Short(x86::pextrw(val, i));
}
else
{
return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
}
} }
RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y) RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
...@@ -2991,50 +2872,34 @@ namespace sw ...@@ -2991,50 +2872,34 @@ namespace sw
UShort4::UShort4(RValue<Float4> cast, bool saturate) UShort4::UShort4(RValue<Float4> cast, bool saturate)
{ {
Float4 sat;
if(saturate) if(saturate)
{ {
if(CPUID::supportsSSE4_1()) if(CPUID::supportsSSE4_1())
{ {
sat = Min(cast, Float4(0xFFFF)); // packusdw takes care of 0x0000 saturation Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
*this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
} }
else else
{ {
sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000)); *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
} }
} }
else else
{ {
sat = cast; *this = Short4(Int4(cast));
}
Int4 int4(sat);
if(!saturate || !CPUID::supportsSSE4_1())
{
*this = Short4(int4);
}
else
{
*this = As<Short4>(Int2(As<Int4>(x86::packusdw(int4, int4))));
} }
} }
UShort4::UShort4(unsigned short xyzw) UShort4::UShort4(unsigned short xyzw)
{ {
int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
Value *vector = V(Nucleus::createConstantVector(constantVector, T(llvm::VectorType::get(T(UShort::getType()), 4)))); storeValue(Nucleus::createConstantVector(constantVector, getType()));
storeValue(Nucleus::createBitCast(vector, getType()));
} }
UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
{ {
int64_t constantVector[4] = {x, y, z, w}; int64_t constantVector[4] = {x, y, z, w};
Value *vector = V(Nucleus::createConstantVector(constantVector, T(llvm::VectorType::get(T(UShort::getType()), 4)))); storeValue(Nucleus::createConstantVector(constantVector, getType()));
storeValue(Nucleus::createBitCast(vector, getType()));
} }
UShort4::UShort4(RValue<UShort4> rhs) UShort4::UShort4(RValue<UShort4> rhs)
...@@ -3119,74 +2984,32 @@ namespace sw ...@@ -3119,74 +2984,32 @@ namespace sw
RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs) RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
{
return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
}
} }
RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs) RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
{
return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
}
} }
RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs) RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
{
return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
}
} }
RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs) RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
{
return As<UShort4>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
}
} }
RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs) RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
{
return As<UShort4>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
}
} }
RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs) RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
{
return As<UShort4>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
}
} }
RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs) RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
...@@ -3215,14 +3038,7 @@ namespace sw ...@@ -3215,14 +3038,7 @@ namespace sw
RValue<UShort4> operator~(RValue<UShort4> val) RValue<UShort4> operator~(RValue<UShort4> val)
{ {
if(CPUID::supportsMMX2()) return RValue<UShort4>(Nucleus::createNot(val.value));
{
return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu));
}
else
{
return RValue<UShort4>(Nucleus::createNot(val.value));
}
} }
RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y) RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
...@@ -3257,7 +3073,9 @@ namespace sw ...@@ -3257,7 +3073,9 @@ namespace sw
RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y) RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
{ {
return x86::packuswb(x, y); auto result = x86::packuswb(x, y);
return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
} }
Type *UShort4::getType() Type *UShort4::getType()
...@@ -3290,15 +3108,10 @@ namespace sw ...@@ -3290,15 +3108,10 @@ namespace sw
Short8::Short8(RValue<Short4> lo, RValue<Short4> hi) Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
{ {
Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16
Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
Value *long2 = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
long2 = Nucleus::createInsertElement(long2, loLong, 0);
long2 = Nucleus::createInsertElement(long2, hiLong, 1);
Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
storeValue(short8); storeValue(packed);
} }
RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs) RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
...@@ -3328,15 +3141,8 @@ namespace sw ...@@ -3328,15 +3141,8 @@ namespace sw
RValue<Int4> Abs(RValue<Int4> x) RValue<Int4> Abs(RValue<Int4> x)
{ {
if(CPUID::supportsSSSE3()) auto negative = x >> 31;
{ return (x ^ negative) - negative;
return x86::pabsd(x);
}
else
{
Int4 mask = (x >> 31);
return (mask ^ x) - mask;
}
} }
RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y) RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
...@@ -3374,15 +3180,10 @@ namespace sw ...@@ -3374,15 +3180,10 @@ namespace sw
UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi) UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
{ {
Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16
Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
Value *long2 = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))); storeValue(packed);
long2 = Nucleus::createInsertElement(long2, loLong, 0);
long2 = Nucleus::createInsertElement(long2, hiLong, 1);
Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
storeValue(short8);
} }
RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
...@@ -4263,19 +4064,13 @@ namespace sw ...@@ -4263,19 +4064,13 @@ namespace sw
Int2::Int2(RValue<Int4> cast) Int2::Int2(RValue<Int4> cast)
{ {
Value *long2 = Nucleus::createBitCast(cast.value, T(llvm::VectorType::get(T(Long::getType()), 2))); storeValue(Nucleus::createBitCast(cast.value, getType()));
Value *element = Nucleus::createExtractElement(long2, Long::getType(), 0);
Value *int2 = Nucleus::createBitCast(element, Int2::getType());
storeValue(int2);
} }
Int2::Int2(int x, int y) Int2::Int2(int x, int y)
{ {
int64_t constantVector[2] = {x, y}; int64_t constantVector[2] = {x, y};
Value *vector = V(Nucleus::createConstantVector(constantVector, T(llvm::VectorType::get(T(Int::getType()), 2)))); storeValue(Nucleus::createConstantVector(constantVector, getType()));
storeValue(Nucleus::createBitCast(vector, getType()));
} }
Int2::Int2(RValue<Int2> rhs) Int2::Int2(RValue<Int2> rhs)
...@@ -4297,26 +4092,10 @@ namespace sw ...@@ -4297,26 +4092,10 @@ namespace sw
Int2::Int2(RValue<Int> lo, RValue<Int> hi) Int2::Int2(RValue<Int> lo, RValue<Int> hi)
{ {
if(CPUID::supportsMMX2()) int shuffle[4] = {0, 4, 1, 5};
{ Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
// movd mm0, lo
// movd mm1, hi
// punpckldq mm0, mm1
Value *loLong = Nucleus::createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Int::getType()), 2))), lo.value, 0);
loLong = Nucleus::createInsertElement(loLong, V(llvm::ConstantInt::get(T(Int::getType()), 0)), 1);
Value *hiLong = Nucleus::createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Int::getType()), 2))), hi.value, 0);
hiLong = Nucleus::createInsertElement(hiLong, V(llvm::ConstantInt::get(T(Int::getType()), 0)), 1);
storeValue(As<Int2>(UnpackLow(As<Int2>(loLong), As<Int2>(hiLong))).value);
}
else
{
int shuffle[2] = {0, 1};
Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, T(llvm::VectorType::get(T(Int::getType()), 1))), Nucleus::createBitCast(hi.value, T(llvm::VectorType::get(T(Int::getType()), 1))), shuffle);
storeValue(Nucleus::createBitCast(packed, Int2::getType())); storeValue(Nucleus::createBitCast(packed, Int2::getType()));
}
} }
RValue<Int2> Int2::operator=(RValue<Int2> rhs) RValue<Int2> Int2::operator=(RValue<Int2> rhs)
...@@ -4344,26 +4123,12 @@ namespace sw ...@@ -4344,26 +4123,12 @@ namespace sw
RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs) RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
{
return x86::paddd(lhs, rhs);
}
else
{
return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
}
} }
RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs) RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
{
return x86::psubd(lhs, rhs);
}
else
{
return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
}
} }
// RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs) // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
...@@ -4383,38 +4148,17 @@ namespace sw ...@@ -4383,38 +4148,17 @@ namespace sw
RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs) RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
{
return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
}
} }
RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs) RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
{
return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
}
} }
RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs) RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
{
return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
}
} }
RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs) RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
...@@ -4493,63 +4237,30 @@ namespace sw ...@@ -4493,63 +4237,30 @@ namespace sw
RValue<Int2> operator~(RValue<Int2> val) RValue<Int2> operator~(RValue<Int2> val)
{ {
if(CPUID::supportsMMX2()) return RValue<Int2>(Nucleus::createNot(val.value));
{
return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF);
}
else
{
return RValue<Int2>(Nucleus::createNot(val.value));
}
} }
RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y) RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
{ {
if(CPUID::supportsMMX2()) int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32
{ return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return x86::punpckldq(x, y);
}
else
{
int shuffle[2] = {0, 2};
Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
return As<Short4>(packed);
}
} }
RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y) RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
{ {
if(CPUID::supportsMMX2()) int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32
{ auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return x86::punpckhdq(x, y); return As<Short4>(Swizzle(lowHigh, 0xEE));
}
else
{
int shuffle[2] = {1, 3};
Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
return As<Short4>(packed);
}
} }
RValue<Int> Extract(RValue<Int2> val, int i) RValue<Int> Extract(RValue<Int2> val, int i)
{ {
if(i == 0) return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
{
return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(llvm::VectorType::get(T(Int::getType()), 2))), Int::getType(), 0));
}
else
{
Int2 val2 = As<Int2>(UnpackHigh(val, val));
return Extract(val2, 0);
}
} }
RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i) RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
{ {
return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, T(llvm::VectorType::get(T(Int::getType()), 2))), element.value, i), Int2::getType())); return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
} }
Type *Int2::getType() Type *Int2::getType()
...@@ -4560,9 +4271,7 @@ namespace sw ...@@ -4560,9 +4271,7 @@ namespace sw
UInt2::UInt2(unsigned int x, unsigned int y) UInt2::UInt2(unsigned int x, unsigned int y)
{ {
int64_t constantVector[2] = {x, y}; int64_t constantVector[2] = {x, y};
Value *vector = V(Nucleus::createConstantVector(constantVector, T(llvm::VectorType::get(T(UInt::getType()), 2)))); storeValue(Nucleus::createConstantVector(constantVector, getType()));
storeValue(Nucleus::createBitCast(vector, getType()));
} }
UInt2::UInt2(RValue<UInt2> rhs) UInt2::UInt2(RValue<UInt2> rhs)
...@@ -4607,26 +4316,12 @@ namespace sw ...@@ -4607,26 +4316,12 @@ namespace sw
RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs) RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
{
return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs)));
}
else
{
return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
}
} }
RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs) RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
{
return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs)));
}
else
{
return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
}
} }
// RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs) // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
...@@ -4646,38 +4341,17 @@ namespace sw ...@@ -4646,38 +4341,17 @@ namespace sw
RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs) RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
{
return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
}
} }
RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs) RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
{
return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
}
} }
RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs) RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
{ {
if(CPUID::supportsMMX2()) return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
{
return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
}
else
{
return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
}
} }
RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs) RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
...@@ -4756,14 +4430,7 @@ namespace sw ...@@ -4756,14 +4430,7 @@ namespace sw
RValue<UInt2> operator~(RValue<UInt2> val) RValue<UInt2> operator~(RValue<UInt2> val)
{ {
if(CPUID::supportsMMX2()) return RValue<UInt2>(Nucleus::createNot(val.value));
{
return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF);
}
else
{
return RValue<UInt2>(Nucleus::createNot(val.value));
}
} }
Type *UInt2::getType() Type *UInt2::getType()
...@@ -4773,57 +4440,42 @@ namespace sw ...@@ -4773,57 +4440,42 @@ namespace sw
Int4::Int4(RValue<Byte4> cast) Int4::Int4(RValue<Byte4> cast)
{ {
Value *x = Nucleus::createBitCast(cast.value, Int::getType()); if(CPUID::supportsSSE4_1())
Value *a = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Int4::getType()))), x, 0);
Value *e;
if (CPUID::supportsSSE4_1())
{ {
e = x86::pmovzxbd(RValue<Int4>(a)).value; *this = x86::pmovzxbd(As<Byte16>(cast));
} }
else else
{ {
int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Value *b = Nucleus::createBitCast(a, Byte16::getType()); Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle); Value *b = Nucleus::createShuffleVector(a, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11}; int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Value *d = Nucleus::createBitCast(c, Short8::getType()); Value *c = Nucleus::createBitCast(b, Short8::getType());
e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2); Value *d = Nucleus::createShuffleVector(c, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
}
Value *f = Nucleus::createBitCast(e, Int4::getType()); *this = As<Int4>(d);
storeValue(f); }
} }
Int4::Int4(RValue<SByte4> cast) Int4::Int4(RValue<SByte4> cast)
{ {
Value *x = Nucleus::createBitCast(cast.value, Int::getType()); if(CPUID::supportsSSE4_1())
Value *a = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Int4::getType()))), x, 0);
Value *g;
if (CPUID::supportsSSE4_1())
{ {
g = x86::pmovsxbd(RValue<Int4>(a)).value; *this = x86::pmovsxbd(As<SByte16>(cast));
} }
else else
{ {
int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
Value *b = Nucleus::createBitCast(a, Byte16::getType()); Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Value *c = Nucleus::createShuffleVector(b, b, swizzle); Value *b = Nucleus::createShuffleVector(a, a, swizzle);
int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3}; int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Value *d = Nucleus::createBitCast(c, Short8::getType()); Value *c = Nucleus::createBitCast(b, Short8::getType());
Value *e = Nucleus::createShuffleVector(d, d, swizzle2); Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Value *f = Nucleus::createBitCast(e, Int4::getType()); *this = As<Int4>(d) >> 24;
// g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
g = x86::psrad(RValue<Int4>(f), 24).value;
} }
storeValue(g);
} }
Int4::Int4(RValue<Float4> cast) Int4::Int4(RValue<Float4> cast)
...@@ -4835,51 +4487,29 @@ namespace sw ...@@ -4835,51 +4487,29 @@ namespace sw
Int4::Int4(RValue<Short4> cast) Int4::Int4(RValue<Short4> cast)
{ {
Value *long2 = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Value *element = Nucleus::createBitCast(cast.value, Long::getType());
long2 = Nucleus::createInsertElement(long2, element, 0);
RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
if(CPUID::supportsSSE4_1()) if(CPUID::supportsSSE4_1())
{ {
storeValue(x86::pmovsxwd(vector).value); *this = x86::pmovsxwd(As<Short8>(cast));
} }
else else
{ {
Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3}; int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Value *c = Nucleus::createShuffleVector(b, b, swizzle); Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
Value *d = Nucleus::createBitCast(c, Int4::getType()); *this = As<Int4>(c) >> 16;
storeValue(d);
// Each Short is packed into each Int in the (Short | Short) format.
// Shifting by 16 will retrieve the original Short value.
// Shifting an Int will propagate the sign bit, which will work
// for both positive and negative values of a Short.
*this >>= 16;
} }
} }
Int4::Int4(RValue<UShort4> cast) Int4::Int4(RValue<UShort4> cast)
{ {
Value *long2 = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Value *element = Nucleus::createBitCast(cast.value, Long::getType());
long2 = Nucleus::createInsertElement(long2, element, 0);
RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
if(CPUID::supportsSSE4_1()) if(CPUID::supportsSSE4_1())
{ {
storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value); *this = x86::pmovzxwd(As<UShort8>(cast));
} }
else else
{ {
Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Short8::getType())), swizzle); Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Value *d = Nucleus::createBitCast(c, Int4::getType()); *this = As<Int4>(c);
storeValue(d);
} }
} }
...@@ -4945,15 +4575,10 @@ namespace sw ...@@ -4945,15 +4575,10 @@ namespace sw
Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
{ {
Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32
Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
Value *long2 = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))); storeValue(packed);
long2 = Nucleus::createInsertElement(long2, loLong, 0);
long2 = Nucleus::createInsertElement(long2, hiLong, 1);
Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
storeValue(int4);
} }
Int4::Int4(RValue<Int> rhs) Int4::Int4(RValue<Int> rhs)
...@@ -5306,15 +4931,10 @@ namespace sw ...@@ -5306,15 +4931,10 @@ namespace sw
UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
{ {
Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32
Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
Value *long2 = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
long2 = Nucleus::createInsertElement(long2, loLong, 0);
long2 = Nucleus::createInsertElement(long2, hiLong, 1);
Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
storeValue(uint4); storeValue(packed);
} }
RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
...@@ -5786,11 +5406,7 @@ namespace sw ...@@ -5786,11 +5406,7 @@ namespace sw
Float2::Float2(RValue<Float4> cast) Float2::Float2(RValue<Float4> cast)
{ {
Value *int64x2 = Nucleus::createBitCast(cast.value, T(llvm::VectorType::get(T(Long::getType()), 2))); storeValue(Nucleus::createBitCast(cast.value, getType()));
Value *int64 = Nucleus::createExtractElement(int64x2, Long::getType(), 0);
Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
storeValue(float2);
} }
Type *Float2::getType() Type *Float2::getType()
...@@ -6018,7 +5634,7 @@ namespace sw ...@@ -6018,7 +5634,7 @@ namespace sw
int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType()))); Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType())); return As<Float4>(result);
} }
RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y) RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
...@@ -6055,9 +5671,9 @@ namespace sw ...@@ -6055,9 +5671,9 @@ namespace sw
return x86::sqrtps(x); return x86::sqrtps(x);
} }
RValue<Float4> Insert(RValue<Float4> val, RValue<Float> element, int i) RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
{ {
return RValue<Float4>(Nucleus::createInsertElement(val.value, element.value, i)); return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
} }
RValue<Float> Extract(RValue<Float4> x, int i) RValue<Float> Extract(RValue<Float4> x, int i)
...@@ -6098,10 +5714,10 @@ namespace sw ...@@ -6098,10 +5714,10 @@ namespace sw
RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select) RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
{ {
Value *vector = lhs.loadValue(); Value *vector = lhs.loadValue();
Value *shuffle = createMask4(vector, rhs.value, select); Value *result = createMask4(vector, rhs.value, select);
lhs.storeValue(shuffle); lhs.storeValue(result);
return RValue<Float4>(shuffle); return RValue<Float4>(result);
} }
RValue<Int> SignMask(RValue<Float4> x) RValue<Int> SignMask(RValue<Float4> x)
...@@ -6165,7 +5781,7 @@ namespace sw ...@@ -6165,7 +5781,7 @@ namespace sw
} }
else else
{ {
return Float4(Int4(x)); // Rounded toward zero return Float4(Int4(x));
} }
} }
...@@ -6175,7 +5791,7 @@ namespace sw ...@@ -6175,7 +5791,7 @@ namespace sw
if(CPUID::supportsSSE4_1()) if(CPUID::supportsSSE4_1())
{ {
frc = x - x86::floorps(x); frc = x - Floor(x);
} }
else else
{ {
...@@ -6220,7 +5836,7 @@ namespace sw ...@@ -6220,7 +5836,7 @@ namespace sw
RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset) RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
{ {
return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset)), false)); return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
} }
RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset) RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
...@@ -6320,20 +5936,6 @@ namespace sw ...@@ -6320,20 +5936,6 @@ namespace sw
return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value))); return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value)));
} }
RValue<Int2> cvtps2pi(RValue<Float4> val)
{
llvm::Function *cvtps2pi = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtps2pi);
return RValue<Int2>(V(::builder->CreateCall(cvtps2pi, val.value)));
}
RValue<Int2> cvttps2pi(RValue<Float4> val)
{
llvm::Function *cvttps2pi = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvttps2pi);
return RValue<Int2>(V(::builder->CreateCall(cvttps2pi, val.value)));
}
RValue<Int4> cvtps2dq(RValue<Float4> val) RValue<Int4> cvtps2dq(RValue<Float4> val)
{ {
llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq); llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
...@@ -6440,103 +6042,6 @@ namespace sw ...@@ -6440,103 +6042,6 @@ namespace sw
return roundps(val, 2); return roundps(val, 2);
} }
RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
{
llvm::Function *cmpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cmp_ps);
return RValue<Float4>(V(::builder->CreateCall3(cmpps, x.value, y.value, V(Nucleus::createConstantByte(imm)))));
}
RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y)
{
return cmpps(x, y, 0);
}
RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y)
{
return cmpps(x, y, 1);
}
RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y)
{
return cmpps(x, y, 2);
}
RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y)
{
return cmpps(x, y, 3);
}
RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y)
{
return cmpps(x, y, 4);
}
RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y)
{
return cmpps(x, y, 5);
}
RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y)
{
return cmpps(x, y, 6);
}
RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y)
{
return cmpps(x, y, 7);
}
RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm)
{
llvm::Function *cmpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cmp_ss);
Value *vector1 = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), x.value, 0);
Value *vector2 = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), y.value, 0);
return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(cmpss, vector1, vector2, V(Nucleus::createConstantByte(imm)))), Float::getType(), 0));
}
RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y)
{
return cmpss(x, y, 0);
}
RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y)
{
return cmpss(x, y, 1);
}
RValue<Float> cmpless(RValue<Float> x, RValue<Float> y)
{
return cmpss(x, y, 2);
}
RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y)
{
return cmpss(x, y, 3);
}
RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y)
{
return cmpss(x, y, 4);
}
RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y)
{
return cmpss(x, y, 5);
}
RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y)
{
return cmpss(x, y, 6);
}
RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y)
{
return cmpss(x, y, 7);
}
RValue<Int4> pabsd(RValue<Int4> x) RValue<Int4> pabsd(RValue<Int4> x)
{ {
llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128); llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
...@@ -6546,247 +6051,114 @@ namespace sw ...@@ -6546,247 +6051,114 @@ namespace sw
RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y) RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_padds_w); llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
return As<Short4>(V(::builder->CreateCall2(paddsw, As<MMX>(x).value, As<MMX>(y).value))); return As<Short4>(V(::builder->CreateCall2(paddsw, x.value, y.value)));
} }
RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y) RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psubs_w); llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
return As<Short4>(V(::builder->CreateCall2(psubsw, As<MMX>(x).value, As<MMX>(y).value))); return As<Short4>(V(::builder->CreateCall2(psubsw, x.value, y.value)));
} }
RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y) RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
{ {
llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_paddus_w); llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
return As<UShort4>(V(::builder->CreateCall2(paddusw, As<MMX>(x).value, As<MMX>(y).value))); return As<UShort4>(V(::builder->CreateCall2(paddusw, x.value, y.value)));
} }
RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y) RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
{ {
llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psubus_w); llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
return As<UShort4>(V(::builder->CreateCall2(psubusw, As<MMX>(x).value, As<MMX>(y).value))); return As<UShort4>(V(::builder->CreateCall2(psubusw, x.value, y.value)));
} }
RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y) RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
{ {
llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_padds_b); llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
return As<SByte8>(V(::builder->CreateCall2(paddsb, As<MMX>(x).value, As<MMX>(y).value))); return As<SByte8>(V(::builder->CreateCall2(paddsb, x.value, y.value)));
} }
RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y) RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
{ {
llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psubs_b); llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
return As<SByte8>(V(::builder->CreateCall2(psubsb, As<MMX>(x).value, As<MMX>(y).value))); return As<SByte8>(V(::builder->CreateCall2(psubsb, x.value, y.value)));
} }
RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y) RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
{ {
llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_paddus_b); llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
return As<Byte8>(V(::builder->CreateCall2(paddusb, As<MMX>(x).value, As<MMX>(y).value))); return As<Byte8>(V(::builder->CreateCall2(paddusb, x.value, y.value)));
} }
RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y) RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
{ {
llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psubus_b); llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
return As<Byte8>(V(::builder->CreateCall2(psubusb, As<MMX>(x).value, As<MMX>(y).value))); return As<Byte8>(V(::builder->CreateCall2(psubusb, x.value, y.value)));
}
RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y)
{
llvm::Function *paddw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_padd_w);
return As<Short4>(V(::builder->CreateCall2(paddw, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y)
{
llvm::Function *psubw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psub_w);
return As<Short4>(V(::builder->CreateCall2(psubw, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y)
{
llvm::Function *pmullw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pmull_w);
return As<Short4>(V(::builder->CreateCall2(pmullw, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y)
{
llvm::Function *pand = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pand);
return As<Short4>(V(::builder->CreateCall2(pand, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> por(RValue<Short4> x, RValue<Short4> y)
{
llvm::Function *por = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_por);
return As<Short4>(V(::builder->CreateCall2(por, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y)
{
llvm::Function *pxor = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pxor);
return As<Short4>(V(::builder->CreateCall2(pxor, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> pshufw(RValue<Short4> x, unsigned char y)
{
llvm::Function *pshufw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_pshuf_w);
return As<Short4>(V(::builder->CreateCall2(pshufw, As<MMX>(x).value, V(Nucleus::createConstantByte(y)))));
}
RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y)
{
llvm::Function *punpcklwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_punpcklwd);
return As<Int2>(V(::builder->CreateCall2(punpcklwd, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y)
{
llvm::Function *punpckhwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_punpckhwd);
return As<Int2>(V(::builder->CreateCall2(punpckhwd, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i)
{
llvm::Function *pinsrw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pinsr_w);
return As<Short4>(V(::builder->CreateCall3(pinsrw, As<MMX>(x).value, y.value, V(Nucleus::createConstantInt(i)))));
}
RValue<Int> pextrw(RValue<Short4> x, unsigned int i)
{
llvm::Function *pextrw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pextr_w);
return RValue<Int>(V(::builder->CreateCall2(pextrw, As<MMX>(x).value, V(Nucleus::createConstantInt(i)))));
}
RValue<Short4> punpckldq(RValue<Int2> x, RValue<Int2> y)
{
llvm::Function *punpckldq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_punpckldq);
return As<Short4>(V(::builder->CreateCall2(punpckldq, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> punpckhdq(RValue<Int2> x, RValue<Int2> y)
{
llvm::Function *punpckhdq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_punpckhdq);
return As<Short4>(V(::builder->CreateCall2(punpckhdq, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y)
{
llvm::Function *punpcklbw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_punpcklbw);
return As<Short4>(V(::builder->CreateCall2(punpcklbw, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y)
{
llvm::Function *punpckhbw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_punpckhbw);
return As<Short4>(V(::builder->CreateCall2(punpckhbw, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y)
{
llvm::Function *paddb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_padd_b);
return As<Byte8>(V(::builder->CreateCall2(paddb, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y)
{
llvm::Function *psubb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psub_b);
return As<Byte8>(V(::builder->CreateCall2(psubb, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y)
{
llvm::Function *paddd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_padd_d);
return As<Int2>(V(::builder->CreateCall2(paddd, As<MMX>(x).value, As<MMX>(y).value)));
}
RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y)
{
llvm::Function *psubd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psub_d);
return As<Int2>(V(::builder->CreateCall2(psubd, As<MMX>(x).value, As<MMX>(y).value)));
} }
RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y) RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
{ {
llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pavg_w); llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
return As<UShort4>(V(::builder->CreateCall2(pavgw, As<MMX>(x).value, As<MMX>(y).value))); return As<UShort4>(V(::builder->CreateCall2(pavgw, x.value, y.value)));
} }
RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y) RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pmaxs_w); llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
return As<Short4>(V(::builder->CreateCall2(pmaxsw, As<MMX>(x).value, As<MMX>(y).value))); return As<Short4>(V(::builder->CreateCall2(pmaxsw, x.value, y.value)));
} }
RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y) RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pmins_w); llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
return As<Short4>(V(::builder->CreateCall2(pminsw, As<MMX>(x).value, As<MMX>(y).value))); return As<Short4>(V(::builder->CreateCall2(pminsw, x.value, y.value)));
} }
RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y) RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pcmpgt_w); llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
return As<Short4>(V(::builder->CreateCall2(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value))); return As<Short4>(V(::builder->CreateCall2(pcmpgtw, x.value, y.value)));
} }
RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y) RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pcmpeq_w); llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
return As<Short4>(V(::builder->CreateCall2(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value))); return As<Short4>(V(::builder->CreateCall2(pcmpeqw, x.value, y.value)));
} }
RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y) RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
{ {
llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pcmpgt_b); llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value))); return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, x.value, y.value)));
} }
RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y) RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
{ {
llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pcmpeq_b); llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value))); return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, x.value, y.value)));
} }
RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y) RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
{ {
llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_packssdw); llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
return As<Short4>(V(::builder->CreateCall2(packssdw, As<MMX>(x).value, As<MMX>(y).value))); return As<Short4>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
} }
RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y) RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
...@@ -6798,16 +6170,16 @@ namespace sw ...@@ -6798,16 +6170,16 @@ namespace sw
RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y) RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_packsswb); llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
return As<SByte8>(V(::builder->CreateCall2(packsswb, As<MMX>(x).value, As<MMX>(y).value))); return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value)));
} }
RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y) RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
{ {
llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_packuswb); llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value))); return As<Byte8>(V(::builder->CreateCall2(packuswb, x.value, y.value)));
} }
RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y) RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
...@@ -6829,9 +6201,9 @@ namespace sw ...@@ -6829,9 +6201,9 @@ namespace sw
RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y) RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
{ {
llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psrli_w); llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
return As<UShort4>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); return As<UShort4>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
} }
RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y) RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
...@@ -6843,9 +6215,9 @@ namespace sw ...@@ -6843,9 +6215,9 @@ namespace sw
RValue<Short4> psraw(RValue<Short4> x, unsigned char y) RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
{ {
llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psrai_w); llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
return As<Short4>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); return As<Short4>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
} }
RValue<Short8> psraw(RValue<Short8> x, unsigned char y) RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
...@@ -6857,9 +6229,9 @@ namespace sw ...@@ -6857,9 +6229,9 @@ namespace sw
RValue<Short4> psllw(RValue<Short4> x, unsigned char y) RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
{ {
llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pslli_w); llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
return As<Short4>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); return As<Short4>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
} }
RValue<Short8> psllw(RValue<Short8> x, unsigned char y) RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
...@@ -6871,9 +6243,9 @@ namespace sw ...@@ -6871,9 +6243,9 @@ namespace sw
RValue<Int2> pslld(RValue<Int2> x, unsigned char y) RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
{ {
llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pslli_d); llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
return As<Int2>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); return As<Int2>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
} }
RValue<Int4> pslld(RValue<Int4> x, unsigned char y) RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
...@@ -6885,9 +6257,9 @@ namespace sw ...@@ -6885,9 +6257,9 @@ namespace sw
RValue<Int2> psrad(RValue<Int2> x, unsigned char y) RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
{ {
llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psrai_d); llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
return As<Int2>(V(::builder->CreateCall2(psrad, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); return As<Int2>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
} }
RValue<Int4> psrad(RValue<Int4> x, unsigned char y) RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
...@@ -6899,9 +6271,9 @@ namespace sw ...@@ -6899,9 +6271,9 @@ namespace sw
RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y) RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
{ {
llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_psrli_d); llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
return As<UInt2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); return As<UInt2>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
} }
RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y) RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
...@@ -6941,23 +6313,23 @@ namespace sw ...@@ -6941,23 +6313,23 @@ namespace sw
RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y) RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pmulh_w); llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
return As<Short4>(V(::builder->CreateCall2(pmulhw, As<MMX>(x).value, As<MMX>(y).value))); return As<Short4>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
} }
RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y) RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
{ {
llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pmulhu_w); llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
return As<UShort4>(V(::builder->CreateCall2(pmulhuw, As<MMX>(x).value, As<MMX>(y).value))); return As<UShort4>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
} }
RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y) RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pmadd_wd); llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
return As<Int2>(V(::builder->CreateCall2(pmaddwd, As<MMX>(x).value, As<MMX>(y).value))); return As<Int2>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
} }
RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y) RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
...@@ -6990,64 +6362,37 @@ namespace sw ...@@ -6990,64 +6362,37 @@ namespace sw
RValue<Int> pmovmskb(RValue<Byte8> x) RValue<Int> pmovmskb(RValue<Byte8> x)
{ {
llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_pmovmskb); llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
return RValue<Int>(V(::builder->CreateCall(pmovmskb, As<MMX>(x).value))); return RValue<Int>(V(::builder->CreateCall(pmovmskb, x.value))) & 0xFF;
} }
//RValue<Int2> movd(RValue<Pointer<Int>> x) RValue<Int4> pmovzxbd(RValue<Byte16> x)
//{
// Value *element = Nucleus::createLoad(x.value);
//// Value *int2 = UndefValue::get(Int2::getType());
//// int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(T(Int::getType()), 0));
// Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
// return RValue<Int2>(int2);
//}
//RValue<Int2> movdq2q(RValue<Int4> x)
//{
// Value *long2 = Nucleus::createBitCast(x.value, T(llvm::VectorType::get(T(Long::getType()), 2)));
// Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(T(Int::getType()), 0));
// return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
//}
RValue<Int4> pmovzxbd(RValue<Int4> x)
{ {
llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd); llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType())))); return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, x.value)));
} }
RValue<Int4> pmovsxbd(RValue<Int4> x) RValue<Int4> pmovsxbd(RValue<SByte16> x)
{ {
llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd); llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType())))); return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, x.value)));
} }
RValue<Int4> pmovzxwd(RValue<Int4> x) RValue<Int4> pmovzxwd(RValue<UShort8> x)
{ {
llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd); llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType())))); return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, x.value)));
} }
RValue<Int4> pmovsxwd(RValue<Int4> x) RValue<Int4> pmovsxwd(RValue<Short8> x)
{ {
llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd); llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType())))); return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, x.value)));
}
void emms()
{
llvm::Function *emms = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_mmx_emms);
V(::builder->CreateCall(emms));
} }
} }
} }
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#ifndef sw_Nucleus_hpp #ifndef sw_Nucleus_hpp
#define sw_Nucleus_hpp #define sw_Nucleus_hpp
#include <cassert>
#include <cstdarg> #include <cstdarg>
#include <cstdint> #include <cstdint>
#include <vector> #include <vector>
......
...@@ -22,8 +22,6 @@ namespace sw ...@@ -22,8 +22,6 @@ namespace sw
namespace x86 namespace x86
{ {
RValue<Int> cvtss2si(RValue<Float> val); RValue<Int> cvtss2si(RValue<Float> val);
RValue<Int2> cvtps2pi(RValue<Float4> val);
RValue<Int2> cvttps2pi(RValue<Float4> val);
RValue<Int4> cvtps2dq(RValue<Float4> val); RValue<Int4> cvtps2dq(RValue<Float4> val);
RValue<Float> rcpss(RValue<Float> val); RValue<Float> rcpss(RValue<Float> val);
...@@ -44,26 +42,6 @@ namespace sw ...@@ -44,26 +42,6 @@ namespace sw
RValue<Float4> floorps(RValue<Float4> val); RValue<Float4> floorps(RValue<Float4> val);
RValue<Float4> ceilps(RValue<Float4> val); RValue<Float4> ceilps(RValue<Float4> val);
RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm);
RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y);
RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm);
RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y);
RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y);
RValue<Float> cmpless(RValue<Float> x, RValue<Float> y);
RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y);
RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y);
RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y);
RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y);
RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y);
RValue<Int4> pabsd(RValue<Int4> x); RValue<Int4> pabsd(RValue<Int4> x);
RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y); RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y);
...@@ -75,26 +53,6 @@ namespace sw ...@@ -75,26 +53,6 @@ namespace sw
RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y); RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y); RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y);
RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> por(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> pshufw(RValue<Short4> x, unsigned char y);
RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y);
RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i);
RValue<Int> pextrw(RValue<Short4> x, unsigned int i);
RValue<Short4> punpckldq(RValue<Int2> x, RValue<Int2> y);
RValue<Short4> punpckhdq(RValue<Int2> x, RValue<Int2> y);
RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y);
RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y);
RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y);
RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y);
RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y); RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y);
RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y); RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y);
...@@ -141,12 +99,10 @@ namespace sw ...@@ -141,12 +99,10 @@ namespace sw
RValue<Int> movmskps(RValue<Float4> x); RValue<Int> movmskps(RValue<Float4> x);
RValue<Int> pmovmskb(RValue<Byte8> x); RValue<Int> pmovmskb(RValue<Byte8> x);
RValue<Int4> pmovzxbd(RValue<Int4> x); RValue<Int4> pmovzxbd(RValue<Byte16> x);
RValue<Int4> pmovsxbd(RValue<Int4> x); RValue<Int4> pmovsxbd(RValue<SByte16> x);
RValue<Int4> pmovzxwd(RValue<Int4> x); RValue<Int4> pmovzxwd(RValue<UShort8> x);
RValue<Int4> pmovsxwd(RValue<Int4> x); RValue<Int4> pmovsxwd(RValue<Short8> x);
void emms();
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment