Commit 83b8036b by Matt Wala

Lower casting operations that involve vector types.

Impacted instructions: bitcast {v4f32, v4i32, v8i16, v16i8} <-> {v4f32, v4i32, v8i16, v16i8} bitcast v8i1 <-> i8 bitcast v16i1 <-> i16 (There was already code present to handle trivial bitcasts like v16i1 <-> v16i1.) [sz]ext v4i1 -> v4i32 [sz]ext v8i1 -> v8i16 [sz]ext v16i1 -> v16i8 trunc v4i32 -> v4i1 trunc v8i16 -> v8i1 trunc v16i8 -> v16i1 [su]itofp v4i32 -> v4f32 fpto[su]i v4f32 -> v4i32 Where there is a relatively simple lowering to x86 instructions, it has been used. Otherwise a helper call is used. Some lowerings require a materialization of a integer vector with 1s in each entry. Since there is no support for vector constant pools, the constant is materialized purely through register operations. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/383303003
parent e4da26f6
...@@ -39,10 +39,11 @@ const size_t InstX8632BrAttributesSize = ...@@ -39,10 +39,11 @@ const size_t InstX8632BrAttributesSize =
const struct TypeX8632Attributes_ { const struct TypeX8632Attributes_ {
const char *CvtString; // i (integer), s (single FP), d (double FP) const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank> const char *SdSsString; // ss, sd, or <blank>
const char *PackString; // b, w, d, or <blank>
const char *WidthString; // {byte,word,dword,qword} ptr const char *WidthString; // {byte,word,dword,qword} ptr
} TypeX8632Attributes[] = { } TypeX8632Attributes[] = {
#define X(tag, cvt, sdss, width) \ #define X(tag, cvt, sdss, pack, width) \
{ cvt, "" sdss, width } \ { cvt, "" sdss, pack, width } \
, ,
ICETYPEX8632_TABLE ICETYPEX8632_TABLE
#undef X #undef X
...@@ -448,8 +449,10 @@ template <> const char *InstX8632Addss::Opcode = "addss"; ...@@ -448,8 +449,10 @@ template <> const char *InstX8632Addss::Opcode = "addss";
template <> const char *InstX8632Sub::Opcode = "sub"; template <> const char *InstX8632Sub::Opcode = "sub";
template <> const char *InstX8632Subps::Opcode = "subps"; template <> const char *InstX8632Subps::Opcode = "subps";
template <> const char *InstX8632Subss::Opcode = "subss"; template <> const char *InstX8632Subss::Opcode = "subss";
template <> const char *InstX8632Psub::Opcode = "psub";
template <> const char *InstX8632Sbb::Opcode = "sbb"; template <> const char *InstX8632Sbb::Opcode = "sbb";
template <> const char *InstX8632And::Opcode = "and"; template <> const char *InstX8632And::Opcode = "and";
template <> const char *InstX8632Pand::Opcode = "pand";
template <> const char *InstX8632Or::Opcode = "or"; template <> const char *InstX8632Or::Opcode = "or";
template <> const char *InstX8632Xor::Opcode = "xor"; template <> const char *InstX8632Xor::Opcode = "xor";
template <> const char *InstX8632Pxor::Opcode = "pxor"; template <> const char *InstX8632Pxor::Opcode = "pxor";
...@@ -461,8 +464,12 @@ template <> const char *InstX8632Divps::Opcode = "divps"; ...@@ -461,8 +464,12 @@ template <> const char *InstX8632Divps::Opcode = "divps";
template <> const char *InstX8632Idiv::Opcode = "idiv"; template <> const char *InstX8632Idiv::Opcode = "idiv";
template <> const char *InstX8632Divss::Opcode = "divss"; template <> const char *InstX8632Divss::Opcode = "divss";
template <> const char *InstX8632Shl::Opcode = "shl"; template <> const char *InstX8632Shl::Opcode = "shl";
template <> const char *InstX8632Psll::Opcode = "psll";
template <> const char *InstX8632Shr::Opcode = "shr"; template <> const char *InstX8632Shr::Opcode = "shr";
template <> const char *InstX8632Sar::Opcode = "sar"; template <> const char *InstX8632Sar::Opcode = "sar";
template <> const char *InstX8632Psra::Opcode = "psra";
template <> const char *InstX8632Pcmpeq::Opcode = "pcmpeq";
template <> const char *InstX8632Pcmpgt::Opcode = "pcmpgt";
template <> void InstX8632Sqrtss::emit(const Cfg *Func) const { template <> void InstX8632Sqrtss::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
...@@ -690,7 +697,7 @@ void InstX8632Cmpxchg8b::dump(const Cfg *Func) const { ...@@ -690,7 +697,7 @@ void InstX8632Cmpxchg8b::dump(const Cfg *Func) const {
void InstX8632Cvt::emit(const Cfg *Func) const { void InstX8632Cvt::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
Str << "\tcvts" << TypeX8632Attributes[getSrc(0)->getType()].CvtString << "2s" Str << "\tcvt" << TypeX8632Attributes[getSrc(0)->getType()].CvtString << "2"
<< TypeX8632Attributes[getDest()->getType()].CvtString << "\t"; << TypeX8632Attributes[getDest()->getType()].CvtString << "\t";
getDest()->emit(Func); getDest()->emit(Func);
Str << ", "; Str << ", ";
...@@ -701,8 +708,8 @@ void InstX8632Cvt::emit(const Cfg *Func) const { ...@@ -701,8 +708,8 @@ void InstX8632Cvt::emit(const Cfg *Func) const {
void InstX8632Cvt::dump(const Cfg *Func) const { void InstX8632Cvt::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func); dumpDest(Func);
Str << " = cvts" << TypeX8632Attributes[getSrc(0)->getType()].CvtString Str << " = cvt" << TypeX8632Attributes[getSrc(0)->getType()].CvtString
<< "2s" << TypeX8632Attributes[getDest()->getType()].CvtString << " "; << "2" << TypeX8632Attributes[getDest()->getType()].CvtString << " ";
dumpSources(Func); dumpSources(Func);
} }
...@@ -1000,6 +1007,20 @@ void InstX8632Fstp::dump(const Cfg *Func) const { ...@@ -1000,6 +1007,20 @@ void InstX8632Fstp::dump(const Cfg *Func) const {
Str << "\n"; Str << "\n";
} }
template <> void InstX8632Pcmpeq::emit(const Cfg *Func) const {
char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "pcmpeq%s",
TypeX8632Attributes[getDest()->getType()].PackString);
emitTwoAddress(buf, this, Func);
}
template <> void InstX8632Pcmpgt::emit(const Cfg *Func) const {
char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "pcmpgt%s",
TypeX8632Attributes[getDest()->getType()].PackString);
emitTwoAddress(buf, this, Func);
}
void InstX8632Pop::emit(const Cfg *Func) const { void InstX8632Pop::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 0); assert(getSrcSize() == 0);
...@@ -1054,6 +1075,31 @@ void InstX8632Push::dump(const Cfg *Func) const { ...@@ -1054,6 +1075,31 @@ void InstX8632Push::dump(const Cfg *Func) const {
dumpSources(Func); dumpSources(Func);
} }
template <> void InstX8632Psll::emit(const Cfg *Func) const {
assert(getDest()->getType() == IceType_v8i16 ||
getDest()->getType() == IceType_v4i32);
char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "psll%s",
TypeX8632Attributes[getDest()->getType()].PackString);
emitTwoAddress(buf, this, Func);
}
template <> void InstX8632Psra::emit(const Cfg *Func) const {
assert(getDest()->getType() == IceType_v8i16 ||
getDest()->getType() == IceType_v4i32);
char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "psra%s",
TypeX8632Attributes[getDest()->getType()].PackString);
emitTwoAddress(buf, this, Func);
}
template <> void InstX8632Psub::emit(const Cfg *Func) const {
char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "psub%s",
TypeX8632Attributes[getDest()->getType()].PackString);
emitTwoAddress(buf, this, Func);
}
void InstX8632Ret::emit(const Cfg *Func) const { void InstX8632Ret::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
Str << "\tret\n"; Str << "\tret\n";
......
...@@ -66,23 +66,23 @@ ...@@ -66,23 +66,23 @@
X(Br_p, "p", "jp") \ X(Br_p, "p", "jp") \
//#define X(tag, dump, emit) //#define X(tag, dump, emit)
#define ICETYPEX8632_TABLE \ #define ICETYPEX8632_TABLE \
/* tag, cvt, sdss, width */ \ /* tag, cvt, sdss, pack, width */ \
X(IceType_void, "?", "" , "???") \ X(IceType_void, "?", "" , "" , "???") \
X(IceType_i1, "i", "" , "byte ptr") \ X(IceType_i1, "si", "" , "" , "byte ptr") \
X(IceType_i8, "i", "" , "byte ptr") \ X(IceType_i8, "si", "" , "" , "byte ptr") \
X(IceType_i16, "i", "" , "word ptr") \ X(IceType_i16, "si", "" , "" , "word ptr") \
X(IceType_i32, "i", "" , "dword ptr") \ X(IceType_i32, "si", "" , "" , "dword ptr") \
X(IceType_i64, "i", "" , "qword ptr") \ X(IceType_i64, "si", "" , "" , "qword ptr") \
X(IceType_f32, "s", "ss", "dword ptr") \ X(IceType_f32, "ss", "ss", "" , "dword ptr") \
X(IceType_f64, "d", "sd", "qword ptr") \ X(IceType_f64, "sd", "sd", "" , "qword ptr") \
X(IceType_v4i1, "?", "" , "xmmword ptr") \ X(IceType_v4i1, "?", "" , "" , "xmmword ptr") \
X(IceType_v8i1, "?", "" , "xmmword ptr") \ X(IceType_v8i1, "?", "" , "" , "xmmword ptr") \
X(IceType_v16i1, "?", "" , "xmmword ptr") \ X(IceType_v16i1, "?", "" , "" , "xmmword ptr") \
X(IceType_v16i8, "?", "" , "xmmword ptr") \ X(IceType_v16i8, "?", "" , "b", "xmmword ptr") \
X(IceType_v8i16, "?", "" , "xmmword ptr") \ X(IceType_v8i16, "?", "" , "w", "xmmword ptr") \
X(IceType_v4i32, "?", "" , "xmmword ptr") \ X(IceType_v4i32, "dq", "" , "d", "xmmword ptr") \
X(IceType_v4f32, "?", "" , "xmmword ptr") \ X(IceType_v4f32, "ps", "" , "", "xmmword ptr") \
//#define X(tag, cvt, sdss, width) //#define X(tag, cvt, sdss, width)
#endif // SUBZERO_SRC_ICEINSTX8632_DEF #endif // SUBZERO_SRC_ICEINSTX8632_DEF
...@@ -168,8 +168,14 @@ public: ...@@ -168,8 +168,14 @@ public:
Mulss, Mulss,
Neg, Neg,
Or, Or,
Pand,
Pcmpeq,
Pcmpgt,
Pop, Pop,
Push, Push,
Psll,
Psra,
Psub,
Pxor, Pxor,
Ret, Ret,
Sar, Sar,
...@@ -453,7 +459,9 @@ typedef InstX8632Binop<InstX8632::Sub> InstX8632Sub; ...@@ -453,7 +459,9 @@ typedef InstX8632Binop<InstX8632::Sub> InstX8632Sub;
typedef InstX8632Binop<InstX8632::Subps> InstX8632Subps; typedef InstX8632Binop<InstX8632::Subps> InstX8632Subps;
typedef InstX8632Binop<InstX8632::Subss> InstX8632Subss; typedef InstX8632Binop<InstX8632::Subss> InstX8632Subss;
typedef InstX8632Binop<InstX8632::Sbb> InstX8632Sbb; typedef InstX8632Binop<InstX8632::Sbb> InstX8632Sbb;
typedef InstX8632Binop<InstX8632::Psub> InstX8632Psub;
typedef InstX8632Binop<InstX8632::And> InstX8632And; typedef InstX8632Binop<InstX8632::And> InstX8632And;
typedef InstX8632Binop<InstX8632::Pand> InstX8632Pand;
typedef InstX8632Binop<InstX8632::Or> InstX8632Or; typedef InstX8632Binop<InstX8632::Or> InstX8632Or;
typedef InstX8632Binop<InstX8632::Xor> InstX8632Xor; typedef InstX8632Binop<InstX8632::Xor> InstX8632Xor;
typedef InstX8632Binop<InstX8632::Pxor> InstX8632Pxor; typedef InstX8632Binop<InstX8632::Pxor> InstX8632Pxor;
...@@ -463,8 +471,12 @@ typedef InstX8632Binop<InstX8632::Mulss> InstX8632Mulss; ...@@ -463,8 +471,12 @@ typedef InstX8632Binop<InstX8632::Mulss> InstX8632Mulss;
typedef InstX8632Binop<InstX8632::Divps> InstX8632Divps; typedef InstX8632Binop<InstX8632::Divps> InstX8632Divps;
typedef InstX8632Binop<InstX8632::Divss> InstX8632Divss; typedef InstX8632Binop<InstX8632::Divss> InstX8632Divss;
typedef InstX8632Binop<InstX8632::Shl, true> InstX8632Shl; typedef InstX8632Binop<InstX8632::Shl, true> InstX8632Shl;
typedef InstX8632Binop<InstX8632::Psll> InstX8632Psll;
typedef InstX8632Binop<InstX8632::Shr, true> InstX8632Shr; typedef InstX8632Binop<InstX8632::Shr, true> InstX8632Shr;
typedef InstX8632Binop<InstX8632::Sar, true> InstX8632Sar; typedef InstX8632Binop<InstX8632::Sar, true> InstX8632Sar;
typedef InstX8632Binop<InstX8632::Psra> InstX8632Psra;
typedef InstX8632Binop<InstX8632::Pcmpeq> InstX8632Pcmpeq;
typedef InstX8632Binop<InstX8632::Pcmpgt> InstX8632Pcmpgt;
typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv; typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv;
typedef InstX8632Ternop<InstX8632::Div> InstX8632Div; typedef InstX8632Ternop<InstX8632::Div> InstX8632Div;
......
...@@ -87,6 +87,8 @@ InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { ...@@ -87,6 +87,8 @@ InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
// The maximum number of arguments to pass in XMM registers // The maximum number of arguments to pass in XMM registers
const unsigned X86_MAX_XMM_ARGS = 4; const unsigned X86_MAX_XMM_ARGS = 4;
// The number of bits in a byte
const unsigned X86_CHAR_BIT = 8;
// In some cases, there are x-macros tables for both high-level and // In some cases, there are x-macros tables for both high-level and
// low-level instructions/operands that use the same enum key value. // low-level instructions/operands that use the same enum key value.
...@@ -157,7 +159,7 @@ void xMacroIntegrityCheck() { ...@@ -157,7 +159,7 @@ void xMacroIntegrityCheck() {
// Define a temporary set of enum values based on low-level // Define a temporary set of enum values based on low-level
// table entries. // table entries.
enum _tmp_enum { enum _tmp_enum {
#define X(tag, cvt, sdss, width) _tmp_##tag, #define X(tag, cvt, sdss, pack, width) _tmp_##tag,
ICETYPEX8632_TABLE ICETYPEX8632_TABLE
#undef X #undef X
_num _num
...@@ -169,7 +171,7 @@ void xMacroIntegrityCheck() { ...@@ -169,7 +171,7 @@ void xMacroIntegrityCheck() {
#undef X #undef X
// Define a set of constants based on low-level table entries, // Define a set of constants based on low-level table entries,
// and ensure the table entry keys are consistent. // and ensure the table entry keys are consistent.
#define X(tag, cvt, sdss, width) \ #define X(tag, cvt, sdss, pack, width) \
static const int _table2_##tag = _tmp_##tag; \ static const int _table2_##tag = _tmp_##tag; \
STATIC_ASSERT(_table1_##tag == _table2_##tag); STATIC_ASSERT(_table1_##tag == _table2_##tag);
ICETYPEX8632_TABLE; ICETYPEX8632_TABLE;
...@@ -1573,6 +1575,28 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -1573,6 +1575,28 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
_mov(T_Hi, T_Lo); _mov(T_Hi, T_Lo);
_sar(T_Hi, Shift); _sar(T_Hi, Shift);
_mov(DestHi, T_Hi); _mov(DestHi, T_Hi);
} else if (isVectorType(Dest->getType())) {
Type DestTy = Dest->getType();
if (DestTy == IceType_v16i8) {
// onemask = materialize(1,1,...); dst = (src & onemask) > 0
Variable *OneMask = makeVectorOfOnes(Dest->getType());
Variable *T = makeReg(DestTy);
_movp(T, Src0RM);
_pand(T, OneMask);
Variable *Zeros = makeVectorOfZeros(Dest->getType());
_pcmpgt(T, Zeros);
_movp(Dest, T);
} else {
// width = width(elty) - 1; dest = (src << width) >> width
SizeT ShiftAmount =
X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount);
Variable *T = makeReg(DestTy);
_movp(T, Src0RM);
_psll(T, ShiftConstant);
_psra(T, ShiftConstant);
_movp(Dest, T);
}
} else { } else {
// TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
// also copy to the high operand of a 64-bit variable. // also copy to the high operand of a 64-bit variable.
...@@ -1604,6 +1628,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -1604,6 +1628,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
_movzx(T, Src0RM); _movzx(T, Src0RM);
_and(T, One); _and(T, One);
_mov(Dest, T); _mov(Dest, T);
} else if (isVectorType(Dest->getType())) {
// onemask = materialize(1,1,...); dest = onemask & src
Type DestTy = Dest->getType();
Variable *OneMask = makeVectorOfOnes(DestTy);
Variable *T = makeReg(DestTy);
_movp(T, Src0RM);
_pand(T, OneMask);
_movp(Dest, T);
} else { } else {
// t1 = movzx src; dst = t1 // t1 = movzx src; dst = t1
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
...@@ -1613,14 +1645,25 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -1613,14 +1645,25 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
break; break;
} }
case InstCast::Trunc: { case InstCast::Trunc: {
Operand *Src0 = Inst->getSrc(0); if (isVectorType(Dest->getType())) {
if (Src0->getType() == IceType_i64) // onemask = materialize(1,1,...); dst = src & onemask
Src0 = loOperand(Src0); Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); Type Src0Ty = Src0RM->getType();
// t1 = trunc Src0RM; Dest = t1 Variable *OneMask = makeVectorOfOnes(Src0Ty);
Variable *T = NULL; Variable *T = makeReg(Dest->getType());
_mov(T, Src0RM); _movp(T, Src0RM);
_mov(Dest, T); _pand(T, OneMask);
_movp(Dest, T);
} else {
Operand *Src0 = Inst->getSrc(0);
if (Src0->getType() == IceType_i64)
Src0 = loOperand(Src0);
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
// t1 = trunc Src0RM; Dest = t1
Variable *T = NULL;
_mov(T, Src0RM);
_mov(Dest, T);
}
break; break;
} }
case InstCast::Fptrunc: case InstCast::Fptrunc:
...@@ -1633,7 +1676,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -1633,7 +1676,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
break; break;
} }
case InstCast::Fptosi: case InstCast::Fptosi:
if (Dest->getType() == IceType_i64) { if (isVectorType(Dest->getType())) {
assert(Dest->getType() == IceType_v4i32 &&
Inst->getSrc(0)->getType() == IceType_v4f32);
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Variable *T = makeReg(Dest->getType());
_cvt(T, Src0RM);
_movp(Dest, T);
} else if (Dest->getType() == IceType_i64) {
// Use a helper for converting floating-point values to 64-bit // Use a helper for converting floating-point values to 64-bit
// integers. SSE2 appears to have no way to convert from xmm // integers. SSE2 appears to have no way to convert from xmm
// registers to something like the edx:eax register pair, and // registers to something like the edx:eax register pair, and
...@@ -1660,7 +1710,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -1660,7 +1710,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
} }
break; break;
case InstCast::Fptoui: case InstCast::Fptoui:
if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) { if (isVectorType(Dest->getType())) {
assert(Dest->getType() == IceType_v4i32 &&
Inst->getSrc(0)->getType() == IceType_v4f32);
const SizeT MaxSrcs = 1;
InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
Call->addArg(Inst->getSrc(0));
lowerCall(Call);
} else if (Dest->getType() == IceType_i64 ||
Dest->getType() == IceType_i32) {
// Use a helper for both x86-32 and x86-64. // Use a helper for both x86-32 and x86-64.
split64(Dest); split64(Dest);
const SizeT MaxSrcs = 1; const SizeT MaxSrcs = 1;
...@@ -1687,7 +1745,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -1687,7 +1745,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
} }
break; break;
case InstCast::Sitofp: case InstCast::Sitofp:
if (Inst->getSrc(0)->getType() == IceType_i64) { if (isVectorType(Dest->getType())) {
assert(Dest->getType() == IceType_v4f32 &&
Inst->getSrc(0)->getType() == IceType_v4i32);
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Variable *T = makeReg(Dest->getType());
_cvt(T, Src0RM);
_movp(Dest, T);
} else if (Inst->getSrc(0)->getType() == IceType_i64) {
// Use a helper for x86-32. // Use a helper for x86-32.
const SizeT MaxSrcs = 1; const SizeT MaxSrcs = 1;
Type DestType = Dest->getType(); Type DestType = Dest->getType();
...@@ -1713,7 +1778,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -1713,7 +1778,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
break; break;
case InstCast::Uitofp: { case InstCast::Uitofp: {
Operand *Src0 = Inst->getSrc(0); Operand *Src0 = Inst->getSrc(0);
if (Src0->getType() == IceType_i64 || Src0->getType() == IceType_i32) { if (isVectorType(Src0->getType())) {
assert(Dest->getType() == IceType_v4f32 &&
Src0->getType() == IceType_v4i32);
const SizeT MaxSrcs = 1;
InstCall *Call = makeHelperCall("Sz_uitofp_v4i32", Dest, MaxSrcs);
Call->addArg(Src0);
lowerCall(Call);
} else if (Src0->getType() == IceType_i64 ||
Src0->getType() == IceType_i32) {
// Use a helper for x86-32 and x86-64. Also use a helper for // Use a helper for x86-32 and x86-64. Also use a helper for
// i32 on x86-32. // i32 on x86-32.
const SizeT MaxSrcs = 1; const SizeT MaxSrcs = 1;
...@@ -1752,6 +1825,18 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -1752,6 +1825,18 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
switch (Dest->getType()) { switch (Dest->getType()) {
default: default:
llvm_unreachable("Unexpected Bitcast dest type"); llvm_unreachable("Unexpected Bitcast dest type");
case IceType_i8: {
assert(Src0->getType() == IceType_v8i1);
InstCall *Call = makeHelperCall("Sz_bitcast_v8i1_to_i8", Dest, 1);
Call->addArg(Src0);
lowerCall(Call);
} break;
case IceType_i16: {
assert(Src0->getType() == IceType_v16i1);
InstCall *Call = makeHelperCall("Sz_bitcast_v16i1_to_i16", Dest, 1);
Call->addArg(Src0);
lowerCall(Call);
} break;
case IceType_i32: case IceType_i32:
case IceType_f32: { case IceType_f32: {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
...@@ -1830,6 +1915,30 @@ void TargetX8632::lowerCast(const InstCast *Inst) { ...@@ -1830,6 +1915,30 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
_store(T_Hi, SpillHi); _store(T_Hi, SpillHi);
_movq(Dest, Spill); _movq(Dest, Spill);
} break; } break;
case IceType_v8i1: {
assert(Src0->getType() == IceType_i8);
InstCall *Call = makeHelperCall("Sz_bitcast_i8_to_v8i1", Dest, 1);
Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode());
// Arguments to functions are required to be at least 32 bits wide.
lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
Call->addArg(Src0AsI32);
lowerCall(Call);
} break;
case IceType_v16i1: {
assert(Src0->getType() == IceType_i16);
InstCall *Call = makeHelperCall("Sz_bitcast_i16_to_v16i1", Dest, 1);
Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode());
// Arguments to functions are required to be at least 32 bits wide.
lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
Call->addArg(Src0AsI32);
lowerCall(Call);
} break;
case IceType_v8i16:
case IceType_v16i8:
case IceType_v4i32:
case IceType_v4f32: {
_movp(Dest, legalizeToVar(Src0));
} break;
} }
break; break;
} }
...@@ -2875,6 +2984,29 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { ...@@ -2875,6 +2984,29 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
lowerCall(Call); lowerCall(Call);
} }
Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
// There is no support for loading or emitting vector constants, so
// this value is initialized using register operations.
Variable *Reg = makeReg(Ty, RegNum);
// Insert a FakeDef, since otherwise the live range of Reg might
// be overestimated.
Context.insert(InstFakeDef::create(Func, Reg));
_pxor(Reg, Reg);
return Reg;
}
Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
// There is no support for loading or emitting vector constants, so
// this value is initialized using register operations.
Variable *Dest = makeVectorOfZeros(Ty, RegNum);
Variable *MinusOne = makeReg(Ty);
// Insert a FakeDef so the live range of MinusOne is not overestimated.
Context.insert(InstFakeDef::create(Func, MinusOne));
_pcmpeq(MinusOne, MinusOne);
_psub(Dest, MinusOne);
return Dest;
}
// Helper for legalize() to emit the right code to lower an operand to a // Helper for legalize() to emit the right code to lower an operand to a
// register of the appropriate type. // register of the appropriate type.
Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
...@@ -2937,19 +3069,9 @@ Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed, ...@@ -2937,19 +3069,9 @@ Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
// overestimated. If the constant being lowered is a 64 bit value, // overestimated. If the constant being lowered is a 64 bit value,
// then the result should be split and the lo and hi components will // then the result should be split and the lo and hi components will
// need to go in uninitialized registers. // need to go in uninitialized registers.
if (isVectorType(From->getType()))
if (isVectorType(From->getType())) { return makeVectorOfZeros(From->getType());
// There is no support for loading or emitting vector constants, so From = Ctx->getConstantZero(From->getType());
// undef values are instead initialized in registers.
Variable *Reg = makeReg(From->getType(), RegNum);
// Insert a FakeDef, since otherwise the live range of Reg might
// be overestimated.
Context.insert(InstFakeDef::create(Func, Reg));
_pxor(Reg, Reg);
return Reg;
} else {
From = Ctx->getConstantZero(From->getType());
}
} }
// There should be no constants of vector type (other than undef). // There should be no constants of vector type (other than undef).
assert(!isVectorType(From->getType())); assert(!isVectorType(From->getType()));
......
...@@ -148,6 +148,10 @@ protected: ...@@ -148,6 +148,10 @@ protected:
Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister); Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
// Returns a vector in a register with the given constant entries.
Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
Variable *makeVectorOfOnes(Type Ty, int32_t RegNum = Variable::NoRegister);
// The following are helpers that insert lowered x86 instructions // The following are helpers that insert lowered x86 instructions
// with minimal syntactic overhead, so that the lowering code can // with minimal syntactic overhead, so that the lowering code can
// look as close to assembly as practical. // look as close to assembly as practical.
...@@ -272,12 +276,30 @@ protected: ...@@ -272,12 +276,30 @@ protected:
void _or(Variable *Dest, Operand *Src0) { void _or(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Or::create(Func, Dest, Src0)); Context.insert(InstX8632Or::create(Func, Dest, Src0));
} }
void _pand(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pand::create(Func, Dest, Src0));
}
void _pcmpeq(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pcmpeq::create(Func, Dest, Src0));
}
void _pcmpgt(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pcmpgt::create(Func, Dest, Src0));
}
void _pop(Variable *Dest) { void _pop(Variable *Dest) {
Context.insert(InstX8632Pop::create(Func, Dest)); Context.insert(InstX8632Pop::create(Func, Dest));
} }
void _push(Operand *Src0, bool SuppressStackAdjustment = false) { void _push(Operand *Src0, bool SuppressStackAdjustment = false) {
Context.insert(InstX8632Push::create(Func, Src0, SuppressStackAdjustment)); Context.insert(InstX8632Push::create(Func, Src0, SuppressStackAdjustment));
} }
void _psll(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Psll::create(Func, Dest, Src0));
}
void _psra(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Psra::create(Func, Dest, Src0));
}
void _psub(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Psub::create(Func, Dest, Src0));
}
void _pxor(Variable *Dest, Operand *Src0) { void _pxor(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pxor::create(Func, Dest, Src0)); Context.insert(InstX8632Pxor::create(Func, Dest, Src0));
} }
......
; This file tests bitcasts of vector type. For most operations, these
; should be lowered to a no-op on -O2.
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s --check-prefix=OPTM1
; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
; RUN: | FileCheck --check-prefix=DUMP %s
define <16 x i8> @test_bitcast_v16i8_to_v16i8(<16 x i8> %arg) {
entry:
%res = bitcast <16 x i8> %arg to <16 x i8>
ret <16 x i8> %res
; CHECK-LABEL: test_bitcast_v16i8_to_v16i8:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <8 x i16> @test_bitcast_v16i8_to_v8i16(<16 x i8> %arg) {
entry:
%res = bitcast <16 x i8> %arg to <8 x i16>
ret <8 x i16> %res
; CHECK-LABEL: test_bitcast_v16i8_to_v8i16:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <4 x i32> @test_bitcast_v16i8_to_v4i32(<16 x i8> %arg) {
entry:
%res = bitcast <16 x i8> %arg to <4 x i32>
ret <4 x i32> %res
; CHECK-LABEL: test_bitcast_v16i8_to_v4i32:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <4 x float> @test_bitcast_v16i8_to_v4f32(<16 x i8> %arg) {
entry:
%res = bitcast <16 x i8> %arg to <4 x float>
ret <4 x float> %res
; CHECK-LABEL: test_bitcast_v16i8_to_v4f32:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <16 x i8> @test_bitcast_v8i16_to_v16i8(<8 x i16> %arg) {
entry:
%res = bitcast <8 x i16> %arg to <16 x i8>
ret <16 x i8> %res
; CHECK-LABEL: test_bitcast_v8i16_to_v16i8:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <8 x i16> @test_bitcast_v8i16_to_v8i16(<8 x i16> %arg) {
entry:
%res = bitcast <8 x i16> %arg to <8 x i16>
ret <8 x i16> %res
; CHECK-LABEL: test_bitcast_v8i16_to_v8i16:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <4 x i32> @test_bitcast_v8i16_to_v4i32(<8 x i16> %arg) {
entry:
%res = bitcast <8 x i16> %arg to <4 x i32>
ret <4 x i32> %res
; CHECK-LABEL: test_bitcast_v8i16_to_v4i32:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <4 x float> @test_bitcast_v8i16_to_v4f32(<8 x i16> %arg) {
entry:
%res = bitcast <8 x i16> %arg to <4 x float>
ret <4 x float> %res
; CHECK-LABEL: test_bitcast_v8i16_to_v4f32:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <16 x i8> @test_bitcast_v4i32_to_v16i8(<4 x i32> %arg) {
entry:
%res = bitcast <4 x i32> %arg to <16 x i8>
ret <16 x i8> %res
; CHECK-LABEL: test_bitcast_v4i32_to_v16i8:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <8 x i16> @test_bitcast_v4i32_to_v8i16(<4 x i32> %arg) {
entry:
%res = bitcast <4 x i32> %arg to <8 x i16>
ret <8 x i16> %res
; CHECK-LABEL: test_bitcast_v4i32_to_v8i16:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <4 x i32> @test_bitcast_v4i32_to_v4i32(<4 x i32> %arg) {
entry:
%res = bitcast <4 x i32> %arg to <4 x i32>
ret <4 x i32> %res
; CHECK-LABEL: test_bitcast_v4i32_to_v4i32:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <4 x float> @test_bitcast_v4i32_to_v4f32(<4 x i32> %arg) {
entry:
%res = bitcast <4 x i32> %arg to <4 x float>
ret <4 x float> %res
; CHECK-LABEL: test_bitcast_v4i32_to_v4f32:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <16 x i8> @test_bitcast_v4f32_to_v16i8(<4 x float> %arg) {
entry:
%res = bitcast <4 x float> %arg to <16 x i8>
ret <16 x i8> %res
; CHECK-LABEL: test_bitcast_v4f32_to_v16i8:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <8 x i16> @test_bitcast_v4f32_to_v8i16(<4 x float> %arg) {
entry:
%res = bitcast <4 x float> %arg to <8 x i16>
ret <8 x i16> %res
; CHECK-LABEL: test_bitcast_v4f32_to_v8i16:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <4 x i32> @test_bitcast_v4f32_to_v4i32(<4 x float> %arg) {
entry:
%res = bitcast <4 x float> %arg to <4 x i32>
ret <4 x i32> %res
; CHECK-LABEL: test_bitcast_v4f32_to_v4i32:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define <4 x float> @test_bitcast_v4f32_to_v4f32(<4 x float> %arg) {
entry:
%res = bitcast <4 x float> %arg to <4 x float>
ret <4 x float> %res
; CHECK-LABEL: test_bitcast_v4f32_to_v4f32:
; CHECK: .L{{.*}}entry:
; CHECK-NEXT: ret
}
define i8 @test_bitcast_v8i1_to_i8(<8 x i1> %arg) {
entry:
%res = bitcast <8 x i1> %arg to i8
ret i8 %res
; CHECK-LABEL: test_bitcast_v8i1_to_i8:
; CHECK: call Sz_bitcast_v8i1_to_i8
; OPTM1-LABEL: test_bitcast_v8i1_to_i8:
; OPMT1: call Sz_bitcast_v8i1_to_i8
}
define i16 @test_bitcast_v16i1_to_i16(<16 x i1> %arg) {
entry:
%res = bitcast <16 x i1> %arg to i16
ret i16 %res
; CHECK-LABEL: test_bitcast_v16i1_to_i16:
; CHECK: call Sz_bitcast_v16i1_to_i16
; OPTM1-LABEL: test_bitcast_v16i1_to_i16:
; OPMT1: call Sz_bitcast_v16i1_to_i16
}
define <8 x i1> @test_bitcast_i8_to_v8i1(i32 %arg) {
entry:
%arg.trunc = trunc i32 %arg to i8
%res = bitcast i8 %arg.trunc to <8 x i1>
ret <8 x i1> %res
; CHECK-LABEL: test_bitcast_i8_to_v8i1:
; CHECK: call Sz_bitcast_i8_to_v8i1
; OPTM1-LABEL: test_bitcast_i8_to_v8i1:
; OPTM1: call Sz_bitcast_i8_to_v8i1
}
define <16 x i1> @test_bitcast_i16_to_v16i1(i32 %arg) {
entry:
%arg.trunc = trunc i32 %arg to i16
%res = bitcast i16 %arg.trunc to <16 x i1>
ret <16 x i1> %res
; CHECK-LABEL: test_bitcast_i16_to_v16i1:
; CHECK: call Sz_bitcast_i16_to_v16i1
; OPTM1-LABEL: test_bitcast_i16_to_v16i1:
; OPTM1: call Sz_bitcast_i16_to_v16i1
}
; ERRORS-NOT: ICE translation error
; DUMP-NOT: SZ
; This file tests casting / conversion operations that apply to vector types.
; bitcast operations are in vector-bitcast.ll.
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
; RUN: | FileCheck --check-prefix=DUMP %s
; sext operations
define <16 x i8> @test_sext_v16i1_to_v16i8(<16 x i1> %arg) {
entry:
%res = sext <16 x i1> %arg to <16 x i8>
ret <16 x i8> %res
; CHECK-LABEL: test_sext_v16i1_to_v16i8:
; CHECK: pxor
; CHECK: pcmpeqb
; CHECK: psubb
; CHECK: pand
; CHECK: pxor
; CHECK: pcmpgtb
}
define <8 x i16> @test_sext_v8i1_to_v8i16(<8 x i1> %arg) {
entry:
%res = sext <8 x i1> %arg to <8 x i16>
ret <8 x i16> %res
; CHECK-LABEL: test_sext_v8i1_to_v8i16:
; CHECK: psllw {{.*}}, 15
; CHECK: psraw {{.*}}, 15
}
define <4 x i32> @test_sext_v4i1_to_v4i32(<4 x i1> %arg) {
entry:
%res = sext <4 x i1> %arg to <4 x i32>
ret <4 x i32> %res
; CHECK-LABEL: test_sext_v4i1_to_v4i32:
; CHECK: pslld {{.*}}, 31
; CHECK: psrad {{.*}}, 31
}
; zext operations
define <16 x i8> @test_zext_v16i1_to_v16i8(<16 x i1> %arg) {
entry:
%res = zext <16 x i1> %arg to <16 x i8>
ret <16 x i8> %res
; CHECK-LABEL: test_zext_v16i1_to_v16i8:
; CHECK: pxor
; CHECK: pcmpeqb
; CHECK: psubb
; CHECK: pand
}
define <8 x i16> @test_zext_v8i1_to_v8i16(<8 x i1> %arg) {
entry:
%res = zext <8 x i1> %arg to <8 x i16>
ret <8 x i16> %res
; CHECK-LABEL: test_zext_v8i1_to_v8i16:
; CHECK: pxor
; CHECK: pcmpeqw
; CHECK: psubw
; CHECK: pand
}
define <4 x i32> @test_zext_v4i1_to_v4i32(<4 x i1> %arg) {
entry:
%res = zext <4 x i1> %arg to <4 x i32>
ret <4 x i32> %res
; CHECK-LABEL: test_zext_v4i1_to_v4i32:
; CHECK: pxor
; CHECK: pcmpeqd
; CHECK: psubd
; CHECK: pand
}
; trunc operations
define <16 x i1> @test_trunc_v16i8_to_v16i1(<16 x i8> %arg) {
entry:
%res = trunc <16 x i8> %arg to <16 x i1>
ret <16 x i1> %res
; CHECK-LABEL: test_trunc_v16i8_to_v16i1:
; CHECK: pxor
; CHECK: pcmpeqb
; CHECK: psubb
; CHECK: pand
}
define <8 x i1> @test_trunc_v8i16_to_v8i1(<8 x i16> %arg) {
entry:
%res = trunc <8 x i16> %arg to <8 x i1>
ret <8 x i1> %res
; CHECK-LABEL: test_trunc_v8i16_to_v8i1:
; CHECK: pxor
; CHECK: pcmpeqw
; CHECK: psubw
; CHECK: pand
}
define <4 x i1> @test_trunc_v4i32_to_v4i1(<4 x i32> %arg) {
entry:
%res = trunc <4 x i32> %arg to <4 x i1>
ret <4 x i1> %res
; CHECK-LABEL: test_trunc_v4i32_to_v4i1:
; CHECK: pxor
; CHECK: pcmpeqd
; CHECK: psubd
; CHECK: pand
}
; fpto[us]i operations
define <4 x i32> @test_fptosi_v4f32_to_v4i32(<4 x float> %arg) {
entry:
%res = fptosi <4 x float> %arg to <4 x i32>
ret <4 x i32> %res
; CHECK-LABEL: test_fptosi_v4f32_to_v4i32:
; CHECK: cvtps2dq
}
define <4 x i32> @test_fptoui_v4f32_to_v4i32(<4 x float> %arg) {
entry:
%res = fptoui <4 x float> %arg to <4 x i32>
ret <4 x i32> %res
; CHECK-LABEL: test_fptoui_v4f32_to_v4i32:
; CHECK: call Sz_fptoui_v4f32
}
; [su]itofp operations
define <4 x float> @test_sitofp_v4i32_to_v4f32(<4 x i32> %arg) {
entry:
%res = sitofp <4 x i32> %arg to <4 x float>
ret <4 x float> %res
; CHECK-LABEL: test_sitofp_v4i32_to_v4f32:
; CHECK: cvtdq2ps
}
define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
entry:
%res = uitofp <4 x i32> %arg to <4 x float>
ret <4 x float> %res
; CHECK-LABEL: test_uitofp_v4i32_to_v4f32:
; CHECK: call Sz_uitofp_v4i32
}
; ERRORS-NOT: ICE translation error
; DUMP-NOT: SZ
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment