Commit e398428c by David Sehr

Fuse icmp/fcmp with select

Allows the optimization of pairs of operations, including 64-bit compares and selects as well as preparing for idioms (minsd, maxsd, etc.). BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1497033002 .
parent 8344bfe9
......@@ -321,6 +321,326 @@ entry:
; CHECK: fcmpTrueDouble:
; CHECK: mov {{.*}}, 1
define i32 @fcmpSelectFalseFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp false float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectFalseFloat:
; CHECK: mov {{.*}}, 0
define i32 @fcmpSelectFalseDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp false double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectFalseDouble:
; CHECK: mov {{.*}}, 0
define i32 @fcmpSelectOeqFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp oeq float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOeqFloat:
; CHECK: ucomiss
; CHECK: jne .
; CHECK: jp .
define i32 @fcmpSelectOeqDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp oeq double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOeqDouble:
; CHECK: ucomisd
; CHECK: jne .
; CHECK: jp .
define i32 @fcmpSelectOgtFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ogt float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOgtFloat:
; CHECK: ucomiss
; CHECK: ja .
define i32 @fcmpSelectOgtDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ogt double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOgtDouble:
; CHECK: ucomisd
; CHECK: ja .
define i32 @fcmpSelectOgeFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp oge float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOgeFloat:
; CHECK: ucomiss
; CHECK: jae .
define i32 @fcmpSelectOgeDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp oge double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOgeDouble:
; CHECK: ucomisd
; CHECK: jae .
define i32 @fcmpSelectOltFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp olt float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOltFloat:
; CHECK: ucomiss
; CHECK: ja .
define i32 @fcmpSelectOltDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp olt double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOltDouble:
; CHECK: ucomisd
; CHECK: ja .
define i32 @fcmpSelectOleFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ole float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOleFloat:
; CHECK: ucomiss
; CHECK: jae .
define i32 @fcmpSelectOleDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ole double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOleDouble:
; CHECK: ucomisd
; CHECK: jae .
define i32 @fcmpSelectOneFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp one float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOneFloat:
; CHECK: ucomiss
; CHECK: jne .
define i32 @fcmpSelectOneDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp one double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOneDouble:
; CHECK: ucomisd
; CHECK: jne .
define i32 @fcmpSelectOrdFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ord float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOrdFloat:
; CHECK: ucomiss
; CHECK: jnp .
define i32 @fcmpSelectOrdDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ord double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectOrdDouble:
; CHECK: ucomisd
; CHECK: jnp .
define i32 @fcmpSelectUeqFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ueq float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUeqFloat:
; CHECK: ucomiss
; CHECK: je .
define i32 @fcmpSelectUeqDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ueq double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUeqDouble:
; CHECK: ucomisd
; CHECK: je .
define i32 @fcmpSelectUgtFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ugt float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUgtFloat:
; CHECK: ucomiss
; CHECK: jb .
define i32 @fcmpSelectUgtDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ugt double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUgtDouble:
; CHECK: ucomisd
; CHECK: jb .
define i32 @fcmpSelectUgeFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp uge float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUgeFloat:
; CHECK: ucomiss
; CHECK: jbe .
define i32 @fcmpSelectUgeDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp uge double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUgeDouble:
; CHECK: ucomisd
; CHECK: jbe .
define i32 @fcmpSelectUltFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ult float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUltFloat:
; CHECK: ucomiss
; CHECK: jb .
define i32 @fcmpSelectUltDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ult double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUltDouble:
; CHECK: ucomisd
; CHECK: jb .
define i32 @fcmpSelectUleFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ule float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUleFloat:
; CHECK: ucomiss
; CHECK: jbe .
define i32 @fcmpSelectUleDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp ule double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUleDouble:
; CHECK: ucomisd
; CHECK: jbe .
define i32 @fcmpSelectUneFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp une float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUneFloat:
; CHECK: ucomiss
; CHECK: je .
; CHECK: jnp .
define i32 @fcmpSelectUneDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp une double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUneDouble:
; CHECK: ucomisd
; CHECK: je .
; CHECK: jnp .
define i32 @fcmpSelectUnoFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp uno float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUnoFloat:
; CHECK: ucomiss
; CHECK: jp .
define i32 @fcmpSelectUnoDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp uno double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectUnoDouble:
; CHECK: ucomisd
; CHECK: jp .
define i32 @fcmpSelectTrueFloat(float %a, float %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp true float %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectTrueFloat:
; CHECK: mov {{.*}}, 1
define i32 @fcmpSelectTrueDouble(double %a, double %b, i32 %c, i32 %d) {
entry:
%cmp = fcmp true double %a, %b
%cmp.ret_ext = select i1 %cmp, i32 %c, i32 %d
ret i32 %cmp.ret_ext
}
; CHECK: fcmpSelectTrueDouble:
; CHECK: mov {{.*}}, 1
define <4 x i32> @fcmpFalseVector(<4 x float> %a, <4 x float> %b) {
entry:
%res.trunc = fcmp false <4 x float> %a, %b
......
......@@ -27,9 +27,15 @@
#define X(cmp) \
extern "C" bool fcmp##cmp##Float(float a, float b); \
extern "C" bool fcmp##cmp##Double(double a, double b); \
extern "C" int fcmpSelect##cmp##Float(float a, float b, int c, int d); \
extern "C" int fcmpSelect##cmp##Double(double a, double b, int c, int d); \
extern "C" v4si32 fcmp##cmp##Vector(v4f32 a, v4f32 b); \
extern "C" bool Subzero_fcmp##cmp##Float(float a, float b); \
extern "C" bool Subzero_fcmp##cmp##Double(double a, double b); \
extern "C" int Subzero_fcmpSelect##cmp##Float(float a, float b, int c, \
int d); \
extern "C" int Subzero_fcmpSelect##cmp##Double(double a, double b, int c, \
int d); \
extern "C" v4si32 Subzero_fcmp##cmp##Vector(v4f32 a, v4f32 b);
FCMP_TABLE;
#undef X
......@@ -59,17 +65,25 @@ void initializeValues() {
void testsScalar(size_t &TotalTests, size_t &Passes, size_t &Failures) {
typedef bool (*FuncTypeFloat)(float, float);
typedef bool (*FuncTypeDouble)(double, double);
typedef int (*FuncTypeFloatSelect)(float, float, int, int);
typedef int (*FuncTypeDoubleSelect)(double, double, int, int);
static struct {
const char *Name;
FuncTypeFloat FuncFloatSz;
FuncTypeFloat FuncFloatLlc;
FuncTypeDouble FuncDoubleSz;
FuncTypeDouble FuncDoubleLlc;
FuncTypeFloatSelect FuncFloatSelectSz;
FuncTypeFloatSelect FuncFloatSelectLlc;
FuncTypeDoubleSelect FuncDoubleSelectSz;
FuncTypeDoubleSelect FuncDoubleSelectLlc;
} Funcs[] = {
#define X(cmp) \
{ \
"fcmp" STR(cmp), Subzero_fcmp##cmp##Float, fcmp##cmp##Float, \
Subzero_fcmp##cmp##Double, fcmp##cmp##Double \
Subzero_fcmp##cmp##Double, fcmp##cmp##Double, \
Subzero_fcmpSelect##cmp##Float, fcmpSelect##cmp##Float, \
Subzero_fcmpSelect##cmp##Double, fcmpSelect##cmp##Double \
} \
,
FCMP_TABLE
......@@ -110,6 +124,34 @@ void testsScalar(size_t &TotalTests, size_t &Passes, size_t &Failures) {
<< Value2Double << "): sz=" << ResultSz
<< " llc=" << ResultLlc << "\n";
}
++TotalTests;
float Value1SelectFloat = Values[i];
float Value2SelectFloat = Values[j];
ResultSz = Funcs[f].FuncFloatSelectSz(Value1Float, Value2Float, 1, 2);
ResultLlc = Funcs[f].FuncFloatSelectLlc(Value1Float, Value2Float, 1, 2);
if (ResultSz == ResultLlc) {
++Passes;
} else {
++Failures;
std::cout << Funcs[f].Name << "SelectFloat(" << Value1Float << ", "
<< Value2Float << "): sz=" << ResultSz
<< " llc=" << ResultLlc << "\n";
}
++TotalTests;
double Value1SelectDouble = Values[i];
double Value2SelectDouble = Values[j];
ResultSz =
Funcs[f].FuncDoubleSelectSz(Value1Double, Value2Double, 1, 2);
ResultLlc =
Funcs[f].FuncDoubleSelectLlc(Value1Double, Value2Double, 1, 2);
if (ResultSz == ResultLlc) {
++Passes;
} else {
++Failures;
std::cout << Funcs[f].Name << "SelectDouble(" << Value1Double << ", "
<< Value2Double << "): sz=" << ResultSz
<< " llc=" << ResultLlc << "\n";
}
}
}
}
......
......@@ -447,15 +447,34 @@ public:
typename Traits::XmmRegister src);
void mulps(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void minps(typename Traits::XmmRegister dst,
void minps(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void minps(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void maxps(typename Traits::XmmRegister dst,
void minss(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void minss(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void andps(typename Traits::XmmRegister dst,
void maxps(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void maxps(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void andps(typename Traits::XmmRegister dst,
void maxss(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void orps(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
void maxss(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void andnps(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void andnps(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void andps(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void andps(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void orps(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void orps(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void blendvps(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
......@@ -466,9 +485,10 @@ public:
void pblendvb(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void cmpps(typename Traits::XmmRegister dst, typename Traits::XmmRegister src,
void cmpps(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src,
typename Traits::Cond::CmppsCond CmpCondition);
void cmpps(typename Traits::XmmRegister dst,
void cmpps(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src,
typename Traits::Cond::CmppsCond CmpCondition);
......@@ -492,10 +512,6 @@ public:
void set1ps(typename Traits::XmmRegister dst,
typename Traits::GPRRegister tmp, const Immediate &imm);
void minpd(typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void maxpd(typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void sqrtpd(typename Traits::XmmRegister dst);
void pshufd(Type Ty, typename Traits::XmmRegister dst,
......@@ -547,22 +563,11 @@ public:
void sqrtss(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void xorpd(typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void xorpd(typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void xorps(typename Traits::XmmRegister dst,
void xorps(Type Ty, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void xorps(typename Traits::XmmRegister dst,
void xorps(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void andpd(typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void andpd(typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void orpd(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
void insertps(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src, const Immediate &imm);
void insertps(Type Ty, typename Traits::XmmRegister dst,
......
......@@ -50,8 +50,8 @@ const MachineTraits<TargetX8632>::InstCmppsAttributesType
const MachineTraits<TargetX8632>::TypeAttributesType
MachineTraits<TargetX8632>::TypeAttributes[] = {
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
{ cvt, sdss, pack, width, fld } \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, width, fld } \
,
ICETYPEX8632_TABLE
#undef X
......
......@@ -212,22 +212,22 @@
//#define X(val, emit)
#define ICETYPEX8632_TABLE \
/* tag, element type, cvt , sdss, pack, width, fld */ \
X(void, void, "?", "", "", "", "") \
X(i1, void, "si", "", "", "b", "") \
X(i8, void, "si", "", "", "b", "") \
X(i16, void, "si", "", "", "w", "") \
X(i32, void, "si", "", "", "l", "") \
X(i64, void, "si", "", "", "q", "") \
X(f32, void, "ss", "ss", "d", "", "s") \
X(f64, void, "sd", "sd", "q", "", "l") \
X(v4i1, i32, "?", "", "d", "", "") \
X(v8i1, i16, "?", "", "w", "", "") \
X(v16i1, i8, "?", "", "b", "", "") \
X(v16i8, i8, "?", "", "b", "", "") \
X(v8i16, i16, "?", "", "w", "", "") \
X(v4i32, i32, "dq", "", "d", "", "") \
X(v4f32, f32, "ps", "", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pack, width, fld)
/* tag, element type, cvt , sdss, pdps, spsd, pack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8632_DEF
......@@ -51,8 +51,8 @@ const MachineTraits<TargetX8664>::InstCmppsAttributesType
const MachineTraits<TargetX8664>::TypeAttributesType
MachineTraits<TargetX8664>::TypeAttributes[] = {
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
{ cvt, sdss, pack, width, fld } \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, width, fld } \
,
ICETYPEX8664_TABLE
#undef X
......
......@@ -292,22 +292,22 @@
//#define X(val, emit)
#define ICETYPEX8664_TABLE \
/* tag, element type, cvt , sdss, pack, width, fld */ \
X(void, void, "?", "", "", "", "") \
X(i1, void, "si", "", "", "b", "") \
X(i8, void, "si", "", "", "b", "") \
X(i16, void, "si", "", "", "w", "") \
X(i32, void, "si", "", "", "l", "") \
X(i64, void, "si", "", "", "q", "") \
X(f32, void, "ss", "ss", "d", "", "s") \
X(f64, void, "sd", "sd", "q", "", "l") \
X(v4i1, i32, "?", "", "d", "", "") \
X(v8i1, i16, "?", "", "w", "", "") \
X(v16i1, i8, "?", "", "b", "", "") \
X(v16i8, i8, "?", "", "b", "", "") \
X(v8i16, i16, "?", "", "w", "", "") \
X(v4i32, i32, "dq", "", "d", "", "") \
X(v4f32, f32, "ps", "", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pack, width, fld)
/* tag, element type, cvt , sdss, pdps, spsd, pack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, pack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8664_DEF
......@@ -1105,6 +1105,84 @@ void InstX86Mulss<Machine>::emit(const Cfg *Func) const {
}
template <class Machine>
void InstX86Andnps<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
char buf[30];
snprintf(
buf, llvm::array_lengthof(buf), "%s%s", this->Opcode,
InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
.PdPsString);
this->emitTwoAddress(buf, this, Func);
}
template <class Machine>
void InstX86Andps<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
char buf[30];
snprintf(
buf, llvm::array_lengthof(buf), "%s%s", this->Opcode,
InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
.PdPsString);
this->emitTwoAddress(buf, this, Func);
}
template <class Machine>
void InstX86Maxss<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
char buf[30];
snprintf(
buf, llvm::array_lengthof(buf), "%s%s", this->Opcode,
InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
.SdSsString);
this->emitTwoAddress(buf, this, Func);
}
template <class Machine>
void InstX86Minss<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
char buf[30];
snprintf(
buf, llvm::array_lengthof(buf), "%s%s", this->Opcode,
InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
.SdSsString);
this->emitTwoAddress(buf, this, Func);
}
template <class Machine>
void InstX86Orps<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
char buf[30];
snprintf(
buf, llvm::array_lengthof(buf), "%s%s", this->Opcode,
InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
.PdPsString);
this->emitTwoAddress(buf, this, Func);
}
template <class Machine>
void InstX86Xorps<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
char buf[30];
snprintf(
buf, llvm::array_lengthof(buf), "%s%s", this->Opcode,
InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
.PdPsString);
this->emitTwoAddress(buf, this, Func);
}
template <class Machine>
void InstX86Pmuludq<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
......@@ -1624,10 +1702,11 @@ void InstX86Cmpps<Machine>::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 2);
assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid);
Type DestTy = this->Dest->getType();
Str << "\t";
Str << "cmp"
<< InstX86Base<Machine>::Traits::InstCmppsAttributes[Condition].EmitString
<< "ps"
<< InstX86Base<Machine>::Traits::TypeAttributes[DestTy].PdPsString
<< "\t";
this->getSrc(1)->emit(Func);
Str << ", ";
......@@ -1646,14 +1725,16 @@ void InstX86Cmpps<Machine>::emitIAS(const Cfg *Func) const {
auto *Target = InstX86Base<Machine>::getTarget(Func);
const auto *SrcVar = llvm::cast<Variable>(this->getSrc(1));
if (SrcVar->hasReg()) {
Asm->cmpps(InstX86Base<Machine>::Traits::getEncodedXmm(
Asm->cmpps(this->getDest()->getType(),
InstX86Base<Machine>::Traits::getEncodedXmm(
this->getDest()->getRegNum()),
InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()),
Condition);
} else {
typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
Target->stackVarToAsmOperand(SrcVar);
Asm->cmpps(InstX86Base<Machine>::Traits::getEncodedXmm(
Asm->cmpps(this->getDest()->getType(),
InstX86Base<Machine>::Traits::getEncodedXmm(
this->getDest()->getRegNum()),
SrcStackAddr, Condition);
}
......
......@@ -73,7 +73,7 @@ const size_t MachineTraits<TargetX8632>::TableIcmp64Size =
const MachineTraits<TargetX8632>::TableTypeX8632AttributesType
MachineTraits<TargetX8632>::TableTypeX8632Attributes[] = {
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
{ IceType_##elementty } \
,
ICETYPEX8632_TABLE
......@@ -974,7 +974,7 @@ ICEINSTICMP_TABLE
namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) _tmp_##tag,
ICETYPEX8632_TABLE
#undef X
_num
......@@ -986,7 +986,7 @@ ICETYPE_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
......
......@@ -853,6 +853,8 @@ template <> struct MachineTraits<TargetX8632> {
static const struct TypeAttributesType {
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpsdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
......
......@@ -73,7 +73,7 @@ const size_t MachineTraits<TargetX8664>::TableIcmp64Size =
const MachineTraits<TargetX8664>::TableTypeX8664AttributesType
MachineTraits<TargetX8664>::TableTypeX8664Attributes[] = {
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
{ IceType_##elementty } \
,
ICETYPEX8664_TABLE
......@@ -957,7 +957,7 @@ ICEINSTICMP_TABLE
namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) _tmp_##tag,
ICETYPEX8664_TABLE
#undef X
_num
......@@ -969,7 +969,7 @@ ICETYPE_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
......
......@@ -832,6 +832,8 @@ template <> struct MachineTraits<TargetX8664> {
static const struct TypeAttributesType {
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpSdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
......
......@@ -347,6 +347,12 @@ protected:
void _and(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::And::create(Func, Dest, Src0));
}
void _andnps(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Andnps::create(Func, Dest, Src0));
}
void _andps(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Andps::create(Func, Dest, Src0));
}
void _and_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
Context.insert(Traits::Insts::AndRMW::create(Func, DestSrc0, Src1));
}
......@@ -468,26 +474,31 @@ protected:
Context.insert(Traits::Insts::Lea::create(Func, Dest, Src0));
}
void _mfence() { Context.insert(Traits::Insts::Mfence::create(Func)); }
/// Moves can be used to redefine registers, creating "partial kills" for
/// liveness. Mark where moves are used in this way.
void _redefined(Inst *MovInst, bool IsRedefinition = true) {
if (IsRedefinition)
MovInst->setDestRedefined();
}
/// If Dest=nullptr is passed in, then a new variable is created, marked as
/// infinite register allocation weight, and returned through the in/out Dest
/// argument.
void _mov(Variable *&Dest, Operand *Src0,
int32_t RegNum = Variable::NoRegister) {
Inst *_mov(Variable *&Dest, Operand *Src0,
int32_t RegNum = Variable::NoRegister) {
if (Dest == nullptr)
Dest = makeReg(Src0->getType(), RegNum);
Context.insert(Traits::Insts::Mov::create(Func, Dest, Src0));
}
void _mov_redefined(Variable *Dest, Operand *Src0) {
Inst *NewInst = Traits::Insts::Mov::create(Func, Dest, Src0);
NewInst->setDestRedefined();
Context.insert(NewInst);
return NewInst;
}
Inst *_movp(Variable *Dest, Operand *Src0) {
Inst *NewInst = Traits::Insts::Movp::create(Func, Dest, Src0);
Context.insert(NewInst);
return NewInst;
}
void _movd(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Movd::create(Func, Dest, Src0));
}
void _movp(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Movp::create(Func, Dest, Src0));
}
void _movq(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Movq::create(Func, Dest, Src0));
}
......@@ -500,6 +511,12 @@ protected:
void _movzx(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Movzx::create(Func, Dest, Src0));
}
void _maxss(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Maxss::create(Func, Dest, Src0));
}
void _minss(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Minss::create(Func, Dest, Src0));
}
void _mul(Variable *Dest, Variable *Src0, Operand *Src1) {
Context.insert(Traits::Insts::Mul::create(Func, Dest, Src0, Src1));
}
......@@ -518,6 +535,9 @@ protected:
void _or(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Or::create(Func, Dest, Src0));
}
void _orps(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Orps::create(Func, Dest, Src0));
}
void _or_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
Context.insert(Traits::Insts::OrRMW::create(Func, DestSrc0, Src1));
}
......@@ -663,6 +683,9 @@ protected:
void _xor(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Xor::create(Func, Dest, Src0));
}
void _xorps(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Xorps::create(Func, Dest, Src0));
}
void _xor_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
Context.insert(Traits::Insts::XorRMW::create(Func, DestSrc0, Src1));
}
......@@ -751,31 +774,49 @@ private:
void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi,
Operand *Src1Lo, Variable *DestLo, Variable *DestHi);
/// Emit the code for a combined operation and branch, or set the destination
/// variable of the operation if Br == nullptr.
void lowerIcmpAndBr(const InstIcmp *Icmp, const InstBr *Br);
void lowerFcmpAndBr(const InstFcmp *Fcmp, const InstBr *Br);
void lowerArithAndBr(const InstArithmetic *Arith, const InstBr *Br);
/// Emit a setcc instruction if Br == nullptr; otherwise emit a branch.
void setccOrBr(typename Traits::Cond::BrCond Condition, Variable *Dest,
const InstBr *Br);
/// Emit a mov [1|0] instruction if Br == nullptr; otherwise emit a branch.
void movOrBr(bool IcmpResult, Variable *Dest, const InstBr *Br);
/// Emit the code for a combined operation and consumer instruction, or set
/// the destination variable of the operation if Consumer == nullptr.
void lowerIcmpAndConsumer(const InstIcmp *Icmp, const Inst *Consumer);
void lowerFcmpAndConsumer(const InstFcmp *Fcmp, const Inst *Consumer);
void lowerArithAndConsumer(const InstArithmetic *Arith, const Inst *Consumer);
/// Emit a setcc instruction if Consumer == nullptr; otherwise emit a
/// specialized version of Consumer.
void setccOrConsumer(typename Traits::Cond::BrCond Condition, Variable *Dest,
const Inst *Consumer);
/// Emit a mov [1|0] instruction if Consumer == nullptr; otherwise emit a
/// specialized version of Consumer.
void movOrConsumer(bool IcmpResult, Variable *Dest, const Inst *Consumer);
/// Emit the code for instructions with a vector type.
void lowerIcmpVector(const InstIcmp *Icmp);
void lowerFcmpVector(const InstFcmp *Icmp);
void lowerSelectVector(const InstSelect *Inst);
/// Helpers for select lowering.
void lowerSelectMove(Variable *Dest, typename Traits::Cond::BrCond Cond,
Operand *SrcT, Operand *SrcF);
void lowerSelectIntMove(Variable *Dest, typename Traits::Cond::BrCond Cond,
Operand *SrcT, Operand *SrcF);
/// Generic helper to move an arbitrary type from Src to Dest.
void lowerMove(Variable *Dest, Operand *Src, bool IsRedefinition);
/// Optimizations for idiom recognition.
bool lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, const InstSelect *Select);
/// Complains loudly if invoked because the cpu can handle 64-bit types
/// natively.
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type lowerIcmp64(const InstIcmp *,
const InstBr *) {
const Inst *) {
llvm::report_fatal_error(
"Hey, yo! This is x86-64. Watcha doin'? (lowerIcmp64)");
}
/// x86lowerIcmp64 handles 64-bit icmp lowering.
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type
lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br);
lowerIcmp64(const InstIcmp *Icmp, const Inst *Consumer);
BoolFolding FoldingInfo;
};
......
......@@ -1650,13 +1650,15 @@ entry:
ret i64 %cond
}
; CHECK-LABEL: select64VarVar
; CHECK: mov
; CHECK: mov
; CHECK: cmp
; CHECK: jb
; CHECK: ja
; CHECK: cmp
; CHECK: jb
; CHECK: cmp
; CHECK: cmovne
; CHECK: mov
; CHECK: mov
;
; OPTM1-LABEL: select64VarVar
; OPTM1: cmp
......@@ -1683,13 +1685,15 @@ entry:
ret i64 %cond
}
; CHECK-LABEL: select64VarConst
; CHECK: mov
; CHECK: mov
; CHECK: cmp
; CHECK: jb
; CHECK: ja
; CHECK: cmp
; CHECK: jb
; CHECK: cmp
; CHECK: cmovne
; CHECK: mov
; CHECK: mov
;
; OPTM1-LABEL: select64VarConst
; OPTM1: cmp
......@@ -1720,13 +1724,15 @@ entry:
ret i64 %cond
}
; CHECK-LABEL: select64ConstVar
; CHECK: mov
; CHECK: mov
; CHECK: cmp
; CHECK: jb
; CHECK: ja
; CHECK: cmp
; CHECK: jb
; CHECK: cmp
; CHECK: cmove
; CHECK: mov
; CHECK: mov
;
; OPTM1-LABEL: select64ConstVar
; OPTM1: cmp
......
......@@ -815,9 +815,10 @@ entry:
ret float %cond
}
; CHECK-LABEL: selectFloatVarVar
; CHECK: movss
; CHECK: ucomiss
; CHECK: seta
; CHECK: fld
; CHECK: ja
; CHECK: movss
; ARM32-LABEL: selectFloatVarVar
; ARM32: vcmp.f32
; ARM32-OM1: vmovne.f32 s{{[0-9]+}}
......@@ -831,9 +832,10 @@ entry:
ret double %cond
}
; CHECK-LABEL: selectDoubleVarVar
; CHECK: movsd
; CHECK: ucomisd
; CHECK: seta
; CHECK: fld
; CHECK: ja
; CHECK: movsd
; ARM32-LABEL: selectDoubleVarVar
; ARM32: vcmp.f64
; ARM32-OM1: vmovne.f64 d{{[0-9]+}}
......
......@@ -38,7 +38,15 @@ define internal float @undef_float() {
entry:
ret float undef
; CHECK-LABEL: undef_float
; CHECK: pxor [[REG:xmm.]],[[REG]]
; CHECK: xorps [[REG:xmm.]],[[REG]]
; CHECK: fld
}
define internal double @undef_double() {
entry:
ret double undef
; CHECK-LABEL: undef_double
; CHECK: xorpd [[REG:xmm.]],[[REG]]
; CHECK: fld
}
......@@ -192,7 +200,7 @@ entry:
%val = insertelement <4 x float> %arg, float undef, i32 0
ret <4 x float> %val
; CHECK-LABEL: vector_insertelement_arg2
; CHECK: pxor [[REG:xmm.]],[[REG]]
; CHECK: xorps [[REG:xmm.]],[[REG]]
; CHECK: {{movss|insertps}} {{.*}},[[REG]]
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment