Commit ae15f0fd by John Porto

Subzero. X86. Lowers shufflevector using xmm instructions.

parent 4aae81af
...@@ -56,4 +56,4 @@ test: test_sync_atomic.cpp ...@@ -56,4 +56,4 @@ test: test_sync_atomic.cpp
[test_vector_ops] [test_vector_ops]
driver: test_vector_ops_main.cpp driver: test_vector_ops_main.cpp
test: test_vector_ops.ll test: test_vector_ops.cpp test_vector_ops_ll.ll
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
#include "vectors.h" #include "vectors.h"
// The VectorOps<> class acts like Vectors<> but also has insertelement, // The VectorOps<> class acts like Vectors<> but also has insertelement,
// Subzero_insertelement, extractelement, and Subzero_extractelement // Subzero_insertelement, extractelement, Subzero_extractelement,
// fields. // shufflevector, Subzero_shufflevector, and shufflevector_count fields.
template <typename T> struct VectorOps; template <typename T> struct VectorOps;
#define FIELD(TYNAME, FIELDNAME) VectorOps<TYNAME>::FIELDNAME #define FIELD(TYNAME, FIELDNAME) VectorOps<TYNAME>::FIELDNAME
...@@ -28,15 +28,21 @@ template <typename T> struct VectorOps; ...@@ -28,15 +28,21 @@ template <typename T> struct VectorOps;
#define DECLARE_VECTOR_OPS(NAME) \ #define DECLARE_VECTOR_OPS(NAME) \
template <> struct VectorOps<NAME> : public Vectors<NAME> { \ template <> struct VectorOps<NAME> : public Vectors<NAME> { \
static Ty (*insertelement)(Ty, CastTy, int32_t); \ static Ty (*insertelement)(Ty, CastTy, int32_t); \
static Ty (*shufflevector)(Ty, Ty, uint32_t); \
static CastTy (*extractelement)(Ty, int32_t); \ static CastTy (*extractelement)(Ty, int32_t); \
static Ty (*Subzero_insertelement)(Ty, CastTy, int32_t); \ static Ty (*Subzero_insertelement)(Ty, CastTy, int32_t); \
static Ty (*Subzero_shufflevector)(Ty, Ty, uint32_t); \
static CastTy (*Subzero_extractelement)(Ty, int32_t); \ static CastTy (*Subzero_extractelement)(Ty, int32_t); \
static uint32_t (*shufflevector_count)(); \
}; \ }; \
extern "C" { \ extern "C" { \
TY(NAME) insertelement_##NAME(TY(NAME), CASTTY(NAME), int32_t); \ TY(NAME) insertelement_##NAME(TY(NAME), CASTTY(NAME), int32_t); \
TY(NAME) Subzero_insertelement_##NAME(TY(NAME), CASTTY(NAME), int32_t); \ TY(NAME) Subzero_insertelement_##NAME(TY(NAME), CASTTY(NAME), int32_t); \
CASTTY(NAME) extractelement_##NAME(TY(NAME), int32_t); \ CASTTY(NAME) extractelement_##NAME(TY(NAME), int32_t); \
CASTTY(NAME) Subzero_extractelement_##NAME(TY(NAME), int32_t); \ CASTTY(NAME) Subzero_extractelement_##NAME(TY(NAME), int32_t); \
TY(NAME) shufflevector_##NAME(TY(NAME), TY(NAME), uint32_t); \
TY(NAME) Subzero_shufflevector_##NAME(TY(NAME), TY(NAME), uint32_t); \
uint32_t shufflevector_count_##NAME(); \
} \ } \
TY(NAME) (*FIELD(NAME, insertelement))(TY(NAME), CASTTY(NAME), int32_t) = \ TY(NAME) (*FIELD(NAME, insertelement))(TY(NAME), CASTTY(NAME), int32_t) = \
&insertelement_##NAME; \ &insertelement_##NAME; \
...@@ -45,7 +51,12 @@ template <typename T> struct VectorOps; ...@@ -45,7 +51,12 @@ template <typename T> struct VectorOps;
CASTTY(NAME) (*FIELD(NAME, extractelement))(TY(NAME), int32_t) = \ CASTTY(NAME) (*FIELD(NAME, extractelement))(TY(NAME), int32_t) = \
&extractelement_##NAME; \ &extractelement_##NAME; \
CASTTY(NAME) (*FIELD(NAME, Subzero_extractelement))(TY(NAME), int32_t) = \ CASTTY(NAME) (*FIELD(NAME, Subzero_extractelement))(TY(NAME), int32_t) = \
&Subzero_extractelement_##NAME; &Subzero_extractelement_##NAME; \
TY(NAME) (*FIELD(NAME, shufflevector))(TY(NAME), TY(NAME), uint32_t) = \
&shufflevector_##NAME; \
TY(NAME) (*FIELD(NAME, Subzero_shufflevector))( \
TY(NAME), TY(NAME), uint32_t) = &Subzero_shufflevector_##NAME; \
uint32_t (*FIELD(NAME, shufflevector_count))() = &shufflevector_count_##NAME;
#define X(ty, eltty, castty) DECLARE_VECTOR_OPS(ty) #define X(ty, eltty, castty) DECLARE_VECTOR_OPS(ty)
VECTOR_TYPE_TABLE VECTOR_TYPE_TABLE
......
...@@ -130,6 +130,41 @@ void testExtractElement(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -130,6 +130,41 @@ void testExtractElement(size_t &TotalTests, size_t &Passes, size_t &Failures) {
free(TestVectors); free(TestVectors);
} }
template <typename T>
void testShuffleVector(size_t &TotalTests, size_t &Passes, size_t &Failures) {
typedef typename VectorOps<T>::Ty Ty;
typedef typename VectorOps<T>::ElementTy ElementTy;
size_t NumTestVectors;
Ty *TestVectors = getTestVectors<T>(NumTestVectors);
for (size_t VI = 0; VI < NumTestVectors; ++VI) {
Ty Vect0 = TestVectors[VI];
for (size_t VJ = 0; VJ < NumTestVectors; ++VJ) {
Ty Vect1 = TestVectors[VJ];
for (uint32_t Which = 0; Which < VectorOps<T>::shufflevector_count();
++Which) {
Ty ResultLlc = VectorOps<T>::shufflevector(Vect0, Vect1, Which);
Ty ResultSz = VectorOps<T>::Subzero_shufflevector(Vect0, Vect1, Which);
++TotalTests;
if (!memcmp(&ResultLlc, &ResultSz, sizeof(ResultLlc))) {
++Passes;
} else {
++Failures;
std::cout << "shufflevector<" << VectorOps<T>::TypeName << ">(Vect0=";
std::cout << vectAsString<T>(Vect0)
<< ", Vect1=" << vectAsString<T>(Vect1) << ", Which=" << VJ
<< ")\n";
std::cout << "llc=" << vectAsString<T>(ResultLlc) << "\n";
std::cout << "sz =" << vectAsString<T>(ResultSz) << "\n";
}
}
}
}
free(TestVectors);
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
...@@ -157,6 +192,17 @@ int main(int argc, char *argv[]) { ...@@ -157,6 +192,17 @@ int main(int argc, char *argv[]) {
testExtractElement<v4ui32>(TotalTests, Passes, Failures); testExtractElement<v4ui32>(TotalTests, Passes, Failures);
testExtractElement<v4f32>(TotalTests, Passes, Failures); testExtractElement<v4f32>(TotalTests, Passes, Failures);
testShuffleVector<v4i1>(TotalTests, Passes, Failures);
testShuffleVector<v8i1>(TotalTests, Passes, Failures);
testShuffleVector<v16i1>(TotalTests, Passes, Failures);
testShuffleVector<v16si8>(TotalTests, Passes, Failures);
testShuffleVector<v16ui8>(TotalTests, Passes, Failures);
testShuffleVector<v8si16>(TotalTests, Passes, Failures);
testShuffleVector<v8ui16>(TotalTests, Passes, Failures);
testShuffleVector<v4si32>(TotalTests, Passes, Failures);
testShuffleVector<v4ui32>(TotalTests, Passes, Failures);
testShuffleVector<v4f32>(TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n"; << " Failures=" << Failures << "\n";
......
...@@ -432,6 +432,8 @@ public: ...@@ -432,6 +432,8 @@ public:
void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask); void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void pshufd(Type Ty, XmmRegister dst, const Address &src, void pshufd(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask); const Immediate &mask);
void punpckldq(Type, XmmRegister Dst, XmmRegister Src);
void punpckldq(Type, XmmRegister Dst, const Address &Src);
void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask); void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void shufps(Type Ty, XmmRegister dst, const Address &src, void shufps(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask); const Immediate &mask);
......
...@@ -1565,6 +1565,29 @@ void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst, ...@@ -1565,6 +1565,29 @@ void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
} }
template <typename TraitsType> template <typename TraitsType>
void AssemblerX86Base<TraitsType>::punpckldq(Type, XmmRegister Dst,
XmmRegister Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, Dst, Src);
emitUint8(0x0F);
emitUint8(0x62);
emitXmmRegisterOperand(Dst, Src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::punpckldq(Type, XmmRegister Dst,
const Address &Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, Src, Dst);
emitUint8(0x0F);
emitUint8(0x62);
emitOperand(gprEncoding(Dst), Src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst, void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
XmmRegister src, XmmRegister src,
const Immediate &imm) { const Immediate &imm) {
......
...@@ -50,8 +50,8 @@ const TargetX8632Traits::InstCmppsAttributesType ...@@ -50,8 +50,8 @@ const TargetX8632Traits::InstCmppsAttributesType
const TargetX8632Traits::TypeAttributesType const TargetX8632Traits::TypeAttributesType
TargetX8632Traits::TypeAttributes[] = { TargetX8632Traits::TypeAttributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \ #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, width, fld } \ { cvt, sdss, pdps, spsd, pack, unpack, width, fld } \
, ,
ICETYPEX8632_TABLE ICETYPEX8632_TABLE
#undef X #undef X
......
...@@ -212,22 +212,22 @@ ...@@ -212,22 +212,22 @@
//#define X(val, emit) //#define X(val, emit)
#define ICETYPEX8632_TABLE \ #define ICETYPEX8632_TABLE \
/* tag, element type, cvt , sdss, pdps, spsd, pack, width, fld */ \ /* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "") \ X(void, void, "?", "", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "b", "") \ X(i1, void, "si", "", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "b", "") \ X(i8, void, "si", "", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "w", "") \ X(i16, void, "si", "", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "l", "") \ X(i32, void, "si", "", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "q", "") \ X(i64, void, "si", "", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "s") \ X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "l") \ X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "", "") \ X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "", "") \ X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "", "") \ X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "", "") \ X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "", "") \ X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "", "") \ X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "", "") X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) //#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8632_DEF #endif // SUBZERO_SRC_ICEINSTX8632_DEF
...@@ -51,8 +51,8 @@ const TargetX8664Traits::InstCmppsAttributesType ...@@ -51,8 +51,8 @@ const TargetX8664Traits::InstCmppsAttributesType
const TargetX8664Traits::TypeAttributesType const TargetX8664Traits::TypeAttributesType
TargetX8664Traits::TypeAttributes[] = { TargetX8664Traits::TypeAttributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \ #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, width, fld } \ { cvt, sdss, pdps, spsd, pack, unpack, width, fld } \
, ,
ICETYPEX8664_TABLE ICETYPEX8664_TABLE
#undef X #undef X
......
...@@ -293,22 +293,22 @@ ...@@ -293,22 +293,22 @@
//#define X(val, emit) //#define X(val, emit)
#define ICETYPEX8664_TABLE \ #define ICETYPEX8664_TABLE \
/* tag, element type, cvt , sdss, pdps, spsd, pack, width, fld */ \ /* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "") \ X(void, void, "?", "", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "b", "") \ X(i1, void, "si", "", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "b", "") \ X(i8, void, "si", "", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "w", "") \ X(i16, void, "si", "", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "l", "") \ X(i32, void, "si", "", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "q", "") \ X(i64, void, "si", "", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "s") \ X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "l") \ X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "", "") \ X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "", "") \ X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "", "") \ X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "", "") \ X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "", "") \ X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "", "") \ X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "", "") X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, pack, width, fld) //#define X(tag, elementty, cvt, sdss, pdps, pack, unpack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8664_DEF #endif // SUBZERO_SRC_ICEINSTX8664_DEF
...@@ -143,6 +143,7 @@ template <typename TraitsType> struct InstImpl { ...@@ -143,6 +143,7 @@ template <typename TraitsType> struct InstImpl {
Pop, Pop,
Por, Por,
Pshufd, Pshufd,
Punpckl,
Psll, Psll,
Psra, Psra,
Psrl, Psrl,
...@@ -183,7 +184,7 @@ template <typename TraitsType> struct InstImpl { ...@@ -183,7 +184,7 @@ template <typename TraitsType> struct InstImpl {
IacaEnd IacaEnd
}; };
enum SseSuffix { None, Packed, Scalar, Integral }; enum SseSuffix { None, Packed, Unpack, Scalar, Integral };
static const char *getWidthString(Type Ty); static const char *getWidthString(Type Ty);
static const char *getFldString(Type Ty); static const char *getFldString(Type Ty);
...@@ -841,6 +842,9 @@ template <typename TraitsType> struct InstImpl { ...@@ -841,6 +842,9 @@ template <typename TraitsType> struct InstImpl {
case InstX86Base::SseSuffix::Packed: case InstX86Base::SseSuffix::Packed:
SuffixString = Traits::TypeAttributes[DestTy].PdPsString; SuffixString = Traits::TypeAttributes[DestTy].PdPsString;
break; break;
case InstX86Base::SseSuffix::Unpack:
SuffixString = Traits::TypeAttributes[DestTy].UnpackString;
break;
case InstX86Base::SseSuffix::Scalar: case InstX86Base::SseSuffix::Scalar:
SuffixString = Traits::TypeAttributes[DestTy].SdSsString; SuffixString = Traits::TypeAttributes[DestTy].SdSsString;
break; break;
...@@ -2839,6 +2843,23 @@ template <typename TraitsType> struct InstImpl { ...@@ -2839,6 +2843,23 @@ template <typename TraitsType> struct InstImpl {
private: private:
InstX86IacaEnd(Cfg *Func); InstX86IacaEnd(Cfg *Func);
}; };
class InstX86Punpckl
: public InstX86BaseBinopXmm<InstX86Base::Punpckl, false,
InstX86Base::SseSuffix::Unpack> {
public:
static InstX86Punpckl *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX86Punpckl>())
InstX86Punpckl(Func, Dest, Source);
}
private:
InstX86Punpckl(Cfg *Func, Variable *Dest, Operand *Source)
: InstX86BaseBinopXmm<InstX86Base::Punpckl, false,
InstX86Base::SseSuffix::Unpack>(Func, Dest,
Source) {}
};
}; // struct InstImpl }; // struct InstImpl
/// struct Insts is a template that can be used to instantiate all the X86 /// struct Insts is a template that can be used to instantiate all the X86
...@@ -2960,6 +2981,8 @@ template <typename TraitsType> struct Insts { ...@@ -2960,6 +2981,8 @@ template <typename TraitsType> struct Insts {
using IacaStart = typename InstImpl<TraitsType>::InstX86IacaStart; using IacaStart = typename InstImpl<TraitsType>::InstX86IacaStart;
using IacaEnd = typename InstImpl<TraitsType>::InstX86IacaEnd; using IacaEnd = typename InstImpl<TraitsType>::InstX86IacaEnd;
using Punpckl = typename InstImpl<TraitsType>::InstX86Punpckl;
}; };
/// X86 Instructions have static data (particularly, opcodes and instruction /// X86 Instructions have static data (particularly, opcodes and instruction
...@@ -3189,6 +3212,9 @@ template <typename TraitsType> struct Insts { ...@@ -3189,6 +3212,9 @@ template <typename TraitsType> struct Insts {
template <> \ template <> \
template <> \ template <> \
const char *InstImpl<TraitsType>::InstX86Pshufd::Base::Opcode = "pshufd"; \ const char *InstImpl<TraitsType>::InstX86Pshufd::Base::Opcode = "pshufd"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Punpckl::Base::Opcode = "punpckl"; \
/* Inplace GPR ops */ \ /* Inplace GPR ops */ \
template <> \ template <> \
template <> \ template <> \
...@@ -3550,6 +3576,12 @@ template <typename TraitsType> struct Insts { ...@@ -3550,6 +3576,12 @@ template <typename TraitsType> struct Insts {
&InstImpl<TraitsType>::Assembler::psrl, \ &InstImpl<TraitsType>::Assembler::psrl, \
&InstImpl<TraitsType>::Assembler::psrl, \ &InstImpl<TraitsType>::Assembler::psrl, \
&InstImpl<TraitsType>::Assembler::psrl}; \ &InstImpl<TraitsType>::Assembler::psrl}; \
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Punpckl::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::punpckldq, \
&InstImpl<TraitsType>::Assembler::punpckldq}; \
} \ } \
} }
......
...@@ -101,7 +101,7 @@ const size_t TargetX8632Traits::TableIcmp64Size = ...@@ -101,7 +101,7 @@ const size_t TargetX8632Traits::TableIcmp64Size =
const TargetX8632Traits::TableTypeX8632AttributesType const TargetX8632Traits::TableTypeX8632AttributesType
TargetX8632Traits::TableTypeX8632Attributes[] = { TargetX8632Traits::TableTypeX8632Attributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \ #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ IceType_##elementty } \ { IceType_##elementty } \
, ,
ICETYPEX8632_TABLE ICETYPEX8632_TABLE
...@@ -459,7 +459,8 @@ ICEINSTICMP_TABLE ...@@ -459,7 +459,8 @@ ICEINSTICMP_TABLE
namespace dummy3 { namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries. // Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum { enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) _tmp_##tag, #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
_tmp_##tag,
ICETYPEX8632_TABLE ICETYPEX8632_TABLE
#undef X #undef X
_num _num
...@@ -471,7 +472,7 @@ ICETYPE_TABLE ...@@ -471,7 +472,7 @@ ICETYPE_TABLE
#undef X #undef X
// Define a set of constants based on low-level table entries, and ensure the // Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent. // table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \ #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \ static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \ static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
......
...@@ -976,13 +976,14 @@ public: ...@@ -976,13 +976,14 @@ public:
} InstCmppsAttributes[]; } InstCmppsAttributes[];
static const struct TypeAttributesType { static const struct TypeAttributesType {
const char *CvtString; // i (integer), s (single FP), d (double FP) const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank> const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank> const char *PdPsString; // ps, pd, or <blank>
const char *SpsdString; // ss, sd, ps, pd, or <blank> const char *SpsdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank> const char *PackString; // b, w, d, or <blank>
const char *WidthString; // b, w, l, q, or <blank> const char *UnpackString; // bw, wd, dq, or <blank>
const char *FldString; // s, l, or <blank> const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
} TypeAttributes[]; } TypeAttributes[];
static const char *InstSegmentRegNames[]; static const char *InstSegmentRegNames[];
......
...@@ -94,7 +94,7 @@ const size_t TargetX8664Traits::TableIcmp64Size = ...@@ -94,7 +94,7 @@ const size_t TargetX8664Traits::TableIcmp64Size =
const TargetX8664Traits::TableTypeX8664AttributesType const TargetX8664Traits::TableTypeX8664AttributesType
TargetX8664Traits::TableTypeX8664Attributes[] = { TargetX8664Traits::TableTypeX8664Attributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \ #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ IceType_##elementty } \ { IceType_##elementty } \
, ,
ICETYPEX8664_TABLE ICETYPEX8664_TABLE
...@@ -787,7 +787,8 @@ ICEINSTICMP_TABLE ...@@ -787,7 +787,8 @@ ICEINSTICMP_TABLE
namespace dummy3 { namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries. // Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum { enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) _tmp_##tag, #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
_tmp_##tag,
ICETYPEX8664_TABLE ICETYPEX8664_TABLE
#undef X #undef X
_num _num
...@@ -799,7 +800,7 @@ ICETYPE_TABLE ...@@ -799,7 +800,7 @@ ICETYPE_TABLE
#undef X #undef X
// Define a set of constants based on low-level table entries, and ensure the // Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent. // table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \ #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \ static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \ static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
......
...@@ -1021,13 +1021,14 @@ public: ...@@ -1021,13 +1021,14 @@ public:
} InstCmppsAttributes[]; } InstCmppsAttributes[];
static const struct TypeAttributesType { static const struct TypeAttributesType {
const char *CvtString; // i (integer), s (single FP), d (double FP) const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank> const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank> const char *PdPsString; // ps, pd, or <blank>
const char *SpSdString; // ss, sd, ps, pd, or <blank> const char *SpSdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank> const char *PackString; // b, w, d, or <blank>
const char *WidthString; // b, w, l, q, or <blank> const char *UnpackString; // bw, wd, dq, or <blank>
const char *FldString; // s, l, or <blank> const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
} TypeAttributes[]; } TypeAttributes[];
}; };
......
...@@ -801,6 +801,10 @@ protected: ...@@ -801,6 +801,10 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0); AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Por>(Dest, Src0); Context.insert<typename Traits::Insts::Por>(Dest, Src0);
} }
void _punpckl(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Punpckl>(Dest, Src0);
}
void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) { void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1); AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
Context.insert<typename Traits::Insts::Pshufd>(Dest, Src0, Src1); Context.insert<typename Traits::Insts::Pshufd>(Dest, Src0, Src1);
...@@ -1082,6 +1086,23 @@ private: ...@@ -1082,6 +1086,23 @@ private:
BoolFolding<Traits> FoldingInfo; BoolFolding<Traits> FoldingInfo;
/// Helpers for lowering ShuffleVector
/// @{
Variable *lowerShuffleVector_AllFromSameSrc(Variable *Src, SizeT Index0,
SizeT Index1, SizeT Index2,
SizeT Index3);
static constexpr SizeT IGNORE_INDEX = 0x80000000u;
Variable *lowerShuffleVector_TwoFromSameSrc(Variable *Src0, SizeT Index0,
SizeT Index1, Variable *Src1,
SizeT Index2, SizeT Index3);
static constexpr SizeT UNIFIED_INDEX_0 = 0;
static constexpr SizeT UNIFIED_INDEX_1 = 2;
Variable *lowerShuffleVector_UnifyFromDifferentSrcs(Variable *Src0,
SizeT Index0,
Variable *Src1,
SizeT Index1);
/// @}
static FixupKind PcRelFixup; static FixupKind PcRelFixup;
static FixupKind AbsFixup; static FixupKind AbsFixup;
}; };
......
; Some shufflevector optimized lowering. This list is by no means exhaustive. It
; is only a **basic** smoke test. the vector_ops crosstest has a broader range
; of test cases.
; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble -a -O2 \
; RUN: --allow-externally-defined-symbols | FileCheck %s --check-prefix=X86
declare void @useV4I32(<4 x i32> %t);
define internal void @shuffleV4I32(<4 x i32> %a, <4 x i32> %b) {
; X86-LABEL: shuffleV4I32
%a_0 = extractelement <4 x i32> %a, i32 0
%a_1 = extractelement <4 x i32> %a, i32 1
%a_2 = extractelement <4 x i32> %a, i32 2
%a_3 = extractelement <4 x i32> %a, i32 3
%b_0 = extractelement <4 x i32> %b, i32 0
%b_1 = extractelement <4 x i32> %b, i32 1
%b_2 = extractelement <4 x i32> %b, i32 2
%b_3 = extractelement <4 x i32> %b, i32 3
%t0_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
%t0_1 = insertelement <4 x i32> %t0_0, i32 %b_0, i32 1
%t0_2 = insertelement <4 x i32> %t0_1, i32 %a_1, i32 2
%t0 = insertelement <4 x i32> %t0_2, i32 %b_1, i32 3
; X86: punpckldq {{.*}}
call void @useV4I32(<4 x i32> %t0)
; X86: call
%t1_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
%t1_1 = insertelement <4 x i32> %t1_0, i32 %b_1, i32 1
%t1_2 = insertelement <4 x i32> %t1_1, i32 %b_1, i32 2
%t1 = insertelement <4 x i32> %t1_2, i32 %a_0, i32 3
; X86: shufps [[T:xmm[0-9]+]],{{.*}},0x10
; X86: pshufd {{.*}},[[T]],0x28
call void @useV4I32(<4 x i32> %t1)
; X86: call
%t2_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
%t2_1 = insertelement <4 x i32> %t2_0, i32 %b_3, i32 1
%t2_2 = insertelement <4 x i32> %t2_1, i32 %a_2, i32 2
%t2 = insertelement <4 x i32> %t2_2, i32 %b_2, i32 3
; X86: shufps {{.*}},0x30
; X86: shufps {{.*}},0x22
; X86: shufps {{.*}},0x88
call void @useV4I32(<4 x i32> %t2)
; X86: call
ret void
}
...@@ -995,33 +995,79 @@ TEST_F(AssemblerX8632Test, Shufp) { ...@@ -995,33 +995,79 @@ TEST_F(AssemblerX8632Test, Shufp) {
reset(); \ reset(); \
} while (0) } while (0)
#define TestImplSingleXmmXmmUntyped(Dst, Src, Inst) \ #define TestImpl(Dst, Src) \
do { \ do { \
static constexpr char TestString[] = \ TestImplSingleXmmXmm(Dst, Src, pshufd); \
"(" #Dst ", " #Src ", " #Inst ", Untyped)"; \ TestImplSingleXmmAddr(Dst, pshufd); \
TestImplSingleXmmXmm(Dst, Src, shufps); \
TestImplSingleXmmAddr(Dst, shufps); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm0);
#undef TestImpl
#undef TestImplSingleXmmAddr
#undef TestImplSingleXmmXmm
}
TEST_F(AssemblerX8632Test, Punpckldq) {
const Dqword V0(uint64_t(0x1111111122222222ull),
uint64_t(0x5555555577777777ull));
const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
uint64_t(0xCCCCCCCCDDDDDDDDull));
const Dqword Expected(uint64_t(0xBBBBBBBB22222222ull),
uint64_t(0xAAAAAAAA11111111ull));
#define TestImplXmmXmm(Dst, Src, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \ const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \ const uint32_t T1 = allocateDqword(); \
\ \
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src, \ __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
Immediate(Inst##Imm)); \ XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \ test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \ test.setDqwordTo(T1, V1); \
test.run(); \ test.run(); \
\ \
ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString; \ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \ reset(); \
} while (0) } while (0)
#define TestImpl(Dst, Src) \ #define TestImpl(Dst, Src) \
do { \ do { \
TestImplSingleXmmXmm(Dst, Src, pshufd); \ TestImplXmmXmm(Dst, Src, punpckldq); \
TestImplSingleXmmAddr(Dst, pshufd); \ TestImplXmmAddr(Dst, punpckldq); \
TestImplSingleXmmXmm(Dst, Src, shufps); \
TestImplSingleXmmAddr(Dst, shufps); \
} while (0) } while (0)
TestImpl(xmm0, xmm1); TestImpl(xmm0, xmm1);
...@@ -1034,9 +1080,8 @@ TEST_F(AssemblerX8632Test, Shufp) { ...@@ -1034,9 +1080,8 @@ TEST_F(AssemblerX8632Test, Shufp) {
TestImpl(xmm7, xmm0); TestImpl(xmm7, xmm0);
#undef TestImpl #undef TestImpl
#undef TestImplSingleXmmXmmUntyped #undef TestImplXmmAddr
#undef TestImplSingleXmmAddr #undef TestImplXmmXmm
#undef TestImplSingleXmmXmm
} }
TEST_F(AssemblerX8632Test, Cvt) { TEST_F(AssemblerX8632Test, Cvt) {
......
...@@ -1083,6 +1083,81 @@ TEST_F(AssemblerX8664Test, Shufp) { ...@@ -1083,6 +1083,81 @@ TEST_F(AssemblerX8664Test, Shufp) {
#undef TestImplSingleXmmXmm #undef TestImplSingleXmmXmm
} }
TEST_F(AssemblerX8664Test, Punpckldq) {
const Dqword V0(uint64_t(0x1111111122222222ull),
uint64_t(0x5555555577777777ull));
const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
uint64_t(0xCCCCCCCCDDDDDDDDull));
const Dqword Expected(uint64_t(0xBBBBBBBB22222222ull),
uint64_t(0xAAAAAAAA11111111ull));
#define TestImplXmmXmm(Dst, Src, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImpl(Dst, Src) \
do { \
TestImplXmmXmm(Dst, Src, punpckldq); \
TestImplXmmAddr(Dst, punpckldq); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm8);
TestImpl(xmm8, xmm9);
TestImpl(xmm9, xmm10);
TestImpl(xmm10, xmm11);
TestImpl(xmm11, xmm12);
TestImpl(xmm12, xmm13);
TestImpl(xmm13, xmm14);
TestImpl(xmm14, xmm15);
TestImpl(xmm15, xmm0);
#undef TestImpl
#undef TestImplXmmAddr
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8664Test, Cvt) { TEST_F(AssemblerX8664Test, Cvt) {
const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
const Dqword dq2ps32SrcValue(-5, 3, 100, 200); const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment