Commit 7638e275 by Nicolas Capens Committed by Nicolas Capens

Add x86 vector packing instructions.

BUG=swiftshader:15 Change-Id: I0d40fab6287130143693e8e4752859b7142a503d Reviewed-on: https://chromium-review.googlesource.com/394007Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org>
parent f52cea4b
......@@ -437,6 +437,10 @@ public:
const Immediate &mask);
void punpckl(Type Ty, XmmRegister Dst, XmmRegister Src);
void punpckl(Type Ty, XmmRegister Dst, const Address &Src);
void packss(Type Ty, XmmRegister Dst, XmmRegister Src);
void packss(Type Ty, XmmRegister Dst, const Address &Src);
void packus(Type Ty, XmmRegister Dst, XmmRegister Src);
void packus(Type Ty, XmmRegister Dst, const Address &Src);
void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void shufps(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask);
......
......@@ -1628,6 +1628,78 @@ void AssemblerX86Base<TraitsType>::punpckl(Type Ty, XmmRegister Dst,
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
XmmRegister Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, Dst, Src);
emitUint8(0x0F);
if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
emitUint8(0x6B);
} else if (Ty == IceType_v8i16) {
emitUint8(0x63);
} else {
assert(false && "Unexpected vector pack operand type");
}
emitXmmRegisterOperand(Dst, Src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
const Address &Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, Src, Dst);
emitUint8(0x0F);
if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
emitUint8(0x6B);
} else if (Ty == IceType_v8i16) {
emitUint8(0x63);
} else {
assert(false && "Unexpected vector pack operand type");
}
emitOperand(gprEncoding(Dst), Src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
XmmRegister Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, Dst, Src);
emitUint8(0x0F);
if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
emitUint8(0x38);
emitUint8(0x2B);
} else if (Ty == IceType_v8i16) {
emitUint8(0x67);
} else {
assert(false && "Unexpected vector pack operand type");
}
emitXmmRegisterOperand(Dst, Src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
const Address &Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, Src, Dst);
emitUint8(0x0F);
if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
emitUint8(0x38);
emitUint8(0x2B);
} else if (Ty == IceType_v8i16) {
emitUint8(0x67);
} else {
assert(false && "Unexpected vector pack operand type");
}
emitOperand(gprEncoding(Dst), Src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
XmmRegister src,
const Immediate &imm) {
......
......@@ -50,8 +50,8 @@ const TargetX8632Traits::InstCmppsAttributesType
const TargetX8632Traits::TypeAttributesType
TargetX8632Traits::TypeAttributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, unpack, width, fld } \
#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
{ cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld } \
,
ICETYPEX8632_TABLE
#undef X
......
......@@ -212,22 +212,22 @@
//#define X(val, emit)
#define ICETYPEX8632_TABLE \
/* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld)
/* tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "dq", "", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "wd", "", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "bw", "", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "bw", "", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "wd", "wb", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "dq", "dw", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "", "")
//#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8632_DEF
......@@ -51,8 +51,8 @@ const TargetX8664Traits::InstCmppsAttributesType
const TargetX8664Traits::TypeAttributesType
TargetX8664Traits::TypeAttributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ cvt, sdss, pdps, spsd, pack, unpack, width, fld } \
#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
{ cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld } \
,
ICETYPEX8664_TABLE
#undef X
......
......@@ -295,22 +295,22 @@
//#define X(val, emit)
#define ICETYPEX8664_TABLE \
/* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "")
//#define X(tag, elementty, cvt, sdss, pdps, pack, unpack, width, fld)
/* tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld */ \
X(void, void, "?", "", "", "", "", "", "", "", "") \
X(i1, void, "si", "", "", "", "", "", "", "b", "") \
X(i8, void, "si", "", "", "", "", "", "", "b", "") \
X(i16, void, "si", "", "", "", "", "", "", "w", "") \
X(i32, void, "si", "", "", "", "", "", "", "l", "") \
X(i64, void, "si", "", "", "", "", "", "", "q", "") \
X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "", "s") \
X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "", "l") \
X(v4i1, i32, "?", "", "", "", "d", "dq", "", "", "") \
X(v8i1, i16, "?", "", "", "", "w", "wd", "", "", "") \
X(v16i1, i8, "?", "", "", "", "b", "bw", "", "", "") \
X(v16i8, i8, "?", "", "", "", "b", "bw", "", "", "") \
X(v8i16, i16, "?", "", "", "", "w", "wd", "wb", "", "") \
X(v4i32, i32, "dq", "", "", "", "d", "dq", "dw", "", "") \
X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "", "")
//#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8664_DEF
......@@ -145,6 +145,8 @@ template <typename TraitsType> struct InstImpl {
Pshufb,
Pshufd,
Punpckl,
Packss,
Packus,
Psll,
Psra,
Psrl,
......@@ -186,7 +188,7 @@ template <typename TraitsType> struct InstImpl {
IacaEnd
};
enum SseSuffix { None, Packed, Unpack, Scalar, Integral };
enum SseSuffix { None, Packed, Unpack, Scalar, Integral, Pack };
static const char *getWidthString(Type Ty);
static const char *getFldString(Type Ty);
......@@ -878,6 +880,9 @@ template <typename TraitsType> struct InstImpl {
SuffixString = Traits::TypeAttributes[DestTy].SdSsString;
break;
case InstX86Base::SseSuffix::Integral:
SuffixString = Traits::TypeAttributes[DestTy].IntegralString;
break;
case InstX86Base::SseSuffix::Pack:
SuffixString = Traits::TypeAttributes[DestTy].PackString;
break;
}
......@@ -934,7 +939,7 @@ template <typename TraitsType> struct InstImpl {
// Shift operations are always integral, and hence always need a suffix.
const Type DestTy = this->getDest()->getType();
this->emitTwoAddress(Func, this->Opcode,
Traits::TypeAttributes[DestTy].PackString);
Traits::TypeAttributes[DestTy].IntegralString);
}
void emitIAS(const Cfg *Func) const override {
this->validateVectorAddrMode();
......@@ -2927,6 +2932,38 @@ template <typename TraitsType> struct InstImpl {
Source) {}
};
class InstX86Packss
: public InstX86BaseBinopXmm<InstX86Base::Packss, false,
InstX86Base::SseSuffix::Pack> {
public:
static InstX86Packss *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX86Packss>())
InstX86Packss(Func, Dest, Source);
}
private:
InstX86Packss(Cfg *Func, Variable *Dest, Operand *Source)
: InstX86BaseBinopXmm<InstX86Base::Packss, false,
InstX86Base::SseSuffix::Pack>(Func, Dest,
Source) {}
};
class InstX86Packus
: public InstX86BaseBinopXmm<InstX86Base::Packus, false,
InstX86Base::SseSuffix::Pack> {
public:
static InstX86Packus *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX86Packus>())
InstX86Packus(Func, Dest, Source);
}
private:
InstX86Packus(Cfg *Func, Variable *Dest, Operand *Source)
: InstX86BaseBinopXmm<InstX86Base::Packus, false,
InstX86Base::SseSuffix::Pack>(Func, Dest,
Source) {}
};
}; // struct InstImpl
/// struct Insts is a template that can be used to instantiate all the X86
......@@ -3052,6 +3089,8 @@ template <typename TraitsType> struct Insts {
using Pshufb = typename InstImpl<TraitsType>::InstX86Pshufb;
using Punpckl = typename InstImpl<TraitsType>::InstX86Punpckl;
using Packss = typename InstImpl<TraitsType>::InstX86Packss;
using Packus = typename InstImpl<TraitsType>::InstX86Packus;
};
/// X86 Instructions have static data (particularly, opcodes and instruction
......@@ -3287,6 +3326,12 @@ template <typename TraitsType> struct Insts {
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Punpckl::Base::Opcode = "punpckl"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Packss::Base::Opcode = "packss"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Packus::Base::Opcode = "packus"; \
/* Inplace GPR ops */ \
template <> \
template <> \
......@@ -3660,6 +3705,18 @@ template <typename TraitsType> struct Insts {
InstImpl<TraitsType>::InstX86Punpckl::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::punpckl, \
&InstImpl<TraitsType>::Assembler::punpckl}; \
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Packss::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::packss, \
&InstImpl<TraitsType>::Assembler::packss}; \
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Packus::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::packus, \
&InstImpl<TraitsType>::Assembler::packus}; \
} \
}
......
......@@ -2607,7 +2607,8 @@ void InstImpl<TraitsType>::InstX86Pextr::emit(const Cfg *Func) const {
assert(this->getSrcSize() == 2);
// pextrb and pextrd are SSE4.1 instructions.
Str << "\t" << this->Opcode
<< Traits::TypeAttributes[this->getSrc(0)->getType()].PackString << "\t";
<< Traits::TypeAttributes[this->getSrc(0)->getType()].IntegralString
<< "\t";
this->getSrc(1)->emit(Func);
Str << ", ";
this->getSrc(0)->emit(Func);
......@@ -2646,7 +2647,8 @@ void InstImpl<TraitsType>::InstX86Pinsr::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 3);
Str << "\t" << this->Opcode
<< Traits::TypeAttributes[this->getDest()->getType()].PackString << "\t";
<< Traits::TypeAttributes[this->getDest()->getType()].IntegralString
<< "\t";
this->getSrc(2)->emit(Func);
Str << ", ";
Operand *Src1 = this->getSrc(1);
......
......@@ -101,8 +101,8 @@ const size_t TargetX8632Traits::TableIcmp64Size =
const TargetX8632Traits::TableTypeX8632AttributesType
TargetX8632Traits::TableTypeX8632Attributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ IceType_##elementty } \
#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
{ IceType_##elty } \
,
ICETYPEX8632_TABLE
#undef X
......@@ -462,7 +462,7 @@ ICEINSTICMP_TABLE
namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
_tmp_##tag,
ICETYPEX8632_TABLE
#undef X
......@@ -475,7 +475,7 @@ ICETYPE_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
......
......@@ -946,14 +946,15 @@ public:
} InstCmppsAttributes[];
static const struct TypeAttributesType {
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpsdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank>
const char *UnpackString; // bw, wd, dq, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpsdString; // ss, sd, ps, pd, or <blank>
const char *IntegralString; // b, w, d, or <blank>
const char *UnpackString; // bw, wd, dq, or <blank>
const char *PackString; // wb, dw, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
} TypeAttributes[];
static const char *InstSegmentRegNames[];
......
......@@ -94,8 +94,8 @@ const size_t TargetX8664Traits::TableIcmp64Size =
const TargetX8664Traits::TableTypeX8664AttributesType
TargetX8664Traits::TableTypeX8664Attributes[] = {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ IceType_##elementty } \
#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
{ IceType_##elty } \
,
ICETYPEX8664_TABLE
#undef X
......@@ -801,7 +801,7 @@ ICEINSTICMP_TABLE
namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
_tmp_##tag,
ICETYPEX8664_TABLE
#undef X
......@@ -814,7 +814,7 @@ ICETYPE_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
......
......@@ -1022,14 +1022,15 @@ public:
} InstCmppsAttributes[];
static const struct TypeAttributesType {
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpSdString; // ss, sd, ps, pd, or <blank>
const char *PackString; // b, w, d, or <blank>
const char *UnpackString; // bw, wd, dq, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
const char *SpSdString; // ss, sd, ps, pd, or <blank>
const char *IntegralString; // b, w, d, or <blank>
const char *UnpackString; // bw, wd, dq, or <blank>
const char *PackString; // wb, dw, or <blank>
const char *WidthString; // b, w, l, q, or <blank>
const char *FldString; // s, l, or <blank>
} TypeAttributes[];
};
......
......@@ -815,6 +815,14 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Punpckl>(Dest, Src0);
}
void _packss(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Packss>(Dest, Src0);
}
void _packus(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Packus>(Dest, Src0);
}
void _pshufb(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Pshufb>(Dest, Src0);
......
......@@ -1103,6 +1103,160 @@ TEST_F(AssemblerX8632Test, Punpckl) {
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8632Test, Packss) {
const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
uint64_t(0x7FFFFFFF80000000ull));
const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
uint64_t(0x0000800100007FFEull));
const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
uint64_t(0x7FFF7FFEFFFEFFFFull));
const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
uint64_t(0xFFFEFFFF7FFF8000ull));
const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
uint64_t(0x0088007700660055ull));
const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
uint64_t(0x7F776655057F7F7Eull));
#define TestImplXmmXmm(Dst, Src, Inst, Ty) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0_##Ty); \
test.setDqwordTo(T1, V1_##Ty); \
test.run(); \
\
ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst, Ty) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0_##Ty); \
test.setDqwordTo(T1, V1_##Ty); \
test.run(); \
\
ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImpl(Dst, Src) \
do { \
TestImplXmmXmm(Dst, Src, packss, v4i32); \
TestImplXmmAddr(Dst, packss, v4i32); \
TestImplXmmXmm(Dst, Src, packss, v8i16); \
TestImplXmmAddr(Dst, packss, v8i16); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm0);
#undef TestImpl
#undef TestImplXmmAddr
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8632Test, Packus) {
const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
uint64_t(0x7FFFFFFF80000000ull));
const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
uint64_t(0x0000800100007FFEull));
const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
uint64_t(0x80017FFE00000000ull));
const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
uint64_t(0xFFFEFFFF7FFF8000ull));
const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
uint64_t(0x0088007700660055ull));
const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
uint64_t(0x8877665505FF817Eull));
#define TestImplXmmXmm(Dst, Src, Inst, Ty) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0_##Ty); \
test.setDqwordTo(T1, V1_##Ty); \
test.run(); \
\
ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst, Ty) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0_##Ty); \
test.setDqwordTo(T1, V1_##Ty); \
test.run(); \
\
ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImpl(Dst, Src) \
do { \
TestImplXmmXmm(Dst, Src, packus, v4i32); \
TestImplXmmAddr(Dst, packus, v4i32); \
TestImplXmmXmm(Dst, Src, packus, v8i16); \
TestImplXmmAddr(Dst, packus, v8i16); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm0);
#undef TestImpl
#undef TestImplXmmAddr
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8632Test, Pshufb) {
const Dqword V0(uint64_t(0x1122334455667788ull),
uint64_t(0x99aabbccddeeff32ull));
......
......@@ -1169,6 +1169,160 @@ TEST_F(AssemblerX8664Test, Punpckl) {
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8664Test, Packss) {
const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
uint64_t(0x7FFFFFFF80000000ull));
const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
uint64_t(0x0000800100007FFEull));
const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
uint64_t(0x7FFF7FFEFFFEFFFFull));
const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
uint64_t(0xFFFEFFFF7FFF8000ull));
const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
uint64_t(0x0088007700660055ull));
const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
uint64_t(0x7F776655057F7F7Eull));
#define TestImplXmmXmm(Dst, Src, Inst, Ty) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0_##Ty); \
test.setDqwordTo(T1, V1_##Ty); \
test.run(); \
\
ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst, Ty) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0_##Ty); \
test.setDqwordTo(T1, V1_##Ty); \
test.run(); \
\
ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImpl(Dst, Src) \
do { \
TestImplXmmXmm(Dst, Src, packss, v4i32); \
TestImplXmmAddr(Dst, packss, v4i32); \
TestImplXmmXmm(Dst, Src, packss, v8i16); \
TestImplXmmAddr(Dst, packss, v8i16); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm0);
#undef TestImpl
#undef TestImplXmmAddr
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8664Test, Packus) {
const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
uint64_t(0x7FFFFFFF80000000ull));
const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
uint64_t(0x0000800100007FFEull));
const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
uint64_t(0x80017FFE00000000ull));
const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
uint64_t(0xFFFEFFFF7FFF8000ull));
const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
uint64_t(0x0088007700660055ull));
const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
uint64_t(0x8877665505FF817Eull));
#define TestImplXmmXmm(Dst, Src, Inst, Ty) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0_##Ty); \
test.setDqwordTo(T1, V1_##Ty); \
test.run(); \
\
ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst, Ty) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0_##Ty); \
test.setDqwordTo(T1, V1_##Ty); \
test.run(); \
\
ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImpl(Dst, Src) \
do { \
TestImplXmmXmm(Dst, Src, packus, v4i32); \
TestImplXmmAddr(Dst, packus, v4i32); \
TestImplXmmXmm(Dst, Src, packus, v8i16); \
TestImplXmmAddr(Dst, packus, v8i16); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm0);
#undef TestImpl
#undef TestImplXmmAddr
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8664Test, Pshufb) {
const Dqword V0(uint64_t(0x1122334455667788ull),
uint64_t(0x99aabbccddeeff32ull));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment