Commit de29f120 by John Porto

Subzero. X86. Uses pshufb for shufflevector lowering.

This CL enables subzero to lower shufflevector instructions in x86 using pshufb (only for SSE 4.1) BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4136 BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4077 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1917863004 .
parent ae15f0fd
...@@ -429,6 +429,8 @@ public: ...@@ -429,6 +429,8 @@ public:
void sqrtpd(XmmRegister dst); void sqrtpd(XmmRegister dst);
void pshufb(Type Ty, XmmRegister dst, XmmRegister src);
void pshufb(Type Ty, XmmRegister dst, const Address &src);
void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask); void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void pshufd(Type Ty, XmmRegister dst, const Address &src, void pshufd(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask); const Immediate &mask);
......
...@@ -1535,6 +1535,31 @@ void AssemblerX86Base<TraitsType>::set1ps(XmmRegister dst, GPRRegister tmp1, ...@@ -1535,6 +1535,31 @@ void AssemblerX86Base<TraitsType>::set1ps(XmmRegister dst, GPRRegister tmp1,
} }
template <typename TraitsType> template <typename TraitsType>
void AssemblerX86Base<TraitsType>::pshufb(Type /* Ty */, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
emitUint8(0x38);
emitUint8(0x00);
emitXmmRegisterOperand(dst, src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::pshufb(Type /* Ty */, XmmRegister dst,
const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
emitUint8(0x38);
emitUint8(0x00);
emitOperand(gprEncoding(dst), src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst, void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
XmmRegister src, XmmRegister src,
const Immediate &imm) { const Immediate &imm) {
......
...@@ -142,6 +142,7 @@ template <typename TraitsType> struct InstImpl { ...@@ -142,6 +142,7 @@ template <typename TraitsType> struct InstImpl {
Pmuludq, Pmuludq,
Pop, Pop,
Por, Por,
Pshufb,
Pshufd, Pshufd,
Punpckl, Punpckl,
Psll, Psll,
...@@ -2844,6 +2845,22 @@ template <typename TraitsType> struct InstImpl { ...@@ -2844,6 +2845,22 @@ template <typename TraitsType> struct InstImpl {
InstX86IacaEnd(Cfg *Func); InstX86IacaEnd(Cfg *Func);
}; };
class InstX86Pshufb
: public InstX86BaseBinopXmm<InstX86Base::Pshufb, false,
InstX86Base::SseSuffix::None> {
public:
static InstX86Pshufb *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX86Pshufb>())
InstX86Pshufb(Func, Dest, Source);
}
private:
InstX86Pshufb(Cfg *Func, Variable *Dest, Operand *Source)
: InstX86BaseBinopXmm<InstX86Base::Pshufb, false,
InstX86Base::SseSuffix::None>(Func, Dest,
Source) {}
};
class InstX86Punpckl class InstX86Punpckl
: public InstX86BaseBinopXmm<InstX86Base::Punpckl, false, : public InstX86BaseBinopXmm<InstX86Base::Punpckl, false,
InstX86Base::SseSuffix::Unpack> { InstX86Base::SseSuffix::Unpack> {
...@@ -2982,6 +2999,7 @@ template <typename TraitsType> struct Insts { ...@@ -2982,6 +2999,7 @@ template <typename TraitsType> struct Insts {
using IacaStart = typename InstImpl<TraitsType>::InstX86IacaStart; using IacaStart = typename InstImpl<TraitsType>::InstX86IacaStart;
using IacaEnd = typename InstImpl<TraitsType>::InstX86IacaEnd; using IacaEnd = typename InstImpl<TraitsType>::InstX86IacaEnd;
using Pshufb = typename InstImpl<TraitsType>::InstX86Pshufb;
using Punpckl = typename InstImpl<TraitsType>::InstX86Punpckl; using Punpckl = typename InstImpl<TraitsType>::InstX86Punpckl;
}; };
...@@ -3214,6 +3232,9 @@ template <typename TraitsType> struct Insts { ...@@ -3214,6 +3232,9 @@ template <typename TraitsType> struct Insts {
const char *InstImpl<TraitsType>::InstX86Pshufd::Base::Opcode = "pshufd"; \ const char *InstImpl<TraitsType>::InstX86Pshufd::Base::Opcode = "pshufd"; \
template <> \ template <> \
template <> \ template <> \
const char *InstImpl<TraitsType>::InstX86Pshufb::Base::Opcode = "pshufb"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Punpckl::Base::Opcode = "punpckl"; \ const char *InstImpl<TraitsType>::InstX86Punpckl::Base::Opcode = "punpckl"; \
/* Inplace GPR ops */ \ /* Inplace GPR ops */ \
template <> \ template <> \
...@@ -3579,6 +3600,12 @@ template <typename TraitsType> struct Insts { ...@@ -3579,6 +3600,12 @@ template <typename TraitsType> struct Insts {
template <> \ template <> \
template <> \ template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \ const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Pshufb::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::pshufb, \
&InstImpl<TraitsType>::Assembler::pshufb}; \
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Punpckl::Base::Emitter = { \ InstImpl<TraitsType>::InstX86Punpckl::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::punpckldq, \ &InstImpl<TraitsType>::Assembler::punpckldq, \
&InstImpl<TraitsType>::Assembler::punpckldq}; \ &InstImpl<TraitsType>::Assembler::punpckldq}; \
......
...@@ -805,6 +805,10 @@ protected: ...@@ -805,6 +805,10 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0); AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Punpckl>(Dest, Src0); Context.insert<typename Traits::Insts::Punpckl>(Dest, Src0);
} }
void _pshufb(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Pshufb>(Dest, Src0);
}
void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) { void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1); AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
Context.insert<typename Traits::Insts::Pshufd>(Dest, Src0, Src1); Context.insert<typename Traits::Insts::Pshufd>(Dest, Src0, Src1);
...@@ -1101,6 +1105,21 @@ private: ...@@ -1101,6 +1105,21 @@ private:
SizeT Index0, SizeT Index0,
Variable *Src1, Variable *Src1,
SizeT Index1); SizeT Index1);
static constexpr SizeT CLEAR_ALL_BITS = 0x80;
SizeT PshufbMaskCount = 0;
GlobalString lowerShuffleVector_NewMaskName();
ConstantRelocatable *lowerShuffleVector_CreatePshufbMask(
int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4,
int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9,
int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14,
int8_t Idx15);
void lowerShuffleVector_UsingPshufb(Variable *Dest, Operand *Src0,
Operand *Src1, int8_t Idx0, int8_t Idx1,
int8_t Idx2, int8_t Idx3, int8_t Idx4,
int8_t Idx5, int8_t Idx6, int8_t Idx7,
int8_t Idx8, int8_t Idx9, int8_t Idx10,
int8_t Idx11, int8_t Idx12, int8_t Idx13,
int8_t Idx14, int8_t Idx15);
/// @} /// @}
static FixupKind PcRelFixup; static FixupKind PcRelFixup;
......
...@@ -5675,6 +5675,99 @@ inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2, ...@@ -5675,6 +5675,99 @@ inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2,
} }
template <typename TraitsType> template <typename TraitsType>
GlobalString TargetX86Base<TraitsType>::lowerShuffleVector_NewMaskName() {
GlobalString FuncName = Func->getFunctionName();
const SizeT Id = PshufbMaskCount++;
if (!BuildDefs::dump() || !FuncName.hasStdString()) {
return GlobalString::createWithString(
Ctx,
"$PS" + std::to_string(FuncName.getID()) + "_" + std::to_string(Id));
}
return GlobalString::createWithString(
Ctx, "Pshufb$" + Func->getFunctionName() + "$" + std::to_string(Id));
}
template <typename TraitsType>
ConstantRelocatable *
TargetX86Base<TraitsType>::lowerShuffleVector_CreatePshufbMask(
int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4,
int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9,
int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14,
int8_t Idx15) {
static constexpr uint8_t NumElements = 16;
const char Initializer[NumElements] = {
Idx0, Idx1, Idx2, Idx3, Idx4, Idx5, Idx6, Idx7,
Idx8, Idx9, Idx10, Idx11, Idx12, Idx13, Idx14, Idx15,
};
static constexpr Type V4VectorType = IceType_v4i32;
const uint32_t MaskAlignment = typeWidthInBytesOnStack(V4VectorType);
auto *Mask = VariableDeclaration::create(Func->getGlobalPool());
GlobalString MaskName = lowerShuffleVector_NewMaskName();
Mask->setIsConstant(true);
Mask->addInitializer(VariableDeclaration::DataInitializer::create(
Func->getGlobalPool(), Initializer, NumElements));
Mask->setName(MaskName);
// Mask needs to be 16-byte aligned, or pshufb will seg fault.
Mask->setAlignment(MaskAlignment);
Func->addGlobal(Mask);
constexpr RelocOffsetT Offset = 0;
return llvm::cast<ConstantRelocatable>(Ctx->getConstantSym(Offset, MaskName));
}
template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerShuffleVector_UsingPshufb(
Variable *Dest, Operand *Src0, Operand *Src1, int8_t Idx0, int8_t Idx1,
int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6,
int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11,
int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15) {
const Type DestTy = Dest->getType();
static constexpr bool NotRebased = false;
static constexpr Variable *NoBase = nullptr;
// We use void for the memory operand instead of DestTy because using the
// latter causes a validation failure: the X86 Inst layer complains that
// vector mem operands could be under aligned. Thus, using void we avoid the
// validation error. Note that the mask global declaration is aligned, so it
// can be used as an XMM mem operand.
static constexpr Type MaskType = IceType_void;
#define IDX_IN_SRC(N, S) \
((((N) & (1 << 4)) == (S << 4)) ? ((N)&0xf) : CLEAR_ALL_BITS)
auto *Mask0M = X86OperandMem::create(
Func, MaskType, NoBase,
lowerShuffleVector_CreatePshufbMask(
IDX_IN_SRC(Idx0, 0), IDX_IN_SRC(Idx1, 0), IDX_IN_SRC(Idx2, 0),
IDX_IN_SRC(Idx3, 0), IDX_IN_SRC(Idx4, 0), IDX_IN_SRC(Idx5, 0),
IDX_IN_SRC(Idx6, 0), IDX_IN_SRC(Idx7, 0), IDX_IN_SRC(Idx8, 0),
IDX_IN_SRC(Idx9, 0), IDX_IN_SRC(Idx10, 0), IDX_IN_SRC(Idx11, 0),
IDX_IN_SRC(Idx12, 0), IDX_IN_SRC(Idx13, 0), IDX_IN_SRC(Idx14, 0),
IDX_IN_SRC(Idx15, 0)),
NotRebased);
auto *Mask1M = X86OperandMem::create(
Func, MaskType, NoBase,
lowerShuffleVector_CreatePshufbMask(
IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1),
IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1),
IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1),
IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1),
IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1),
IDX_IN_SRC(Idx15, 1)),
NotRebased);
#undef IDX_IN_SRC
auto *T0 = makeReg(DestTy);
auto *T1 = makeReg(DestTy);
auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
_movp(T0, Src0RM);
auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
_movp(T1, Src1RM);
_pshufb(T1, Mask1M);
_pshufb(T0, Mask0M);
_por(T1, T0);
_movp(Dest, T1);
}
template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerShuffleVector( void TargetX86Base<TraitsType>::lowerShuffleVector(
const InstShuffleVector *Instr) { const InstShuffleVector *Instr) {
auto *Dest = Instr->getDest(); auto *Dest = Instr->getDest();
...@@ -5687,9 +5780,68 @@ void TargetX86Base<TraitsType>::lowerShuffleVector( ...@@ -5687,9 +5780,68 @@ void TargetX86Base<TraitsType>::lowerShuffleVector(
switch (DestTy) { switch (DestTy) {
default: default:
llvm::report_fatal_error("Unexpected vector type.");
case IceType_v16i1:
case IceType_v16i8: {
if (InstructionSet < Traits::SSE4_1) {
// TODO(jpp): figure out how to lower with sse2.
break; break;
// TODO(jpp): figure out how to properly lower the remaining cases without }
// scalarization. static constexpr SizeT ExpectedNumElements = 16;
assert(ExpectedNumElements == Instr->getNumIndexes());
(void)ExpectedNumElements;
const SizeT Index0 = Instr->getIndex(0)->getValue();
const SizeT Index1 = Instr->getIndex(1)->getValue();
const SizeT Index2 = Instr->getIndex(2)->getValue();
const SizeT Index3 = Instr->getIndex(3)->getValue();
const SizeT Index4 = Instr->getIndex(4)->getValue();
const SizeT Index5 = Instr->getIndex(5)->getValue();
const SizeT Index6 = Instr->getIndex(6)->getValue();
const SizeT Index7 = Instr->getIndex(7)->getValue();
const SizeT Index8 = Instr->getIndex(8)->getValue();
const SizeT Index9 = Instr->getIndex(9)->getValue();
const SizeT Index10 = Instr->getIndex(10)->getValue();
const SizeT Index11 = Instr->getIndex(11)->getValue();
const SizeT Index12 = Instr->getIndex(12)->getValue();
const SizeT Index13 = Instr->getIndex(13)->getValue();
const SizeT Index14 = Instr->getIndex(14)->getValue();
const SizeT Index15 = Instr->getIndex(15)->getValue();
lowerShuffleVector_UsingPshufb(Dest, Src0, Src1, Index0, Index1, Index2,
Index3, Index4, Index5, Index6, Index7,
Index8, Index9, Index10, Index11, Index12,
Index13, Index14, Index15);
return;
}
case IceType_v8i1:
case IceType_v8i16: {
if (InstructionSet < Traits::SSE4_1) {
// TODO(jpp): figure out how to lower with sse2.
break;
}
static constexpr SizeT ExpectedNumElements = 8;
assert(ExpectedNumElements == Instr->getNumIndexes());
(void)ExpectedNumElements;
const SizeT Index0 = Instr->getIndex(0)->getValue();
const SizeT Index1 = Instr->getIndex(1)->getValue();
const SizeT Index2 = Instr->getIndex(2)->getValue();
const SizeT Index3 = Instr->getIndex(3)->getValue();
const SizeT Index4 = Instr->getIndex(4)->getValue();
const SizeT Index5 = Instr->getIndex(5)->getValue();
const SizeT Index6 = Instr->getIndex(6)->getValue();
const SizeT Index7 = Instr->getIndex(7)->getValue();
#define TO_BYTE_INDEX(I) ((I) << 1)
lowerShuffleVector_UsingPshufb(
Dest, Src0, Src1, TO_BYTE_INDEX(Index0), TO_BYTE_INDEX(Index0) + 1,
TO_BYTE_INDEX(Index1), TO_BYTE_INDEX(Index1) + 1, TO_BYTE_INDEX(Index2),
TO_BYTE_INDEX(Index2) + 1, TO_BYTE_INDEX(Index3),
TO_BYTE_INDEX(Index3) + 1, TO_BYTE_INDEX(Index4),
TO_BYTE_INDEX(Index4) + 1, TO_BYTE_INDEX(Index5),
TO_BYTE_INDEX(Index5) + 1, TO_BYTE_INDEX(Index6),
TO_BYTE_INDEX(Index6) + 1, TO_BYTE_INDEX(Index7),
TO_BYTE_INDEX(Index7) + 1);
#undef TO_BYTE_INDEX
return;
}
case IceType_v4i1: case IceType_v4i1:
case IceType_v4i32: case IceType_v4i32:
case IceType_v4f32: { case IceType_v4f32: {
......
...@@ -1084,6 +1084,73 @@ TEST_F(AssemblerX8632Test, Punpckldq) { ...@@ -1084,6 +1084,73 @@ TEST_F(AssemblerX8632Test, Punpckldq) {
#undef TestImplXmmXmm #undef TestImplXmmXmm
} }
TEST_F(AssemblerX8632Test, Pshufb) {
const Dqword V0(uint64_t(0x1122334455667788ull),
uint64_t(0x99aabbccddeeff32ull));
const Dqword V1(uint64_t(0x0204050380060708ull),
uint64_t(0x010306080a8b0c0dull));
const Dqword Expected(uint64_t(0x6644335500221132ull),
uint64_t(0x77552232ee00ccbbull));
#define TestImplXmmXmm(Dst, Src, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImpl(Dst, Src) \
do { \
TestImplXmmXmm(Dst, Src, pshufb); \
TestImplXmmAddr(Dst, pshufb); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm0);
#undef TestImpl
#undef TestImplXmmAddr
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8632Test, Cvt) { TEST_F(AssemblerX8632Test, Cvt) {
const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
const Dqword dq2ps32SrcValue(-5, 3, 100, 200); const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
......
...@@ -1158,6 +1158,81 @@ TEST_F(AssemblerX8664Test, Punpckldq) { ...@@ -1158,6 +1158,81 @@ TEST_F(AssemblerX8664Test, Punpckldq) {
#undef TestImplXmmXmm #undef TestImplXmmXmm
} }
TEST_F(AssemblerX8664Test, Pshufb) {
const Dqword V0(uint64_t(0x1122334455667788ull),
uint64_t(0x99aabbccddeeff32ull));
const Dqword V1(uint64_t(0x0204050380060708ull),
uint64_t(0x010306080a8b0c0dull));
const Dqword Expected(uint64_t(0x6644335500221132ull),
uint64_t(0x77552232ee00ccbbull));
#define TestImplXmmXmm(Dst, Src, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImplXmmAddr(Dst, Inst) \
do { \
static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
test.setDqwordTo(T1, V1); \
test.run(); \
\
ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
reset(); \
} while (0)
#define TestImpl(Dst, Src) \
do { \
TestImplXmmXmm(Dst, Src, pshufb); \
TestImplXmmAddr(Dst, pshufb); \
} while (0)
TestImpl(xmm0, xmm1);
TestImpl(xmm1, xmm2);
TestImpl(xmm2, xmm3);
TestImpl(xmm3, xmm4);
TestImpl(xmm4, xmm5);
TestImpl(xmm5, xmm6);
TestImpl(xmm6, xmm7);
TestImpl(xmm7, xmm8);
TestImpl(xmm8, xmm9);
TestImpl(xmm9, xmm10);
TestImpl(xmm10, xmm11);
TestImpl(xmm11, xmm12);
TestImpl(xmm12, xmm13);
TestImpl(xmm13, xmm14);
TestImpl(xmm14, xmm15);
TestImpl(xmm15, xmm0);
#undef TestImpl
#undef TestImplXmmAddr
#undef TestImplXmmXmm
}
TEST_F(AssemblerX8664Test, Cvt) { TEST_F(AssemblerX8664Test, Cvt) {
const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
const Dqword dq2ps32SrcValue(-5, 3, 100, 200); const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment