Commit 962befa4 by Jan Voung

emitIAS for Shld and Shrd and the ternary and three-address ops.

Give a different name to the crosstest .s and .o files depending on the CPU features as well. That way the SSE2 and SSE4.1 .s and .o are separate. The encodings for Pextrw and Pextrb/d... make me sad. BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/656983002
parent abce6e56
......@@ -95,7 +95,7 @@ if __name__ == '__main__':
f.write(line)
f.close()
base_sz = '%s.O%s.%s' % (base, args.optlevel, args.target)
base_sz = '%s.O%s.%s.%s' % (base, args.optlevel, args.attr, args.target)
asm_sz = os.path.join(args.dir, base_sz + '.sz.s')
obj_sz = os.path.join(args.dir, base_sz + '.sz.o')
obj_llc = os.path.join(args.dir, base + '.llc.o')
......
......@@ -602,6 +602,30 @@ void emitIASGPRShift(const Cfg *Func, Type Ty, const Variable *Var,
emitIASBytes(Func, Asm, StartPosition);
}
void emitIASGPRShiftDouble(const Cfg *Func, const Variable *Dest,
const Operand *Src1Op, const Operand *Src2Op,
const x86::AssemblerX86::GPREmitterShiftD &Emitter) {
x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
intptr_t StartPosition = Asm->GetPosition();
// Dest can be reg or mem, but we only use the reg variant.
assert(Dest->hasReg());
RegX8632::GPRRegister DestReg = RegX8632::getEncodedGPR(Dest->getRegNum());
// Src1 must be reg.
const auto Src1 = llvm::cast<Variable>(Src1Op);
assert(Src1->hasReg());
RegX8632::GPRRegister SrcReg = RegX8632::getEncodedGPR(Src1->getRegNum());
Type Ty = Src1->getType();
// Src2 can be the implicit CL register or an immediate.
if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src2Op)) {
(Asm->*(Emitter.GPRGPRImm))(Ty, DestReg, SrcReg,
x86::Immediate(Imm->getValue()));
} else {
assert(llvm::cast<Variable>(Src2Op)->getRegNum() == RegX8632::Reg_ecx);
(Asm->*(Emitter.GPRGPR))(Ty, DestReg, SrcReg);
}
emitIASBytes(Func, Asm, StartPosition);
}
void emitIASXmmShift(const Cfg *Func, Type Ty, const Variable *Var,
const Operand *Src,
const x86::AssemblerX86::XmmEmitterShiftOp &Emitter) {
......@@ -686,6 +710,37 @@ void emitIASCastRegOp(
emitIASBytes(Func, Asm, StartPosition);
}
template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
SReg_t (*srcEnc)(int32_t)>
void emitIASThreeOpImmOps(
const Cfg *Func, Type DispatchTy, const Variable *Dest, const Operand *Src0,
const Operand *Src1,
const x86::AssemblerX86::ThreeOpImmEmitter<DReg_t, SReg_t> Emitter) {
x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
intptr_t StartPosition = Asm->GetPosition();
// This only handles Dest being a register, and Src1 being an immediate.
assert(Dest->hasReg());
DReg_t DestReg = destEnc(Dest->getRegNum());
x86::Immediate Imm(llvm::cast<ConstantInteger32>(Src1)->getValue());
if (const auto SrcVar = llvm::dyn_cast<Variable>(Src0)) {
if (SrcVar->hasReg()) {
SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
(Asm->*(Emitter.RegRegImm))(DispatchTy, DestReg, SrcReg, Imm);
} else {
x86::Address SrcStackAddr = static_cast<TargetX8632 *>(Func->getTarget())
->stackVarToAsmOperand(SrcVar);
(Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, SrcStackAddr, Imm);
}
} else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src0)) {
Mem->emitSegmentOverride(Asm);
(Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, Mem->toAsmAddress(Asm),
Imm);
} else {
llvm_unreachable("Unexpected operand type");
}
emitIASBytes(Func, Asm, StartPosition);
}
void emitIASMovlikeXMM(const Cfg *Func, const Variable *Dest,
const Operand *Src,
const x86::AssemblerX86::XmmEmitterMovOps Emitter) {
......@@ -1174,6 +1229,21 @@ template <> void InstX8632Imul::emitIAS(const Cfg *Func) const {
}
}
template <> void InstX8632Insertps::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
TargetX8632::SSE4_1);
const Variable *Dest = getDest();
assert(Dest == getSrc(0));
Type Ty = Dest->getType();
static const x86::AssemblerX86::ThreeOpImmEmitter<
RegX8632::XmmRegister, RegX8632::XmmRegister> Emitter = {
&x86::AssemblerX86::insertps, &x86::AssemblerX86::insertps};
emitIASThreeOpImmOps<RegX8632::XmmRegister, RegX8632::XmmRegister,
RegX8632::getEncodedXmm, RegX8632::getEncodedXmm>(
Func, Ty, Dest, getSrc(1), getSrc(2), Emitter);
}
template <> void InstX8632Cbwdq::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
......@@ -1275,6 +1345,17 @@ void InstX8632Shld::emit(const Cfg *Func) const {
Str << "\n";
}
void InstX8632Shld::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
assert(getDest() == getSrc(0));
const Variable *Dest = getDest();
const Operand *Src1 = getSrc(1);
const Operand *Src2 = getSrc(2);
static const x86::AssemblerX86::GPREmitterShiftD Emitter = {
&x86::AssemblerX86::shld, &x86::AssemblerX86::shld};
emitIASGPRShiftDouble(Func, Dest, Src1, Src2, Emitter);
}
void InstX8632Shld::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
......@@ -1301,6 +1382,17 @@ void InstX8632Shrd::emit(const Cfg *Func) const {
Str << "\n";
}
void InstX8632Shrd::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
assert(getDest() == getSrc(0));
const Variable *Dest = getDest();
const Operand *Src1 = getSrc(1);
const Operand *Src2 = getSrc(2);
static const x86::AssemblerX86::GPREmitterShiftD Emitter = {
&x86::AssemblerX86::shrd, &x86::AssemblerX86::shrd};
emitIASGPRShiftDouble(Func, Dest, Src1, Src2, Emitter);
}
void InstX8632Shrd::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
......@@ -2240,8 +2332,10 @@ template <> void InstX8632Pextr::emit(const Cfg *Func) const {
Str << "\t" << Opcode
<< TypeX8632Attributes[getSrc(0)->getType()].PackString << "\t";
Variable *Dest = getDest();
// pextrw must take a register dest.
assert(Dest->getType() != IceType_i16 || Dest->hasReg());
// pextrw must take a register dest. There is an SSE4.1 version that takes
// a memory dest, but we aren't using it. For uniformity, just restrict
// them all to have a register dest for now.
assert(Dest->hasReg());
Dest->asType(IceType_i32).emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
......@@ -2250,6 +2344,28 @@ template <> void InstX8632Pextr::emit(const Cfg *Func) const {
Str << "\n";
}
template <> void InstX8632Pextr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
// pextrb and pextrd are SSE4.1 instructions.
const Variable *Dest = getDest();
Type DispatchTy = Dest->getType();
assert(DispatchTy == IceType_i16 ||
static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
TargetX8632::SSE4_1);
// pextrw must take a register dest. There is an SSE4.1 version that takes
// a memory dest, but we aren't using it. For uniformity, just restrict
// them all to have a register dest for now.
assert(Dest->hasReg());
// pextrw's Src(0) must be a register (both SSE4.1 and SSE2).
assert(llvm::cast<Variable>(getSrc(0))->hasReg());
static const x86::AssemblerX86::ThreeOpImmEmitter<
RegX8632::GPRRegister, RegX8632::XmmRegister> Emitter = {
&x86::AssemblerX86::pextr, NULL};
emitIASThreeOpImmOps<RegX8632::GPRRegister, RegX8632::XmmRegister,
RegX8632::getEncodedGPR, RegX8632::getEncodedXmm>(
Func, DispatchTy, Dest, getSrc(0), getSrc(1), Emitter);
}
template <> void InstX8632Pinsr::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 3);
......@@ -2278,6 +2394,52 @@ template <> void InstX8632Pinsr::emit(const Cfg *Func) const {
Str << "\n";
}
template <> void InstX8632Pinsr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
assert(getDest() == getSrc(0));
// pinsrb and pinsrd are SSE4.1 instructions.
const Operand *Src0 = getSrc(1);
Type DispatchTy = Src0->getType();
assert(DispatchTy == IceType_i16 ||
static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
TargetX8632::SSE4_1);
// If src1 is a register, it should always be r32 (this should fall out
// from the encodings for ByteRegs overlapping the encodings for r32),
// but we have to trust the regalloc to not choose "ah", where it
// doesn't overlap.
static const x86::AssemblerX86::ThreeOpImmEmitter<
RegX8632::XmmRegister, RegX8632::GPRRegister> Emitter = {
&x86::AssemblerX86::pinsr, &x86::AssemblerX86::pinsr};
emitIASThreeOpImmOps<RegX8632::XmmRegister, RegX8632::GPRRegister,
RegX8632::getEncodedXmm, RegX8632::getEncodedGPR>(
Func, DispatchTy, getDest(), Src0, getSrc(2), Emitter);
}
template <> void InstX8632Pshufd::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
const Variable *Dest = getDest();
Type Ty = Dest->getType();
static const x86::AssemblerX86::ThreeOpImmEmitter<
RegX8632::XmmRegister, RegX8632::XmmRegister> Emitter = {
&x86::AssemblerX86::pshufd, &x86::AssemblerX86::pshufd};
emitIASThreeOpImmOps<RegX8632::XmmRegister, RegX8632::XmmRegister,
RegX8632::getEncodedXmm, RegX8632::getEncodedXmm>(
Func, Ty, Dest, getSrc(0), getSrc(1), Emitter);
}
template <> void InstX8632Shufps::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
const Variable *Dest = getDest();
assert(Dest == getSrc(0));
Type Ty = Dest->getType();
static const x86::AssemblerX86::ThreeOpImmEmitter<
RegX8632::XmmRegister, RegX8632::XmmRegister> Emitter = {
&x86::AssemblerX86::shufps, &x86::AssemblerX86::shufps};
emitIASThreeOpImmOps<RegX8632::XmmRegister, RegX8632::XmmRegister,
RegX8632::getEncodedXmm, RegX8632::getEncodedXmm>(
Func, Ty, Dest, getSrc(1), getSrc(2), Emitter);
}
void InstX8632Pop::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 0);
......
......@@ -761,7 +761,7 @@ public:
getSrc(2)->emit(Func);
Str << "\n";
}
void emitIAS(const Cfg *Func) const override { emit(Func); }
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
......@@ -803,6 +803,7 @@ public:
getSrc(1)->emit(Func);
Str << "\n";
}
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
......@@ -976,6 +977,7 @@ public:
InstX8632Shld(Func, Dest, Source1, Source2);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Shld); }
......@@ -997,6 +999,7 @@ public:
InstX8632Shrd(Func, Dest, Source1, Source2);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Shrd); }
......@@ -1518,14 +1521,19 @@ template <> void InstX8632Sqrtss::emit(const Cfg *Func) const;
template <> void InstX8632Subss::emit(const Cfg *Func) const;
template <> void InstX8632Blendvps::emitIAS(const Cfg *Func) const;
template <> void InstX8632Cbwdq::emitIAS(const Cfg *Func) const;
template <> void InstX8632Div::emitIAS(const Cfg *Func) const;
template <> void InstX8632Idiv::emitIAS(const Cfg *Func) const;
template <> void InstX8632Imul::emitIAS(const Cfg *Func) const;
template <> void InstX8632Cbwdq::emitIAS(const Cfg *Func) const;
template <> void InstX8632Insertps::emitIAS(const Cfg *Func) const;
template <> void InstX8632Movd::emitIAS(const Cfg *Func) const;
template <> void InstX8632MovssRegs::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pextr::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pinsr::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pmull::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pshufd::emitIAS(const Cfg *Func) const;
template <> void InstX8632Shufps::emitIAS(const Cfg *Func) const;
} // end of namespace Ice
......
......@@ -940,6 +940,48 @@ void AssemblerX86::shufps(XmmRegister dst, XmmRegister src,
EmitUint8(imm.value());
}
void AssemblerX86::pshufd(Type /* Ty */, XmmRegister dst, XmmRegister src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0x70);
EmitXmmRegisterOperand(dst, src);
assert(imm.is_uint8());
EmitUint8(imm.value());
}
void AssemblerX86::pshufd(Type /* Ty */, XmmRegister dst, const Address &src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0x70);
EmitOperand(dst, src);
assert(imm.is_uint8());
EmitUint8(imm.value());
}
void AssemblerX86::shufps(Type /* Ty */, XmmRegister dst, XmmRegister src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0xC6);
EmitXmmRegisterOperand(dst, src);
assert(imm.is_uint8());
EmitUint8(imm.value());
}
void AssemblerX86::shufps(Type /* Ty */, XmmRegister dst, const Address &src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0xC6);
EmitOperand(dst, src);
assert(imm.is_uint8());
EmitUint8(imm.value());
}
void AssemblerX86::minpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
......@@ -1163,18 +1205,96 @@ void AssemblerX86::andpd(XmmRegister dst, XmmRegister src) {
EmitXmmRegisterOperand(dst, src);
}
void AssemblerX86::pextrd(GPRRegister dst, XmmRegister src,
const Immediate &imm) {
void AssemblerX86::insertps(Type Ty, XmmRegister dst, XmmRegister src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(imm.is_uint8());
assert(isVectorFloatingType(Ty));
(void)Ty;
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0x3A);
EmitUint8(0x16);
EmitOperand(src, Operand(dst));
EmitUint8(0x21);
EmitXmmRegisterOperand(dst, src);
EmitUint8(imm.value());
}
void AssemblerX86::insertps(Type Ty, XmmRegister dst, const Address &src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(imm.is_uint8());
assert(isVectorFloatingType(Ty));
(void)Ty;
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0x3A);
EmitUint8(0x21);
EmitOperand(dst, src);
EmitUint8(imm.value());
}
void AssemblerX86::pinsr(Type Ty, XmmRegister dst, GPRRegister src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(imm.is_uint8());
if (Ty == IceType_i16) {
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0xC4);
EmitXmmRegisterOperand(dst, XmmRegister(src));
EmitUint8(imm.value());
} else {
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0x3A);
EmitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
EmitXmmRegisterOperand(dst, XmmRegister(src));
EmitUint8(imm.value());
}
}
void AssemblerX86::pinsr(Type Ty, XmmRegister dst, const Address &src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(imm.is_uint8());
if (Ty == IceType_i16) {
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0xC4);
EmitOperand(dst, src);
EmitUint8(imm.value());
} else {
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0x3A);
EmitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
EmitOperand(dst, src);
EmitUint8(imm.value());
}
}
void AssemblerX86::pextr(Type Ty, GPRRegister dst, XmmRegister src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(imm.is_uint8());
if (Ty == IceType_i16) {
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0xC5);
EmitXmmRegisterOperand(XmmRegister(dst), src);
EmitUint8(imm.value());
} else {
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0x3A);
EmitUint8(isByteSizedType(Ty) ? 0x14 : 0x16);
// SSE 4.1 versions are "MRI" because dst can be mem, while
// pextrw (SSE2) is RMI because dst must be reg.
EmitXmmRegisterOperand(src, XmmRegister(dst));
EmitUint8(imm.value());
}
}
void AssemblerX86::pmovsxdq(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
......@@ -1863,52 +1983,70 @@ void AssemblerX86::sar(Type Ty, const Address &address, GPRRegister shifter) {
EmitGenericShift(7, Ty, address, shifter);
}
void AssemblerX86::shld(GPRRegister dst, GPRRegister src) {
void AssemblerX86::shld(Type Ty, GPRRegister dst, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(Ty == IceType_i16 || Ty == IceType_i32);
if (Ty == IceType_i16)
EmitOperandSizeOverride();
EmitUint8(0x0F);
EmitUint8(0xA5);
EmitRegisterOperand(src, dst);
}
void AssemblerX86::shld(GPRRegister dst, GPRRegister src,
void AssemblerX86::shld(Type Ty, GPRRegister dst, GPRRegister src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(Ty == IceType_i16 || Ty == IceType_i32);
assert(imm.is_int8());
if (Ty == IceType_i16)
EmitOperandSizeOverride();
EmitUint8(0x0F);
EmitUint8(0xA4);
EmitRegisterOperand(src, dst);
EmitUint8(imm.value() & 0xFF);
}
void AssemblerX86::shld(const Address &operand, GPRRegister src) {
void AssemblerX86::shld(Type Ty, const Address &operand, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(Ty == IceType_i16 || Ty == IceType_i32);
if (Ty == IceType_i16)
EmitOperandSizeOverride();
EmitUint8(0x0F);
EmitUint8(0xA5);
EmitOperand(src, Operand(operand));
EmitOperand(src, operand);
}
void AssemblerX86::shrd(GPRRegister dst, GPRRegister src) {
void AssemblerX86::shrd(Type Ty, GPRRegister dst, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(Ty == IceType_i16 || Ty == IceType_i32);
if (Ty == IceType_i16)
EmitOperandSizeOverride();
EmitUint8(0x0F);
EmitUint8(0xAD);
EmitRegisterOperand(src, dst);
}
void AssemblerX86::shrd(GPRRegister dst, GPRRegister src,
void AssemblerX86::shrd(Type Ty, GPRRegister dst, GPRRegister src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(Ty == IceType_i16 || Ty == IceType_i32);
assert(imm.is_int8());
if (Ty == IceType_i16)
EmitOperandSizeOverride();
EmitUint8(0x0F);
EmitUint8(0xAC);
EmitRegisterOperand(src, dst);
EmitUint8(imm.value() & 0xFF);
}
void AssemblerX86::shrd(const Address &dst, GPRRegister src) {
void AssemblerX86::shrd(Type Ty, const Address &dst, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(Ty == IceType_i16 || Ty == IceType_i32);
if (Ty == IceType_i16)
EmitOperandSizeOverride();
EmitUint8(0x0F);
EmitUint8(0xAD);
EmitOperand(src, Operand(dst));
EmitOperand(src, dst);
}
void AssemblerX86::neg(Type Ty, GPRRegister reg) {
......
......@@ -382,10 +382,20 @@ public:
};
struct GPREmitterShiftOp {
TypedEmitGPRGPR GPRGPR;
TypedEmitGPRImm GPRImm;
// Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are not.
// In practice, we always normalize the Dest to a Register first.
TypedEmitGPRGPR GPRGPR;
TypedEmitGPRImm GPRImm;
};
typedef void (AssemblerX86::*TypedEmitGPRGPRImm)(Type, GPRRegister,
GPRRegister,
const Immediate &);
struct GPREmitterShiftD {
// Technically AddrGPR and AddrGPRImm are also allowed, but in practice
// we always normalize Dest to a Register first.
TypedEmitGPRGPR GPRGPR;
TypedEmitGPRGPRImm GPRGPRImm;
};
typedef void (AssemblerX86::*TypedEmitAddrGPR)(Type, const Address &,
......@@ -433,6 +443,19 @@ public:
TypedEmitAddr RegAddr;
};
// Three operand (potentially) cross Xmm/GPR instructions.
// The last operand must be an immediate.
template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
typedef void (AssemblerX86::*TypedEmitRegRegImm)(Type, DReg_t, SReg_t,
const Immediate &);
typedef void (AssemblerX86::*TypedEmitRegAddrImm)(Type, DReg_t,
const Address &,
const Immediate &);
TypedEmitRegRegImm RegRegImm;
TypedEmitRegAddrImm RegAddrImm;
};
/*
* Emit Machine Instructions.
*/
......@@ -570,6 +593,13 @@ public:
void sqrtpd(XmmRegister dst);
void shufpd(XmmRegister dst, XmmRegister src, const Immediate &mask);
void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void pshufd(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask);
void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void shufps(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask);
void cvtdq2ps(Type, XmmRegister dst, XmmRegister src);
void cvtdq2ps(Type, XmmRegister dst, const Address &src);
......@@ -604,7 +634,19 @@ public:
void orpd(XmmRegister dst, XmmRegister src);
void pextrd(GPRRegister dst, XmmRegister src, const Immediate &imm);
void insertps(Type Ty, XmmRegister dst, XmmRegister src,
const Immediate &imm);
void insertps(Type Ty, XmmRegister dst, const Address &src,
const Immediate &imm);
void pinsr(Type Ty, XmmRegister dst, GPRRegister src, const Immediate &imm);
void pinsr(Type Ty, XmmRegister dst, const Address &src,
const Immediate &imm);
void pextr(Type Ty, GPRRegister dst, XmmRegister src, const Immediate &imm);
void pextr(Type Ty, GPRRegister dst, const Address &src,
const Immediate &imm);
void pmovsxdq(XmmRegister dst, XmmRegister src);
void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
......@@ -715,12 +757,12 @@ public:
void sar(Type Ty, GPRRegister operand, GPRRegister shifter);
void sar(Type Ty, const Address &address, GPRRegister shifter);
void shld(GPRRegister dst, GPRRegister src);
void shld(GPRRegister dst, GPRRegister src, const Immediate &imm);
void shld(const Address &operand, GPRRegister src);
void shrd(GPRRegister dst, GPRRegister src);
void shrd(GPRRegister dst, GPRRegister src, const Immediate &imm);
void shrd(const Address &dst, GPRRegister src);
void shld(Type Ty, GPRRegister dst, GPRRegister src);
void shld(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
void shld(Type Ty, const Address &operand, GPRRegister src);
void shrd(Type Ty, GPRRegister dst, GPRRegister src);
void shrd(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
void shrd(Type Ty, const Address &dst, GPRRegister src);
void neg(Type Ty, GPRRegister reg);
void neg(Type Ty, const Address &addr);
......
......@@ -161,4 +161,138 @@ entry:
; CHECK: 89 51 80 mov dword ptr [ecx - 128], edx
; CHECK: 89 91 00 01 00 00 mov dword ptr [ecx + 256], edx
; The 16-bit pinsrw/pextrw (SSE2) are quite different from
; the pinsr{b,d}/pextr{b,d} (SSE4.1).
define <4 x i32> @test_pinsrd(<4 x i32> %vec, i32 %elt1, i32 %elt2, i32 %elt3, i32 %elt4) {
entry:
%elt12 = add i32 %elt1, %elt2
%elt34 = add i32 %elt3, %elt4
%res1 = insertelement <4 x i32> %vec, i32 %elt12, i32 1
%res2 = insertelement <4 x i32> %res1, i32 %elt34, i32 2
%res3 = insertelement <4 x i32> %res2, i32 %elt1, i32 3
ret <4 x i32> %res3
}
; CHECK-LABEL: test_pinsrd:
; CHECK-DAG: 66 0f 3a 22 c{{.*}} 01 pinsrd xmm0, e{{.*}}, 1
; CHECK-DAG: 66 0f 3a 22 c{{.*}} 02 pinsrd xmm0, e{{.*}}, 2
; CHECK-DAG: 66 0f 3a 22 c{{.*}} 03 pinsrd xmm0, e{{.*}}, 3
define <16 x i8> @test_pinsrb(<16 x i8> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt3_w, i32 %elt4_w) {
entry:
%elt1 = trunc i32 %elt1_w to i8
%elt2 = trunc i32 %elt2_w to i8
%elt3 = trunc i32 %elt3_w to i8
%elt4 = trunc i32 %elt4_w to i8
%elt12 = add i8 %elt1, %elt2
%elt34 = add i8 %elt3, %elt4
%res1 = insertelement <16 x i8> %vec, i8 %elt12, i32 1
%res2 = insertelement <16 x i8> %res1, i8 %elt34, i32 7
%res3 = insertelement <16 x i8> %res2, i8 %elt1, i32 15
ret <16 x i8> %res3
}
; CHECK-LABEL: test_pinsrb:
; CHECK-DAG: 66 0f 3a 20 c{{.*}} 01 pinsrb xmm0, e{{.*}}, 1
; CHECK-DAG: 66 0f 3a 20 c{{.*}} 07 pinsrb xmm0, e{{.*}}, 7
; CHECK-DAG: 66 0f 3a 20 {{.*}} 0f pinsrb xmm0, byte ptr {{.*}}, 15
define <8 x i16> @test_pinsrw(<8 x i16> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt3_w, i32 %elt4_w) {
entry:
%elt1 = trunc i32 %elt1_w to i16
%elt2 = trunc i32 %elt2_w to i16
%elt3 = trunc i32 %elt3_w to i16
%elt4 = trunc i32 %elt4_w to i16
%elt12 = add i16 %elt1, %elt2
%elt34 = add i16 %elt3, %elt4
%res1 = insertelement <8 x i16> %vec, i16 %elt12, i32 1
%res2 = insertelement <8 x i16> %res1, i16 %elt34, i32 4
%res3 = insertelement <8 x i16> %res2, i16 %elt1, i32 7
ret <8 x i16> %res3
}
; CHECK-LABEL: test_pinsrw:
; CHECK-DAG: 66 0f c4 c{{.*}} 01 pinsrw xmm0, e{{.*}}, 1
; CHECK-DAG: 66 0f c4 c{{.*}} 04 pinsrw xmm0, e{{.*}}, 4
; CHECK-DAG: 66 0f c4 c{{.*}} 07 pinsrw xmm0, e{{.*}}, 7
define i32 @test_pextrd(i32 %c, <4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec3, <4 x i32> %vec4) {
entry:
switch i32 %c, label %three [i32 0, label %zero
i32 1, label %one
i32 2, label %two]
zero:
%res0 = extractelement <4 x i32> %vec1, i32 0
ret i32 %res0
one:
%res1 = extractelement <4 x i32> %vec2, i32 1
ret i32 %res1
two:
%res2 = extractelement <4 x i32> %vec3, i32 2
ret i32 %res2
three:
%res3 = extractelement <4 x i32> %vec4, i32 3
ret i32 %res3
}
; CHECK-LABEL: test_pextrd
; CHECK-DAG: 66 0f 3a 16 c0 00 pextrd eax, xmm0, 0
; CHECK-DAG: 66 0f 3a 16 c8 01 pextrd eax, xmm1, 1
; CHECK-DAG: 66 0f 3a 16 d0 02 pextrd eax, xmm2, 2
; CHECK-DAG: 66 0f 3a 16 d8 03 pextrd eax, xmm3, 3
define i32 @test_pextrb(i32 %c, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, <16 x i8> %vec4) {
entry:
switch i32 %c, label %three [i32 0, label %zero
i32 1, label %one
i32 2, label %two]
zero:
%res0 = extractelement <16 x i8> %vec1, i32 0
%res0_ext = zext i8 %res0 to i32
ret i32 %res0_ext
one:
%res1 = extractelement <16 x i8> %vec2, i32 6
%res1_ext = zext i8 %res1 to i32
ret i32 %res1_ext
two:
%res2 = extractelement <16 x i8> %vec3, i32 12
%res2_ext = zext i8 %res2 to i32
ret i32 %res2_ext
three:
%res3 = extractelement <16 x i8> %vec4, i32 15
%res3_ext = zext i8 %res3 to i32
ret i32 %res3_ext
}
; CHECK-LABEL: test_pextrb
; CHECK-DAG: 66 0f 3a 14 c0 00 pextrb eax, xmm0, 0
; CHECK-DAG: 66 0f 3a 14 c8 06 pextrb eax, xmm1, 6
; CHECK-DAG: 66 0f 3a 14 d0 0c pextrb eax, xmm2, 12
; CHECK-DAG: 66 0f 3a 14 d8 0f pextrb eax, xmm3, 15
define i32 @test_pextrw(i32 %c, <8 x i16> %vec1, <8 x i16> %vec2, <8 x i16> %vec3, <8 x i16> %vec4) {
entry:
switch i32 %c, label %three [i32 0, label %zero
i32 1, label %one
i32 2, label %two]
zero:
%res0 = extractelement <8 x i16> %vec1, i32 0
%res0_ext = zext i16 %res0 to i32
ret i32 %res0_ext
one:
%res1 = extractelement <8 x i16> %vec2, i32 2
%res1_ext = zext i16 %res1 to i32
ret i32 %res1_ext
two:
%res2 = extractelement <8 x i16> %vec3, i32 5
%res2_ext = zext i16 %res2 to i32
ret i32 %res2_ext
three:
%res3 = extractelement <8 x i16> %vec4, i32 7
%res3_ext = zext i16 %res3 to i32
ret i32 %res3_ext
}
; CHECK-LABEL: test_pextrw
; CHECK-DAG: 66 0f c5 c0 00 pextrw eax, xmm0, 0
; CHECK-DAG: 66 0f c5 c1 02 pextrw eax, xmm1, 2
; CHECK-DAG: 66 0f c5 c2 05 pextrw eax, xmm2, 5
; CHECK-DAG: 66 0f c5 c3 07 pextrw eax, xmm3, 7
; ERRORS-NOT: ICE translation error
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment