Commit e11f878a by David Sehr

Use three-address form of imul

Previously we did not take advantage of the three address versions of the imul instruction. With this we are able to avoid some copies before imuls. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1365433004 .
parent 578f1161
......@@ -751,6 +751,11 @@ public:
void imul(Type Ty, typename Traits::GPRRegister reg);
void imul(Type Ty, const typename Traits::Address &address);
void imul(Type Ty, typename Traits::GPRRegister dst,
typename Traits::GPRRegister src, const Immediate &imm);
void imul(Type Ty, typename Traits::GPRRegister dst,
const typename Traits::Address &address, const Immediate &imm);
void mul(Type Ty, typename Traits::GPRRegister reg);
void mul(Type Ty, const typename Traits::Address &address);
......
......@@ -2581,6 +2581,46 @@ void AssemblerX86Base<Machine>::imul(Type Ty,
}
template <class Machine>
void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
typename Traits::GPRRegister src,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
assert(Ty == IceType_i16 || Ty == IceType_i32);
if (Ty == IceType_i16)
emitOperandSizeOverride();
emitRexRB(Ty, dst, src);
if (imm.is_int8()) {
emitUint8(0x6B);
emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
emitUint8(imm.value() & 0xFF);
} else {
emitUint8(0x69);
emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
emitImmediate(Ty, imm);
}
}
template <class Machine>
void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
const typename Traits::Address &address,
const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
assert(Ty == IceType_i16 || Ty == IceType_i32);
if (Ty == IceType_i16)
emitOperandSizeOverride();
emitRex(Ty, address, dst);
if (imm.is_int8()) {
emitUint8(0x6B);
emitOperand(gprEncoding(dst), address);
emitUint8(imm.value() & 0xFF);
} else {
emitUint8(0x69);
emitOperand(gprEncoding(dst), address);
emitImmediate(Ty, imm);
}
}
template <class Machine>
void AssemblerX86Base<Machine>::mul(Type Ty, typename Traits::GPRRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
if (Ty == IceType_i16)
......
......@@ -68,6 +68,7 @@ public:
Icmp,
Idiv,
Imul,
ImulImm,
Insertps,
Jmp,
Label,
......@@ -1622,6 +1623,25 @@ private:
};
template <class Machine>
class InstX86ImulImm
: public InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::ImulImm> {
public:
static InstX86ImulImm *create(Cfg *Func, Variable *Dest, Operand *Source0,
Operand *Source1) {
return new (Func->allocate<InstX86ImulImm>())
InstX86ImulImm(Func, Dest, Source0, Source1);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
private:
InstX86ImulImm(Cfg *Func, Variable *Dest, Operand *Source0, Operand *Source1)
: InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::ImulImm>(
Func, Dest, Source0, Source1) {}
};
template <class Machine>
class InstX86Mulps
: public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulps, true> {
public:
......@@ -2790,6 +2810,7 @@ template <class Machine> struct Insts {
using XorRMW = InstX86XorRMW<Machine>;
using Pxor = InstX86Pxor<Machine>;
using Imul = InstX86Imul<Machine>;
using ImulImm = InstX86ImulImm<Machine>;
using Mulps = InstX86Mulps<Machine>;
using Mulss = InstX86Mulss<Machine>;
using Pmull = InstX86Pmull<Machine>;
......@@ -2897,6 +2918,7 @@ template <class Machine> struct Insts {
template <> const char *InstX86XorRMW<Machine>::Base::Opcode = "xor"; \
template <> const char *InstX86Pxor<Machine>::Base::Opcode = "pxor"; \
template <> const char *InstX86Imul<Machine>::Base::Opcode = "imul"; \
template <> const char *InstX86ImulImm<Machine>::Base::Opcode = "imul"; \
template <> const char *InstX86Mulps<Machine>::Base::Opcode = "mulps"; \
template <> const char *InstX86Mulss<Machine>::Base::Opcode = "mulss"; \
template <> const char *InstX86Pmull<Machine>::Base::Opcode = "pmull"; \
......
......@@ -1330,8 +1330,8 @@ void InstX86Imul<Machine>::emitIAS(const Cfg *Func) const {
&InstX86Base<Machine>::Traits::Assembler::imul};
emitIASOpTyGPR<Machine>(Func, Ty, this->getSrc(1), Emitter);
} else {
// We only use imul as a two-address instruction even though there is a 3
// operand version when one of the operands is a constant.
// The two-address version is used when multiplying by a non-constant
// or doing an 8-bit multiply.
assert(Var == this->getSrc(0));
static const typename InstX86Base<
Machine>::Traits::Assembler::GPREmitterRegOp Emitter = {
......@@ -1343,6 +1343,43 @@ void InstX86Imul<Machine>::emitIAS(const Cfg *Func) const {
}
template <class Machine>
void InstX86ImulImm<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 2);
Variable *Dest = this->getDest();
assert(Dest->getType() == IceType_i16 || Dest->getType() == IceType_i32);
assert(llvm::isa<Constant>(this->getSrc(1)));
Str << "\timul" << this->getWidthString(Dest->getType()) << "\t";
this->getSrc(1)->emit(Func);
Str << ", ";
this->getSrc(0)->emit(Func);
Str << ", ";
Dest->emit(Func);
}
template <class Machine>
void InstX86ImulImm<Machine>::emitIAS(const Cfg *Func) const {
assert(this->getSrcSize() == 2);
const Variable *Dest = this->getDest();
Type Ty = Dest->getType();
assert(llvm::isa<Constant>(this->getSrc(1)));
static const typename InstX86Base<Machine>::Traits::Assembler::
template ThreeOpImmEmitter<
typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister>
Emitter = {&InstX86Base<Machine>::Traits::Assembler::imul,
&InstX86Base<Machine>::Traits::Assembler::imul};
emitIASThreeOpImmOps<
Machine, typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR>(
Func, Ty, Dest, this->getSrc(0), this->getSrc(1), Emitter);
}
template <class Machine>
void InstX86Insertps<Machine>::emitIAS(const Cfg *Func) const {
assert(this->getSrcSize() == 3);
assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
......
......@@ -428,6 +428,9 @@ protected:
void _imul(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Imul::create(Func, Dest, Src0));
}
void _imul_imm(Variable *Dest, Operand *Src0, Constant *Imm) {
Context.insert(Traits::Insts::ImulImm::create(Func, Dest, Src0, Imm));
}
void _insertps(Variable *Dest, Operand *Src0, Operand *Src1) {
Context.insert(Traits::Insts::Insertps::create(Func, Dest, Src0, Src1));
}
......
......@@ -1618,11 +1618,17 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
if (isByteSizedArithType(Dest->getType())) {
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
_imul(T, Src0 == Src1 ? T : Src1);
_mov(Dest, T);
} else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
T = makeReg(Dest->getType());
_imul_imm(T, Src0, ImmConst);
_mov(Dest, T);
} else {
_mov(T, Src0);
_imul(T, Src0 == Src1 ? T : Src1);
_mov(Dest, T);
}
_imul(T, Src0 == Src1 ? T : Src1);
_mov(Dest, T);
break;
case InstArithmetic::Shl:
_mov(T, Src0);
......
......@@ -250,6 +250,88 @@ entry:
; CHECK-LABEL: testMul32Imm16Neg
; CHECK: 69 c0 01 ff ff ff imul eax,eax,0xffffff01
define i32 @testMul32Imm32ThreeAddress(i32 %a) {
entry:
%mul = mul i32 232, %a
%add = add i32 %mul, %a
ret i32 %add
}
; CHECK-LABEL: testMul32Imm32ThreeAddress
; CHECK: 69 c8 e8 00 00 00 imul ecx,eax,0xe8
define i32 @testMul32Mem32Imm32ThreeAddress(i32 %addr_arg) {
entry:
%__1 = inttoptr i32 %addr_arg to i32*
%a = load i32, i32* %__1, align 1
%mul = mul i32 232, %a
ret i32 %mul
}
; CHECK-LABEL: testMul32Mem32Imm32ThreeAddress
; CHECK: 69 00 e8 00 00 00 imul eax,DWORD PTR [eax],0xe8
define i32 @testMul32Imm8ThreeAddress(i32 %a) {
entry:
%mul = mul i32 127, %a
%add = add i32 %mul, %a
ret i32 %add
}
; CHECK-LABEL: testMul32Imm8ThreeAddress
; CHECK: 6b c8 7f imul ecx,eax,0x7f
define i32 @testMul32Mem32Imm8ThreeAddress(i32 %addr_arg) {
entry:
%__1 = inttoptr i32 %addr_arg to i32*
%a = load i32, i32* %__1, align 1
%mul = mul i32 127, %a
ret i32 %mul
}
; CHECK-LABEL: testMul32Mem32Imm8ThreeAddress
; CHECK: 6b 00 7f imul eax,DWORD PTR [eax],0x7f
define i32 @testMul16Imm16ThreeAddress(i32 %a) {
entry:
%arg_i16 = trunc i32 %a to i16
%mul = mul i16 232, %arg_i16
%add = add i16 %mul, %arg_i16
%result = zext i16 %add to i32
ret i32 %result
}
; CHECK-LABEL: testMul16Imm16ThreeAddress
; CHECK: 66 69 c8 e8 00 imul cx,ax,0xe8
define i32 @testMul16Mem16Imm16ThreeAddress(i32 %addr_arg) {
entry:
%__1 = inttoptr i32 %addr_arg to i16*
%a = load i16, i16* %__1, align 1
%mul = mul i16 232, %a
%result = zext i16 %mul to i32
ret i32 %result
}
; CHECK-LABEL: testMul16Mem16Imm16ThreeAddress
; CHECK: 66 69 00 e8 00 imul ax,WORD PTR [eax],0xe8
define i32 @testMul16Imm8ThreeAddress(i32 %a) {
entry:
%arg_i16 = trunc i32 %a to i16
%mul = mul i16 127, %arg_i16
%add = add i16 %mul, %arg_i16
%result = zext i16 %add to i32
ret i32 %result
}
; CHECK-LABEL: testMul16Imm8ThreeAddress
; CHECK: 66 6b c8 7f imul cx,ax,0x7f
define i32 @testMul16Mem16Imm8ThreeAddress(i32 %addr_arg) {
entry:
%__1 = inttoptr i32 %addr_arg to i16*
%a = load i16, i16* %__1, align 1
%mul = mul i16 127, %a
%result = zext i16 %mul to i32
ret i32 %result
}
; CHECK-LABEL: testMul16Mem16Imm8ThreeAddress
; CHECK: 66 6b 00 7f imul ax,WORD PTR [eax],0x7f
; The GPR shift instructions either allow an 8-bit immediate or
; have a special encoding for "1".
define internal i32 @testShl16Imm8(i32 %arg) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment