Commit 0ac50dcf by Jan Voung

Handle imul, pcmpeq, pcmpgt.

Be sure to legalize 8-bit imul immediates (there is only the r/m form). Add a test for that, and cover a couple of other ops too... There is a one-byte-shorter form when Dest/Src0 == EAX and Src1 is not an immediate, but that isn't taken advantage of. Go ahead and add the optimization for 8-bit immediates for i16/i32 (not allowed for i8). It shows up sometimes in spec, e.g., to multiply by 10. There is a lot of multiply by 4 as well, that we could strength-reduce. BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/617593002
parent 3a569183
......@@ -720,6 +720,12 @@ template <>
const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pandn::Emitter = {
&x86::AssemblerX86::pandn, &x86::AssemblerX86::pandn, NULL};
template <>
const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pcmpeq::Emitter = {
&x86::AssemblerX86::pcmpeq, &x86::AssemblerX86::pcmpeq, NULL};
template <>
const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pcmpgt::Emitter = {
&x86::AssemblerX86::pcmpgt, &x86::AssemblerX86::pcmpgt, NULL};
template <>
const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pmuludq::Emitter = {
&x86::AssemblerX86::pmuludq, &x86::AssemblerX86::pmuludq, NULL};
template <>
......@@ -904,6 +910,30 @@ template <> void InstX8632Imul::emit(const Cfg *Func) const {
}
}
template <> void InstX8632Imul::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
const Variable *Var = getDest();
Type Ty = Var->getType();
const Operand *Src = getSrc(1);
if (isByteSizedArithType(Ty)) {
// The 8-bit version of imul only allows the form "imul r/m8".
Variable *Src0 = llvm::dyn_cast<Variable>(getSrc(0));
(void)Src0;
assert(Src0 && Src0->getRegNum() == RegX8632::Reg_eax);
const x86::AssemblerX86::GPREmitterOneOp Emitter = {
&x86::AssemblerX86::imul, &x86::AssemblerX86::imul};
emitIASOpTyGPR(Func, Ty, getSrc(1), Emitter);
} else {
// We only use imul as a two-address instruction even though
// there is a 3 operand version when one of the operands is a constant.
assert(Var == getSrc(0));
const x86::AssemblerX86::GPREmitterRegOp Emitter = {
&x86::AssemblerX86::imul, &x86::AssemblerX86::imul,
&x86::AssemblerX86::imul};
emitIASRegOpTyGPR(Func, Ty, Var, Src, Emitter);
}
}
template <> void InstX8632Cbwdq::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
......
......@@ -837,7 +837,7 @@ typedef InstX8632BinopGPR<InstX8632::Or> InstX8632Or;
typedef InstX8632BinopXmm<InstX8632::Por, false> InstX8632Por;
typedef InstX8632BinopGPR<InstX8632::Xor> InstX8632Xor;
typedef InstX8632BinopXmm<InstX8632::Pxor, false> InstX8632Pxor;
typedef InstX8632Binop<InstX8632::Imul> InstX8632Imul;
typedef InstX8632BinopGPR<InstX8632::Imul> InstX8632Imul;
typedef InstX8632BinopXmm<InstX8632::Mulps, true> InstX8632Mulps;
typedef InstX8632BinopXmm<InstX8632::Mulss, false> InstX8632Mulss;
typedef InstX8632Binop<InstX8632::Pmull> InstX8632Pmull;
......@@ -850,8 +850,8 @@ typedef InstX8632Binop<InstX8632::Psll> InstX8632Psll;
typedef InstX8632Binop<InstX8632::Shr, true> InstX8632Shr;
typedef InstX8632Binop<InstX8632::Sar, true> InstX8632Sar;
typedef InstX8632Binop<InstX8632::Psra> InstX8632Psra;
typedef InstX8632Binop<InstX8632::Pcmpeq> InstX8632Pcmpeq;
typedef InstX8632Binop<InstX8632::Pcmpgt> InstX8632Pcmpgt;
typedef InstX8632BinopXmm<InstX8632::Pcmpeq, true> InstX8632Pcmpeq;
typedef InstX8632BinopXmm<InstX8632::Pcmpgt, true> InstX8632Pcmpgt;
// TODO: movss is only a binary operation when the source and dest
// operands are both registers. In other cases, it behaves like a copy
// (mov-like) operation. Eventually, InstX8632Movss should assert that
......@@ -1450,6 +1450,7 @@ template <> void InstX8632Subss::emit(const Cfg *Func) const;
template <> void InstX8632Div::emitIAS(const Cfg *Func) const;
template <> void InstX8632Idiv::emitIAS(const Cfg *Func) const;
template <> void InstX8632Imul::emitIAS(const Cfg *Func) const;
template <> void InstX8632Cbwdq::emitIAS(const Cfg *Func) const;
template <> void InstX8632Movd::emitIAS(const Cfg *Func) const;
......
......@@ -1581,10 +1581,12 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
//
// The 8-bit version of imul only allows the form "imul r/m8"
// where T must be in eax.
if (isByteSizedArithType(Dest->getType()))
if (isByteSizedArithType(Dest->getType())) {
_mov(T, Src0, RegX8632::Reg_eax);
else
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
} else {
_mov(T, Src0);
}
_imul(T, Src1);
_mov(Dest, T);
break;
......
......@@ -1056,15 +1056,62 @@ void AssemblerX86::pmovsxdq(XmmRegister dst, XmmRegister src) {
EmitXmmRegisterOperand(dst, src);
}
void AssemblerX86::pcmpeqq(XmmRegister dst, XmmRegister src) {
void AssemblerX86::pcmpeq(Type Ty, XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x29);
if (isByteSizedArithType(Ty)) {
EmitUint8(0x74);
} else if (Ty == IceType_i16) {
EmitUint8(0x75);
} else {
EmitUint8(0x76);
}
EmitXmmRegisterOperand(dst, src);
}
void AssemblerX86::pcmpeq(Type Ty, XmmRegister dst, const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
EmitUint8(0x74);
} else if (Ty == IceType_i16) {
EmitUint8(0x75);
} else {
EmitUint8(0x76);
}
EmitOperand(dst, src);
}
void AssemblerX86::pcmpgt(Type Ty, XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
EmitUint8(0x64);
} else if (Ty == IceType_i16) {
EmitUint8(0x65);
} else {
EmitUint8(0x66);
}
EmitXmmRegisterOperand(dst, src);
}
void AssemblerX86::pcmpgt(Type Ty, XmmRegister dst, const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
EmitUint8(0x64);
} else if (Ty == IceType_i16) {
EmitUint8(0x65);
} else {
EmitUint8(0x66);
}
EmitOperand(dst, src);
}
void AssemblerX86::roundsd(XmmRegister dst, XmmRegister src,
RoundingMode mode) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
......@@ -1489,36 +1536,61 @@ void AssemblerX86::idiv(Type Ty, const Address &addr) {
EmitOperand(7, addr);
}
void AssemblerX86::imull(GPRRegister dst, GPRRegister src) {
void AssemblerX86::imul(Type Ty, GPRRegister dst, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(Ty == IceType_i16 || Ty == IceType_i32);
if (Ty == IceType_i16)
EmitOperandSizeOverride();
EmitUint8(0x0F);
EmitUint8(0xAF);
EmitOperand(dst, Operand(src));
}
void AssemblerX86::imull(GPRRegister reg, const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x69);
EmitOperand(reg, Operand(reg));
EmitImmediate(BrokenType, imm);
EmitRegisterOperand(dst, src);
}
void AssemblerX86::imull(GPRRegister reg, const Address &address) {
void AssemblerX86::imul(Type Ty, GPRRegister reg, const Address &address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
assert(Ty == IceType_i16 || Ty == IceType_i32);
if (Ty == IceType_i16)
EmitOperandSizeOverride();
EmitUint8(0x0F);
EmitUint8(0xAF);
EmitOperand(reg, address);
}
void AssemblerX86::imull(GPRRegister reg) {
void AssemblerX86::imul(Type Ty, GPRRegister reg, const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF7);
EmitOperand(5, Operand(reg));
assert(Ty == IceType_i16 || Ty == IceType_i32);
if (Ty == IceType_i16)
EmitOperandSizeOverride();
if (imm.is_int8()) {
EmitUint8(0x6B);
EmitRegisterOperand(reg, reg);
EmitUint8(imm.value() & 0xFF);
} else {
EmitUint8(0x69);
EmitRegisterOperand(reg, reg);
EmitImmediate(Ty, imm);
}
}
void AssemblerX86::imull(const Address &address) {
void AssemblerX86::imul(Type Ty, GPRRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF7);
if (Ty == IceType_i16)
EmitOperandSizeOverride();
if (isByteSizedArithType(Ty))
EmitUint8(0xF6);
else
EmitUint8(0xF7);
EmitRegisterOperand(5, reg);
}
void AssemblerX86::imul(Type Ty, const Address &address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
if (Ty == IceType_i16)
EmitOperandSizeOverride();
if (isByteSizedArithType(Ty))
EmitUint8(0xF6);
else
EmitUint8(0xF7);
EmitOperand(5, address);
}
......
......@@ -535,7 +535,11 @@ public:
void pextrd(GPRRegister dst, XmmRegister src, const Immediate &imm);
void pmovsxdq(XmmRegister dst, XmmRegister src);
void pcmpeqq(XmmRegister dst, XmmRegister src);
void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
void pcmpeq(Type Ty, XmmRegister dst, const Address &src);
void pcmpgt(Type Ty, XmmRegister dst, XmmRegister src);
void pcmpgt(Type Ty, XmmRegister dst, const Address &src);
enum RoundingMode {
kRoundToNearest = 0x0,
......@@ -609,12 +613,12 @@ public:
void idiv(Type Ty, GPRRegister reg);
void idiv(Type Ty, const Address &address);
void imull(GPRRegister dst, GPRRegister src);
void imull(GPRRegister reg, const Immediate &imm);
void imull(GPRRegister reg, const Address &address);
void imul(Type Ty, GPRRegister dst, GPRRegister src);
void imul(Type Ty, GPRRegister reg, const Immediate &imm);
void imul(Type Ty, GPRRegister reg, const Address &address);
void imull(GPRRegister reg);
void imull(const Address &address);
void imul(Type Ty, GPRRegister reg);
void imul(Type Ty, const Address &address);
void mul(Type Ty, GPRRegister reg);
void mul(Type Ty, const Address &address);
......
......@@ -171,5 +171,88 @@ entry:
; CHECK-LABEL: testSub8Imm8
; CHECK: 2c 7d sub al, 125
; imul has some shorter 8-bit immediate encodings.
; It also has a shorter encoding for eax, but we don't do that yet.
define internal i32 @testMul16Imm8(i32 %arg) {
entry:
%arg_i16 = trunc i32 %arg to i16
%tmp = mul i16 %arg_i16, 99
%result_i16 = add i16 %tmp, 1
%result = zext i16 %result_i16 to i32
ret i32 %result
}
; CHECK-LABEL: testMul16Imm8
; CHECK: 66 6b c0 63 imul ax, ax, 99
; CHECK-NEXT: add ax, 1
define internal i32 @testMul16Imm8Neg(i32 %arg) {
entry:
%arg_i16 = trunc i32 %arg to i16
%tmp = mul i16 %arg_i16, -111
%result_i16 = add i16 %tmp, 1
%result = zext i16 %result_i16 to i32
ret i32 %result
}
; CHECK-LABEL: testMul16Imm8Neg
; CHECK: 66 6b c0 91 imul ax, ax, 145
; CHECK-NEXT: add ax, 1
define internal i32 @testMul16Imm16(i32 %arg) {
entry:
%arg_i16 = trunc i32 %arg to i16
%tmp = mul i16 %arg_i16, 1024
%result_i16 = add i16 %tmp, 1
%result = zext i16 %result_i16 to i32
ret i32 %result
}
; CHECK-LABEL: testMul16Imm16
; CHECK: 66 69 c0 00 04 imul ax, ax, 1024
; CHECK-NEXT: add ax, 1
define internal i32 @testMul16Imm16Neg(i32 %arg) {
entry:
%arg_i16 = trunc i32 %arg to i16
%tmp = mul i16 %arg_i16, -256
%result_i16 = add i16 %tmp, 1
%result = zext i16 %result_i16 to i32
ret i32 %result
}
; CHECK-LABEL: testMul16Imm16Neg
; CHECK: 66 69 c0 00 ff imul ax, ax, 65280
; CHECK-NEXT: add ax, 1
define internal i32 @testMul32Imm8(i32 %arg) {
entry:
%result = mul i32 %arg, 99
ret i32 %result
}
; CHECK-LABEL: testMul32Imm8
; CHECK: 6b c0 63 imul eax, eax, 99
define internal i32 @testMul32Imm8Neg(i32 %arg) {
entry:
%result = mul i32 %arg, -111
ret i32 %result
}
; CHECK-LABEL: testMul32Imm8Neg
; CHECK: 6b c0 91 imul eax, eax, -111
define internal i32 @testMul32Imm16(i32 %arg) {
entry:
%result = mul i32 %arg, 1024
ret i32 %result
}
; CHECK-LABEL: testMul32Imm16
; CHECK: 69 c0 00 04 00 00 imul eax, eax, 1024
define internal i32 @testMul32Imm16Neg(i32 %arg) {
entry:
%result = mul i32 %arg, -256
ret i32 %result
}
; CHECK-LABEL: testMul32Imm16Neg
; CHECK: 69 c0 00 ff ff ff imul eax, eax, 4294967040
; ERRORS-NOT: ICE translation error
; DUMP-NOT: SZ
; This tries to be a comprehensive test of i8 operations.
; RUN: %p2i -i %s --args -O2 --verbose none \
; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s
; RUN: %p2i -i %s --args -Om1 --verbose none \
; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s
; RUN: %p2i -i %s --args --verbose none | FileCheck --check-prefix=ERRORS %s
; RUN: %p2i -i %s --insts | %szdiff %s | FileCheck --check-prefix=DUMP %s
define internal i32 @add8Bit(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%b_8 = trunc i32 %b to i8
%add = add i8 %b_8, %a_8
%ret = zext i8 %add to i32
ret i32 %ret
}
; CHECK-LABEL: add8Bit
; CHECK: add {{[abcd]l}}
define internal i32 @add8BitConst(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%add = add i8 %a_8, 123
%ret = zext i8 %add to i32
ret i32 %ret
}
; CHECK-LABEL: add8BitConst
; CHECK: add {{[abcd]l}}
define internal i32 @sub8Bit(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%b_8 = trunc i32 %b to i8
%sub = sub i8 %b_8, %a_8
%ret = zext i8 %sub to i32
ret i32 %ret
}
; CHECK-LABEL: sub8Bit
; XCHECK: sub {{[abcd]l}}
define internal i32 @sub8BitConst(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%sub = sub i8 %a_8, 123
%ret = zext i8 %sub to i32
ret i32 %ret
}
; CHECK-LABEL: sub8BitConst
; XCHECK: sub {{[abcd]l}}
define internal i32 @mul8Bit(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%b_8 = trunc i32 %b to i8
%mul = mul i8 %b_8, %a_8
%ret = zext i8 %mul to i32
ret i32 %ret
}
; CHECK-LABEL: mul8Bit
; CHECK: mul {{[abcd]l|byte ptr}}
define internal i32 @mul8BitConst(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%mul = mul i8 %a_8, 56
%ret = zext i8 %mul to i32
ret i32 %ret
}
; CHECK-LABEL: mul8BitConst
; 8-bit imul only accepts r/m, not imm
; CHECK: mov {{.*}}, 56
; CHECK: mul {{[abcd]l|byte ptr}}
define internal i32 @udiv8Bit(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%b_8 = trunc i32 %b to i8
%udiv = udiv i8 %b_8, %a_8
%ret = zext i8 %udiv to i32
ret i32 %ret
}
; CHECK-LABEL: udiv8Bit
; CHECK: div {{[abcd]l|byte ptr}}
define internal i32 @udiv8BitConst(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%udiv = udiv i8 %a_8, 123
%ret = zext i8 %udiv to i32
ret i32 %ret
}
; CHECK-LABEL: udiv8BitConst
; CHECK: div {{[abcd]l|byte ptr}}
define internal i32 @urem8Bit(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%b_8 = trunc i32 %b to i8
%urem = urem i8 %b_8, %a_8
%ret = zext i8 %urem to i32
ret i32 %ret
}
; CHECK-LABEL: urem8Bit
; CHECK: div {{[abcd]l|byte ptr}}
define internal i32 @urem8BitConst(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%urem = urem i8 %a_8, 123
%ret = zext i8 %urem to i32
ret i32 %ret
}
; CHECK-LABEL: urem8BitConst
; CHECK: div {{[abcd]l|byte ptr}}
define internal i32 @sdiv8Bit(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%b_8 = trunc i32 %b to i8
%sdiv = sdiv i8 %b_8, %a_8
%ret = zext i8 %sdiv to i32
ret i32 %ret
}
; CHECK-LABEL: sdiv8Bit
; CHECK: idiv {{[abcd]l|byte ptr}}
define internal i32 @sdiv8BitConst(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%sdiv = sdiv i8 %a_8, 123
%ret = zext i8 %sdiv to i32
ret i32 %ret
}
; CHECK-LABEL: sdiv8BitConst
; CHECK: idiv {{[abcd]l|byte ptr}}
define internal i32 @srem8Bit(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%b_8 = trunc i32 %b to i8
%srem = srem i8 %b_8, %a_8
%ret = zext i8 %srem to i32
ret i32 %ret
}
; CHECK-LABEL: srem8Bit
; CHECK: idiv {{[abcd]l|byte ptr}}
define internal i32 @srem8BitConst(i32 %a, i32 %b) {
entry:
%a_8 = trunc i32 %a to i8
%srem = srem i8 %a_8, 123
%ret = zext i8 %srem to i32
ret i32 %ret
}
; CHECK-LABEL: srem8BitConst
; CHECK: idiv {{[abcd]l|byte ptr}}
; ERRORS-NOT: ICE translation error
; DUMP-NOT: SZ
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment