Commit f48b320c by Jim Stichnoth

Subzero: Use a setcc sequence for better icmp lowering.

For an example like: %a = icmp eq i32 %b, %c The original icmp lowering sequence for i8/i16/i32 was something like: cmpl b, c movb 1, a je label movb 0, a label: The improved sequence is: cmpl b, c sete a In O2 mode, this doesn't help when successive compare/branch instructions are fused, but it does help when the boolean result needs to be saved and later used. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/1118353005
parent 0e7e412e
......@@ -339,6 +339,9 @@ InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
addSource(Source);
}
InstX8632Setcc::InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond)
: InstX8632(Func, InstX8632::Setcc, 0, Dest), Condition(Cond) {}
InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
bool Locked)
: InstX8632Lockable(Func, InstX8632::Xadd, 2,
......@@ -2726,6 +2729,35 @@ void InstX8632Ret::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstX8632Setcc::emit(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\tset" << InstX8632BrAttributes[Condition].DisplayString << "\t";
Dest->emit(Func);
}
void InstX8632Setcc::emitIAS(const Cfg *Func) const {
assert(Condition != CondX86::Br_None);
assert(getDest()->getType() == IceType_i1);
assert(getSrcSize() == 0);
X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
if (getDest()->hasReg())
Asm->setcc(Condition, RegX8632::getEncodedByteReg(getDest()->getRegNum()));
else
Asm->setcc(Condition, static_cast<TargetX8632 *>(Func->getTarget())
->stackVarToAsmOperand(getDest()));
return;
}
void InstX8632Setcc::dump(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "setcc." << InstX8632BrAttributes[Condition].DisplayString << " ";
dumpDest(Func);
}
void InstX8632Xadd::emit(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
......
......@@ -242,6 +242,7 @@ public:
Rol,
Sar,
Sbb,
Setcc,
Shl,
Shld,
Shr,
......@@ -1585,6 +1586,30 @@ private:
~InstX8632Ret() override {}
};
// Conditional set-byte instruction.
class InstX8632Setcc : public InstX8632 {
InstX8632Setcc() = delete;
InstX8632Setcc(const InstX8632Cmov &) = delete;
InstX8632Setcc &operator=(const InstX8632Setcc &) = delete;
public:
static InstX8632Setcc *create(Cfg *Func, Variable *Dest,
CondX86::BrCond Cond) {
return new (Func->allocate<InstX8632Setcc>())
InstX8632Setcc(Func, Dest, Cond);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Setcc); }
private:
InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond);
~InstX8632Setcc() override {}
const CondX86::BrCond Condition;
};
// Exchanging Add instruction. Exchanges the first operand (destination
// operand) with the second operand (source operand), then loads the sum
// of the two values into the destination operand. The destination may be
......
......@@ -2748,12 +2748,8 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
// cmp b, c
Operand *Src0RM =
legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
InstX8632Label *Label = InstX8632Label::create(Func, this);
_cmp(Src0RM, Src1);
_mov(Dest, One);
_br(getIcmp32Mapping(Inst->getCondition()), Label);
_mov_nonkillable(Dest, Zero);
Context.insert(Label);
_setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
}
void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
......
......@@ -415,6 +415,9 @@ protected:
void _sbb(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Sbb::create(Func, Dest, Src0));
}
void _setcc(Variable *Dest, CondX86::BrCond Condition) {
Context.insert(InstX8632Setcc::create(Func, Dest, Condition));
}
void _shl(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Shl::create(Func, Dest, Src0));
}
......
......@@ -156,6 +156,13 @@ void AssemblerX8632::setcc(CondX86::BrCond condition, ByteRegister dst) {
EmitUint8(0xC0 + dst);
}
void AssemblerX8632::setcc(CondX86::BrCond condition, const Address &address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0x90 + condition);
EmitOperand(0, address);
}
void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
if (isByteSizedType(Ty)) {
......
......@@ -492,6 +492,7 @@ public:
void popal();
void setcc(CondX86::BrCond condition, ByteRegister dst);
void setcc(CondX86::BrCond condition, const Address &address);
void mov(Type Ty, GPRRegister dst, const Immediate &src);
void mov(Type Ty, GPRRegister dst, GPRRegister src);
......
......@@ -96,22 +96,23 @@ entry:
; boundary should not trigger nop padding.
define void @label_at_boundary(i32 %arg) {
entry:
%cmp = icmp eq i32 %arg, 0
call void @call_target()
; bundle boundary
%addr_short = bitcast [2 x i8]* @global_short to i16*
store i16 0, i16* %addr_short, align 1 ; 9-byte instruction
%cmp = icmp eq i32 %arg, 0 ; 23-byte lowering sequence
%blah = select i1 %cmp, i32 3, i32 5 ; 23-byte lowering sequence
; label is here
store i16 0, i16* %addr_short, align 1 ; 9-byte instruction
ret void
}
; CHECK-LABEL: label_at_boundary
; CHECK: call
; We rely on the hideous 4-instruction 23-byte Om1 lowering sequence for icmp.
; We rely on the hideous 4-instruction 23-byte Om1 lowering sequence for select.
; CHECK-NEXT: 20: {{.*}} mov WORD PTR
; CHECK-NEXT: 29: {{.*}} cmp DWORD PTR
; CHECK-NEXT: 29: {{.*}} cmp BYTE PTR
; CHECK-NEXT: 2e: {{.*}} mov DWORD PTR
; CHECK-NEXT: 36: {{.*}} je 40
; CHECK-NEXT: 36: {{.*}} jne 40
; CHECK-NEXT: 38: {{.*}} mov DWORD PTR
; CHECK-NEXT: 40: {{.*}} mov WORD PTR
......
......@@ -282,7 +282,7 @@ entry:
}
; CHECK-LABEL: selectI8Var
; CHECK: cmp
; CHECK: jl
; CHECK: setl
; CHECK: mov {{[a-d]l}}
define internal i32 @testPhi8(i32 %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) {
......
......@@ -53,7 +53,7 @@ target:
; OM1-LABEL: testCondFallthroughToNextBlock
; OM1: cmp {{.*}},0x7b
; OM1: jge
; OM1: setge
; OM1: cmp
; OM1: jne
; OM1: jmp
......@@ -88,7 +88,7 @@ target:
; OM1-LABEL: testCondTargetNextBlock
; OM1: cmp {{.*}},0x7b
; OM1: jge
; OM1: setge
; OM1: cmp
; OM1: jne
; OM1: jmp
......
......@@ -41,7 +41,7 @@ done:
; OM1-LABEL: test_atomic_cmpxchg_loop
; OM1: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}}
; OM1: cmp
; OM1: je
; OM1: sete
; OM1: call
; Still works if the compare operands are flipped.
......@@ -130,4 +130,4 @@ done:
; O2: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}}
; O2: mov {{.*}}
; O2: cmp
; O2: je
; O2: sete
......@@ -22,9 +22,7 @@ target:
; put in the right place.
; CHECK-LABEL: testPhi1
; CHECK: cmp {{.*}},0x0
; CHECK: mov {{.*}},0x1
; CHECK: jg
; CHECK: mov {{.*}},0x0
; CHECK: setg
; CHECK: mov [[PHI:.*]],
; CHECK: cmp {{.*}},0x0
; CHECK: je
......
......@@ -46,5 +46,5 @@ for.end:
; such atrocious code (by design).
; OPTM1-LABEL: simple_loop
; OPTM1: cmp {{.*}},0x0
; OPTM1: jg
; OPTM1: setl
; OPTM1: ret
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment