Commit f48b320c by Jim Stichnoth

Subzero: Use a setcc sequence for better icmp lowering.

For an example like: %a = icmp eq i32 %b, %c The original icmp lowering sequence for i8/i16/i32 was something like: cmpl b, c movb 1, a je label movb 0, a label: The improved sequence is: cmpl b, c sete a In O2 mode, this doesn't help when successive compare/branch instructions are fused, but it does help when the boolean result needs to be saved and later used. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/1118353005
parent 0e7e412e
...@@ -339,6 +339,9 @@ InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source) ...@@ -339,6 +339,9 @@ InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
addSource(Source); addSource(Source);
} }
InstX8632Setcc::InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond)
: InstX8632(Func, InstX8632::Setcc, 0, Dest), Condition(Cond) {}
InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source, InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
bool Locked) bool Locked)
: InstX8632Lockable(Func, InstX8632::Xadd, 2, : InstX8632Lockable(Func, InstX8632::Xadd, 2,
...@@ -2726,6 +2729,35 @@ void InstX8632Ret::dump(const Cfg *Func) const { ...@@ -2726,6 +2729,35 @@ void InstX8632Ret::dump(const Cfg *Func) const {
dumpSources(Func); dumpSources(Func);
} }
void InstX8632Setcc::emit(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\tset" << InstX8632BrAttributes[Condition].DisplayString << "\t";
Dest->emit(Func);
}
void InstX8632Setcc::emitIAS(const Cfg *Func) const {
assert(Condition != CondX86::Br_None);
assert(getDest()->getType() == IceType_i1);
assert(getSrcSize() == 0);
X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
if (getDest()->hasReg())
Asm->setcc(Condition, RegX8632::getEncodedByteReg(getDest()->getRegNum()));
else
Asm->setcc(Condition, static_cast<TargetX8632 *>(Func->getTarget())
->stackVarToAsmOperand(getDest()));
return;
}
void InstX8632Setcc::dump(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "setcc." << InstX8632BrAttributes[Condition].DisplayString << " ";
dumpDest(Func);
}
void InstX8632Xadd::emit(const Cfg *Func) const { void InstX8632Xadd::emit(const Cfg *Func) const {
if (!ALLOW_DUMP) if (!ALLOW_DUMP)
return; return;
......
...@@ -242,6 +242,7 @@ public: ...@@ -242,6 +242,7 @@ public:
Rol, Rol,
Sar, Sar,
Sbb, Sbb,
Setcc,
Shl, Shl,
Shld, Shld,
Shr, Shr,
...@@ -1585,6 +1586,30 @@ private: ...@@ -1585,6 +1586,30 @@ private:
~InstX8632Ret() override {} ~InstX8632Ret() override {}
}; };
// Conditional set-byte instruction.
class InstX8632Setcc : public InstX8632 {
InstX8632Setcc() = delete;
InstX8632Setcc(const InstX8632Cmov &) = delete;
InstX8632Setcc &operator=(const InstX8632Setcc &) = delete;
public:
static InstX8632Setcc *create(Cfg *Func, Variable *Dest,
CondX86::BrCond Cond) {
return new (Func->allocate<InstX8632Setcc>())
InstX8632Setcc(Func, Dest, Cond);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Setcc); }
private:
InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond);
~InstX8632Setcc() override {}
const CondX86::BrCond Condition;
};
// Exchanging Add instruction. Exchanges the first operand (destination // Exchanging Add instruction. Exchanges the first operand (destination
// operand) with the second operand (source operand), then loads the sum // operand) with the second operand (source operand), then loads the sum
// of the two values into the destination operand. The destination may be // of the two values into the destination operand. The destination may be
......
...@@ -2748,12 +2748,8 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { ...@@ -2748,12 +2748,8 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
// cmp b, c // cmp b, c
Operand *Src0RM = Operand *Src0RM =
legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
InstX8632Label *Label = InstX8632Label::create(Func, this);
_cmp(Src0RM, Src1); _cmp(Src0RM, Src1);
_mov(Dest, One); _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
_br(getIcmp32Mapping(Inst->getCondition()), Label);
_mov_nonkillable(Dest, Zero);
Context.insert(Label);
} }
void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
......
...@@ -415,6 +415,9 @@ protected: ...@@ -415,6 +415,9 @@ protected:
void _sbb(Variable *Dest, Operand *Src0) { void _sbb(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Sbb::create(Func, Dest, Src0)); Context.insert(InstX8632Sbb::create(Func, Dest, Src0));
} }
void _setcc(Variable *Dest, CondX86::BrCond Condition) {
Context.insert(InstX8632Setcc::create(Func, Dest, Condition));
}
void _shl(Variable *Dest, Operand *Src0) { void _shl(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Shl::create(Func, Dest, Src0)); Context.insert(InstX8632Shl::create(Func, Dest, Src0));
} }
......
...@@ -156,6 +156,13 @@ void AssemblerX8632::setcc(CondX86::BrCond condition, ByteRegister dst) { ...@@ -156,6 +156,13 @@ void AssemblerX8632::setcc(CondX86::BrCond condition, ByteRegister dst) {
EmitUint8(0xC0 + dst); EmitUint8(0xC0 + dst);
} }
void AssemblerX8632::setcc(CondX86::BrCond condition, const Address &address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0x90 + condition);
EmitOperand(0, address);
}
void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Immediate &imm) { void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_); AssemblerBuffer::EnsureCapacity ensured(&buffer_);
if (isByteSizedType(Ty)) { if (isByteSizedType(Ty)) {
......
...@@ -492,6 +492,7 @@ public: ...@@ -492,6 +492,7 @@ public:
void popal(); void popal();
void setcc(CondX86::BrCond condition, ByteRegister dst); void setcc(CondX86::BrCond condition, ByteRegister dst);
void setcc(CondX86::BrCond condition, const Address &address);
void mov(Type Ty, GPRRegister dst, const Immediate &src); void mov(Type Ty, GPRRegister dst, const Immediate &src);
void mov(Type Ty, GPRRegister dst, GPRRegister src); void mov(Type Ty, GPRRegister dst, GPRRegister src);
......
...@@ -96,22 +96,23 @@ entry: ...@@ -96,22 +96,23 @@ entry:
; boundary should not trigger nop padding. ; boundary should not trigger nop padding.
define void @label_at_boundary(i32 %arg) { define void @label_at_boundary(i32 %arg) {
entry: entry:
%cmp = icmp eq i32 %arg, 0
call void @call_target() call void @call_target()
; bundle boundary ; bundle boundary
%addr_short = bitcast [2 x i8]* @global_short to i16* %addr_short = bitcast [2 x i8]* @global_short to i16*
store i16 0, i16* %addr_short, align 1 ; 9-byte instruction store i16 0, i16* %addr_short, align 1 ; 9-byte instruction
%cmp = icmp eq i32 %arg, 0 ; 23-byte lowering sequence %blah = select i1 %cmp, i32 3, i32 5 ; 23-byte lowering sequence
; label is here ; label is here
store i16 0, i16* %addr_short, align 1 ; 9-byte instruction store i16 0, i16* %addr_short, align 1 ; 9-byte instruction
ret void ret void
} }
; CHECK-LABEL: label_at_boundary ; CHECK-LABEL: label_at_boundary
; CHECK: call ; CHECK: call
; We rely on the hideous 4-instruction 23-byte Om1 lowering sequence for icmp. ; We rely on the hideous 4-instruction 23-byte Om1 lowering sequence for select.
; CHECK-NEXT: 20: {{.*}} mov WORD PTR ; CHECK-NEXT: 20: {{.*}} mov WORD PTR
; CHECK-NEXT: 29: {{.*}} cmp DWORD PTR ; CHECK-NEXT: 29: {{.*}} cmp BYTE PTR
; CHECK-NEXT: 2e: {{.*}} mov DWORD PTR ; CHECK-NEXT: 2e: {{.*}} mov DWORD PTR
; CHECK-NEXT: 36: {{.*}} je 40 ; CHECK-NEXT: 36: {{.*}} jne 40
; CHECK-NEXT: 38: {{.*}} mov DWORD PTR ; CHECK-NEXT: 38: {{.*}} mov DWORD PTR
; CHECK-NEXT: 40: {{.*}} mov WORD PTR ; CHECK-NEXT: 40: {{.*}} mov WORD PTR
......
...@@ -282,7 +282,7 @@ entry: ...@@ -282,7 +282,7 @@ entry:
} }
; CHECK-LABEL: selectI8Var ; CHECK-LABEL: selectI8Var
; CHECK: cmp ; CHECK: cmp
; CHECK: jl ; CHECK: setl
; CHECK: mov {{[a-d]l}} ; CHECK: mov {{[a-d]l}}
define internal i32 @testPhi8(i32 %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) { define internal i32 @testPhi8(i32 %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) {
......
...@@ -53,7 +53,7 @@ target: ...@@ -53,7 +53,7 @@ target:
; OM1-LABEL: testCondFallthroughToNextBlock ; OM1-LABEL: testCondFallthroughToNextBlock
; OM1: cmp {{.*}},0x7b ; OM1: cmp {{.*}},0x7b
; OM1: jge ; OM1: setge
; OM1: cmp ; OM1: cmp
; OM1: jne ; OM1: jne
; OM1: jmp ; OM1: jmp
...@@ -88,7 +88,7 @@ target: ...@@ -88,7 +88,7 @@ target:
; OM1-LABEL: testCondTargetNextBlock ; OM1-LABEL: testCondTargetNextBlock
; OM1: cmp {{.*}},0x7b ; OM1: cmp {{.*}},0x7b
; OM1: jge ; OM1: setge
; OM1: cmp ; OM1: cmp
; OM1: jne ; OM1: jne
; OM1: jmp ; OM1: jmp
......
...@@ -41,7 +41,7 @@ done: ...@@ -41,7 +41,7 @@ done:
; OM1-LABEL: test_atomic_cmpxchg_loop ; OM1-LABEL: test_atomic_cmpxchg_loop
; OM1: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}} ; OM1: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}}
; OM1: cmp ; OM1: cmp
; OM1: je ; OM1: sete
; OM1: call ; OM1: call
; Still works if the compare operands are flipped. ; Still works if the compare operands are flipped.
...@@ -130,4 +130,4 @@ done: ...@@ -130,4 +130,4 @@ done:
; O2: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}} ; O2: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}}
; O2: mov {{.*}} ; O2: mov {{.*}}
; O2: cmp ; O2: cmp
; O2: je ; O2: sete
...@@ -22,9 +22,7 @@ target: ...@@ -22,9 +22,7 @@ target:
; put in the right place. ; put in the right place.
; CHECK-LABEL: testPhi1 ; CHECK-LABEL: testPhi1
; CHECK: cmp {{.*}},0x0 ; CHECK: cmp {{.*}},0x0
; CHECK: mov {{.*}},0x1 ; CHECK: setg
; CHECK: jg
; CHECK: mov {{.*}},0x0
; CHECK: mov [[PHI:.*]], ; CHECK: mov [[PHI:.*]],
; CHECK: cmp {{.*}},0x0 ; CHECK: cmp {{.*}},0x0
; CHECK: je ; CHECK: je
......
...@@ -46,5 +46,5 @@ for.end: ...@@ -46,5 +46,5 @@ for.end:
; such atrocious code (by design). ; such atrocious code (by design).
; OPTM1-LABEL: simple_loop ; OPTM1-LABEL: simple_loop
; OPTM1: cmp {{.*}},0x0 ; OPTM1: cmp {{.*}},0x0
; OPTM1: jg ; OPTM1: setl
; OPTM1: ret ; OPTM1: ret
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment