Commit daf096cd by David Sehr

Improve bool folding

Fold and/or followed by branch to eliminate cmp. Also, fold fcmp instructions into branches similarly to what was done for icmp instructions. BUG= R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1436623002 .
parent 3e859b73
...@@ -720,9 +720,11 @@ private: ...@@ -720,9 +720,11 @@ private:
void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi, void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi,
Operand *Src1Lo, Variable *DestLo, Variable *DestHi); Operand *Src1Lo, Variable *DestLo, Variable *DestHi);
/// Emit the code for a combined compare and branch, or sets the destination /// Emit the code for a combined operation and branch, or set the destination
/// variable of the compare if branch is nullptr. /// variable of the operation if Br == nullptr.
void lowerIcmpAndBr(const InstIcmp *Icmp, const InstBr *Br); void lowerIcmpAndBr(const InstIcmp *Icmp, const InstBr *Br);
void lowerFcmpAndBr(const InstFcmp *Fcmp, const InstBr *Br);
void lowerArithAndBr(const InstArithmetic *Arith, const InstBr *Br);
/// Emit a setcc instruction if Br == nullptr; otherwise emit a branch. /// Emit a setcc instruction if Br == nullptr; otherwise emit a branch.
void setccOrBr(typename Traits::Cond::BrCond Condition, Variable *Dest, void setccOrBr(typename Traits::Cond::BrCond Condition, Variable *Dest,
......
...@@ -81,7 +81,8 @@ public: ...@@ -81,7 +81,8 @@ public:
PK_Icmp32, PK_Icmp32,
PK_Icmp64, PK_Icmp64,
PK_Fcmp, PK_Fcmp,
PK_Trunc PK_Trunc,
PK_Arith // A flag-setting arithmetic instruction.
}; };
/// Currently the actual enum values are not used (other than CK_None), but we /// Currently the actual enum values are not used (other than CK_None), but we
...@@ -125,10 +126,21 @@ BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { ...@@ -125,10 +126,21 @@ BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
return PK_Icmp32; return PK_Icmp32;
return PK_Icmp64; return PK_Icmp64;
} }
return PK_None; // TODO(stichnot): remove this
if (llvm::isa<InstFcmp>(Instr)) if (llvm::isa<InstFcmp>(Instr))
return PK_Fcmp; return PK_Fcmp;
if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
if (MachineTraits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) {
switch (Arith->getOp()) {
default:
return PK_None;
case InstArithmetic::And:
case InstArithmetic::Or:
return PK_Arith;
}
}
}
return PK_None; // TODO(stichnot): remove this
if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
switch (Cast->getCastKind()) { switch (Cast->getCastKind()) {
default: default:
...@@ -1925,9 +1937,16 @@ void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { ...@@ -1925,9 +1937,16 @@ void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst); lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst);
return; return;
} }
case BoolFolding::PK_Fcmp: {
lowerFcmpAndBr(llvm::dyn_cast<InstFcmp>(Producer), Inst);
return;
}
case BoolFolding::PK_Arith: {
lowerArithAndBr(llvm::dyn_cast<InstArithmetic>(Producer), Inst);
return;
}
} }
} }
Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
Constant *Zero = Ctx->getConstantZero(IceType_i32); Constant *Zero = Ctx->getConstantZero(IceType_i32);
_cmp(Src0, Zero); _cmp(Src0, Zero);
...@@ -2540,20 +2559,26 @@ void TargetX86Base<Machine>::lowerExtractElement( ...@@ -2540,20 +2559,26 @@ void TargetX86Base<Machine>::lowerExtractElement(
template <class Machine> template <class Machine>
void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
constexpr InstBr *Br = nullptr;
lowerFcmpAndBr(Inst, Br);
}
template <class Machine>
void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst,
const InstBr *Br) {
Operand *Src0 = Inst->getSrc(0); Operand *Src0 = Inst->getSrc(0);
Operand *Src1 = Inst->getSrc(1); Operand *Src1 = Inst->getSrc(1);
Variable *Dest = Inst->getDest(); Variable *Dest = Inst->getDest();
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
if (Br)
llvm::report_fatal_error("vector compare/branch cannot be folded");
InstFcmp::FCond Condition = Inst->getCondition(); InstFcmp::FCond Condition = Inst->getCondition();
size_t Index = static_cast<size_t>(Condition); size_t Index = static_cast<size_t>(Condition);
assert(Index < Traits::TableFcmpSize); assert(Index < Traits::TableFcmpSize);
if (Traits::TableFcmp[Index].SwapVectorOperands) { if (Traits::TableFcmp[Index].SwapVectorOperands)
Operand *T = Src0; std::swap(Src0, Src1);
Src0 = Src1;
Src1 = T;
}
Variable *T = nullptr; Variable *T = nullptr;
...@@ -2633,24 +2658,39 @@ void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { ...@@ -2633,24 +2658,39 @@ void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
_ucomiss(T, Src1RM); _ucomiss(T, Src1RM);
if (!HasC2) { if (!HasC2) {
assert(Traits::TableFcmp[Index].Default); assert(Traits::TableFcmp[Index].Default);
_setcc(Dest, Traits::TableFcmp[Index].C1); setccOrBr(Traits::TableFcmp[Index].C1, Dest, Br);
return; return;
} }
} }
Constant *Default = int32_t IntDefault = Traits::TableFcmp[Index].Default;
Ctx->getConstantInt(Dest->getType(), Traits::TableFcmp[Index].Default); if (Br == nullptr) {
_mov(Dest, Default); Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
if (HasC1) { _mov(Dest, Default);
typename Traits::Insts::Label *Label = if (HasC1) {
Traits::Insts::Label::create(Func, this); typename Traits::Insts::Label *Label =
_br(Traits::TableFcmp[Index].C1, Label); Traits::Insts::Label::create(Func, this);
if (HasC2) { _br(Traits::TableFcmp[Index].C1, Label);
_br(Traits::TableFcmp[Index].C2, Label); if (HasC2) {
_br(Traits::TableFcmp[Index].C2, Label);
}
Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault);
_mov_redefined(Dest, NonDefault);
Context.insert(Label);
} }
Constant *NonDefault = } else {
Ctx->getConstantInt(Dest->getType(), !Traits::TableFcmp[Index].Default); CfgNode *TrueSucc = Br->getTargetTrue();
_mov_redefined(Dest, NonDefault); CfgNode *FalseSucc = Br->getTargetFalse();
Context.insert(Label); if (IntDefault != 0)
std::swap(TrueSucc, FalseSucc);
if (HasC1) {
_br(Traits::TableFcmp[Index].C1, FalseSucc);
if (HasC2) {
_br(Traits::TableFcmp[Index].C2, FalseSucc);
}
_br(TrueSucc);
return;
}
_br(FalseSucc);
} }
} }
...@@ -2960,6 +3000,37 @@ void TargetX86Base<Machine>::movOrBr(bool IcmpResult, Variable *Dest, ...@@ -2960,6 +3000,37 @@ void TargetX86Base<Machine>::movOrBr(bool IcmpResult, Variable *Dest,
} }
template <class Machine> template <class Machine>
void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith,
const InstBr *Br) {
Variable *T = nullptr;
Operand *Src0 = legalize(Arith->getSrc(0));
Operand *Src1 = legalize(Arith->getSrc(1));
Variable *Dest = Arith->getDest();
switch (Arith->getOp()) {
default:
llvm_unreachable("arithmetic operator not AND or OR");
break;
case InstArithmetic::And:
_mov(T, Src0);
// Test cannot have an address in the second position. Since T is
// guaranteed to be a register and Src1 could be a memory load, ensure
// that the second argument is a register.
if (llvm::isa<Constant>(Src1))
_test(T, Src1);
else
_test(Src1, T);
break;
case InstArithmetic::Or:
_mov(T, Src0);
_or(T, Src1);
break;
}
Context.insert(InstFakeUse::create(Func, T));
Context.insert(InstFakeDef::create(Func, Dest));
_br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
}
template <class Machine>
void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
Operand *SourceVectNotLegalized = Inst->getSrc(0); Operand *SourceVectNotLegalized = Inst->getSrc(0);
Operand *ElementToInsertNotLegalized = Inst->getSrc(1); Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
......
; This tries to be a comprehensive test of f32 and f64 compare operations. ; This tries to be a comprehensive test of f32 and f64 compare operations.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 \ ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 \
; RUN: -allow-externally-defined-symbols | FileCheck %s ; RUN: -allow-externally-defined-symbols | FileCheck %s
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \ ; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols | FileCheck %s ; RUN: -allow-externally-defined-symbols | FileCheck %s \
; RUN: --check-prefix=CHECK-OM1
; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \ ; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \
; RUN: --target arm32 -i %s --args -O2 \ ; RUN: --target arm32 -i %s --args -O2 \
...@@ -42,13 +40,21 @@ if.end3: ; preds = %if.then2, %if.end ...@@ -42,13 +40,21 @@ if.end3: ; preds = %if.then2, %if.end
} }
; CHECK-LABEL: fcmpEq ; CHECK-LABEL: fcmpEq
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: jne ; CHECK-NEXT: jne
; CHECK-NEXT: jp ; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: jne ; CHECK-NEXT: jne
; CHECK-NEXT: jp ; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func ; CHECK: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpEq
; CHECK-OM1: ucomiss
; CHECK-OM1: jne
; CHECK-OM1-NEXT: jp
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1: jne
; CHECK-NEXT-OM1: jp
; ARM32-LABEL: fcmpEq ; ARM32-LABEL: fcmpEq
; ARM32: vcmp.f32 ; ARM32: vcmp.f32
; ARM32: vmrs ; ARM32: vmrs
...@@ -86,13 +92,26 @@ if.end3: ; preds = %if.then2, %if.end ...@@ -86,13 +92,26 @@ if.end3: ; preds = %if.then2, %if.end
} }
; CHECK-LABEL: fcmpNe ; CHECK-LABEL: fcmpNe
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: jne ; CHECK-NEXT: jne
; CHECK-NEXT: jp ; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: jmp
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: jne ; CHECK-NEXT: jne
; CHECK-NEXT: jp ; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: jmp
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpNe
; CHECK-OM1: ucomiss
; CHECK-OM1: jne
; CHECK-OM1: jp
; CHECK-OM1: jmp
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1: jne
; CHECK-OM1: jp
; CHECK-OM1: jmp
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpNe ; ARM32-LABEL: fcmpNe
; ARM32: vcmp.f32 ; ARM32: vcmp.f32
; ARM32: vmrs ; ARM32: vmrs
...@@ -127,11 +146,18 @@ if.end3: ; preds = %if.then2, %if.end ...@@ -127,11 +146,18 @@ if.end3: ; preds = %if.then2, %if.end
} }
; CHECK-LABEL: fcmpGt ; CHECK-LABEL: fcmpGt
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: seta ; CHECK-NEXT: jbe
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: seta ; CHECK-NEXT: jbe
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpGt
; CHECK-OM1: ucomiss
; CHECK-OM1: seta
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1: seta
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpGt ; ARM32-LABEL: fcmpGt
; ARM32: vcmp.f32 ; ARM32: vcmp.f32
; ARM32: vmrs ; ARM32: vmrs
...@@ -166,11 +192,18 @@ if.end3: ; preds = %if.end, %if.then2 ...@@ -166,11 +192,18 @@ if.end3: ; preds = %if.end, %if.then2
} }
; CHECK-LABEL: fcmpGe ; CHECK-LABEL: fcmpGe
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: setb ; CHECK-NEXT: jb
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: setb ; CHECK-NEXT: jb
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpGe
; CHECK-OM1: ucomiss
; CHECK-OM1-NEXT: setb
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1-NEXT: setb
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpGe ; ARM32-LABEL: fcmpGe
; ARM32: vcmp.f32 ; ARM32: vcmp.f32
; ARM32: vmrs ; ARM32: vmrs
...@@ -205,11 +238,18 @@ if.end3: ; preds = %if.then2, %if.end ...@@ -205,11 +238,18 @@ if.end3: ; preds = %if.then2, %if.end
} }
; CHECK-LABEL: fcmpLt ; CHECK-LABEL: fcmpLt
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: seta ; CHECK-NEXT: jbe
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: seta ; CHECK-NEXT: jbe
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpLt
; CHECK-OM1: ucomiss
; CHECK-OM1-NEXT: seta
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1-NEXT: seta
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpLt ; ARM32-LABEL: fcmpLt
; ARM32: vcmp.f32 ; ARM32: vcmp.f32
; ARM32: vmrs ; ARM32: vmrs
...@@ -244,11 +284,18 @@ if.end3: ; preds = %if.end, %if.then2 ...@@ -244,11 +284,18 @@ if.end3: ; preds = %if.end, %if.then2
} }
; CHECK-LABEL: fcmpLe ; CHECK-LABEL: fcmpLe
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: setb ; CHECK-NEXT: jb
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: setb ; CHECK-NEXT: jb
; CHECK: call {{.*}} R_{{.*}} func ; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpLe
; CHECK-OM1: ucomiss
; CHECK-OM1-NEXT: setb
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1-NEXT: setb
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpLe ; ARM32-LABEL: fcmpLe
; ARM32: vcmp.f32 ; ARM32: vcmp.f32
; ARM32: vmrs ; ARM32: vmrs
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment