Commit daf096cd by David Sehr

Improve bool folding

Fold and/or followed by branch to eliminate cmp. Also, fold fcmp instructions into branches similarly to what was done for icmp instructions. BUG= R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1436623002 .
parent 3e859b73
......@@ -720,9 +720,11 @@ private:
void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi,
Operand *Src1Lo, Variable *DestLo, Variable *DestHi);
/// Emit the code for a combined compare and branch, or sets the destination
/// variable of the compare if branch is nullptr.
/// Emit the code for a combined operation and branch, or set the destination
/// variable of the operation if Br == nullptr.
void lowerIcmpAndBr(const InstIcmp *Icmp, const InstBr *Br);
void lowerFcmpAndBr(const InstFcmp *Fcmp, const InstBr *Br);
void lowerArithAndBr(const InstArithmetic *Arith, const InstBr *Br);
/// Emit a setcc instruction if Br == nullptr; otherwise emit a branch.
void setccOrBr(typename Traits::Cond::BrCond Condition, Variable *Dest,
......
......@@ -81,7 +81,8 @@ public:
PK_Icmp32,
PK_Icmp64,
PK_Fcmp,
PK_Trunc
PK_Trunc,
PK_Arith // A flag-setting arithmetic instruction.
};
/// Currently the actual enum values are not used (other than CK_None), but we
......@@ -125,10 +126,21 @@ BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
return PK_Icmp32;
return PK_Icmp64;
}
return PK_None; // TODO(stichnot): remove this
if (llvm::isa<InstFcmp>(Instr))
return PK_Fcmp;
if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
if (MachineTraits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) {
switch (Arith->getOp()) {
default:
return PK_None;
case InstArithmetic::And:
case InstArithmetic::Or:
return PK_Arith;
}
}
}
return PK_None; // TODO(stichnot): remove this
if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
switch (Cast->getCastKind()) {
default:
......@@ -1925,9 +1937,16 @@ void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst);
return;
}
case BoolFolding::PK_Fcmp: {
lowerFcmpAndBr(llvm::dyn_cast<InstFcmp>(Producer), Inst);
return;
}
case BoolFolding::PK_Arith: {
lowerArithAndBr(llvm::dyn_cast<InstArithmetic>(Producer), Inst);
return;
}
}
}
Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
_cmp(Src0, Zero);
......@@ -2540,20 +2559,26 @@ void TargetX86Base<Machine>::lowerExtractElement(
template <class Machine>
void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
constexpr InstBr *Br = nullptr;
lowerFcmpAndBr(Inst, Br);
}
template <class Machine>
void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst,
const InstBr *Br) {
Operand *Src0 = Inst->getSrc(0);
Operand *Src1 = Inst->getSrc(1);
Variable *Dest = Inst->getDest();
if (isVectorType(Dest->getType())) {
if (Br)
llvm::report_fatal_error("vector compare/branch cannot be folded");
InstFcmp::FCond Condition = Inst->getCondition();
size_t Index = static_cast<size_t>(Condition);
assert(Index < Traits::TableFcmpSize);
if (Traits::TableFcmp[Index].SwapVectorOperands) {
Operand *T = Src0;
Src0 = Src1;
Src1 = T;
}
if (Traits::TableFcmp[Index].SwapVectorOperands)
std::swap(Src0, Src1);
Variable *T = nullptr;
......@@ -2633,24 +2658,39 @@ void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
_ucomiss(T, Src1RM);
if (!HasC2) {
assert(Traits::TableFcmp[Index].Default);
_setcc(Dest, Traits::TableFcmp[Index].C1);
setccOrBr(Traits::TableFcmp[Index].C1, Dest, Br);
return;
}
}
Constant *Default =
Ctx->getConstantInt(Dest->getType(), Traits::TableFcmp[Index].Default);
_mov(Dest, Default);
if (HasC1) {
typename Traits::Insts::Label *Label =
Traits::Insts::Label::create(Func, this);
_br(Traits::TableFcmp[Index].C1, Label);
if (HasC2) {
_br(Traits::TableFcmp[Index].C2, Label);
int32_t IntDefault = Traits::TableFcmp[Index].Default;
if (Br == nullptr) {
Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
_mov(Dest, Default);
if (HasC1) {
typename Traits::Insts::Label *Label =
Traits::Insts::Label::create(Func, this);
_br(Traits::TableFcmp[Index].C1, Label);
if (HasC2) {
_br(Traits::TableFcmp[Index].C2, Label);
}
Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault);
_mov_redefined(Dest, NonDefault);
Context.insert(Label);
}
Constant *NonDefault =
Ctx->getConstantInt(Dest->getType(), !Traits::TableFcmp[Index].Default);
_mov_redefined(Dest, NonDefault);
Context.insert(Label);
} else {
CfgNode *TrueSucc = Br->getTargetTrue();
CfgNode *FalseSucc = Br->getTargetFalse();
if (IntDefault != 0)
std::swap(TrueSucc, FalseSucc);
if (HasC1) {
_br(Traits::TableFcmp[Index].C1, FalseSucc);
if (HasC2) {
_br(Traits::TableFcmp[Index].C2, FalseSucc);
}
_br(TrueSucc);
return;
}
_br(FalseSucc);
}
}
......@@ -2960,6 +3000,37 @@ void TargetX86Base<Machine>::movOrBr(bool IcmpResult, Variable *Dest,
}
template <class Machine>
void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith,
const InstBr *Br) {
Variable *T = nullptr;
Operand *Src0 = legalize(Arith->getSrc(0));
Operand *Src1 = legalize(Arith->getSrc(1));
Variable *Dest = Arith->getDest();
switch (Arith->getOp()) {
default:
llvm_unreachable("arithmetic operator not AND or OR");
break;
case InstArithmetic::And:
_mov(T, Src0);
// Test cannot have an address in the second position. Since T is
// guaranteed to be a register and Src1 could be a memory load, ensure
// that the second argument is a register.
if (llvm::isa<Constant>(Src1))
_test(T, Src1);
else
_test(Src1, T);
break;
case InstArithmetic::Or:
_mov(T, Src0);
_or(T, Src1);
break;
}
Context.insert(InstFakeUse::create(Func, T));
Context.insert(InstFakeDef::create(Func, Dest));
_br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
}
template <class Machine>
void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
Operand *SourceVectNotLegalized = Inst->getSrc(0);
Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
......
; This tries to be a comprehensive test of f32 and f64 compare operations.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 \
; RUN: -allow-externally-defined-symbols | FileCheck %s
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols | FileCheck %s
; RUN: -allow-externally-defined-symbols | FileCheck %s \
; RUN: --check-prefix=CHECK-OM1
; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \
; RUN: --target arm32 -i %s --args -O2 \
......@@ -42,13 +40,21 @@ if.end3: ; preds = %if.then2, %if.end
}
; CHECK-LABEL: fcmpEq
; CHECK: ucomiss
; CHECK: jne
; CHECK-NEXT: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: jne
; CHECK-NEXT: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpEq
; CHECK-OM1: ucomiss
; CHECK-OM1: jne
; CHECK-OM1-NEXT: jp
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1: jne
; CHECK-NEXT-OM1: jp
; ARM32-LABEL: fcmpEq
; ARM32: vcmp.f32
; ARM32: vmrs
......@@ -86,13 +92,26 @@ if.end3: ; preds = %if.then2, %if.end
}
; CHECK-LABEL: fcmpNe
; CHECK: ucomiss
; CHECK: jne
; CHECK-NEXT: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jmp
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: jne
; CHECK-NEXT: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jmp
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpNe
; CHECK-OM1: ucomiss
; CHECK-OM1: jne
; CHECK-OM1: jp
; CHECK-OM1: jmp
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1: jne
; CHECK-OM1: jp
; CHECK-OM1: jmp
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpNe
; ARM32: vcmp.f32
; ARM32: vmrs
......@@ -127,11 +146,18 @@ if.end3: ; preds = %if.then2, %if.end
}
; CHECK-LABEL: fcmpGt
; CHECK: ucomiss
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jbe
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jbe
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpGt
; CHECK-OM1: ucomiss
; CHECK-OM1: seta
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1: seta
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpGt
; ARM32: vcmp.f32
; ARM32: vmrs
......@@ -166,11 +192,18 @@ if.end3: ; preds = %if.end, %if.then2
}
; CHECK-LABEL: fcmpGe
; CHECK: ucomiss
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jb
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jb
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpGe
; CHECK-OM1: ucomiss
; CHECK-OM1-NEXT: setb
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1-NEXT: setb
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpGe
; ARM32: vcmp.f32
; ARM32: vmrs
......@@ -205,11 +238,18 @@ if.end3: ; preds = %if.then2, %if.end
}
; CHECK-LABEL: fcmpLt
; CHECK: ucomiss
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jbe
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jbe
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpLt
; CHECK-OM1: ucomiss
; CHECK-OM1-NEXT: seta
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1-NEXT: seta
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpLt
; ARM32: vcmp.f32
; ARM32: vmrs
......@@ -244,11 +284,18 @@ if.end3: ; preds = %if.end, %if.then2
}
; CHECK-LABEL: fcmpLe
; CHECK: ucomiss
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jb
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
; CHECK-NEXT: jb
; CHECK-NEXT: call {{.*}} R_{{.*}} func
; CHECK-OM1-LABEL: fcmpLe
; CHECK-OM1: ucomiss
; CHECK-OM1-NEXT: setb
; CHECK-OM1: call {{.*}} R_{{.*}} func
; CHECK-OM1: ucomisd
; CHECK-OM1-NEXT: setb
; CHECK-OM1: call {{.*}} R_{{.*}} func
; ARM32-LABEL: fcmpLe
; ARM32: vcmp.f32
; ARM32: vmrs
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment