Commit 47b6ba6d by Nicolas Capens Committed by Jim Stichnoth

Optimize floating-point minimum/maximum operations.

Combine a floating-point compare + select into min/max operations on x86 using SSE2 instructions, where equivalent. Only handles the most common cases for now (but for both scalars and vectors): x > y ? x : y -> max x < y ? x : y -> min BUG=swiftshader:19 Change-Id: Ic87bfa90cefd8014af5624d85a2ecef1c891e328 Reviewed-on: https://chromium-review.googlesource.com/439814Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org>
parent eb568f55
......@@ -260,10 +260,12 @@ template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) {
invalidateProducersOnStore(&Instr);
// Check whether Instr is a valid producer.
Variable *Var = Instr.getDest();
if (Var // only consider instructions with an actual dest var
&& Var->getType() == IceType_i1 // only bool-type dest vars
&& getProducerKind(&Instr) != PK_None) { // white-listed instructions
Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr);
if (Var) { // only consider instructions with an actual dest var
if (isBooleanType(Var->getType())) { // only bool-type dest vars
if (getProducerKind(&Instr) != PK_None) { // white-listed instructions
Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr);
}
}
}
// Check each src variable against the map.
FOREACH_VAR_IN_INST(Var, Instr) {
......@@ -3335,9 +3337,6 @@ void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
Operand *Src1 = Fcmp->getSrc(1);
Variable *Dest = Fcmp->getDest();
if (isVectorType(Dest->getType()))
llvm::report_fatal_error("Vector compare/branch cannot be folded");
if (Consumer != nullptr) {
if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
if (lowerOptimizeFcmpSelect(Fcmp, Select))
......@@ -3345,6 +3344,13 @@ void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
}
}
if (isVectorType(Dest->getType())) {
lowerFcmp(Fcmp);
if (Consumer != nullptr)
lowerSelectVector(llvm::cast<InstSelect>(Consumer));
return;
}
// Lowering a = fcmp cond, b, c
// ucomiss b, c /* only if C1 != Br_None */
// /* but swap b,c order if SwapOperands==true */
......@@ -3509,8 +3515,12 @@ void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
Operand *Src1 = legalize(Icmp->getSrc(1));
Variable *Dest = Icmp->getDest();
if (isVectorType(Dest->getType()))
llvm::report_fatal_error("Vector compare/branch cannot be folded");
if (isVectorType(Dest->getType())) {
lowerIcmp(Icmp);
if (Consumer != nullptr)
lowerSelectVector(llvm::cast<InstSelect>(Consumer));
return;
}
if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
lowerIcmp64(Icmp, Consumer);
......@@ -6616,11 +6626,6 @@ template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
Variable *Dest = Select->getDest();
if (isVectorType(Dest->getType())) {
lowerSelectVector(Select);
return;
}
Operand *Condition = Select->getCondition();
// Handle folding opportunities.
if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
......@@ -6640,6 +6645,11 @@ void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
}
}
if (isVectorType(Dest->getType())) {
lowerSelectVector(Select);
return;
}
Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
Operand *Zero = Ctx->getConstantZero(IceType_i32);
_cmp(CmpResult, Zero);
......@@ -6746,24 +6756,47 @@ bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect(
Operand *CmpSrc1 = Fcmp->getSrc(1);
Operand *SelectSrcT = Select->getTrueOperand();
Operand *SelectSrcF = Select->getFalseOperand();
Variable *SelectDest = Select->getDest();
if (CmpSrc0->getType() != SelectSrcT->getType())
// TODO(capn): also handle swapped compare/select operand order.
if (CmpSrc0 != SelectSrcT || CmpSrc1 != SelectSrcF)
return false;
// TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here.
// TODO(sehr, stichnot): fcmp/select patterns (e.g., minsd/maxss) go here.
InstFcmp::FCond Condition = Fcmp->getCondition();
switch (Condition) {
default:
return false;
case InstFcmp::True:
break;
case InstFcmp::False:
case InstFcmp::Ogt:
case InstFcmp::Olt:
(void)CmpSrc0;
(void)CmpSrc1;
(void)SelectSrcT;
(void)SelectSrcF;
break;
case InstFcmp::Ogt: {
Variable *T = makeReg(SelectDest->getType());
if (isScalarFloatingType(SelectSrcT->getType())) {
_mov(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem));
_maxss(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem));
_mov(SelectDest, T);
} else {
_movp(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem));
_maxps(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem));
_movp(SelectDest, T);
}
return true;
} break;
case InstFcmp::Olt: {
Variable *T = makeReg(SelectSrcT->getType());
if (isScalarFloatingType(SelectSrcT->getType())) {
_mov(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem));
_minss(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem));
_mov(SelectDest, T);
} else {
_movp(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem));
_minps(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem));
_movp(SelectDest, T);
}
return true;
} break;
}
return false;
}
......@@ -6794,6 +6827,7 @@ void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Instr) {
Variable *T = makeReg(SrcTy);
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
if (InstructionSet >= Traits::SSE4_1) {
// TODO(wala): If the condition operand is a constant, use blendps or
// pblendw.
......
......@@ -1000,9 +1000,7 @@ entry:
}
; CHECK-LABEL: selectFloatVarVar
; CHECK: movss
; CHECK: ucomiss
; CHECK: ja
; CHECK: movss
; CHECK: minss
; ARM32-LABEL: selectFloatVarVar
; ARM32: vcmp.f32
; ARM32-OM1: vmovne.f32 s{{[0-9]+}}
......@@ -1019,9 +1017,7 @@ entry:
}
; CHECK-LABEL: selectDoubleVarVar
; CHECK: movsd
; CHECK: ucomisd
; CHECK: ja
; CHECK: movsd
; CHECK: minsd
; ARM32-LABEL: selectDoubleVarVar
; ARM32: vcmp.f64
; ARM32-OM1: vmovne.f64 d{{[0-9]+}}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment