Commit 47b6ba6d by Nicolas Capens Committed by Jim Stichnoth

Optimize floating-point minimum/maximum operations.

Combine a floating-point compare + select into min/max operations on x86 using SSE2 instructions, where equivalent. Only handles the most common cases for now (but for both scalars and vectors): x > y ? x : y -> max x < y ? x : y -> min BUG=swiftshader:19 Change-Id: Ic87bfa90cefd8014af5624d85a2ecef1c891e328 Reviewed-on: https://chromium-review.googlesource.com/439814Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org>
parent eb568f55
...@@ -260,10 +260,12 @@ template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) { ...@@ -260,10 +260,12 @@ template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) {
invalidateProducersOnStore(&Instr); invalidateProducersOnStore(&Instr);
// Check whether Instr is a valid producer. // Check whether Instr is a valid producer.
Variable *Var = Instr.getDest(); Variable *Var = Instr.getDest();
if (Var // only consider instructions with an actual dest var if (Var) { // only consider instructions with an actual dest var
&& Var->getType() == IceType_i1 // only bool-type dest vars if (isBooleanType(Var->getType())) { // only bool-type dest vars
&& getProducerKind(&Instr) != PK_None) { // white-listed instructions if (getProducerKind(&Instr) != PK_None) { // white-listed instructions
Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr); Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr);
}
}
} }
// Check each src variable against the map. // Check each src variable against the map.
FOREACH_VAR_IN_INST(Var, Instr) { FOREACH_VAR_IN_INST(Var, Instr) {
...@@ -3335,9 +3337,6 @@ void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp, ...@@ -3335,9 +3337,6 @@ void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
Operand *Src1 = Fcmp->getSrc(1); Operand *Src1 = Fcmp->getSrc(1);
Variable *Dest = Fcmp->getDest(); Variable *Dest = Fcmp->getDest();
if (isVectorType(Dest->getType()))
llvm::report_fatal_error("Vector compare/branch cannot be folded");
if (Consumer != nullptr) { if (Consumer != nullptr) {
if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
if (lowerOptimizeFcmpSelect(Fcmp, Select)) if (lowerOptimizeFcmpSelect(Fcmp, Select))
...@@ -3345,6 +3344,13 @@ void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp, ...@@ -3345,6 +3344,13 @@ void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
} }
} }
if (isVectorType(Dest->getType())) {
lowerFcmp(Fcmp);
if (Consumer != nullptr)
lowerSelectVector(llvm::cast<InstSelect>(Consumer));
return;
}
// Lowering a = fcmp cond, b, c // Lowering a = fcmp cond, b, c
// ucomiss b, c /* only if C1 != Br_None */ // ucomiss b, c /* only if C1 != Br_None */
// /* but swap b,c order if SwapOperands==true */ // /* but swap b,c order if SwapOperands==true */
...@@ -3509,8 +3515,12 @@ void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp, ...@@ -3509,8 +3515,12 @@ void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
Operand *Src1 = legalize(Icmp->getSrc(1)); Operand *Src1 = legalize(Icmp->getSrc(1));
Variable *Dest = Icmp->getDest(); Variable *Dest = Icmp->getDest();
if (isVectorType(Dest->getType())) if (isVectorType(Dest->getType())) {
llvm::report_fatal_error("Vector compare/branch cannot be folded"); lowerIcmp(Icmp);
if (Consumer != nullptr)
lowerSelectVector(llvm::cast<InstSelect>(Consumer));
return;
}
if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
lowerIcmp64(Icmp, Consumer); lowerIcmp64(Icmp, Consumer);
...@@ -6616,11 +6626,6 @@ template <typename TraitsType> ...@@ -6616,11 +6626,6 @@ template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
Variable *Dest = Select->getDest(); Variable *Dest = Select->getDest();
if (isVectorType(Dest->getType())) {
lowerSelectVector(Select);
return;
}
Operand *Condition = Select->getCondition(); Operand *Condition = Select->getCondition();
// Handle folding opportunities. // Handle folding opportunities.
if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) { if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
...@@ -6640,6 +6645,11 @@ void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { ...@@ -6640,6 +6645,11 @@ void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
} }
} }
if (isVectorType(Dest->getType())) {
lowerSelectVector(Select);
return;
}
Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
Operand *Zero = Ctx->getConstantZero(IceType_i32); Operand *Zero = Ctx->getConstantZero(IceType_i32);
_cmp(CmpResult, Zero); _cmp(CmpResult, Zero);
...@@ -6746,24 +6756,47 @@ bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect( ...@@ -6746,24 +6756,47 @@ bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect(
Operand *CmpSrc1 = Fcmp->getSrc(1); Operand *CmpSrc1 = Fcmp->getSrc(1);
Operand *SelectSrcT = Select->getTrueOperand(); Operand *SelectSrcT = Select->getTrueOperand();
Operand *SelectSrcF = Select->getFalseOperand(); Operand *SelectSrcF = Select->getFalseOperand();
Variable *SelectDest = Select->getDest();
if (CmpSrc0->getType() != SelectSrcT->getType()) // TODO(capn): also handle swapped compare/select operand order.
if (CmpSrc0 != SelectSrcT || CmpSrc1 != SelectSrcF)
return false; return false;
// TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here. // TODO(sehr, stichnot): fcmp/select patterns (e.g., minsd/maxss) go here.
InstFcmp::FCond Condition = Fcmp->getCondition(); InstFcmp::FCond Condition = Fcmp->getCondition();
switch (Condition) { switch (Condition) {
default: default:
return false; return false;
case InstFcmp::True: case InstFcmp::True:
break;
case InstFcmp::False: case InstFcmp::False:
case InstFcmp::Ogt:
case InstFcmp::Olt:
(void)CmpSrc0;
(void)CmpSrc1;
(void)SelectSrcT;
(void)SelectSrcF;
break; break;
case InstFcmp::Ogt: {
Variable *T = makeReg(SelectDest->getType());
if (isScalarFloatingType(SelectSrcT->getType())) {
_mov(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem));
_maxss(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem));
_mov(SelectDest, T);
} else {
_movp(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem));
_maxps(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem));
_movp(SelectDest, T);
}
return true;
} break;
case InstFcmp::Olt: {
Variable *T = makeReg(SelectSrcT->getType());
if (isScalarFloatingType(SelectSrcT->getType())) {
_mov(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem));
_minss(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem));
_mov(SelectDest, T);
} else {
_movp(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem));
_minps(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem));
_movp(SelectDest, T);
}
return true;
} break;
} }
return false; return false;
} }
...@@ -6794,6 +6827,7 @@ void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Instr) { ...@@ -6794,6 +6827,7 @@ void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Instr) {
Variable *T = makeReg(SrcTy); Variable *T = makeReg(SrcTy);
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
if (InstructionSet >= Traits::SSE4_1) { if (InstructionSet >= Traits::SSE4_1) {
// TODO(wala): If the condition operand is a constant, use blendps or // TODO(wala): If the condition operand is a constant, use blendps or
// pblendw. // pblendw.
......
...@@ -1000,9 +1000,7 @@ entry: ...@@ -1000,9 +1000,7 @@ entry:
} }
; CHECK-LABEL: selectFloatVarVar ; CHECK-LABEL: selectFloatVarVar
; CHECK: movss ; CHECK: movss
; CHECK: ucomiss ; CHECK: minss
; CHECK: ja
; CHECK: movss
; ARM32-LABEL: selectFloatVarVar ; ARM32-LABEL: selectFloatVarVar
; ARM32: vcmp.f32 ; ARM32: vcmp.f32
; ARM32-OM1: vmovne.f32 s{{[0-9]+}} ; ARM32-OM1: vmovne.f32 s{{[0-9]+}}
...@@ -1019,9 +1017,7 @@ entry: ...@@ -1019,9 +1017,7 @@ entry:
} }
; CHECK-LABEL: selectDoubleVarVar ; CHECK-LABEL: selectDoubleVarVar
; CHECK: movsd ; CHECK: movsd
; CHECK: ucomisd ; CHECK: minsd
; CHECK: ja
; CHECK: movsd
; ARM32-LABEL: selectDoubleVarVar ; ARM32-LABEL: selectDoubleVarVar
; ARM32: vcmp.f64 ; ARM32: vcmp.f64
; ARM32-OM1: vmovne.f64 d{{[0-9]+}} ; ARM32-OM1: vmovne.f64 d{{[0-9]+}}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment