Commit 45f51a26 by Manasij Mukherjee

Short Circuit Evaluation

Split Nodes whenever an early jump is possible by short circuiting boolean operations. Nodes are split after conservatively checking for side effects, which include definition of multi block variables, function calls and instructions involving memory. BUG=None R=stichnot@chromium.org Review URL: https://codereview.chromium.org/2069923004 .
parent d615c861
...@@ -640,6 +640,69 @@ void Cfg::localCSE() { ...@@ -640,6 +640,69 @@ void Cfg::localCSE() {
} }
} }
void Cfg::shortCircuitJumps() {
// Split Nodes whenever an early jump is possible.
// __N :
// a = <something>
// Instruction 1 without side effect
// ... b = <something> ...
// Instruction N without side effect
// t1 = or a b
// br t1 __X __Y
//
// is transformed into:
// __N :
// a = <something>
// br a __X __N_ext
//
// __N_ext :
// Instruction 1 without side effect
// ... b = <something> ...
// Instruction N without side effect
// br b __X __Y
// (Similar logic for AND, jump to false instead of true target.)
TimerMarker T(TimerStack::TT_shortCircuit, this);
getVMetadata()->init(VMK_Uses);
auto NodeStack = this->getNodes();
CfgUnorderedMap<SizeT, CfgVector<CfgNode *>> Splits;
while (!NodeStack.empty()) {
auto *Node = NodeStack.back();
NodeStack.pop_back();
auto NewNode = Node->shortCircuit();
if (NewNode) {
NodeStack.push_back(NewNode);
NodeStack.push_back(Node);
Splits[Node->getIndex()].push_back(NewNode);
}
}
// Insert nodes in the right place
NodeList NewList;
NewList.reserve(Nodes.size());
CfgUnorderedSet<SizeT> Inserted;
for (auto *Node : Nodes) {
if (Inserted.find(Node->getIndex()) != Inserted.end())
continue; // already inserted
NodeList Stack{Node};
while (!Stack.empty()) {
auto *Current = Stack.back();
Stack.pop_back();
Inserted.insert(Current->getIndex());
NewList.push_back(Current);
for (auto *Next : Splits[Current->getIndex()]) {
Stack.push_back(Next);
}
}
}
SizeT NodeIndex = 0;
for (auto *Node : NewList) {
Node->resetIndex(NodeIndex++);
}
Nodes = NewList;
}
void Cfg::doArgLowering() { void Cfg::doArgLowering() {
TimerMarker T(TimerStack::TT_doArgLowering, this); TimerMarker T(TimerStack::TT_doArgLowering, this);
getTarget()->lowerArguments(); getTarget()->lowerArguments();
......
...@@ -201,6 +201,7 @@ public: ...@@ -201,6 +201,7 @@ public:
void reorderNodes(); void reorderNodes();
void shuffleNodes(); void shuffleNodes();
void localCSE(); void localCSE();
void shortCircuitJumps();
/// Scan allocas to determine whether we need to use a frame pointer. /// Scan allocas to determine whether we need to use a frame pointer.
/// If SortAndCombine == true, merge all the fixed-size allocas in the /// If SortAndCombine == true, merge all the fixed-size allocas in the
......
...@@ -51,6 +51,22 @@ void CfgNode::appendInst(Inst *Instr) { ...@@ -51,6 +51,22 @@ void CfgNode::appendInst(Inst *Instr) {
} }
} }
void CfgNode::replaceInEdge(CfgNode *Old, CfgNode *New) {
for (SizeT i = 0; i < InEdges.size(); ++i) {
if (InEdges[i] == Old) {
InEdges[i] = New;
}
}
for (auto &Inst : getPhis()) {
auto &Phi = llvm::cast<InstPhi>(Inst);
for (SizeT i = 0; i < Phi.getSrcSize(); ++i) {
if (Phi.getLabel(i) == Old) {
Phi.setLabel(i, New);
}
}
}
}
namespace { namespace {
template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) { template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) {
const bool DoDelete = const bool DoDelete =
...@@ -1472,4 +1488,156 @@ void CfgNode::profileExecutionCount(VariableDeclaration *Var) { ...@@ -1472,4 +1488,156 @@ void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
Insts.push_front(Instr); Insts.push_front(Instr);
} }
void CfgNode::removeInEdge(CfgNode *In) {
InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In));
}
CfgNode *CfgNode::shortCircuit() {
auto *Func = getCfg();
auto *Last = &getInsts().back();
Variable *Condition = nullptr;
InstBr *Br = nullptr;
if ((Br = llvm::dyn_cast<InstBr>(Last))) {
if (!Br->isUnconditional()) {
Condition = llvm::dyn_cast<Variable>(Br->getCondition());
}
}
if (Condition == nullptr)
return nullptr;
auto *JumpOnTrue = Br->getTargetTrue();
auto *JumpOnFalse = Br->getTargetFalse();
bool FoundOr = false;
bool FoundAnd = false;
InstArithmetic *TopLevelBoolOp = nullptr;
for (auto &Inst : reverse_range(getInsts())) {
if (Inst.isDeleted())
continue;
if (Inst.getDest() == Condition) {
if (auto *Arith = llvm::dyn_cast<InstArithmetic>(&Inst)) {
FoundOr = (Arith->getOp() == InstArithmetic::OpKind::Or);
FoundAnd = (Arith->getOp() == InstArithmetic::OpKind::And);
if (FoundOr || FoundAnd) {
TopLevelBoolOp = Arith;
break;
}
}
}
}
if (!TopLevelBoolOp)
return nullptr;
auto IsOperand = [](Inst *Instr, Operand *Opr) -> bool {
for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
if (Instr->getSrc(i) == Opr)
return true;
}
return false;
};
Inst *FirstOperandDef = nullptr;
for (auto &Inst : getInsts()) {
if (IsOperand(TopLevelBoolOp, Inst.getDest())) {
FirstOperandDef = &Inst;
break;
}
}
if (FirstOperandDef == nullptr) {
return nullptr;
}
// Check for side effects
auto It = Ice::instToIterator(FirstOperandDef);
while (It != getInsts().end()) {
if (It->isDeleted()) {
++It;
continue;
}
if (llvm::isa<InstBr>(It) || llvm::isa<InstRet>(It)) {
break;
}
auto *Dest = It->getDest();
if (It->getDest() == nullptr || It->hasSideEffects() ||
!Func->getVMetadata()->isSingleBlock(Dest)) {
// Relying on short cicuit eval here.
// getVMetadata()->isSingleBlock(Dest)
// will segfault if It->getDest() == nullptr
return nullptr;
}
It++;
}
auto *NewNode = Func->makeNode();
NewNode->setLoopNestDepth(getLoopNestDepth());
It = Ice::instToIterator(FirstOperandDef);
It++; // Have to split after the def
NewNode->getInsts().splice(NewNode->getInsts().begin(), getInsts(), It,
getInsts().end());
if (BuildDefs::dump()) {
NewNode->setName(getName().append("_2"));
setName(getName().append("_1"));
}
// Point edges properly
NewNode->addInEdge(this);
for (auto *Out : getOutEdges()) {
NewNode->addOutEdge(Out);
Out->addInEdge(NewNode);
}
removeAllOutEdges();
addOutEdge(NewNode);
// Manage Phi instructions of successors
for (auto *Succ : NewNode->getOutEdges()) {
for (auto &Inst : Succ->getPhis()) {
auto *Phi = llvm::cast<InstPhi>(&Inst);
for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
if (Phi->getLabel(i) == this) {
Phi->addArgument(Phi->getSrc(i), NewNode);
}
}
}
}
// Create new Br instruction
InstBr *NewInst = nullptr;
if (FoundOr) {
addOutEdge(JumpOnTrue);
JumpOnFalse->removeInEdge(this);
NewInst =
InstBr::create(Func, FirstOperandDef->getDest(), JumpOnTrue, NewNode);
} else if (FoundAnd) {
addOutEdge(JumpOnFalse);
JumpOnTrue->removeInEdge(this);
NewInst =
InstBr::create(Func, FirstOperandDef->getDest(), NewNode, JumpOnFalse);
} else {
return nullptr;
}
assert(NewInst != nullptr);
appendInst(NewInst);
Operand *UnusedOperand = nullptr;
assert(TopLevelBoolOp->getSrcSize() == 2);
if (TopLevelBoolOp->getSrc(0) == FirstOperandDef->getDest())
UnusedOperand = TopLevelBoolOp->getSrc(1);
else if (TopLevelBoolOp->getSrc(1) == FirstOperandDef->getDest())
UnusedOperand = TopLevelBoolOp->getSrc(0);
assert(UnusedOperand);
Br->replaceSource(0, UnusedOperand); // Index 0 has the condition of the Br
TopLevelBoolOp->setDeleted();
return NewNode;
}
} // end of namespace Ice } // end of namespace Ice
...@@ -116,10 +116,14 @@ public: ...@@ -116,10 +116,14 @@ public:
void addOutEdge(CfgNode *Out) { OutEdges.push_back(Out); } void addOutEdge(CfgNode *Out) { OutEdges.push_back(Out); }
void addInEdge(CfgNode *In) { InEdges.push_back(In); } void addInEdge(CfgNode *In) { InEdges.push_back(In); }
void replaceInEdge(CfgNode *Old, CfgNode *New);
void removeAllOutEdges() { OutEdges.clear(); }
void removeInEdge(CfgNode *In);
bool hasSingleOutEdge() const { bool hasSingleOutEdge() const {
return (getOutEdges().size() == 1 || getOutEdges()[0] == getOutEdges()[1]); return (getOutEdges().size() == 1 || getOutEdges()[0] == getOutEdges()[1]);
} }
CfgNode *shortCircuit();
private: private:
CfgNode(Cfg *Func, SizeT Number) CfgNode(Cfg *Func, SizeT Number)
......
...@@ -140,13 +140,16 @@ struct dev_list_flag {}; ...@@ -140,13 +140,16 @@ struct dev_list_flag {};
"information to stdout at the end of program execution."), \ "information to stdout at the end of program execution."), \
cl::init(false)) \ cl::init(false)) \
\ \
X(EnableExperimental, bool, dev_opt_flag, "enable-experimental", \ X(EnableExperimental, bool, dev_opt_flag, "enable-experimental", \
cl::desc("Enable Optimizations not yet part of O2"), \ cl::desc("Enable Optimizations not yet part of O2"), \
cl::init(false)) \ cl::init(false)) \
\ \
X(EnablePhiEdgeSplit, bool, dev_opt_flag, "phi-edge-split", \ X(EnablePhiEdgeSplit, bool, dev_opt_flag, "phi-edge-split", \
cl::desc("Enable edge splitting for Phi lowering"), cl::init(true)) \ cl::desc("Enable edge splitting for Phi lowering"), cl::init(true)) \
\ \
X(EnableShortCircuit, bool, dev_opt_flag, "enable-sc", \
cl::desc("Split Nodes for short circuit evaluation"), cl::init(false)) \
\
X(ExcludedRegisters, std::string, dev_list_flag, "reg-exclude", \ X(ExcludedRegisters, std::string, dev_list_flag, "reg-exclude", \
cl::CommaSeparated, cl::desc("Don't use specified registers")) \ cl::CommaSeparated, cl::desc("Don't use specified registers")) \
\ \
......
...@@ -77,7 +77,9 @@ const struct InstIcmpAttributes_ { ...@@ -77,7 +77,9 @@ const struct InstIcmpAttributes_ {
Inst::Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest) Inst::Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest)
: Kind(Kind), Number(Func->newInstNumber()), Dest(Dest), MaxSrcs(MaxSrcs), : Kind(Kind), Number(Func->newInstNumber()), Dest(Dest), MaxSrcs(MaxSrcs),
Srcs(Func->allocateArrayOf<Operand *>(MaxSrcs)), LiveRangesEnded(0) {} LiveRangesEnded(0) {
Srcs.reserve(MaxSrcs);
}
const char *Inst::getInstName() const { const char *Inst::getInstName() const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -393,7 +395,7 @@ InstLoad::InstLoad(Cfg *Func, Variable *Dest, Operand *SourceAddr) ...@@ -393,7 +395,7 @@ InstLoad::InstLoad(Cfg *Func, Variable *Dest, Operand *SourceAddr)
InstPhi::InstPhi(Cfg *Func, SizeT MaxSrcs, Variable *Dest) InstPhi::InstPhi(Cfg *Func, SizeT MaxSrcs, Variable *Dest)
: InstHighLevel(Func, Phi, MaxSrcs, Dest) { : InstHighLevel(Func, Phi, MaxSrcs, Dest) {
Labels = Func->allocateArrayOf<CfgNode *>(MaxSrcs); Labels.reserve(MaxSrcs);
} }
// TODO: A Switch instruction (and maybe others) can add duplicate edges. We // TODO: A Switch instruction (and maybe others) can add duplicate edges. We
...@@ -401,7 +403,8 @@ InstPhi::InstPhi(Cfg *Func, SizeT MaxSrcs, Variable *Dest) ...@@ -401,7 +403,8 @@ InstPhi::InstPhi(Cfg *Func, SizeT MaxSrcs, Variable *Dest)
// are the same for duplicate edges), though it seems the current lowering code // are the same for duplicate edges), though it seems the current lowering code
// is OK with this situation. // is OK with this situation.
void InstPhi::addArgument(Operand *Source, CfgNode *Label) { void InstPhi::addArgument(Operand *Source, CfgNode *Label) {
Labels[getSrcSize()] = Label; assert(Label);
Labels.push_back(Label);
addSource(Source); addSource(Source);
} }
......
...@@ -103,13 +103,13 @@ public: ...@@ -103,13 +103,13 @@ public:
Variable *getDest() const { return Dest; } Variable *getDest() const { return Dest; }
SizeT getSrcSize() const { return NumSrcs; } SizeT getSrcSize() const { return Srcs.size(); }
Operand *getSrc(SizeT I) const { Operand *getSrc(SizeT I) const {
assert(I < getSrcSize()); assert(I < getSrcSize());
return Srcs[I]; return Srcs[I];
} }
void replaceSource(SizeT Index, Operand *Replacement) { void replaceSource(SizeT Index, Operand *Replacement) {
assert(Index < NumSrcs); assert(Index < getSrcSize());
assert(!isDeleted()); assert(!isDeleted());
assert(LiveRangesEnded == 0); assert(LiveRangesEnded == 0);
// Invalidates liveness info because the use Srcs[Index] is removed. // Invalidates liveness info because the use Srcs[Index] is removed.
...@@ -189,8 +189,7 @@ protected: ...@@ -189,8 +189,7 @@ protected:
Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest); Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest);
void addSource(Operand *Src) { void addSource(Operand *Src) {
assert(Src); assert(Src);
assert(NumSrcs < MaxSrcs); Srcs.push_back(Src);
Srcs[NumSrcs++] = Src;
} }
void setLastUse(SizeT VarIndex) { void setLastUse(SizeT VarIndex) {
if (VarIndex < CHAR_BIT * sizeof(LiveRangesEnded)) if (VarIndex < CHAR_BIT * sizeof(LiveRangesEnded))
...@@ -199,7 +198,7 @@ protected: ...@@ -199,7 +198,7 @@ protected:
void resetLastUses() { LiveRangesEnded = 0; } void resetLastUses() { LiveRangesEnded = 0; }
/// The destroy() method lets the instruction cleanly release any memory that /// The destroy() method lets the instruction cleanly release any memory that
/// was allocated via the Cfg's allocator. /// was allocated via the Cfg's allocator.
virtual void destroy(Cfg *Func) { Func->deallocateArrayOf<Operand *>(Srcs); } virtual void destroy(Cfg *) {}
const InstKind Kind; const InstKind Kind;
/// Number is the instruction number for describing live ranges. /// Number is the instruction number for describing live ranges.
...@@ -226,8 +225,8 @@ protected: ...@@ -226,8 +225,8 @@ protected:
Variable *Dest; Variable *Dest;
const SizeT MaxSrcs; // only used for assert const SizeT MaxSrcs; // only used for assert
SizeT NumSrcs = 0;
Operand **Srcs; CfgVector<Operand *> Srcs;
/// LiveRangesEnded marks which Variables' live ranges end in this /// LiveRangesEnded marks which Variables' live ranges end in this
/// instruction. An instruction can have an arbitrary number of source /// instruction. An instruction can have an arbitrary number of source
...@@ -666,15 +665,12 @@ public: ...@@ -666,15 +665,12 @@ public:
private: private:
InstPhi(Cfg *Func, SizeT MaxSrcs, Variable *Dest); InstPhi(Cfg *Func, SizeT MaxSrcs, Variable *Dest);
void destroy(Cfg *Func) override { void destroy(Cfg *Func) override { Inst::destroy(Func); }
Func->deallocateArrayOf<CfgNode *>(Labels);
Inst::destroy(Func);
}
/// Labels[] duplicates the InEdges[] information in the enclosing CfgNode, /// Labels[] duplicates the InEdges[] information in the enclosing CfgNode,
/// but the Phi instruction is created before InEdges[] is available, so it's /// but the Phi instruction is created before InEdges[] is available, so it's
/// more complicated to share the list. /// more complicated to share the list.
CfgNode **Labels; CfgVector<CfgNode *> Labels;
}; };
/// Ret instruction. The return value is captured in getSrc(0), but if there is /// Ret instruction. The return value is captured in getSrc(0), but if there is
......
...@@ -447,6 +447,10 @@ template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() { ...@@ -447,6 +447,10 @@ template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() {
Func->localCSE(); Func->localCSE();
Func->dump("After Local CSE"); Func->dump("After Local CSE");
} }
if (getFlags().getEnableShortCircuit()) {
Func->shortCircuitJumps();
Func->dump("After Short Circuiting");
}
if (!getFlags().getEnablePhiEdgeSplit()) { if (!getFlags().getEnablePhiEdgeSplit()) {
// Lower Phi instructions. // Lower Phi instructions.
......
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
X(qTransPush) \ X(qTransPush) \
X(regAlloc) \ X(regAlloc) \
X(renumberInstructions) \ X(renumberInstructions) \
X(shortCircuit) \
X(szmain) \ X(szmain) \
X(translate) \ X(translate) \
X(translateFunctions) \ X(translateFunctions) \
......
; Test on -enable-sc if basic blocks are split when short circuit evaluation
; is possible for boolean expressions
; REQUIRES: allow_dump
; RUN: %p2i -i %s --filetype=asm --target x8632 --args \
; RUN: -O2 -enable-sc | FileCheck %s --check-prefix=ENABLE \
; RUN: --check-prefix=CHECK
; RUN: %p2i -i %s --filetype=asm --target x8632 --args \
; RUN: -O2 | FileCheck %s --check-prefix=NOENABLE \
; RUN: --check-prefix=CHECK
define internal i32 @short_circuit(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
i32 %arg5) {
%t0 = trunc i32 %arg1 to i1
%t1 = trunc i32 %arg2 to i1
%t2 = trunc i32 %arg3 to i1
%t3 = trunc i32 %arg4 to i1
%t4 = trunc i32 %arg5 to i1
%t5 = or i1 %t0, %t1
%t6 = and i1 %t5, %t2
%t7 = and i1 %t3, %t4
%t8 = or i1 %t6, %t7
br i1 %t8, label %target_true, label %target_false
target_true:
ret i32 1
target_false:
ret i32 0
}
; CHECK-LABEL: short_circuit
; NOENABLE: .Lshort_circuit$__0:
; ENABLE: .Lshort_circuit$__0_1_1:
; ENABLE: .Lshort_circuit$__0_1_2:
; ENABLE: .Lshort_circuit$__0_2:
; CHECK: .Lshort_circuit$target_true:
; CHECK: .Lshort_circuit$target_false:
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment