Commit adf352bc by Manasij Mukherjee

Improve LoopAnalyzer Interface

Make LoopAnalyzer compute loop bodies and depth only. Move the logic for finding out loop headers and pre-headers to LoopInfo, which provides a visitor to iterate over the loops and easy access to the information. This does not change the core algorithm. BUG=None R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/2149803005 .
parent 017a5538
...@@ -643,41 +643,29 @@ void Cfg::localCSE() { ...@@ -643,41 +643,29 @@ void Cfg::localCSE() {
void Cfg::loopInvariantCodeMotion() { void Cfg::loopInvariantCodeMotion() {
TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this); TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this);
// Does not introduce new nodes as of now. // Does not introduce new nodes as of now.
for (auto &Pair : LoopInfo) { for (auto &Loop : LoopInfo) {
CfgNode *Header = Nodes[Pair.first]; CfgNode *Header = Loop.Header;
assert(Header); assert(Header);
if (Header->getLoopNestDepth() < 1) if (Header->getLoopNestDepth() < 1)
continue; return;
CfgNode *PreHeader = nullptr; CfgNode *PreHeader = Loop.PreHeader;
for (auto *Pred : Header->getInEdges()) {
if (Pred->getLoopNestDepth() == Header->getLoopNestDepth() - 1) {
if (PreHeader == nullptr) {
PreHeader = Pred;
} else {
PreHeader = nullptr;
break;
// Do not consider cases with two incoming edges.
// Will require insertion of nodes.
}
}
}
if (PreHeader == nullptr || PreHeader->getInsts().size() == 0) { if (PreHeader == nullptr || PreHeader->getInsts().size() == 0) {
continue; // to next loop return; // try next loop
} }
auto &Insts = PreHeader->getInsts(); auto &Insts = PreHeader->getInsts();
auto &LastInst = Insts.back(); auto &LastInst = Insts.back();
Insts.pop_back(); Insts.pop_back();
for (auto *Inst : findLoopInvariantInstructions(Pair.first)) { for (auto *Inst : findLoopInvariantInstructions(Loop.Body)) {
PreHeader->appendInst(Inst); PreHeader->appendInst(Inst);
} }
PreHeader->appendInst(&LastInst); PreHeader->appendInst(&LastInst);
} }
} }
Ice::CfgVector<Inst *> CfgVector<Inst *>
Cfg::findLoopInvariantInstructions(Ice::SizeT LoopHeaderIndex) { Cfg::findLoopInvariantInstructions(const CfgUnorderedSet<SizeT> &Body) {
CfgUnorderedSet<Inst *> InvariantInsts; CfgUnorderedSet<Inst *> InvariantInsts;
CfgUnorderedSet<Variable *> InvariantVars; CfgUnorderedSet<Variable *> InvariantVars;
for (auto *Var : getArgs()) { for (auto *Var : getArgs()) {
...@@ -686,7 +674,7 @@ Cfg::findLoopInvariantInstructions(Ice::SizeT LoopHeaderIndex) { ...@@ -686,7 +674,7 @@ Cfg::findLoopInvariantInstructions(Ice::SizeT LoopHeaderIndex) {
bool Changed = false; bool Changed = false;
do { do {
Changed = false; Changed = false;
for (auto NodeIndex : LoopInfo[LoopHeaderIndex]) { for (auto NodeIndex : Body) {
auto *Node = Nodes[NodeIndex]; auto *Node = Nodes[NodeIndex];
CfgVector<std::reference_wrapper<Inst>> Insts(Node->getInsts().begin(), CfgVector<std::reference_wrapper<Inst>> Insts(Node->getInsts().begin(),
Node->getInsts().end()); Node->getInsts().end());
...@@ -1437,7 +1425,7 @@ void Cfg::genFrame() { ...@@ -1437,7 +1425,7 @@ void Cfg::genFrame() {
void Cfg::generateLoopInfo() { void Cfg::generateLoopInfo() {
TimerMarker T(TimerStack::TT_computeLoopNestDepth, this); TimerMarker T(TimerStack::TT_computeLoopNestDepth, this);
LoopInfo = LoopAnalyzer(this).getLoopInfo(); LoopInfo = ComputeLoopInfo(this);
} }
// This is a lightweight version of live-range-end calculation. Marks the last // This is a lightweight version of live-range-end calculation. Marks the last
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "IceClFlags.h" #include "IceClFlags.h"
#include "IceDefs.h" #include "IceDefs.h"
#include "IceGlobalContext.h" #include "IceGlobalContext.h"
#include "IceLoopAnalyzer.h"
#include "IceStringPool.h" #include "IceStringPool.h"
#include "IceTypes.h" #include "IceTypes.h"
...@@ -301,7 +302,8 @@ private: ...@@ -301,7 +302,8 @@ private:
uint32_t CombinedAlignment, InstList &Insts, uint32_t CombinedAlignment, InstList &Insts,
AllocaBaseVariableType BaseVariableType); AllocaBaseVariableType BaseVariableType);
void findRematerializable(); void findRematerializable();
CfgVector<Inst *> findLoopInvariantInstructions(SizeT LoopHeaderIndex); CfgVector<Inst *>
findLoopInvariantInstructions(const CfgUnorderedSet<SizeT> &Body);
GlobalContext *Ctx; GlobalContext *Ctx;
uint32_t SequenceNumber; /// output order for emission uint32_t SequenceNumber; /// output order for emission
...@@ -332,12 +334,12 @@ private: ...@@ -332,12 +334,12 @@ private:
/// Globals required by this CFG. Mostly used for the profiler's globals. /// Globals required by this CFG. Mostly used for the profiler's globals.
std::unique_ptr<VariableDeclarationList> GlobalInits; std::unique_ptr<VariableDeclarationList> GlobalInits;
CfgVector<InstJumpTable *> JumpTables; CfgVector<InstJumpTable *> JumpTables;
CfgUnorderedMap<SizeT, CfgVector<SizeT>> LoopInfo;
/// CurrentNode is maintained during dumping/emitting just for validating /// CurrentNode is maintained during dumping/emitting just for validating
/// Variable::DefNode. Normally, a traversal over CfgNodes maintains this, but /// Variable::DefNode. Normally, a traversal over CfgNodes maintains this, but
/// before global operations like register allocation, resetCurrentNode() /// before global operations like register allocation, resetCurrentNode()
/// should be called to avoid spurious validation failures. /// should be called to avoid spurious validation failures.
const CfgNode *CurrentNode = nullptr; const CfgNode *CurrentNode = nullptr;
CfgVector<Loop> LoopInfo;
public: public:
static void TlsInit() { CfgAllocatorTraits::init(); } static void TlsInit() { CfgAllocatorTraits::init(); }
......
...@@ -16,8 +16,107 @@ ...@@ -16,8 +16,107 @@
#include "IceCfg.h" #include "IceCfg.h"
#include "IceCfgNode.h" #include "IceCfgNode.h"
#include <algorithm>
namespace Ice { namespace Ice {
class LoopAnalyzer {
public:
explicit LoopAnalyzer(Cfg *Func);
/// Use Tarjan's strongly connected components algorithm to identify outermost
/// to innermost loops. By deleting the head of the loop from the graph, inner
/// loops can be found. This assumes that the head node is not shared between
/// loops but instead all paths to the head come from 'continue' constructs.
///
/// This only computes the loop nest depth within the function and does not
/// take into account whether the function was called from within a loop.
// TODO(ascull): this currently uses a extension of Tarjan's algorithm with
// is bounded linear. ncbray suggests another algorithm which is linear in
// practice but not bounded linear. I think it also finds dominators.
// http://lenx.100871.net/papers/loop-SAS.pdf
CfgVector<CfgUnorderedSet<SizeT>> getLoopBodies() { return Loops; }
private:
LoopAnalyzer() = delete;
LoopAnalyzer(const LoopAnalyzer &) = delete;
LoopAnalyzer &operator=(const LoopAnalyzer &) = delete;
void computeLoopNestDepth();
using IndexT = uint32_t;
static constexpr IndexT UndefinedIndex = 0;
static constexpr IndexT FirstDefinedIndex = 1;
// TODO(ascull): classify the other fields
class LoopNode {
LoopNode() = delete;
LoopNode operator=(const LoopNode &) = delete;
public:
explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
LoopNode(const LoopNode &) = default;
void reset();
NodeList::const_iterator successorsEnd() const;
NodeList::const_iterator currentSuccessor() const { return Succ; }
void nextSuccessor() { ++Succ; }
void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
bool isVisited() const { return Index != UndefinedIndex; }
IndexT getIndex() const { return Index; }
void tryLink(IndexT NewLink) {
if (NewLink < LowLink)
LowLink = NewLink;
}
IndexT getLowLink() const { return LowLink; }
void setOnStack(bool NewValue = true) { OnStack = NewValue; }
bool isOnStack() const { return OnStack; }
void setDeleted() { Deleted = true; }
bool isDeleted() const { return Deleted; }
void incrementLoopNestDepth();
bool hasSelfEdge() const;
CfgNode *getNode() { return BB; }
private:
CfgNode *BB;
NodeList::const_iterator Succ;
IndexT Index;
IndexT LowLink;
bool OnStack;
bool Deleted = false;
};
using LoopNodeList = CfgVector<LoopNode>;
using LoopNodePtrList = CfgVector<LoopNode *>;
/// Process the node as part as part of Tarjan's algorithm and return either a
/// node to recurse into or nullptr when the node has been fully processed.
LoopNode *processNode(LoopNode &Node);
/// The function to analyze for loops.
Cfg *const Func;
/// A list of decorated nodes in the same order as Func->getNodes() which
/// means the node's index will also be valid in this list.
LoopNodeList AllNodes;
/// This is used as a replacement for the call stack.
LoopNodePtrList WorkStack;
/// Track which loop a node belongs to.
LoopNodePtrList LoopStack;
/// The index to assign to the next visited node.
IndexT NextIndex = FirstDefinedIndex;
/// The number of nodes which have been marked deleted. This is used to track
/// when the iteration should end.
LoopNodePtrList::size_type NumDeletedNodes = 0;
/// All the Loops, in descending order of size
CfgVector<CfgUnorderedSet<SizeT>> Loops;
};
void LoopAnalyzer::LoopNode::reset() { void LoopAnalyzer::LoopNode::reset() {
if (Deleted) if (Deleted)
return; return;
...@@ -142,12 +241,12 @@ LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) { ...@@ -142,12 +241,12 @@ LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) {
if (*It == &Node) { if (*It == &Node) {
(*It)->setDeleted(); (*It)->setDeleted();
++NumDeletedNodes; ++NumDeletedNodes;
CfgVector<SizeT> LoopNodes; CfgUnorderedSet<SizeT> LoopNodes;
for (auto LoopIter = It.base() - 1; LoopIter != LoopStack.end(); for (auto LoopIter = It.base() - 1; LoopIter != LoopStack.end();
++LoopIter) { ++LoopIter) {
LoopNodes.push_back((*LoopIter)->getNode()->getIndex()); LoopNodes.insert((*LoopIter)->getNode()->getIndex());
} }
Loops[(*It)->getNode()->getIndex()] = LoopNodes; Loops.push_back(LoopNodes);
LoopStack.erase(It.base() - 1, LoopStack.end()); LoopStack.erase(It.base() - 1, LoopStack.end());
break; break;
} }
...@@ -155,5 +254,55 @@ LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) { ...@@ -155,5 +254,55 @@ LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) {
return nullptr; return nullptr;
} }
CfgVector<Loop> ComputeLoopInfo(Cfg *Func) {
auto LoopBodies = LoopAnalyzer(Func).getLoopBodies();
CfgVector<Loop> Loops;
Loops.reserve(LoopBodies.size());
std::sort(
LoopBodies.begin(), LoopBodies.end(),
[](const CfgUnorderedSet<SizeT> &A, const CfgUnorderedSet<SizeT> &B) {
return A.size() > B.size();
});
for (auto &LoopBody : LoopBodies) {
CfgNode *Header = nullptr;
bool IsSimpleLoop = true;
for (auto NodeIndex : LoopBody) {
CfgNode *Cur = Func->getNodes()[NodeIndex];
for (auto *Prev : Cur->getInEdges()) {
if (LoopBody.find(Prev->getIndex()) ==
LoopBody.end()) { // coming from outside
if (Header == nullptr) {
Header = Cur;
} else {
Header = nullptr;
IsSimpleLoop = false;
break;
}
}
}
if (!IsSimpleLoop) {
break;
}
}
if (!IsSimpleLoop)
continue; // To next potential loop
CfgNode *PreHeader = nullptr;
for (auto *Prev : Header->getInEdges()) {
if (LoopBody.find(Prev->getIndex()) == LoopBody.end()) {
if (PreHeader == nullptr) {
PreHeader = Prev;
} else {
PreHeader = nullptr;
break;
}
}
}
Loops.emplace_back(Header, PreHeader, LoopBody);
}
return Loops;
}
} // end of namespace Ice } // end of namespace Ice
...@@ -18,105 +18,16 @@ ...@@ -18,105 +18,16 @@
namespace Ice { namespace Ice {
/// Analyze a function's CFG for loops. The CFG must not change during the struct Loop {
/// lifetime of this object. Loop(CfgNode *Header, CfgNode *PreHeader, CfgUnorderedSet<SizeT> Body)
class LoopAnalyzer { : Header(Header), PreHeader(PreHeader), Body(Body) {}
LoopAnalyzer() = delete; CfgNode *Header;
LoopAnalyzer(const LoopAnalyzer &) = delete; CfgNode *PreHeader;
LoopAnalyzer &operator=(const LoopAnalyzer &) = delete; CfgUnorderedSet<SizeT> Body; // Node IDs
public:
explicit LoopAnalyzer(Cfg *Func);
/// Use Tarjan's strongly connected components algorithm to identify outermost
/// to innermost loops. By deleting the head of the loop from the graph, inner
/// loops can be found. This assumes that the head node is not shared between
/// loops but instead all paths to the head come from 'continue' constructs.
///
/// This only computes the loop nest depth within the function and does not
/// take into account whether the function was called from within a loop.
// TODO(ascull): this currently uses a extension of Tarjan's algorithm with
// is bounded linear. ncbray suggests another algorithm which is linear in
// practice but not bounded linear. I think it also finds dominators.
// http://lenx.100871.net/papers/loop-SAS.pdf
CfgUnorderedMap<SizeT, CfgVector<SizeT>> getLoopInfo() { return Loops; }
private:
void computeLoopNestDepth();
using IndexT = uint32_t;
static constexpr IndexT UndefinedIndex = 0;
static constexpr IndexT FirstDefinedIndex = 1;
// TODO(ascull): classify the other fields
class LoopNode {
LoopNode() = delete;
LoopNode operator=(const LoopNode &) = delete;
public:
explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
LoopNode(const LoopNode &) = default;
void reset();
NodeList::const_iterator successorsEnd() const;
NodeList::const_iterator currentSuccessor() const { return Succ; }
void nextSuccessor() { ++Succ; }
void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
bool isVisited() const { return Index != UndefinedIndex; }
IndexT getIndex() const { return Index; }
void tryLink(IndexT NewLink) {
if (NewLink < LowLink)
LowLink = NewLink;
}
IndexT getLowLink() const { return LowLink; }
void setOnStack(bool NewValue = true) { OnStack = NewValue; }
bool isOnStack() const { return OnStack; }
void setDeleted() { Deleted = true; }
bool isDeleted() const { return Deleted; }
void incrementLoopNestDepth();
bool hasSelfEdge() const;
CfgNode *getNode() { return BB; }
private:
CfgNode *BB;
NodeList::const_iterator Succ;
IndexT Index;
IndexT LowLink;
bool OnStack;
bool Deleted = false;
};
using LoopNodeList = CfgVector<LoopNode>;
using LoopNodePtrList = CfgVector<LoopNode *>;
/// Process the node as part as part of Tarjan's algorithm and return either a
/// node to recurse into or nullptr when the node has been fully processed.
LoopNode *processNode(LoopNode &Node);
/// The function to analyze for loops.
Cfg *const Func;
/// A list of decorated nodes in the same order as Func->getNodes() which
/// means the node's index will also be valid in this list.
LoopNodeList AllNodes;
/// This is used as a replacement for the call stack.
LoopNodePtrList WorkStack;
/// Track which loop a node belongs to.
LoopNodePtrList LoopStack;
/// The index to assign to the next visited node.
IndexT NextIndex = FirstDefinedIndex;
/// The number of nodes which have been marked deleted. This is used to track
/// when the iteration should end.
LoopNodePtrList::size_type NumDeletedNodes = 0;
/// Detailed loop information
CfgUnorderedMap<SizeT, CfgVector<SizeT>> Loops;
}; };
CfgVector<Loop> ComputeLoopInfo(Cfg *Func);
} // end of namespace Ice } // end of namespace Ice
#endif // SUBZERO_SRC_ICELOOPANALYZER_H #endif // SUBZERO_SRC_ICELOOPANALYZER_H
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment