Commit aa6c1093 by Andrew Scull

This improves the variable use weight by taking into account use in loops. It

further improves spec2k performance and fixes the regression in ammp. Loops are identified using an extension to Tarjan's algorithm. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1318553003.
parent efb89713
...@@ -195,6 +195,7 @@ SRCS = \ ...@@ -195,6 +195,7 @@ SRCS = \
IceInstX8664.cpp \ IceInstX8664.cpp \
IceIntrinsics.cpp \ IceIntrinsics.cpp \
IceLiveness.cpp \ IceLiveness.cpp \
IceLoopAnalyzer.cpp \
IceOperand.cpp \ IceOperand.cpp \
IceRegAlloc.cpp \ IceRegAlloc.cpp \
IceRNG.cpp \ IceRNG.cpp \
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "IceInst.h" #include "IceInst.h"
#include "IceInstVarIter.h" #include "IceInstVarIter.h"
#include "IceLiveness.h" #include "IceLiveness.h"
#include "IceLoopAnalyzer.h"
#include "IceOperand.h" #include "IceOperand.h"
#include "IceTargetLowering.h" #include "IceTargetLowering.h"
...@@ -463,10 +464,16 @@ void Cfg::genFrame() { ...@@ -463,10 +464,16 @@ void Cfg::genFrame() {
getTarget()->addEpilog(Node); getTarget()->addEpilog(Node);
} }
// This is a lightweight version of live-range-end calculation. Marks void Cfg::computeLoopNestDepth() {
// the last use of only those variables whose definition and uses are TimerMarker T(TimerStack::TT_computeLoopNestDepth, this);
// completely with a single block. It is a quick single pass and LoopAnalyzer LA(this);
// doesn't need to iterate until convergence. LA.computeLoopNestDepth();
}
// This is a lightweight version of live-range-end calculation. Marks the last
// use of only those variables whose definition and uses are completely with a
// single block. It is a quick single pass and doesn't need to iterate until
// convergence.
void Cfg::livenessLightweight() { void Cfg::livenessLightweight() {
TimerMarker T(TimerStack::TT_livenessLightweight, this); TimerMarker T(TimerStack::TT_livenessLightweight, this);
getVMetadata()->init(VMK_Uses); getVMetadata()->init(VMK_Uses);
...@@ -602,12 +609,11 @@ bool Cfg::validateLiveness() const { ...@@ -602,12 +609,11 @@ bool Cfg::validateLiveness() const {
} }
void Cfg::contractEmptyNodes() { void Cfg::contractEmptyNodes() {
// If we're decorating the asm output with register liveness info, // If we're decorating the asm output with register liveness info, this
// this information may become corrupted or incorrect after // information may become corrupted or incorrect after contracting nodes that
// contracting nodes that contain only redundant assignments. As // contain only redundant assignments. As such, we disable this pass when
// such, we disable this pass when DecorateAsm is specified. This // DecorateAsm is specified. This may make the resulting code look more
// may make the resulting code look more branchy, but it should have // branchy, but it should have no effect on the register assignments.
// no effect on the register assignments.
if (Ctx->getFlags().getDecorateAsm()) if (Ctx->getFlags().getDecorateAsm())
return; return;
for (CfgNode *Node : Nodes) { for (CfgNode *Node : Nodes) {
......
...@@ -86,7 +86,9 @@ public: ...@@ -86,7 +86,9 @@ public:
/// @{ /// @{
void setEntryNode(CfgNode *EntryNode) { Entry = EntryNode; } void setEntryNode(CfgNode *EntryNode) { Entry = EntryNode; }
CfgNode *getEntryNode() const { return Entry; } CfgNode *getEntryNode() const { return Entry; }
/// Create a node and append it to the end of the linearized list. /// Create a node and append it to the end of the linearized list. The loop
/// nest depth of the new node may not be valid if it is created after
/// computeLoopNestDepth.
CfgNode *makeNode(); CfgNode *makeNode();
SizeT getNumNodes() const { return Nodes.size(); } SizeT getNumNodes() const { return Nodes.size(); }
const NodeList &getNodes() const { return Nodes; } const NodeList &getNodes() const { return Nodes; }
...@@ -189,6 +191,7 @@ public: ...@@ -189,6 +191,7 @@ public:
void doNopInsertion(); void doNopInsertion();
void genCode(); void genCode();
void genFrame(); void genFrame();
void computeLoopNestDepth();
void livenessLightweight(); void livenessLightweight();
void liveness(LivenessMode Mode); void liveness(LivenessMode Mode);
bool validateLiveness() const; bool validateLiveness() const;
......
...@@ -219,6 +219,11 @@ void CfgNode::deletePhis() { ...@@ -219,6 +219,11 @@ void CfgNode::deletePhis() {
// not contain duplicates. // not contain duplicates.
CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
CfgNode *NewNode = Func->makeNode(); CfgNode *NewNode = Func->makeNode();
// Depth is the minimum as it works if both are the same, but if one is
// outside the loop and the other is inside, the new node should be placed
// outside and not be executed multiple times within the loop.
NewNode->setLoopNestDepth(
std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
if (BuildDefs::dump()) if (BuildDefs::dump())
NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
std::to_string(EdgeIndex)); std::to_string(EdgeIndex));
...@@ -1175,9 +1180,11 @@ void CfgNode::dump(Cfg *Func) const { ...@@ -1175,9 +1180,11 @@ void CfgNode::dump(Cfg *Func) const {
Func->setCurrentNode(this); Func->setCurrentNode(this);
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
Liveness *Liveness = Func->getLiveness(); Liveness *Liveness = Func->getLiveness();
if (Func->isVerbose(IceV_Instructions)) { if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
Str << getName() << ":\n"; Str << getName() << ":\n";
} // Dump the loop nest depth
if (Func->isVerbose(IceV_Loop))
Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n";
// Dump list of predecessor nodes. // Dump list of predecessor nodes.
if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) { if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
Str << " // preds = "; Str << " // preds = ";
......
...@@ -46,6 +46,10 @@ public: ...@@ -46,6 +46,10 @@ public:
return ".L" + Func->getFunctionName() + "$" + getName(); return ".L" + Func->getFunctionName() + "$" + getName();
} }
void incrementLoopNestDepth() { ++LoopNestDepth; }
void setLoopNestDepth(SizeT NewDepth) { LoopNestDepth = NewDepth; }
SizeT getLoopNestDepth() const { return LoopNestDepth; }
/// The HasReturn flag indicates that this node contains a return /// The HasReturn flag indicates that this node contains a return
/// instruction and therefore needs an epilog. /// instruction and therefore needs an epilog.
void setHasReturn() { HasReturn = true; } void setHasReturn() { HasReturn = true; }
...@@ -111,6 +115,7 @@ private: ...@@ -111,6 +115,7 @@ private:
SizeT Number; /// label index SizeT Number; /// label index
Cfg::IdentifierIndexType NameIndex = Cfg::IdentifierIndexType NameIndex =
Cfg::IdentifierIndexInvalid; /// index into Cfg::NodeNames table Cfg::IdentifierIndexInvalid; /// index into Cfg::NodeNames table
SizeT LoopNestDepth = 0; /// the loop nest depth of this node
bool HasReturn = false; /// does this block need an epilog? bool HasReturn = false; /// does this block need an epilog?
bool NeedsPlacement = false; bool NeedsPlacement = false;
bool NeedsAlignment = false; /// is sandboxing required? bool NeedsAlignment = false; /// is sandboxing required?
......
...@@ -234,6 +234,7 @@ cl::list<Ice::VerboseItem> VerboseList( ...@@ -234,6 +234,7 @@ cl::list<Ice::VerboseItem> VerboseList(
clEnumValN(Ice::IceV_Random, "random", "Randomization details"), clEnumValN(Ice::IceV_Random, "random", "Randomization details"),
clEnumValN(Ice::IceV_Folding, "fold", "Instruction folding details"), clEnumValN(Ice::IceV_Folding, "fold", "Instruction folding details"),
clEnumValN(Ice::IceV_RMW, "rmw", "ReadModifyWrite optimization"), clEnumValN(Ice::IceV_RMW, "rmw", "ReadModifyWrite optimization"),
clEnumValN(Ice::IceV_Loop, "loop", "Loop nest depth analysis"),
clEnumValN(Ice::IceV_All, "all", "Use all verbose options"), clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
clEnumValN(Ice::IceV_Most, "most", clEnumValN(Ice::IceV_Most, "most",
"Use all verbose options except 'regalloc'"), "Use all verbose options except 'regalloc'"),
......
...@@ -225,6 +225,7 @@ enum VerboseItem { ...@@ -225,6 +225,7 @@ enum VerboseItem {
IceV_Random = 1 << 10, IceV_Random = 1 << 10,
IceV_Folding = 1 << 11, IceV_Folding = 1 << 11,
IceV_RMW = 1 << 12, IceV_RMW = 1 << 12,
IceV_Loop = 1 << 13,
IceV_All = ~IceV_None, IceV_All = ~IceV_None,
IceV_Most = IceV_All & ~IceV_LinearScan IceV_Most = IceV_All & ~IceV_LinearScan
}; };
......
...@@ -412,6 +412,10 @@ InstStore::InstStore(Cfg *Func, Operand *Data, Operand *Addr) ...@@ -412,6 +412,10 @@ InstStore::InstStore(Cfg *Func, Operand *Data, Operand *Addr)
addSource(Data); addSource(Data);
} }
Variable *InstStore::getRmwBeacon() const {
return llvm::dyn_cast<Variable>(getSrc(2));
}
void InstStore::setRmwBeacon(Variable *Beacon) { void InstStore::setRmwBeacon(Variable *Beacon) {
Dest = llvm::dyn_cast<Variable>(getData()); Dest = llvm::dyn_cast<Variable>(getData());
Srcs[2] = Beacon; Srcs[2] = Beacon;
......
...@@ -693,7 +693,7 @@ public: ...@@ -693,7 +693,7 @@ public:
} }
Operand *getAddr() const { return getSrc(1); } Operand *getAddr() const { return getSrc(1); }
Operand *getData() const { return getSrc(0); } Operand *getData() const { return getSrc(0); }
Variable *getRmwBeacon() const { return llvm::dyn_cast<Variable>(getSrc(2)); } Variable *getRmwBeacon() const;
void setRmwBeacon(Variable *Beacon); void setRmwBeacon(Variable *Beacon);
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return Inst->getKind() == Store; } static bool classof(const Inst *Inst) { return Inst->getKind() == Store; }
......
//===- subzero/src/IceLoopAnalyzer.cpp - Loop Analysis --------------------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements the loop analysis on the CFG.
///
//===----------------------------------------------------------------------===//
#include "IceLoopAnalyzer.h"
#include "IceCfg.h"
#include "IceCfgNode.h"
namespace Ice {
void LoopAnalyzer::LoopNode::reset() {
if (Deleted)
return;
Succ = BB->getOutEdges().begin();
Index = LowLink = UndefinedIndex;
OnStack = false;
}
NodeList::const_iterator LoopAnalyzer::LoopNode::successorsEnd() const {
return BB->getOutEdges().end();
}
void LoopAnalyzer::LoopNode::incrementLoopNestDepth() {
BB->incrementLoopNestDepth();
}
LoopAnalyzer::LoopAnalyzer(Cfg *Func) : Func(Func) {
const NodeList &Nodes = Func->getNodes();
// Allocate memory ahead of time. This is why a vector is used instead of a
// stack which doesn't support reserving (or bulk erasure used below).
AllNodes.reserve(Nodes.size());
WorkStack.reserve(Nodes.size());
LoopStack.reserve(Nodes.size());
// Create the LoopNodes from the function's CFG
for (CfgNode *Node : Nodes)
AllNodes.emplace_back(Node);
}
void LoopAnalyzer::computeLoopNestDepth() {
assert(AllNodes.size() == Func->getNodes().size());
assert(NextIndex == FirstDefinedIndex);
assert(NumDeletedNodes == 0);
while (NumDeletedNodes < AllNodes.size()) {
// Prepare to run Tarjan's
for (LoopNode &Node : AllNodes)
Node.reset();
assert(WorkStack.empty());
assert(LoopStack.empty());
for (LoopNode &Node : AllNodes) {
if (Node.isDeleted() || Node.isVisited())
continue;
WorkStack.push_back(&Node);
while (!WorkStack.empty()) {
LoopNode &WorkNode = *WorkStack.back();
if (LoopNode *Succ = processNode(WorkNode))
WorkStack.push_back(Succ);
else
WorkStack.pop_back();
}
}
}
}
LoopAnalyzer::LoopNode *
LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) {
if (!Node.isVisited()) {
Node.visit(NextIndex++);
LoopStack.push_back(&Node);
Node.setOnStack();
} else {
// Returning to a node after having recursed into Succ so continue
// iterating through successors after using the Succ.LowLink value that was
// computed in the recursion.
LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
Node.tryLink(Succ.getLowLink());
Node.nextSuccessor();
}
// Visit the successors and recurse into unvisited nodes. The recursion could
// cause the iteration to be suspended but it will resume as the stack is
// unwound.
auto SuccEnd = Node.successorsEnd();
for (; Node.currentSuccessor() != SuccEnd; Node.nextSuccessor()) {
LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
if (Succ.isDeleted())
continue;
if (!Succ.isVisited())
return &Succ;
else if (Succ.isOnStack())
Node.tryLink(Succ.getIndex());
}
if (Node.getLowLink() != Node.getIndex())
return nullptr;
// Single node means no loop in the CFG
if (LoopStack.back() == &Node) {
LoopStack.back()->setOnStack(false);
LoopStack.back()->setDeleted();
++NumDeletedNodes;
LoopStack.pop_back();
return nullptr;
}
// Reaching here means a loop has been found! It consists of the nodes on
// the top of the stack, down until the current node being processed, Node,
// is found.
for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) {
(*It)->setOnStack(false);
(*It)->incrementLoopNestDepth();
// Remove the loop from the stack and delete the head node
if (*It == &Node) {
(*It)->setDeleted();
++NumDeletedNodes;
LoopStack.erase(It.base() - 1, LoopStack.end());
break;
}
}
return nullptr;
}
} // end of namespace Ice
//===- subzero/src/IceLoopAnalyzer.h - Loop Analysis ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This analysis identifies loops in the CFG.
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICELOOPANALYZER_H
#define SUBZERO_SRC_ICELOOPANALYZER_H
#include "IceDefs.h"
namespace Ice {
/// Analyze a function's CFG for loops. The CFG must not change during the
/// lifetime of this object.
class LoopAnalyzer {
LoopAnalyzer() = delete;
LoopAnalyzer(const LoopAnalyzer &) = delete;
LoopAnalyzer &operator=(const LoopAnalyzer &) = delete;
public:
explicit LoopAnalyzer(Cfg *Func);
/// Use Tarjan's strongly connected components algorithm to identify outermost
/// to innermost loops. By deleting the head of the loop from the graph, inner
/// loops can be found. This assumes that the head node is not shared between
/// loops but instead all paths to the head come from 'continue' constructs.
///
/// This only computes the loop nest depth within the function and does not
/// take into account whether the function was called from within a loop.
void computeLoopNestDepth();
private:
using IndexT = uint32_t;
static constexpr IndexT UndefinedIndex = 0;
static constexpr IndexT FirstDefinedIndex = 1;
// TODO(ascull): classify the other fields
class LoopNode {
LoopNode() = delete;
LoopNode operator=(const LoopNode &) = delete;
public:
explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
LoopNode(const LoopNode &) = default;
void reset();
NodeList::const_iterator successorsEnd() const;
NodeList::const_iterator currentSuccessor() const { return Succ; }
void nextSuccessor() { ++Succ; }
void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
bool isVisited() const { return Index != UndefinedIndex; }
IndexT getIndex() const { return Index; }
void tryLink(IndexT NewLink) {
if (NewLink < LowLink)
LowLink = NewLink;
}
IndexT getLowLink() const { return LowLink; }
void setOnStack(bool NewValue = true) { OnStack = NewValue; }
bool isOnStack() const { return OnStack; }
void setDeleted() { Deleted = true; }
bool isDeleted() const { return Deleted; }
void incrementLoopNestDepth();
private:
CfgNode *BB;
NodeList::const_iterator Succ;
IndexT Index;
IndexT LowLink;
bool OnStack;
bool Deleted = false;
};
using LoopNodeList = std::vector<LoopNode, CfgLocalAllocator<LoopNode>>;
using LoopNodePtrList =
std::vector<LoopNode *, CfgLocalAllocator<LoopNode *>>;
/// Process the node as part as part of Tarjan's algorithm and return either
/// a node to recurse into or nullptr when the node has been fully processed.
LoopNode *processNode(LoopNode &Node);
/// The fuction to analyze for loops.
Cfg *const Func;
/// A list of decorated nodes in the same order as Func->getNodes() which
/// means the node's index will also be valid in this list.
LoopNodeList AllNodes;
/// This is used as a replacement for the call stack.
LoopNodePtrList WorkStack;
/// Track which loop a node belongs to.
LoopNodePtrList LoopStack;
/// The index to assign to the next visited node.
IndexT NextIndex = FirstDefinedIndex;
/// The number of nodes which have been marked deleted. This is used to track
/// when the iteration should end.
LoopNodePtrList::size_type NumDeletedNodes = 0;
};
} // end of namespace Ice
#endif // SUBZERO_SRC_ICELOOPANALYZER_H
...@@ -148,18 +148,26 @@ Variable *Variable::asType(Type Ty) { ...@@ -148,18 +148,26 @@ Variable *Variable::asType(Type Ty) {
RegWeight Variable::getWeight(const Cfg *Func) const { RegWeight Variable::getWeight(const Cfg *Func) const {
VariablesMetadata *VMetadata = Func->getVMetadata(); VariablesMetadata *VMetadata = Func->getVMetadata();
return RegWeight(mustHaveReg() return mustHaveReg() ? RegWeight(RegWeight::Inf)
? RegWeight::Inf : mustNotHaveReg() ? RegWeight(RegWeight::Zero)
: mustNotHaveReg() ? RegWeight::Zero : VMetadata->getUseWeight(this);
: VMetadata->getUseWeight(this));
} }
void VariableTracking::markUse(MetadataKind TrackingKind, const Inst *Instr, void VariableTracking::markUse(MetadataKind TrackingKind, const Inst *Instr,
CfgNode *Node, bool IsImplicit) { CfgNode *Node, bool IsImplicit) {
(void)TrackingKind; (void)TrackingKind;
// TODO(ascull): get the loop nest depth from CfgNode // Increment the use weight depending on the loop nest depth. The weight is
UseWeight += 1; // exponential in the nest depth as inner loops are expected to be executed
// an exponentially greater number of times.
constexpr uint32_t LogLoopTripCountEstimate = 2; // 2^2 = 4
constexpr SizeT MaxShift = sizeof(uint32_t) * CHAR_BIT - 1;
constexpr SizeT MaxLoopNestDepth = MaxShift / LogLoopTripCountEstimate;
const uint32_t LoopNestDepth =
std::min(Node->getLoopNestDepth(), MaxLoopNestDepth);
const uint32_t ThisUseWeight = uint32_t(1)
<< LoopNestDepth * LogLoopTripCountEstimate;
UseWeight.addWeight(ThisUseWeight);
if (MultiBlock == MBS_MultiBlock) if (MultiBlock == MBS_MultiBlock)
return; return;
...@@ -391,9 +399,9 @@ CfgNode *VariablesMetadata::getLocalUseNode(const Variable *Var) const { ...@@ -391,9 +399,9 @@ CfgNode *VariablesMetadata::getLocalUseNode(const Variable *Var) const {
return Metadata[VarNum].getNode(); return Metadata[VarNum].getNode();
} }
uint32_t VariablesMetadata::getUseWeight(const Variable *Var) const { RegWeight VariablesMetadata::getUseWeight(const Variable *Var) const {
if (!isTracked(Var)) if (!isTracked(Var))
return 1; // conservative answer return RegWeight(1); // conservative answer
SizeT VarNum = Var->getIndex(); SizeT VarNum = Var->getIndex();
return Metadata[VarNum].getUseWeight(); return Metadata[VarNum].getUseWeight();
} }
......
...@@ -321,13 +321,15 @@ public: ...@@ -321,13 +321,15 @@ public:
explicit RegWeight(uint32_t Weight) : Weight(Weight) {} explicit RegWeight(uint32_t Weight) : Weight(Weight) {}
RegWeight(const RegWeight &) = default; RegWeight(const RegWeight &) = default;
RegWeight &operator=(const RegWeight &) = default; RegWeight &operator=(const RegWeight &) = default;
const static uint32_t Inf = ~0; /// Force regalloc to give a register const static uint32_t Inf = ~0; /// Force regalloc to give a register
const static uint32_t Zero = 0; /// Force regalloc NOT to give a register const static uint32_t Zero = 0; /// Force regalloc NOT to give a register
const static uint32_t Max = Inf - 1; /// Max natural weight.
void addWeight(uint32_t Delta) { void addWeight(uint32_t Delta) {
if (Delta == Inf) if (Delta == Inf)
Weight = Inf; Weight = Inf;
else if (Weight != Inf) else if (Weight != Inf)
Weight += Delta; if (Utils::add_overflow(Weight, Delta, &Weight) || Weight == Inf)
Weight = Max;
} }
void addWeight(const RegWeight &Other) { addWeight(Other.Weight); } void addWeight(const RegWeight &Other) { addWeight(Other.Weight); }
void setWeight(uint32_t Val) { Weight = Val; } void setWeight(uint32_t Val) { Weight = Val; }
...@@ -579,7 +581,7 @@ public: ...@@ -579,7 +581,7 @@ public:
const Inst *getSingleDefinition() const; const Inst *getSingleDefinition() const;
const InstDefList &getLatterDefinitions() const { return Definitions; } const InstDefList &getLatterDefinitions() const { return Definitions; }
CfgNode *getNode() const { return SingleUseNode; } CfgNode *getNode() const { return SingleUseNode; }
uint32_t getUseWeight() const { return UseWeight; } RegWeight getUseWeight() const { return UseWeight; }
void markUse(MetadataKind TrackingKind, const Inst *Instr, CfgNode *Node, void markUse(MetadataKind TrackingKind, const Inst *Instr, CfgNode *Node,
bool IsImplicit); bool IsImplicit);
void markDef(MetadataKind TrackingKind, const Inst *Instr, CfgNode *Node); void markDef(MetadataKind TrackingKind, const Inst *Instr, CfgNode *Node);
...@@ -594,7 +596,7 @@ private: ...@@ -594,7 +596,7 @@ private:
InstDefList Definitions; /// Only used if Kind==VMK_All InstDefList Definitions; /// Only used if Kind==VMK_All
const Inst *FirstOrSingleDefinition = const Inst *FirstOrSingleDefinition =
nullptr; /// Is a copy of Definitions[0] if Kind==VMK_All nullptr; /// Is a copy of Definitions[0] if Kind==VMK_All
uint32_t UseWeight = 0; RegWeight UseWeight;
}; };
/// VariablesMetadata analyzes and summarizes the metadata for the complete set /// VariablesMetadata analyzes and summarizes the metadata for the complete set
...@@ -649,7 +651,7 @@ public: ...@@ -649,7 +651,7 @@ public:
/// Returns the total use weight computed as the sum of uses multiplied by a /// Returns the total use weight computed as the sum of uses multiplied by a
/// loop nest depth factor for each use. /// loop nest depth factor for each use.
uint32_t getUseWeight(const Variable *Var) const; RegWeight getUseWeight(const Variable *Var) const;
private: private:
const Cfg *Func; const Cfg *Func;
......
//===- subzero/src/IceSwitchLowering.cpp - Switch lowering -----------------==// //===- subzero/src/IceSwitchLowering.cpp - Switch lowering ----------------===//
// //
// The Subzero Code Generator // The Subzero Code Generator
// //
......
...@@ -325,6 +325,13 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() { ...@@ -325,6 +325,13 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {
Func->dump("After Phi lowering"); Func->dump("After Phi lowering");
} }
// Run this early so it can be used to focus optimizations on potentially hot
// code.
// TODO(stichnot,ascull): currently only used for regalloc not expensive high
// level optimizations which could be focused on potentially hot code.
Func->computeLoopNestDepth();
Func->dump("After loop nest depth analysis");
// Address mode optimization. // Address mode optimization.
Func->getVMetadata()->init(VMK_SingleDefs); Func->getVMetadata()->init(VMK_SingleDefs);
Func->doAddressOpt(); Func->doAddressOpt();
...@@ -365,8 +372,9 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() { ...@@ -365,8 +372,9 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {
return; return;
Func->dump("After x86 codegen"); Func->dump("After x86 codegen");
// Register allocation. This requires instruction renumbering and full // Register allocation. This requires instruction renumbering and full
// liveness analysis. // liveness analysis. Loops must be identified before liveness so variable
// use weights are correct.
Func->renumberInstructions(); Func->renumberInstructions();
if (Func->hasError()) if (Func->hasError())
return; return;
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
X(O2) \ X(O2) \
X(Om1) \ X(Om1) \
X(advancedPhiLowering) \ X(advancedPhiLowering) \
X(computeLoopNestDepth) \
X(convertToIce) \ X(convertToIce) \
X(deletePhis) \ X(deletePhis) \
X(doAddressOpt) \ X(doAddressOpt) \
......
...@@ -70,6 +70,18 @@ public: ...@@ -70,6 +70,18 @@ public:
(X < 0 && Y < 0 && (X < std::numeric_limits<T>::min() - Y))); (X < 0 && Y < 0 && (X < std::numeric_limits<T>::min() - Y)));
} }
/// Adds x to y and stores the result in sum. Returns true if the addition
/// overflowed.
static inline bool add_overflow(uint32_t x, uint32_t y, uint32_t *sum) {
static_assert(std::is_same<uint32_t, unsigned>::value, "Must match type");
#if __has_builtin(__builtin_uadd_overflow)
return __builtin_uadd_overflow(x, y, sum);
#else
*sum = x + y;
return WouldOverflowAdd(x, y);
#endif
}
/// Return true if X is already aligned by N, where N is a power of 2. /// Return true if X is already aligned by N, where N is a power of 2.
template <typename T> static inline bool IsAligned(T X, intptr_t N) { template <typename T> static inline bool IsAligned(T X, intptr_t N) {
assert(llvm::isPowerOf2_64(N)); assert(llvm::isPowerOf2_64(N));
......
; Test the the loop nest depth is correctly calculated for basic blocks.
; REQUIRES: allow_dump
; Single threaded so that the dumps used for checking happen in order
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 --verbose=loop \
; RUN: --threads=0 | FileCheck %s
define void @test_single_loop(i32 %a32) {
entry:
%a = trunc i32 %a32 to i1
br label %loop0
loop0: ; <-+
br label %loop1 ; |
loop1: ; |
br i1 %a, label %loop0, label %out ; --+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_single_loop_with_continue(i32 %a32, i32 %b32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
br label %loop0
loop0: ; <-+
br label %loop1 ; |
loop1: ; |
br i1 %a, label %loop0, label %loop2 ; --+
loop2: ; |
br i1 %b, label %loop0, label %out ; --+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop2:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_multiple_exits(i32 %a32, i32 %b32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
br label %loop0
loop0: ; <-+
br label %loop1 ; |
loop1: ; |
br i1 %a, label %loop2, label %out ; --+-+
loop2: ; | |
br i1 %b, label %loop0, label %out ; --+ |
; |
out: ; <---+
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop2:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_two_nested_loops(i32 %a32, i32 %b32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
br label %loop0_0
loop0_0: ; <---+
br label %loop1_0 ; |
loop1_0: ; <-+ |
br label %loop1_1 ; | |
loop1_1: ; | |
br i1 %a, label %loop1_0, label %loop0_1 ; --+ |
loop0_1: ; |
br i1 %b, label %loop0_0, label %out ; ----+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0_0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop1_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_two_nested_loops_with_continue(i32 %a32, i32 %b32, i32 %c32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
%c = trunc i32 %c32 to i1
br label %loop0_0
loop0_0: ; <---+
br label %loop1_0 ; |
loop1_0: ; <-+ |
br label %loop1_1 ; | |
loop1_1: ; | |
br i1 %a, label %loop1_0, label %loop1_2 ; --+ |
loop1_2: ; | |
br i1 %a, label %loop1_0, label %loop0_1 ; --+ |
loop0_1: ; |
br i1 %b, label %loop0_0, label %out ; ----+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0_0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop1_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop1_2:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_multiple_nested_loops(i32 %a32, i32 %b32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
br label %loop0_0
loop0_0: ; <---+
br label %loop1_0 ; |
loop1_0: ; <-+ |
br label %loop1_1 ; | |
loop1_1: ; | |
br i1 %a, label %loop1_0, label %loop0_1 ; --+ |
loop0_1: ; |
br label %loop2_0 ; |
loop2_0: ; <-+ |
br label %loop2_1 ; | |
loop2_1: ; | |
br i1 %a, label %loop2_0, label %loop0_2 ; --+ |
loop0_2: ; |
br i1 %b, label %loop0_0, label %out ; ----+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0_0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop1_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop2_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop2_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_2:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_three_nested_loops(i32 %a32, i32 %b32, i32 %c32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
%c = trunc i32 %c32 to i1
br label %loop0_0
loop0_0: ; <-----+
br label %loop1_0 ; |
loop1_0: ; <---+ |
br label %loop2_0 ; | |
loop2_0: ; <-+ | |
br label %loop2_1 ; | | |
loop2_1: ; | | |
br i1 %a, label %loop2_0, label %loop1_1 ; --+ | |
loop1_1: ; | |
br i1 %b, label %loop1_0, label %loop0_1 ; ----+ |
loop0_1: ; |
br i1 %c, label %loop0_0, label %out ; ------+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0_0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop2_0:
; CHECK-NEXT: LoopNestDepth = 3
; CHECK-NEXT: loop2_1:
; CHECK-NEXT: LoopNestDepth = 3
; CHECK-NEXT: loop1_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_diamond(i32 %a32) {
entry:
%a = trunc i32 %a32 to i1
br i1 %a, label %left, label %right
left:
br label %out
right:
br label %out
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: left:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: right:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment