Commit aa6c1093 by Andrew Scull

This improves the variable use weight by taking into account use in loops. It

further improves spec2k performance and fixes the regression in ammp. Loops are identified using an extension to Tarjan's algorithm. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1318553003.
parent efb89713
......@@ -195,6 +195,7 @@ SRCS = \
IceInstX8664.cpp \
IceIntrinsics.cpp \
IceLiveness.cpp \
IceLoopAnalyzer.cpp \
IceOperand.cpp \
IceRegAlloc.cpp \
IceRNG.cpp \
......
......@@ -24,6 +24,7 @@
#include "IceInst.h"
#include "IceInstVarIter.h"
#include "IceLiveness.h"
#include "IceLoopAnalyzer.h"
#include "IceOperand.h"
#include "IceTargetLowering.h"
......@@ -463,10 +464,16 @@ void Cfg::genFrame() {
getTarget()->addEpilog(Node);
}
// This is a lightweight version of live-range-end calculation. Marks
// the last use of only those variables whose definition and uses are
// completely with a single block. It is a quick single pass and
// doesn't need to iterate until convergence.
void Cfg::computeLoopNestDepth() {
TimerMarker T(TimerStack::TT_computeLoopNestDepth, this);
LoopAnalyzer LA(this);
LA.computeLoopNestDepth();
}
// This is a lightweight version of live-range-end calculation. Marks the last
// use of only those variables whose definition and uses are completely with a
// single block. It is a quick single pass and doesn't need to iterate until
// convergence.
void Cfg::livenessLightweight() {
TimerMarker T(TimerStack::TT_livenessLightweight, this);
getVMetadata()->init(VMK_Uses);
......@@ -602,12 +609,11 @@ bool Cfg::validateLiveness() const {
}
void Cfg::contractEmptyNodes() {
// If we're decorating the asm output with register liveness info,
// this information may become corrupted or incorrect after
// contracting nodes that contain only redundant assignments. As
// such, we disable this pass when DecorateAsm is specified. This
// may make the resulting code look more branchy, but it should have
// no effect on the register assignments.
// If we're decorating the asm output with register liveness info, this
// information may become corrupted or incorrect after contracting nodes that
// contain only redundant assignments. As such, we disable this pass when
// DecorateAsm is specified. This may make the resulting code look more
// branchy, but it should have no effect on the register assignments.
if (Ctx->getFlags().getDecorateAsm())
return;
for (CfgNode *Node : Nodes) {
......
......@@ -86,7 +86,9 @@ public:
/// @{
void setEntryNode(CfgNode *EntryNode) { Entry = EntryNode; }
CfgNode *getEntryNode() const { return Entry; }
/// Create a node and append it to the end of the linearized list.
/// Create a node and append it to the end of the linearized list. The loop
/// nest depth of the new node may not be valid if it is created after
/// computeLoopNestDepth.
CfgNode *makeNode();
SizeT getNumNodes() const { return Nodes.size(); }
const NodeList &getNodes() const { return Nodes; }
......@@ -189,6 +191,7 @@ public:
void doNopInsertion();
void genCode();
void genFrame();
void computeLoopNestDepth();
void livenessLightweight();
void liveness(LivenessMode Mode);
bool validateLiveness() const;
......
......@@ -219,6 +219,11 @@ void CfgNode::deletePhis() {
// not contain duplicates.
CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
CfgNode *NewNode = Func->makeNode();
// Depth is the minimum as it works if both are the same, but if one is
// outside the loop and the other is inside, the new node should be placed
// outside and not be executed multiple times within the loop.
NewNode->setLoopNestDepth(
std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
if (BuildDefs::dump())
NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
std::to_string(EdgeIndex));
......@@ -1175,9 +1180,11 @@ void CfgNode::dump(Cfg *Func) const {
Func->setCurrentNode(this);
Ostream &Str = Func->getContext()->getStrDump();
Liveness *Liveness = Func->getLiveness();
if (Func->isVerbose(IceV_Instructions)) {
if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
Str << getName() << ":\n";
}
// Dump the loop nest depth
if (Func->isVerbose(IceV_Loop))
Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n";
// Dump list of predecessor nodes.
if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
Str << " // preds = ";
......
......@@ -46,6 +46,10 @@ public:
return ".L" + Func->getFunctionName() + "$" + getName();
}
void incrementLoopNestDepth() { ++LoopNestDepth; }
void setLoopNestDepth(SizeT NewDepth) { LoopNestDepth = NewDepth; }
SizeT getLoopNestDepth() const { return LoopNestDepth; }
/// The HasReturn flag indicates that this node contains a return
/// instruction and therefore needs an epilog.
void setHasReturn() { HasReturn = true; }
......@@ -111,6 +115,7 @@ private:
SizeT Number; /// label index
Cfg::IdentifierIndexType NameIndex =
Cfg::IdentifierIndexInvalid; /// index into Cfg::NodeNames table
SizeT LoopNestDepth = 0; /// the loop nest depth of this node
bool HasReturn = false; /// does this block need an epilog?
bool NeedsPlacement = false;
bool NeedsAlignment = false; /// is sandboxing required?
......
......@@ -234,6 +234,7 @@ cl::list<Ice::VerboseItem> VerboseList(
clEnumValN(Ice::IceV_Random, "random", "Randomization details"),
clEnumValN(Ice::IceV_Folding, "fold", "Instruction folding details"),
clEnumValN(Ice::IceV_RMW, "rmw", "ReadModifyWrite optimization"),
clEnumValN(Ice::IceV_Loop, "loop", "Loop nest depth analysis"),
clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
clEnumValN(Ice::IceV_Most, "most",
"Use all verbose options except 'regalloc'"),
......
......@@ -225,6 +225,7 @@ enum VerboseItem {
IceV_Random = 1 << 10,
IceV_Folding = 1 << 11,
IceV_RMW = 1 << 12,
IceV_Loop = 1 << 13,
IceV_All = ~IceV_None,
IceV_Most = IceV_All & ~IceV_LinearScan
};
......
......@@ -412,6 +412,10 @@ InstStore::InstStore(Cfg *Func, Operand *Data, Operand *Addr)
addSource(Data);
}
Variable *InstStore::getRmwBeacon() const {
return llvm::dyn_cast<Variable>(getSrc(2));
}
void InstStore::setRmwBeacon(Variable *Beacon) {
Dest = llvm::dyn_cast<Variable>(getData());
Srcs[2] = Beacon;
......
......@@ -693,7 +693,7 @@ public:
}
Operand *getAddr() const { return getSrc(1); }
Operand *getData() const { return getSrc(0); }
Variable *getRmwBeacon() const { return llvm::dyn_cast<Variable>(getSrc(2)); }
Variable *getRmwBeacon() const;
void setRmwBeacon(Variable *Beacon);
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return Inst->getKind() == Store; }
......
//===- subzero/src/IceLoopAnalyzer.cpp - Loop Analysis --------------------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements the loop analysis on the CFG.
///
//===----------------------------------------------------------------------===//
#include "IceLoopAnalyzer.h"
#include "IceCfg.h"
#include "IceCfgNode.h"
namespace Ice {
void LoopAnalyzer::LoopNode::reset() {
if (Deleted)
return;
Succ = BB->getOutEdges().begin();
Index = LowLink = UndefinedIndex;
OnStack = false;
}
NodeList::const_iterator LoopAnalyzer::LoopNode::successorsEnd() const {
return BB->getOutEdges().end();
}
void LoopAnalyzer::LoopNode::incrementLoopNestDepth() {
BB->incrementLoopNestDepth();
}
LoopAnalyzer::LoopAnalyzer(Cfg *Func) : Func(Func) {
const NodeList &Nodes = Func->getNodes();
// Allocate memory ahead of time. This is why a vector is used instead of a
// stack which doesn't support reserving (or bulk erasure used below).
AllNodes.reserve(Nodes.size());
WorkStack.reserve(Nodes.size());
LoopStack.reserve(Nodes.size());
// Create the LoopNodes from the function's CFG
for (CfgNode *Node : Nodes)
AllNodes.emplace_back(Node);
}
void LoopAnalyzer::computeLoopNestDepth() {
assert(AllNodes.size() == Func->getNodes().size());
assert(NextIndex == FirstDefinedIndex);
assert(NumDeletedNodes == 0);
while (NumDeletedNodes < AllNodes.size()) {
// Prepare to run Tarjan's
for (LoopNode &Node : AllNodes)
Node.reset();
assert(WorkStack.empty());
assert(LoopStack.empty());
for (LoopNode &Node : AllNodes) {
if (Node.isDeleted() || Node.isVisited())
continue;
WorkStack.push_back(&Node);
while (!WorkStack.empty()) {
LoopNode &WorkNode = *WorkStack.back();
if (LoopNode *Succ = processNode(WorkNode))
WorkStack.push_back(Succ);
else
WorkStack.pop_back();
}
}
}
}
LoopAnalyzer::LoopNode *
LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) {
if (!Node.isVisited()) {
Node.visit(NextIndex++);
LoopStack.push_back(&Node);
Node.setOnStack();
} else {
// Returning to a node after having recursed into Succ so continue
// iterating through successors after using the Succ.LowLink value that was
// computed in the recursion.
LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
Node.tryLink(Succ.getLowLink());
Node.nextSuccessor();
}
// Visit the successors and recurse into unvisited nodes. The recursion could
// cause the iteration to be suspended but it will resume as the stack is
// unwound.
auto SuccEnd = Node.successorsEnd();
for (; Node.currentSuccessor() != SuccEnd; Node.nextSuccessor()) {
LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
if (Succ.isDeleted())
continue;
if (!Succ.isVisited())
return &Succ;
else if (Succ.isOnStack())
Node.tryLink(Succ.getIndex());
}
if (Node.getLowLink() != Node.getIndex())
return nullptr;
// Single node means no loop in the CFG
if (LoopStack.back() == &Node) {
LoopStack.back()->setOnStack(false);
LoopStack.back()->setDeleted();
++NumDeletedNodes;
LoopStack.pop_back();
return nullptr;
}
// Reaching here means a loop has been found! It consists of the nodes on
// the top of the stack, down until the current node being processed, Node,
// is found.
for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) {
(*It)->setOnStack(false);
(*It)->incrementLoopNestDepth();
// Remove the loop from the stack and delete the head node
if (*It == &Node) {
(*It)->setDeleted();
++NumDeletedNodes;
LoopStack.erase(It.base() - 1, LoopStack.end());
break;
}
}
return nullptr;
}
} // end of namespace Ice
//===- subzero/src/IceLoopAnalyzer.h - Loop Analysis ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This analysis identifies loops in the CFG.
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICELOOPANALYZER_H
#define SUBZERO_SRC_ICELOOPANALYZER_H
#include "IceDefs.h"
namespace Ice {
/// Analyze a function's CFG for loops. The CFG must not change during the
/// lifetime of this object.
class LoopAnalyzer {
LoopAnalyzer() = delete;
LoopAnalyzer(const LoopAnalyzer &) = delete;
LoopAnalyzer &operator=(const LoopAnalyzer &) = delete;
public:
explicit LoopAnalyzer(Cfg *Func);
/// Use Tarjan's strongly connected components algorithm to identify outermost
/// to innermost loops. By deleting the head of the loop from the graph, inner
/// loops can be found. This assumes that the head node is not shared between
/// loops but instead all paths to the head come from 'continue' constructs.
///
/// This only computes the loop nest depth within the function and does not
/// take into account whether the function was called from within a loop.
void computeLoopNestDepth();
private:
using IndexT = uint32_t;
static constexpr IndexT UndefinedIndex = 0;
static constexpr IndexT FirstDefinedIndex = 1;
// TODO(ascull): classify the other fields
class LoopNode {
LoopNode() = delete;
LoopNode operator=(const LoopNode &) = delete;
public:
explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
LoopNode(const LoopNode &) = default;
void reset();
NodeList::const_iterator successorsEnd() const;
NodeList::const_iterator currentSuccessor() const { return Succ; }
void nextSuccessor() { ++Succ; }
void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
bool isVisited() const { return Index != UndefinedIndex; }
IndexT getIndex() const { return Index; }
void tryLink(IndexT NewLink) {
if (NewLink < LowLink)
LowLink = NewLink;
}
IndexT getLowLink() const { return LowLink; }
void setOnStack(bool NewValue = true) { OnStack = NewValue; }
bool isOnStack() const { return OnStack; }
void setDeleted() { Deleted = true; }
bool isDeleted() const { return Deleted; }
void incrementLoopNestDepth();
private:
CfgNode *BB;
NodeList::const_iterator Succ;
IndexT Index;
IndexT LowLink;
bool OnStack;
bool Deleted = false;
};
using LoopNodeList = std::vector<LoopNode, CfgLocalAllocator<LoopNode>>;
using LoopNodePtrList =
std::vector<LoopNode *, CfgLocalAllocator<LoopNode *>>;
/// Process the node as part as part of Tarjan's algorithm and return either
/// a node to recurse into or nullptr when the node has been fully processed.
LoopNode *processNode(LoopNode &Node);
/// The fuction to analyze for loops.
Cfg *const Func;
/// A list of decorated nodes in the same order as Func->getNodes() which
/// means the node's index will also be valid in this list.
LoopNodeList AllNodes;
/// This is used as a replacement for the call stack.
LoopNodePtrList WorkStack;
/// Track which loop a node belongs to.
LoopNodePtrList LoopStack;
/// The index to assign to the next visited node.
IndexT NextIndex = FirstDefinedIndex;
/// The number of nodes which have been marked deleted. This is used to track
/// when the iteration should end.
LoopNodePtrList::size_type NumDeletedNodes = 0;
};
} // end of namespace Ice
#endif // SUBZERO_SRC_ICELOOPANALYZER_H
......@@ -148,18 +148,26 @@ Variable *Variable::asType(Type Ty) {
RegWeight Variable::getWeight(const Cfg *Func) const {
VariablesMetadata *VMetadata = Func->getVMetadata();
return RegWeight(mustHaveReg()
? RegWeight::Inf
: mustNotHaveReg() ? RegWeight::Zero
: VMetadata->getUseWeight(this));
return mustHaveReg() ? RegWeight(RegWeight::Inf)
: mustNotHaveReg() ? RegWeight(RegWeight::Zero)
: VMetadata->getUseWeight(this);
}
void VariableTracking::markUse(MetadataKind TrackingKind, const Inst *Instr,
CfgNode *Node, bool IsImplicit) {
(void)TrackingKind;
// TODO(ascull): get the loop nest depth from CfgNode
UseWeight += 1;
// Increment the use weight depending on the loop nest depth. The weight is
// exponential in the nest depth as inner loops are expected to be executed
// an exponentially greater number of times.
constexpr uint32_t LogLoopTripCountEstimate = 2; // 2^2 = 4
constexpr SizeT MaxShift = sizeof(uint32_t) * CHAR_BIT - 1;
constexpr SizeT MaxLoopNestDepth = MaxShift / LogLoopTripCountEstimate;
const uint32_t LoopNestDepth =
std::min(Node->getLoopNestDepth(), MaxLoopNestDepth);
const uint32_t ThisUseWeight = uint32_t(1)
<< LoopNestDepth * LogLoopTripCountEstimate;
UseWeight.addWeight(ThisUseWeight);
if (MultiBlock == MBS_MultiBlock)
return;
......@@ -391,9 +399,9 @@ CfgNode *VariablesMetadata::getLocalUseNode(const Variable *Var) const {
return Metadata[VarNum].getNode();
}
uint32_t VariablesMetadata::getUseWeight(const Variable *Var) const {
RegWeight VariablesMetadata::getUseWeight(const Variable *Var) const {
if (!isTracked(Var))
return 1; // conservative answer
return RegWeight(1); // conservative answer
SizeT VarNum = Var->getIndex();
return Metadata[VarNum].getUseWeight();
}
......
......@@ -321,13 +321,15 @@ public:
explicit RegWeight(uint32_t Weight) : Weight(Weight) {}
RegWeight(const RegWeight &) = default;
RegWeight &operator=(const RegWeight &) = default;
const static uint32_t Inf = ~0; /// Force regalloc to give a register
const static uint32_t Zero = 0; /// Force regalloc NOT to give a register
const static uint32_t Inf = ~0; /// Force regalloc to give a register
const static uint32_t Zero = 0; /// Force regalloc NOT to give a register
const static uint32_t Max = Inf - 1; /// Max natural weight.
void addWeight(uint32_t Delta) {
if (Delta == Inf)
Weight = Inf;
else if (Weight != Inf)
Weight += Delta;
if (Utils::add_overflow(Weight, Delta, &Weight) || Weight == Inf)
Weight = Max;
}
void addWeight(const RegWeight &Other) { addWeight(Other.Weight); }
void setWeight(uint32_t Val) { Weight = Val; }
......@@ -579,7 +581,7 @@ public:
const Inst *getSingleDefinition() const;
const InstDefList &getLatterDefinitions() const { return Definitions; }
CfgNode *getNode() const { return SingleUseNode; }
uint32_t getUseWeight() const { return UseWeight; }
RegWeight getUseWeight() const { return UseWeight; }
void markUse(MetadataKind TrackingKind, const Inst *Instr, CfgNode *Node,
bool IsImplicit);
void markDef(MetadataKind TrackingKind, const Inst *Instr, CfgNode *Node);
......@@ -594,7 +596,7 @@ private:
InstDefList Definitions; /// Only used if Kind==VMK_All
const Inst *FirstOrSingleDefinition =
nullptr; /// Is a copy of Definitions[0] if Kind==VMK_All
uint32_t UseWeight = 0;
RegWeight UseWeight;
};
/// VariablesMetadata analyzes and summarizes the metadata for the complete set
......@@ -649,7 +651,7 @@ public:
/// Returns the total use weight computed as the sum of uses multiplied by a
/// loop nest depth factor for each use.
uint32_t getUseWeight(const Variable *Var) const;
RegWeight getUseWeight(const Variable *Var) const;
private:
const Cfg *Func;
......
//===- subzero/src/IceSwitchLowering.cpp - Switch lowering -----------------==//
//===- subzero/src/IceSwitchLowering.cpp - Switch lowering ----------------===//
//
// The Subzero Code Generator
//
......
......@@ -325,6 +325,13 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {
Func->dump("After Phi lowering");
}
// Run this early so it can be used to focus optimizations on potentially hot
// code.
// TODO(stichnot,ascull): currently only used for regalloc not expensive high
// level optimizations which could be focused on potentially hot code.
Func->computeLoopNestDepth();
Func->dump("After loop nest depth analysis");
// Address mode optimization.
Func->getVMetadata()->init(VMK_SingleDefs);
Func->doAddressOpt();
......@@ -365,8 +372,9 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {
return;
Func->dump("After x86 codegen");
// Register allocation. This requires instruction renumbering and full
// liveness analysis.
// Register allocation. This requires instruction renumbering and full
// liveness analysis. Loops must be identified before liveness so variable
// use weights are correct.
Func->renumberInstructions();
if (Func->hasError())
return;
......
......@@ -20,6 +20,7 @@
X(O2) \
X(Om1) \
X(advancedPhiLowering) \
X(computeLoopNestDepth) \
X(convertToIce) \
X(deletePhis) \
X(doAddressOpt) \
......
......@@ -70,6 +70,18 @@ public:
(X < 0 && Y < 0 && (X < std::numeric_limits<T>::min() - Y)));
}
/// Adds x to y and stores the result in sum. Returns true if the addition
/// overflowed.
static inline bool add_overflow(uint32_t x, uint32_t y, uint32_t *sum) {
static_assert(std::is_same<uint32_t, unsigned>::value, "Must match type");
#if __has_builtin(__builtin_uadd_overflow)
return __builtin_uadd_overflow(x, y, sum);
#else
*sum = x + y;
return WouldOverflowAdd(x, y);
#endif
}
/// Return true if X is already aligned by N, where N is a power of 2.
template <typename T> static inline bool IsAligned(T X, intptr_t N) {
assert(llvm::isPowerOf2_64(N));
......
; Test the the loop nest depth is correctly calculated for basic blocks.
; REQUIRES: allow_dump
; Single threaded so that the dumps used for checking happen in order
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 --verbose=loop \
; RUN: --threads=0 | FileCheck %s
define void @test_single_loop(i32 %a32) {
entry:
%a = trunc i32 %a32 to i1
br label %loop0
loop0: ; <-+
br label %loop1 ; |
loop1: ; |
br i1 %a, label %loop0, label %out ; --+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_single_loop_with_continue(i32 %a32, i32 %b32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
br label %loop0
loop0: ; <-+
br label %loop1 ; |
loop1: ; |
br i1 %a, label %loop0, label %loop2 ; --+
loop2: ; |
br i1 %b, label %loop0, label %out ; --+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop2:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_multiple_exits(i32 %a32, i32 %b32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
br label %loop0
loop0: ; <-+
br label %loop1 ; |
loop1: ; |
br i1 %a, label %loop2, label %out ; --+-+
loop2: ; | |
br i1 %b, label %loop0, label %out ; --+ |
; |
out: ; <---+
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop2:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_two_nested_loops(i32 %a32, i32 %b32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
br label %loop0_0
loop0_0: ; <---+
br label %loop1_0 ; |
loop1_0: ; <-+ |
br label %loop1_1 ; | |
loop1_1: ; | |
br i1 %a, label %loop1_0, label %loop0_1 ; --+ |
loop0_1: ; |
br i1 %b, label %loop0_0, label %out ; ----+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0_0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop1_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_two_nested_loops_with_continue(i32 %a32, i32 %b32, i32 %c32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
%c = trunc i32 %c32 to i1
br label %loop0_0
loop0_0: ; <---+
br label %loop1_0 ; |
loop1_0: ; <-+ |
br label %loop1_1 ; | |
loop1_1: ; | |
br i1 %a, label %loop1_0, label %loop1_2 ; --+ |
loop1_2: ; | |
br i1 %a, label %loop1_0, label %loop0_1 ; --+ |
loop0_1: ; |
br i1 %b, label %loop0_0, label %out ; ----+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0_0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop1_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop1_2:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_multiple_nested_loops(i32 %a32, i32 %b32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
br label %loop0_0
loop0_0: ; <---+
br label %loop1_0 ; |
loop1_0: ; <-+ |
br label %loop1_1 ; | |
loop1_1: ; | |
br i1 %a, label %loop1_0, label %loop0_1 ; --+ |
loop0_1: ; |
br label %loop2_0 ; |
loop2_0: ; <-+ |
br label %loop2_1 ; | |
loop2_1: ; | |
br i1 %a, label %loop2_0, label %loop0_2 ; --+ |
loop0_2: ; |
br i1 %b, label %loop0_0, label %out ; ----+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0_0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop1_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop2_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop2_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_2:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_three_nested_loops(i32 %a32, i32 %b32, i32 %c32) {
entry:
%a = trunc i32 %a32 to i1
%b = trunc i32 %b32 to i1
%c = trunc i32 %c32 to i1
br label %loop0_0
loop0_0: ; <-----+
br label %loop1_0 ; |
loop1_0: ; <---+ |
br label %loop2_0 ; | |
loop2_0: ; <-+ | |
br label %loop2_1 ; | | |
loop2_1: ; | | |
br i1 %a, label %loop2_0, label %loop1_1 ; --+ | |
loop1_1: ; | |
br i1 %b, label %loop1_0, label %loop0_1 ; ----+ |
loop0_1: ; |
br i1 %c, label %loop0_0, label %out ; ------+
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: loop0_0:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: loop1_0:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop2_0:
; CHECK-NEXT: LoopNestDepth = 3
; CHECK-NEXT: loop2_1:
; CHECK-NEXT: LoopNestDepth = 3
; CHECK-NEXT: loop1_1:
; CHECK-NEXT: LoopNestDepth = 2
; CHECK-NEXT: loop0_1:
; CHECK-NEXT: LoopNestDepth = 1
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
define void @test_diamond(i32 %a32) {
entry:
%a = trunc i32 %a32 to i1
br i1 %a, label %left, label %right
left:
br label %out
right:
br label %out
out:
ret void
}
; CHECK-LABEL: After loop nest depth analysis
; CHECK-NEXT: entry:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: left:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: right:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-NEXT: out:
; CHECK-NEXT: LoopNestDepth = 0
; CHECK-LABEL: Before RMW
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment