Commit 7cd926d6 by Manasij Mukherjee

Live Range Splitting after initial Register Allocation

After register allocation is done once, this pass targets the variables that do not get registers, break them into multiple variables with shorter (at most spanning a basic block) live ranges. After discarding the new variables with too few uses, the register allocator is run again and the new variables that manage to get registers are inserted. BUG=None R=stichnot@chromium.org Review URL: https://codereview.chromium.org/2172313002 .
parent fb068e84
......@@ -459,8 +459,11 @@ void CfgNode::advancedPhiLowering() {
// Undo the effect of the phi instruction on this node's live-in set by
// marking the phi dest variable as live on entry.
SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex());
assert(!Func->getLiveness()->getLiveIn(this)[VarNum]);
Func->getLiveness()->getLiveIn(this)[VarNum] = true;
auto &LiveIn = Func->getLiveness()->getLiveIn(this);
if (VarNum < LiveIn.size()) {
assert(!LiveIn[VarNum]);
LiveIn[VarNum] = true;
}
Phi->setDeleted();
}
}
......@@ -867,15 +870,15 @@ void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
Variable *Var = Liveness->getVariable(i, this);
if (LB > LE) {
Var->addLiveRange(FirstInstNum, LE);
Var->addLiveRange(LB, LastInstNum + 1);
Var->addLiveRange(FirstInstNum, LE, this);
Var->addLiveRange(LB, LastInstNum + 1, this);
// Assert that Var is a global variable by checking that its liveness
// index is less than the number of globals. This ensures that the
// LiveInAndOut[] access is valid.
assert(i < Liveness->getNumGlobalVars());
LiveInAndOut[i] = false;
} else {
Var->addLiveRange(LB, LE);
Var->addLiveRange(LB, LE, this);
}
if (i == i1)
++IBB;
......@@ -887,7 +890,7 @@ void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
i = LiveInAndOut.find_next(i)) {
Variable *Var = Liveness->getVariable(i, this);
if (Liveness->getRangeMask(Var->getIndex()))
Var->addLiveRange(FirstInstNum, LastInstNum + 1);
Var->addLiveRange(FirstInstNum, LastInstNum + 1, this);
}
}
......
......@@ -175,6 +175,10 @@ struct dev_list_flag {};
"this executable."), \
cl::init(false)) \
\
X(SplitGlobalVars, bool, dev_opt_flag, "split-global-vars", \
cl::desc("Global live range splitting"), \
cl::init(false)) \
\
X(InputFileFormat, llvm::NaClFileFormat, dev_opt_flag, "bitcode-format", \
cl::desc("Define format of input file:"), \
cl::values(clEnumValN(llvm::LLVMFormat, "llvm", "LLVM file (default)"), \
......
......@@ -39,6 +39,7 @@
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <string>
#include <system_error>
#include <unordered_map>
......@@ -129,6 +130,8 @@ using AssignList = InstList;
template <typename T> using CfgList = std::list<T, CfgLocalAllocator<T>>;
template <typename T, typename H = std::hash<T>, typename Eq = std::equal_to<T>>
using CfgUnorderedSet = std::unordered_set<T, H, Eq, CfgLocalAllocator<T>>;
template <typename T, typename Cmp = std::less<T>>
using CfgSet = std::set<T, Cmp, CfgLocalAllocator<T>>;
template <typename T, typename U, typename H = std::hash<T>,
typename Eq = std::equal_to<T>>
using CfgUnorderedMap =
......
......@@ -191,6 +191,7 @@ public:
virtual bool isRedundantAssign() const { return false; }
virtual ~Inst() = default;
void replaceDest(Variable *Var) { Dest = Var; }
protected:
Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest);
......
......@@ -101,14 +101,21 @@ bool operator==(const RegWeight &A, const RegWeight &B) {
return !(B < A) && !(A < B);
}
void LiveRange::addSegment(InstNumberT Start, InstNumberT End) {
if (!Range.empty()) {
// Check for merge opportunity.
InstNumberT CurrentEnd = Range.back().second;
assert(Start >= CurrentEnd);
if (Start == CurrentEnd) {
Range.back().second = End;
return;
void LiveRange::addSegment(InstNumberT Start, InstNumberT End, CfgNode *Node) {
if (getFlags().getSplitGlobalVars()) {
// Disable merging to make sure a live range 'segment' has a single node.
// Might be possible to enable when the target segment has the same node.
assert(NodeMap.find(Start) == NodeMap.end());
NodeMap[Start] = Node;
} else {
if (!Range.empty()) {
// Check for merge opportunity.
InstNumberT CurrentEnd = Range.back().second;
assert(Start >= CurrentEnd);
if (Start == CurrentEnd) {
Range.back().second = End;
return;
}
}
}
Range.push_back(RangeElementType(Start, End));
......
......@@ -603,6 +603,9 @@ bool operator==(const RegWeight &A, const RegWeight &B);
/// interval.
class LiveRange {
public:
using RangeElementType = std::pair<InstNumberT, InstNumberT>;
/// RangeType is arena-allocated from the Cfg's allocator.
using RangeType = CfgVector<RangeElementType>;
LiveRange() = default;
/// Special constructor for building a kill set. The advantage is that we can
/// reserve the right amount of space in advance.
......@@ -618,7 +621,10 @@ public:
Range.clear();
untrim();
}
void addSegment(InstNumberT Start, InstNumberT End);
void addSegment(InstNumberT Start, InstNumberT End, CfgNode *Node = nullptr);
void addSegment(RangeElementType Segment, CfgNode *Node = nullptr) {
addSegment(Segment.first, Segment.second, Node);
}
bool endsBefore(const LiveRange &Other) const;
bool overlaps(const LiveRange &Other, bool UseTrimmed = false) const;
......@@ -637,11 +643,18 @@ public:
void dump(Ostream &Str) const;
SizeT getNumSegments() const { return Range.size(); }
const RangeType &getSegments() const { return Range; }
CfgNode *getNodeForSegment(InstNumberT Begin) {
auto Iter = NodeMap.find(Begin);
assert(Iter != NodeMap.end());
return Iter->second;
}
private:
using RangeElementType = std::pair<InstNumberT, InstNumberT>;
/// RangeType is arena-allocated from the Cfg's allocator.
using RangeType = CfgVector<RangeElementType>;
RangeType Range;
CfgUnorderedMap<InstNumberT, CfgNode *> NodeMap;
/// TrimmedBegin is an optimization for the overlaps() computation. Since the
/// linear-scan algorithm always calls it as overlaps(Cur) and Cur advances
/// monotonically according to live range start, we can optimize overlaps() by
......@@ -760,9 +773,10 @@ public:
const LiveRange &getLiveRange() const { return Live; }
void setLiveRange(const LiveRange &Range) { Live = Range; }
void resetLiveRange() { Live.reset(); }
void addLiveRange(InstNumberT Start, InstNumberT End) {
void addLiveRange(InstNumberT Start, InstNumberT End,
CfgNode *Node = nullptr) {
assert(!getIgnoreLiveness());
Live.addSegment(Start, End);
Live.addSegment(Start, End, Node);
}
void trimLiveRange(InstNumberT Start) { Live.trim(Start); }
void untrimLiveRange() { Live.untrim(); }
......
......@@ -113,7 +113,6 @@ void LinearScan::initForGlobal() {
// register allocation since no overlap opportunities should be available and
// it's more expensive to look for opportunities.
FindOverlap = (Kind != RAK_Phi);
const VarList &Vars = Func->getVariables();
Unhandled.reserve(Vars.size());
UnhandledPrecolored.reserve(Vars.size());
// Gather the live ranges of all variables and add them to the Unhandled set.
......@@ -168,7 +167,6 @@ bool LinearScan::livenessValidateIntervals(
if (!BuildDefs::dump())
return false;
const VarList &Vars = Func->getVariables();
OstreamLocker L(Ctx);
Ostream &Str = Ctx->getStrDump();
for (SizeT VarNum : DefsWithoutUses) {
......@@ -212,7 +210,6 @@ void LinearScan::initForInfOnly() {
FindPreference = false;
FindOverlap = false;
SizeT NumVars = 0;
const VarList &Vars = Func->getVariables();
// Iterate across all instructions and record the begin and end of the live
// range for each variable that is pre-colored or infinite weight.
......@@ -325,13 +322,19 @@ void LinearScan::initForSecondChance() {
}
}
void LinearScan::init(RegAllocKind Kind) {
void LinearScan::init(RegAllocKind Kind, CfgSet<Variable *> ExcludeVars) {
this->Kind = Kind;
Unhandled.clear();
UnhandledPrecolored.clear();
Handled.clear();
Inactive.clear();
Active.clear();
Vars.clear();
Vars.reserve(Func->getVariables().size() - ExcludeVars.size());
for (auto *Var : Func->getVariables()) {
if (ExcludeVars.find(Var) == ExcludeVars.end())
Vars.emplace_back(Var);
}
SizeT NumRegs = Target->getNumRegisters();
RegAliases.resize(NumRegs);
......
......@@ -32,7 +32,7 @@ class LinearScan {
public:
explicit LinearScan(Cfg *Func);
void init(RegAllocKind Kind);
void init(RegAllocKind Kind, CfgSet<Variable *> ExcludeVars = {});
void scan(const SmallBitVector &RegMask, bool Randomized);
// Returns the number of times some variable has been assigned a register but
// later evicted because of a higher-priority allocation. The idea is that we
......@@ -131,9 +131,9 @@ private:
llvm::SmallVector<const SmallBitVector *, REGS_SIZE> RegAliases;
bool FindPreference = false;
bool FindOverlap = false;
const bool Verbose;
const bool UseReserve;
CfgVector<Variable *> Vars;
};
} // end of namespace Ice
......
......@@ -24,6 +24,7 @@
#include "IceGlobalContext.h"
#include "IceGlobalInits.h"
#include "IceInstVarIter.h"
#include "IceLiveness.h"
#include "IceOperand.h"
#include "IceRegAlloc.h"
......@@ -511,6 +512,182 @@ void TargetLowering::regAlloc(RegAllocKind Kind) {
// set of Variables that have no register and a non-empty live range, and
// model an infinite number of registers. Maybe use the register aliasing
// mechanism to get better packing of narrower slots.
if (getFlags().getSplitGlobalVars())
postRegallocSplitting(RegMask);
}
namespace {
CfgVector<Inst *> getInstructionsInRange(CfgNode *Node, InstNumberT Start,
InstNumberT End) {
CfgVector<Inst *> Result;
bool Started = false;
auto Process = [&](InstList &Insts) {
for (auto &Instr : Insts) {
if (Instr.isDeleted()) {
continue;
}
if (Instr.getNumber() == Start) {
Started = true;
}
if (Started) {
Result.emplace_back(&Instr);
}
if (Instr.getNumber() == End) {
break;
}
}
};
Process(Node->getPhis());
Process(Node->getInsts());
// TODO(manasijm): Investigate why checking >= End significantly changes
// output. Should not happen when renumbering produces monotonically
// increasing instruction numbers and live ranges begin and end on non-deleted
// instructions.
return Result;
}
}
void TargetLowering::postRegallocSplitting(const SmallBitVector &RegMask) {
// Splits the live ranges of global(/multi block) variables and runs the
// register allocator to find registers for as many of the new variables as
// possible.
// TODO(manasijm): Merge the small liveranges back into multi-block ones when
// the variables get the same register. This will reduce the amount of new
// instructions inserted. This might involve a full dataflow analysis.
// Also, modify the preference mechanism in the register allocator to match.
TimerMarker _(TimerStack::TT_splitGlobalVars, Func);
CfgSet<Variable *> SplitCandidates;
// Find variables that do not have registers but are allowed to. Also skip
// variables with single segment live ranges as they are not split further in
// this function.
for (Variable *Var : Func->getVariables()) {
if (!Var->mustNotHaveReg() && !Var->hasReg()) {
if (Var->getLiveRange().getNumSegments() > 1)
SplitCandidates.insert(Var);
}
}
if (SplitCandidates.empty())
return;
CfgSet<Variable *> ExtraVars;
struct UseInfo {
Variable *Replacing = nullptr;
Inst *FirstUse = nullptr;
Inst *LastDef = nullptr;
SizeT UseCount = 0;
};
CfgUnorderedMap<Variable *, UseInfo> VarInfo;
// Split the live ranges of the viable variables by node.
// Compute metadata (UseInfo) for each of the resulting variables.
for (auto *Var : SplitCandidates) {
for (auto &Segment : Var->getLiveRange().getSegments()) {
UseInfo Info;
Info.Replacing = Var;
auto *Node = Var->getLiveRange().getNodeForSegment(Segment.first);
for (auto *Instr :
getInstructionsInRange(Node, Segment.first, Segment.second)) {
for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
// It's safe to iterate over the top-level src operands rather than
// using FOREACH_VAR_IN_INST(), because any variables inside e.g.
// mem operands should already have registers.
if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
if (Var == Info.Replacing) {
if (Info.FirstUse == nullptr && !llvm::isa<InstPhi>(Instr)) {
Info.FirstUse = Instr;
}
Info.UseCount++;
}
}
}
if (Instr->getDest() == Info.Replacing && !llvm::isa<InstPhi>(Instr)) {
Info.LastDef = Instr;
}
}
static constexpr SizeT MinUseThreshold = 3;
// Skip if variable has less than `MinUseThreshold` uses in the segment.
if (Info.UseCount < MinUseThreshold)
continue;
if (!Info.FirstUse && !Info.LastDef) {
continue;
}
LiveRange LR;
LR.addSegment(Segment);
Variable *NewVar = Func->makeVariable(Var->getType());
NewVar->setLiveRange(LR);
VarInfo[NewVar] = Info;
ExtraVars.insert(NewVar);
}
}
// Run the register allocator with all these new variables included
LinearScan RegAlloc(Func);
RegAlloc.init(RAK_Global, SplitCandidates);
RegAlloc.scan(RegMask, getFlags().getRandomizeRegisterAllocation());
// Modify the Cfg to use the new variables that now have registers.
for (auto *ExtraVar : ExtraVars) {
if (!ExtraVar->hasReg()) {
continue;
}
auto &Info = VarInfo[ExtraVar];
assert(ExtraVar->getLiveRange().getSegments().size() == 1);
auto Segment = ExtraVar->getLiveRange().getSegments()[0];
auto *Node =
Info.Replacing->getLiveRange().getNodeForSegment(Segment.first);
auto RelevantInsts =
getInstructionsInRange(Node, Segment.first, Segment.second);
if (RelevantInsts.empty())
continue;
// Replace old variables
for (auto *Instr : RelevantInsts) {
if (llvm::isa<InstPhi>(Instr))
continue;
// TODO(manasijm): Figure out how to safely enable replacing phi dest
// variables. The issue is that we can not insert low level mov
// instructions into the PhiList.
for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
// FOREACH_VAR_IN_INST() not needed. Same logic as above.
if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
if (Var == Info.Replacing) {
Instr->replaceSource(i, ExtraVar);
}
}
}
if (Instr->getDest() == Info.Replacing) {
Instr->replaceDest(ExtraVar);
}
}
assert(Info.FirstUse != Info.LastDef);
assert(Info.FirstUse || Info.LastDef);
// Insert spill code
if (Info.FirstUse != nullptr) {
auto *NewInst =
Func->getTarget()->createLoweredMove(ExtraVar, Info.Replacing);
Node->getInsts().insert(instToIterator(Info.FirstUse), NewInst);
}
if (Info.LastDef != nullptr) {
auto *NewInst =
Func->getTarget()->createLoweredMove(Info.Replacing, ExtraVar);
Node->getInsts().insertAfter(instToIterator(Info.LastDef), NewInst);
}
}
}
void TargetLowering::markRedefinitions() {
......
......@@ -23,11 +23,12 @@
#ifndef SUBZERO_SRC_ICETARGETLOWERING_H
#define SUBZERO_SRC_ICETARGETLOWERING_H
#include "IceDefs.h"
#include "IceBitVector.h"
#include "IceCfgNode.h"
#include "IceDefs.h"
#include "IceInst.h" // for the names of the Inst subtypes
#include "IceOperand.h"
#include "IceRegAlloc.h"
#include "IceTypes.h"
#include <utility>
......@@ -290,6 +291,7 @@ public:
virtual const SmallBitVector &getAliasesForRegister(RegNumT) const = 0;
void regAlloc(RegAllocKind Kind);
void postRegallocSplitting(const SmallBitVector &RegMask);
virtual void
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
......
......@@ -93,6 +93,17 @@ public:
SizeT getNumRegisters() const override {
return Traits::RegisterSet::Reg_NUM;
}
Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) override {
if (isVectorType(Dest->getType())) {
return Traits::Insts::Movp::create(Func, Dest, SrcVar);
}
return Traits::Insts::Mov::create(Func, Dest, SrcVar);
(void)Dest;
(void)SrcVar;
return nullptr;
}
Variable *getPhysicalRegister(RegNumT RegNum,
Type Ty = IceType_void) override;
const char *getRegName(RegNumT RegNum, Type Ty) const override;
......@@ -220,8 +231,6 @@ public:
InstructionSetEnum getInstructionSet() const { return InstructionSet; }
Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) override;
protected:
const bool NeedSandboxing;
......
......@@ -1353,15 +1353,6 @@ void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) {
RI->setDeleted();
}
template <typename TraitsType>
Inst *TargetX86Base<TraitsType>::createLoweredMove(Variable *Dest,
Variable *SrcVar) {
if (isVectorType(Dest->getType())) {
return Traits::Insts::Movp::create(Func, Dest, SrcVar);
}
return Traits::Insts::Mov::create(Func, Dest, SrcVar);
}
template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() {
return Traits::WordType;
}
......
......@@ -63,6 +63,7 @@
X(regAlloc) \
X(renumberInstructions) \
X(shortCircuit) \
X(splitGlobalVars) \
X(splitLocalVars) \
X(szmain) \
X(translate) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment