Commit 87ff3a18 by Jim Stichnoth

Subzero: Simplify the FakeKill instruction.

Even after earlier simplifications, FakeKill was still handled somewhat inefficiently for the register allocator. For x86-32, any function containing call instructions would result in about 11 pre-colored Variables, each with an identical and relatively complex live range consisting of points. They would start out on the UnhandledPrecolored list, then all move to the Inactive list, where they would be repeatedly compared against each register allocation candidate via overlapsRange(). We improve this by keeping around a single copy of that live range and directly masking out the Free[] register set when that live range overlaps the current candidate's live range. This saves ~10 overlaps() calculations per candidate while FakeKills are still pending. Also, slightly rearrange the initialization of the Unhandled etc. sets into a separate init routine, which will make it easier to reuse the register allocator in other situations such as Om1 post-lowering. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/720343003
parent 51596d43
......@@ -661,18 +661,6 @@ void CfgNode::livenessPostprocess(LivenessMode Mode, Liveness *Liveness) {
FirstInstNum = I->getNumber();
assert(I->getNumber() > LastInstNum);
LastInstNum = I->getNumber();
// Create fake live ranges for a Kill instruction, but only if the
// linked instruction is still alive.
if (Mode == Liveness_Intervals) {
if (InstFakeKill *Kill = llvm::dyn_cast<InstFakeKill>(I)) {
if (!Kill->getLinked()->isDeleted()) {
for (Variable *Var : Kill->getKilledRegs()) {
InstNumberT InstNumber = I->getNumber();
Var->addLiveRange(InstNumber, InstNumber, 1);
}
}
}
}
}
if (Mode != Liveness_Intervals)
return;
......
......@@ -114,8 +114,6 @@ bool Inst::isLastUse(const Operand *TestSrc) const {
void Inst::livenessLightweight(Cfg *Func, LivenessBV &Live) {
assert(!isDeleted());
if (llvm::isa<InstFakeKill>(this))
return;
resetLastUses();
VariablesMetadata *VMetadata = Func->getVMetadata();
SizeT VarIndex = 0;
......@@ -139,8 +137,6 @@ bool Inst::liveness(InstNumberT InstNumber, LivenessBV &Live,
Liveness *Liveness, LiveBeginEndMap *LiveBegin,
LiveBeginEndMap *LiveEnd) {
assert(!isDeleted());
if (llvm::isa<InstFakeKill>(this))
return true;
Dead = false;
if (Dest) {
......@@ -181,15 +177,13 @@ bool Inst::liveness(InstNumberT InstNumber, LivenessBV &Live,
// twice. ICE only allows a variable to have a single
// liveness interval in a basic block (except for blocks
// where a variable is live-in and live-out but there is a
// gap in the middle, and except for the special
// InstFakeKill instruction that can appear multiple
// times in the same block). Therefore, this lowered
// sequence needs to represent a single conservative live
// range for t. Since the instructions are being traversed
// backwards, we make sure LiveEnd is only set once by
// setting it only when LiveEnd[VarNum]==0 (sentinel value).
// Note that it's OK to set LiveBegin multiple times because
// of the backwards traversal.
// gap in the middle). Therefore, this lowered sequence
// needs to represent a single conservative live range for
// t. Since the instructions are being traversed backwards,
// we make sure LiveEnd is only set once by setting it only
// when LiveEnd[VarNum]==0 (sentinel value). Note that it's
// OK to set LiveBegin multiple times because of the
// backwards traversal.
if (LiveEnd) {
// Ideally, we would verify that VarNum wasn't already
// added in this block, but this can't be done very
......@@ -452,10 +446,8 @@ InstFakeUse::InstFakeUse(Cfg *Func, Variable *Src)
addSource(Src);
}
InstFakeKill::InstFakeKill(Cfg *Func, const VarList &KilledRegs,
const Inst *Linked)
: InstHighLevel(Func, Inst::FakeKill, 0, NULL), KilledRegs(KilledRegs),
Linked(Linked) {}
InstFakeKill::InstFakeKill(Cfg *Func, const Inst *Linked)
: InstHighLevel(Func, Inst::FakeKill, 0, NULL), Linked(Linked) {}
// ======================== Dump routines ======================== //
......@@ -757,14 +749,7 @@ void InstFakeKill::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
if (Linked->isDeleted())
Str << "// ";
Str << "kill.pseudo ";
bool First = true;
for (Variable *Var : KilledRegs) {
if (!First)
Str << ", ";
First = false;
Var->dump(Func);
}
Str << "kill.pseudo scratch_regs";
}
void InstTarget::dump(const Cfg *Func) const {
......
......@@ -782,11 +782,12 @@ private:
~InstFakeUse() override {}
};
// FakeKill instruction. This "kills" a set of variables by adding a
// trivial live range at this instruction to each variable. The
// primary use is to indicate that scratch registers are killed after
// a call, so that the register allocator won't assign a scratch
// register to a variable whose live range spans a call.
// FakeKill instruction. This "kills" a set of variables by modeling
// a trivial live range at this instruction for each (implicit)
// variable. The primary use is to indicate that scratch registers
// are killed after a call, so that the register allocator won't
// assign a scratch register to a variable whose live range spans a
// call.
//
// The FakeKill instruction also holds a pointer to the instruction
// that kills the set of variables, so that if that linked instruction
......@@ -796,12 +797,9 @@ class InstFakeKill : public InstHighLevel {
InstFakeKill &operator=(const InstFakeKill &) = delete;
public:
static InstFakeKill *create(Cfg *Func, const VarList &KilledRegs,
const Inst *Linked) {
return new (Func->allocateInst<InstFakeKill>())
InstFakeKill(Func, KilledRegs, Linked);
static InstFakeKill *create(Cfg *Func, const Inst *Linked) {
return new (Func->allocateInst<InstFakeKill>()) InstFakeKill(Func, Linked);
}
const VarList &getKilledRegs() const { return KilledRegs; }
const Inst *getLinked() const { return Linked; }
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg * /* Func */) const override {}
......@@ -809,10 +807,9 @@ public:
static bool classof(const Inst *Inst) { return Inst->getKind() == FakeKill; }
private:
InstFakeKill(Cfg *Func, const VarList &KilledRegs, const Inst *Linked);
InstFakeKill(Cfg *Func, const Inst *Linked);
~InstFakeKill() override {}
const VarList &KilledRegs;
// This instruction is ignored if Linked->isDeleted() is true.
const Inst *Linked;
};
......
......@@ -37,8 +37,6 @@ bool operator==(const RegWeight &A, const RegWeight &B) {
}
void LiveRange::addSegment(InstNumberT Start, InstNumberT End) {
if (End > Start)
IsNonpoints = true;
#ifdef USE_SET
RangeElementType Element(Start, End);
RangeType::iterator Next = Range.lower_bound(Element);
......@@ -125,8 +123,6 @@ bool LiveRange::overlaps(const LiveRange &Other, bool UseTrimmed) const {
}
bool LiveRange::overlapsInst(InstNumberT OtherBegin, bool UseTrimmed) const {
if (!IsNonpoints)
return false;
bool Result = false;
for (auto I = (UseTrimmed ? TrimmedBegin : Range.begin()), E = Range.end();
I != E; ++I) {
......
......@@ -309,17 +309,15 @@ bool operator==(const RegWeight &A, const RegWeight &B);
// weight, in case e.g. we want a live range to have higher weight
// inside a loop.
class LiveRange {
// LiveRange(const LiveRange &) = delete;
// LiveRange &operator=(const LiveRange &) = delete;
public:
LiveRange() : Weight(0), IsNonpoints(false) {}
LiveRange() : Weight(0) {}
LiveRange(const LiveRange &) = default;
LiveRange &operator=(const LiveRange &) = default;
void reset() {
Range.clear();
Weight.setWeight(0);
untrim();
IsNonpoints = false;
}
void addSegment(InstNumberT Start, InstNumberT End);
......@@ -328,7 +326,6 @@ public:
bool overlapsInst(InstNumberT OtherBegin, bool UseTrimmed = false) const;
bool containsValue(InstNumberT Value, bool IsDest) const;
bool isEmpty() const { return Range.empty(); }
bool isNonpoints() const { return IsNonpoints; }
InstNumberT getStart() const {
return Range.empty() ? -1 : Range.begin()->first;
}
......@@ -365,11 +362,6 @@ private:
// that linear-scan also has to initialize TrimmedBegin at the
// beginning by calling untrim().
RangeType::const_iterator TrimmedBegin;
// IsNonpoints keeps track of whether the live range contains at
// least one interval where Start!=End. If it is empty or has the
// form [x,x),[y,y),...,[z,z), then overlaps(InstNumberT) is
// trivially false.
bool IsNonpoints;
};
Ostream &operator<<(Ostream &Str, const LiveRange &L);
......
......@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "IceCfg.h"
#include "IceCfgNode.h"
#include "IceInst.h"
#include "IceOperand.h"
#include "IceRegAlloc.h"
......@@ -72,35 +73,16 @@ void dumpLiveRange(const Variable *Var, const Cfg *Func) {
} // end of anonymous namespace
// Implements the linear-scan algorithm. Based on "Linear Scan
// Register Allocation in the Context of SSA Form and Register
// Constraints" by Hanspeter Mössenböck and Michael Pfeiffer,
// ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF . This
// implementation is modified to take affinity into account and allow
// two interfering variables to share the same register in certain
// cases.
//
// Requires running Cfg::liveness(Liveness_Intervals) in
// preparation. Results are assigned to Variable::RegNum for each
// Variable.
void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
TimerMarker T(TimerStack::TT_linearScan, Func);
assert(RegMaskFull.any()); // Sanity check
void LinearScan::initForGlobalAlloc() {
TimerMarker T(TimerStack::TT_initUnhandled, Func);
Unhandled.clear();
UnhandledPrecolored.clear();
Handled.clear();
Inactive.clear();
Active.clear();
Ostream &Str = Func->getContext()->getStrDump();
bool Verbose = Func->getContext()->isVerbose(IceV_LinearScan);
Func->resetCurrentNode();
VariablesMetadata *VMetadata = Func->getVMetadata();
// Gather the live ranges of all variables and add them to the
// Unhandled set.
const VarList &Vars = Func->getVariables();
{
TimerMarker T(TimerStack::TT_initUnhandled, Func);
Unhandled.reserve(Vars.size());
for (Variable *Var : Vars) {
// Explicitly don't consider zero-weight variables, which are
......@@ -132,7 +114,46 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges());
std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(),
CompareRanges());
// Build the (ordered) list of FakeKill instruction numbers.
Kills.clear();
for (CfgNode *Node : Func->getNodes()) {
for (auto I = Node->getInsts().begin(), E = Node->getInsts().end(); I != E;
++I) {
if (I->isDeleted())
continue;
if (auto Kill = llvm::dyn_cast<InstFakeKill>(I)) {
if (!Kill->getLinked()->isDeleted())
Kills.push_back(I->getNumber());
}
}
}
}
// Implements the linear-scan algorithm. Based on "Linear Scan
// Register Allocation in the Context of SSA Form and Register
// Constraints" by Hanspeter Mössenböck and Michael Pfeiffer,
// ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF . This
// implementation is modified to take affinity into account and allow
// two interfering variables to share the same register in certain
// cases.
//
// Requires running Cfg::liveness(Liveness_Intervals) in
// preparation. Results are assigned to Variable::RegNum for each
// Variable.
void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
TimerMarker T(TimerStack::TT_linearScan, Func);
assert(RegMaskFull.any()); // Sanity check
Ostream &Str = Func->getContext()->getStrDump();
bool Verbose = Func->getContext()->isVerbose(IceV_LinearScan);
Func->resetCurrentNode();
VariablesMetadata *VMetadata = Func->getVMetadata();
// Build a LiveRange representing the Kills list.
LiveRange KillsRange;
for (InstNumberT I : Kills)
KillsRange.addSegment(I, I);
KillsRange.untrim();
// RegUses[I] is the number of live ranges (variables) that register
// I is currently assigned to. It can be greater than 1 as a result
......@@ -144,6 +165,11 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
assert(Inactive.empty());
assert(Handled.empty());
UnorderedRanges::iterator Next;
const TargetLowering::RegSetMask RegsInclude =
TargetLowering::RegSet_CallerSave;
const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None;
const llvm::SmallBitVector KillsMask =
Func->getTarget()->getRegisterSet(RegsInclude, RegsExclude);
while (!Unhandled.empty()) {
Variable *Cur = Unhandled.back();
......@@ -155,6 +181,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
}
const llvm::SmallBitVector RegMask =
RegMaskFull & Func->getTarget()->getRegisterSetForType(Cur->getType());
KillsRange.trim(Cur->getLiveRange().getStart());
// Check for precolored ranges. If Cur is precolored, it
// definitely gets that register. Previously processed live
......@@ -220,15 +247,6 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
++Next;
Variable *Item = *I;
Item->trimLiveRange(Cur->getLiveRange().getStart());
// As an optimization, don't bother checking pure point-valued
// Inactive ranges, because the overlapsStart() test will never
// succeed, and the rangeEndsBefore() test will generally only
// succeed after the last call instruction, which statistically
// happens near the end. TODO(stichnot): Consider suppressing
// this check every N iterations in case calls are only at the
// beginning of the function.
if (!Item->getLiveRange().isNonpoints())
continue;
if (Item->rangeEndsBefore(Cur)) {
// Move Item from Inactive to Handled list.
if (Verbose) {
......@@ -374,6 +392,19 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
}
}
// Remove scratch registers from the Free[] list, and mark their
// Weights[] as infinite, if KillsRange overlaps Cur's live range.
const bool UseTrimmed = true;
if (Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
Free.reset(KillsMask);
for (int i = KillsMask.find_first(); i != -1;
i = KillsMask.find_next(i)) {
Weights[i].setWeight(RegWeight::Inf);
if (PreferReg == i)
AllowOverlap = false;
}
}
// Print info about physical register availability.
if (Verbose) {
for (SizeT i = 0; i < RegMask.size(); ++i) {
......
......@@ -27,6 +27,7 @@ class LinearScan {
public:
LinearScan(Cfg *Func) : Func(Func) {}
void initForGlobalAlloc();
void scan(const llvm::SmallBitVector &RegMask);
void dump(Cfg *Func) const;
......@@ -39,6 +40,7 @@ private:
// for faster processing.
OrderedRanges UnhandledPrecolored;
UnorderedRanges Active, Inactive, Handled;
std::vector<InstNumberT> Kills;
};
} // end of namespace Ice
......
......@@ -234,6 +234,7 @@ void TargetLowering::regAlloc() {
RegInclude |= RegSet_CalleeSave;
if (hasFramePointer())
RegExclude |= RegSet_FramePointer;
LinearScan.initForGlobalAlloc();
llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
LinearScan.scan(RegMask);
}
......
......@@ -316,8 +316,6 @@ TargetX8632::TargetX8632(Cfg *Func)
void TargetX8632::translateO2() {
TimerMarker T(TimerStack::TT_O2, Func);
initFakeKilledScratchRegisters();
if (!Ctx->getFlags().PhiEdgeSplit) {
// Lower Phi instructions.
Func->placePhiLoads();
......@@ -413,8 +411,6 @@ void TargetX8632::translateO2() {
void TargetX8632::translateOm1() {
TimerMarker T(TimerStack::TT_Om1, Func);
initFakeKilledScratchRegisters();
Func->placePhiLoads();
if (Func->hasError())
return;
......@@ -1894,9 +1890,7 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
}
// Insert a register-kill pseudo instruction.
assert(!FakeKilledScratchRegisters.empty());
Context.insert(
InstFakeKill::create(Func, FakeKilledScratchRegisters, NewCall));
Context.insert(InstFakeKill::create(Func, NewCall));
// Generate a FakeUse to keep the call live if necessary.
if (Instr->hasSideEffects() && ReturnReg) {
......
......@@ -483,20 +483,10 @@ protected:
llvm::SmallBitVector RegsUsed;
SizeT NextLabelNumber;
VarList PhysicalRegisters[IceType_NUM];
VarList FakeKilledScratchRegisters;
static IceString RegNames[];
private:
~TargetX8632() override {}
// Ideally, this initialization would be done in the constructor,
// but we need to defer it until after the initial CFG is built,
// because some of the bitcode reader tests rely on the order that
// Variables are created and their default printable names.
void initFakeKilledScratchRegisters() {
for (SizeT I = 0; I < ScratchRegs.size(); ++I)
if (ScratchRegs[I])
FakeKilledScratchRegisters.push_back(getPhysicalRegister(I));
}
template <typename T> void emitConstantPool() const;
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment