Commit b40595a1 by Jim Stichnoth

Subzero: Make the register allocator more robust with -reg-use and -reg-exclude.

The problem is that if you too aggressively -reg-use or -reg-exclude, you can get failures because of inherently high register pressure, and there are also contributions from the "specialty" register classes. For example, when you combine load optimization, address mode inference, local register availability optimization, and the div instruction, you can end up needing 5 simultaneously live infinite-weight registers. The fix/enhancement here is to keep track of the "reserve" set of registers for each register class, and allow the register allocator to draw from that as a last resort. This behavior is guarded by the -reg-reserve flag. This CL also includes two improvements in lowering sequences to reduce register pressure. BUG= none R=kschimpf@google.com Review URL: https://codereview.chromium.org/1641653004 .
parent 029bed9c
...@@ -55,6 +55,30 @@ Cfg::Cfg(GlobalContext *Ctx, uint32_t SequenceNumber) ...@@ -55,6 +55,30 @@ Cfg::Cfg(GlobalContext *Ctx, uint32_t SequenceNumber)
Cfg::~Cfg() { assert(ICE_TLS_GET_FIELD(CurrentCfg) == nullptr); } Cfg::~Cfg() { assert(ICE_TLS_GET_FIELD(CurrentCfg) == nullptr); }
/// Create a string like "foo(i=123:b=9)" indicating the function name, number
/// of high-level instructions, and number of basic blocks. This string is only
/// used for dumping and other diagnostics, and the idea is that given a set of
/// functions to debug a problem on, it's easy to find the smallest or simplest
/// function to attack. Note that the counts may change somewhat depending on
/// what point it is called during the translation passes.
IceString Cfg::getFunctionNameAndSize() const {
if (!BuildDefs::dump())
return getFunctionName();
SizeT NodeCount = 0;
SizeT InstCount = 0;
for (CfgNode *Node : getNodes()) {
++NodeCount;
// Note: deleted instructions are *not* ignored.
InstCount += Node->getPhis().size();
for (Inst &I : Node->getInsts()) {
if (!llvm::isa<InstTarget>(&I))
++InstCount;
}
}
return getFunctionName() + "(i=" + std::to_string(InstCount) + ":b=" +
std::to_string(NodeCount) + ")";
}
void Cfg::setError(const IceString &Message) { void Cfg::setError(const IceString &Message) {
HasError = true; HasError = true;
ErrorMessage = Message; ErrorMessage = Message;
...@@ -1075,7 +1099,9 @@ void Cfg::dump(const IceString &Message) { ...@@ -1075,7 +1099,9 @@ void Cfg::dump(const IceString &Message) {
Str << Args[i]->getType() << " "; Str << Args[i]->getType() << " ";
Args[i]->dump(this); Args[i]->dump(this);
} }
Str << ") {\n"; // Append an extra copy of the function name here, in order to print its
// size stats but not mess up lit tests.
Str << ") { # " << getFunctionNameAndSize() << "\n";
} }
resetCurrentNode(); resetCurrentNode();
if (isVerbose(IceV_Liveness)) { if (isVerbose(IceV_Liveness)) {
......
...@@ -64,7 +64,8 @@ public: ...@@ -64,7 +64,8 @@ public:
/// \name Manage the name and return type of the function being translated. /// \name Manage the name and return type of the function being translated.
/// @{ /// @{
void setFunctionName(const IceString &Name) { FunctionName = Name; } void setFunctionName(const IceString &Name) { FunctionName = Name; }
IceString getFunctionName() const { return FunctionName; } const IceString &getFunctionName() const { return FunctionName; }
IceString getFunctionNameAndSize() const;
void setReturnType(Type Ty) { ReturnType = Ty; } void setReturnType(Type Ty) { ReturnType = Ty; }
Type getReturnType() const { return ReturnType; } Type getReturnType() const { return ReturnType; }
/// @} /// @}
......
...@@ -185,6 +185,13 @@ cl::opt<bool> ...@@ -185,6 +185,13 @@ cl::opt<bool>
cl::desc("Randomize register allocation"), cl::desc("Randomize register allocation"),
cl::init(false)); cl::init(false));
/// Allow failsafe access to registers that were restricted via -reg-use or
/// -reg-exclude.
cl::opt<bool>
RegAllocReserve("reg-reserve",
cl::desc("Let register allocation use reserve registers"),
cl::init(false));
/// Repeat register allocation until convergence. /// Repeat register allocation until convergence.
cl::opt<bool> cl::opt<bool>
RepeatRegAlloc("regalloc-repeat", RepeatRegAlloc("regalloc-repeat",
...@@ -545,6 +552,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) { ...@@ -545,6 +552,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) {
OutFlags.setShouldReorderBasicBlocks(::ReorderBasicBlocks); OutFlags.setShouldReorderBasicBlocks(::ReorderBasicBlocks);
OutFlags.setShouldDoNopInsertion(::ShouldDoNopInsertion); OutFlags.setShouldDoNopInsertion(::ShouldDoNopInsertion);
OutFlags.setShouldRandomizeRegAlloc(::RandomizeRegisterAllocation); OutFlags.setShouldRandomizeRegAlloc(::RandomizeRegisterAllocation);
OutFlags.setRegAllocReserve(::RegAllocReserve);
OutFlags.setShouldRepeatRegAlloc(::RepeatRegAlloc); OutFlags.setShouldRepeatRegAlloc(::RepeatRegAlloc);
OutFlags.setShouldReorderFunctions(::ReorderFunctions); OutFlags.setShouldReorderFunctions(::ReorderFunctions);
OutFlags.setShouldReorderGlobalVariables(::ReorderGlobalVariables); OutFlags.setShouldReorderGlobalVariables(::ReorderGlobalVariables);
......
...@@ -169,6 +169,11 @@ public: ...@@ -169,6 +169,11 @@ public:
/// Set ClFlags::RandomRegAlloc to a new value /// Set ClFlags::RandomRegAlloc to a new value
void setShouldRandomizeRegAlloc(bool NewValue) { RandomRegAlloc = NewValue; } void setShouldRandomizeRegAlloc(bool NewValue) { RandomRegAlloc = NewValue; }
/// Get the value of ClFlags::RegAllocReserve
bool getRegAllocReserve() const { return RegAllocReserve; }
/// Set ClFlags::RegAllocReserve to a new value
void setRegAllocReserve(bool NewValue) { RegAllocReserve = NewValue; }
/// Get the value of ClFlags::RepeatRegAlloc /// Get the value of ClFlags::RepeatRegAlloc
bool shouldRepeatRegAlloc() const { return RepeatRegAlloc; } bool shouldRepeatRegAlloc() const { return RepeatRegAlloc; }
/// Set ClFlags::RepeatRegAlloc to a new value /// Set ClFlags::RepeatRegAlloc to a new value
...@@ -425,6 +430,8 @@ private: ...@@ -425,6 +430,8 @@ private:
bool RandomNopInsertion; bool RandomNopInsertion;
/// see anonymous_namespace{IceClFlags.cpp}::RandomizeRegisterAllocation /// see anonymous_namespace{IceClFlags.cpp}::RandomizeRegisterAllocation
bool RandomRegAlloc; bool RandomRegAlloc;
/// see anonymous_namespace{IceClFlags.cpp}::RegAllocReserve
bool RegAllocReserve;
/// see anonymous_namespace{IceClFlags.cpp}::RepeatRegAlloc /// see anonymous_namespace{IceClFlags.cpp}::RepeatRegAlloc
bool RepeatRegAlloc; bool RepeatRegAlloc;
/// see anonymous_namespace{IceClFlags.cpp}::ReorderBasicBlocks /// see anonymous_namespace{IceClFlags.cpp}::ReorderBasicBlocks
......
...@@ -309,7 +309,8 @@ void GlobalContext::translateFunctions() { ...@@ -309,7 +309,8 @@ void GlobalContext::translateFunctions() {
getErrorStatus()->assign(EC_Translation); getErrorStatus()->assign(EC_Translation);
OstreamLocker L(this); OstreamLocker L(this);
getStrError() << "ICE translation error: " << Func->getFunctionName() getStrError() << "ICE translation error: " << Func->getFunctionName()
<< ": " << Func->getError() << "\n"; << ": " << Func->getError() << ": "
<< Func->getFunctionNameAndSize() << "\n";
Item = new EmitterWorkItem(Func->getSequenceNumber()); Item = new EmitterWorkItem(Func->getSequenceNumber());
} else { } else {
Func->getAssembler<>()->setInternal(Func->getInternal()); Func->getAssembler<>()->setInternal(Func->getInternal());
...@@ -320,7 +321,7 @@ void GlobalContext::translateFunctions() { ...@@ -320,7 +321,7 @@ void GlobalContext::translateFunctions() {
// The Cfg has already emitted into the assembly buffer, so // The Cfg has already emitted into the assembly buffer, so
// stats have been fully collected into this thread's TLS. // stats have been fully collected into this thread's TLS.
// Dump them before TLS is reset for the next Cfg. // Dump them before TLS is reset for the next Cfg.
dumpStats(Func->getFunctionName()); dumpStats(Func->getFunctionNameAndSize());
Assembler *Asm = Func->releaseAssembler(); Assembler *Asm = Func->releaseAssembler();
// Copy relevant fields into Asm before Func is deleted. // Copy relevant fields into Asm before Func is deleted.
Asm->setFunctionName(Func->getFunctionName()); Asm->setFunctionName(Func->getFunctionName());
...@@ -549,7 +550,7 @@ void GlobalContext::emitItems() { ...@@ -549,7 +550,7 @@ void GlobalContext::emitItems() {
Cfg::setCurrentCfg(Func.get()); Cfg::setCurrentCfg(Func.get());
Func->emit(); Func->emit();
Cfg::setCurrentCfg(nullptr); Cfg::setCurrentCfg(nullptr);
dumpStats(Func->getFunctionName()); dumpStats(Func->getFunctionNameAndSize());
} break; } break;
} }
} }
......
...@@ -76,11 +76,25 @@ void dumpLiveRange(const Variable *Var, const Cfg *Func) { ...@@ -76,11 +76,25 @@ void dumpLiveRange(const Variable *Var, const Cfg *Func) {
Str << " Range=" << Var->getLiveRange(); Str << " Range=" << Var->getLiveRange();
} }
int32_t findMinWeightIndex(
const llvm::SmallBitVector &RegMask,
const llvm::SmallVector<RegWeight, LinearScan::REGS_SIZE> &Weights) {
int32_t MinWeightIndex = RegMask.find_first();
assert(MinWeightIndex >= 0);
for (int32_t i = RegMask.find_next(MinWeightIndex); i != -1;
i = RegMask.find_next(i)) {
if (Weights[i] < Weights[MinWeightIndex])
MinWeightIndex = i;
}
return MinWeightIndex;
}
} // end of anonymous namespace } // end of anonymous namespace
LinearScan::LinearScan(Cfg *Func) LinearScan::LinearScan(Cfg *Func)
: Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()), : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()),
Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {} Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)),
UseReserve(Ctx->getFlags().getRegAllocReserve()) {}
// Prepare for full register allocation of all variables. We depend on liveness // Prepare for full register allocation of all variables. We depend on liveness
// analysis to have calculated live ranges. // analysis to have calculated live ranges.
...@@ -545,8 +559,8 @@ void LinearScan::findRegisterPreference(IterationState &Iter) { ...@@ -545,8 +559,8 @@ void LinearScan::findRegisterPreference(IterationState &Iter) {
} }
} }
// Remove registers from the Free[] list where an Inactive range overlaps with // Remove registers from the Iter.Free[] list where an Inactive range overlaps
// the current range. // with the current range.
void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) { void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
for (const Variable *Item : Inactive) { for (const Variable *Item : Inactive) {
if (!Item->rangeOverlaps(Iter.Cur)) if (!Item->rangeOverlaps(Iter.Cur))
...@@ -555,10 +569,11 @@ void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) { ...@@ -555,10 +569,11 @@ void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
// TODO(stichnot): Do this with bitvector ops, not a loop, for efficiency. // TODO(stichnot): Do this with bitvector ops, not a loop, for efficiency.
for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
RegAlias = Aliases.find_next(RegAlias)) { RegAlias = Aliases.find_next(RegAlias)) {
// Don't assert(Free[RegNum]) because in theory (though probably never in // Don't assert(Iter.Free[RegNum]) because in theory (though probably
// practice) there could be two inactive variables that were marked with // never in practice) there could be two inactive variables that were
// AllowOverlap. // marked with AllowOverlap.
Iter.Free[RegAlias] = false; Iter.Free[RegAlias] = false;
Iter.FreeUnfiltered[RegAlias] = false;
// Disable AllowOverlap if an Inactive variable, which is not Prefer, // Disable AllowOverlap if an Inactive variable, which is not Prefer,
// shares Prefer's register, and has a definition within Cur's live range. // shares Prefer's register, and has a definition within Cur's live range.
if (Iter.AllowOverlap && Item != Iter.Prefer && if (Iter.AllowOverlap && Item != Iter.Prefer &&
...@@ -570,11 +585,11 @@ void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) { ...@@ -570,11 +585,11 @@ void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
} }
} }
// Remove registers from the Free[] list where an Unhandled pre-colored range // Remove registers from the Iter.Free[] list where an Unhandled pre-colored
// overlaps with the current range, and set those registers to infinite weight // range overlaps with the current range, and set those registers to infinite
// so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is an // weight so that they aren't candidates for eviction.
// early exit check that turns a guaranteed O(N^2) algorithm into expected // Cur->rangeEndsBefore(Item) is an early exit check that turns a guaranteed
// linear complexity. // O(N^2) algorithm into expected linear complexity.
void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
// TODO(stichnot): Partition UnhandledPrecolored according to register class, // TODO(stichnot): Partition UnhandledPrecolored according to register class,
// to restrict the number of overlap comparisons needed. // to restrict the number of overlap comparisons needed.
...@@ -590,6 +605,7 @@ void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { ...@@ -590,6 +605,7 @@ void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
RegAlias = Aliases.find_next(RegAlias)) { RegAlias = Aliases.find_next(RegAlias)) {
Iter.Weights[RegAlias].setWeight(RegWeight::Inf); Iter.Weights[RegAlias].setWeight(RegWeight::Inf);
Iter.Free[RegAlias] = false; Iter.Free[RegAlias] = false;
Iter.FreeUnfiltered[RegAlias] = false;
Iter.PrecoloredUnhandledMask[RegAlias] = true; Iter.PrecoloredUnhandledMask[RegAlias] = true;
// Disable Iter.AllowOverlap if the preferred register is one of these // Disable Iter.AllowOverlap if the preferred register is one of these
// pre-colored unhandled overlapping ranges. // pre-colored unhandled overlapping ranges.
...@@ -630,10 +646,14 @@ void LinearScan::allocatePreferredRegister(IterationState &Iter) { ...@@ -630,10 +646,14 @@ void LinearScan::allocatePreferredRegister(IterationState &Iter) {
Active.push_back(Iter.Cur); Active.push_back(Iter.Cur);
} }
void LinearScan::allocateFreeRegister(IterationState &Iter) { void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) {
int32_t RegNum = Iter.Free.find_first(); const int32_t RegNum =
Filtered ? Iter.Free.find_first() : Iter.FreeUnfiltered.find_first();
Iter.Cur->setRegNumTmp(RegNum); Iter.Cur->setRegNumTmp(RegNum);
if (Filtered)
dumpLiveRangeTrace("Allocating ", Iter.Cur); dumpLiveRangeTrace("Allocating ", Iter.Cur);
else
dumpLiveRangeTrace("Allocating X ", Iter.Cur);
const llvm::SmallBitVector &Aliases = *RegAliases[RegNum]; const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
RegAlias = Aliases.find_next(RegAlias)) { RegAlias = Aliases.find_next(RegAlias)) {
...@@ -672,29 +692,50 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) { ...@@ -672,29 +692,50 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
} }
// All the weights are now calculated. Find the register with smallest weight. // All the weights are now calculated. Find the register with smallest weight.
int32_t MinWeightIndex = Iter.RegMask.find_first(); int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
// MinWeightIndex must be valid because of the initial RegMask.any() test.
assert(MinWeightIndex >= 0);
for (SizeT i = MinWeightIndex + 1; i < Iter.Weights.size(); ++i) {
if (Iter.RegMask[i] && Iter.Weights[i] < Iter.Weights[MinWeightIndex])
MinWeightIndex = i;
}
if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) { if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
// Cur doesn't have priority over any other live ranges, so don't allocate if (!Iter.Cur->mustHaveReg()) {
// any register to it, and move it to the Handled state. // Iter.Cur doesn't have priority over any other live ranges, so don't
// allocate any register to it, and move it to the Handled state.
Handled.push_back(Iter.Cur); Handled.push_back(Iter.Cur);
if (Iter.Cur->mustHaveReg()) { return;
}
if (Kind == RAK_Phi) { if (Kind == RAK_Phi) {
// Iter.Cur is infinite-weight but all physical registers are already
// taken, so we need to force one to be temporarily available.
addSpillFill(Iter); addSpillFill(Iter);
} else { Handled.push_back(Iter.Cur);
return;
}
// The remaining portion of the enclosing "if" block should only be
// reachable if we are manually limiting physical registers for testing.
if (UseReserve) {
if (Iter.FreeUnfiltered.any()) {
// There is some available physical register held in reserve, so use it.
constexpr bool NotFiltered = false;
allocateFreeRegister(Iter, NotFiltered);
// Iter.Cur is now on the Active list.
return;
}
// At this point, we need to find some reserve register that is already
// assigned to a non-infinite-weight variable. This could happen if some
// variable was previously assigned an alias of such a register.
MinWeightIndex = findMinWeightIndex(Iter.RegMaskUnfiltered, Iter.Weights);
}
if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
dumpLiveRangeTrace("Failing ", Iter.Cur); dumpLiveRangeTrace("Failing ", Iter.Cur);
Func->setError("Unable to find a physical register for an " Func->setError("Unable to find a physical register for an "
"infinite-weight live range: " + "infinite-weight live range "
"(consider using -reg-reserve): " +
Iter.Cur->getName(Func)); Iter.Cur->getName(Func));
Handled.push_back(Iter.Cur);
return;
} }
// At this point, MinWeightIndex points to a valid reserve register to
// reallocate to Iter.Cur, so drop into the eviction code.
} }
} else {
// Evict all live ranges in Active that register number MinWeightIndex is // Evict all live ranges in Active that register number MinWeightIndex is
// assigned to. // assigned to.
const llvm::SmallBitVector &Aliases = *RegAliases[MinWeightIndex]; const llvm::SmallBitVector &Aliases = *RegAliases[MinWeightIndex];
...@@ -719,13 +760,13 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) { ...@@ -719,13 +760,13 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
for (SizeT I = Inactive.size(); I > 0; --I) { for (SizeT I = Inactive.size(); I > 0; --I) {
const SizeT Index = I - 1; const SizeT Index = I - 1;
Variable *Item = Inactive[Index]; Variable *Item = Inactive[Index];
// Note: The Item->rangeOverlaps(Cur) clause is not part of the // Note: The Item->rangeOverlaps(Cur) clause is not part of the description
// description of AssignMemLoc() in the original paper. But there doesn't // of AssignMemLoc() in the original paper. But there doesn't seem to be any
// seem to be any need to evict an inactive live range that doesn't // need to evict an inactive live range that doesn't overlap with the live
// overlap with the live range currently being considered. It's especially // range currently being considered. It's especially bad if we would end up
// bad if we would end up evicting an infinite-weight but // evicting an infinite-weight but currently-inactive live range. The most
// currently-inactive live range. The most common situation for this would // common situation for this would be a scratch register kill set for call
// be a scratch register kill set for call instructions. // instructions.
if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) { if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) {
dumpLiveRangeTrace("Evicting I ", Item); dumpLiveRangeTrace("Evicting I ", Item);
Item->setRegNumTmp(Variable::NoRegister); Item->setRegNumTmp(Variable::NoRegister);
...@@ -742,7 +783,6 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) { ...@@ -742,7 +783,6 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
} }
Active.push_back(Iter.Cur); Active.push_back(Iter.Cur);
dumpLiveRangeTrace("Allocating ", Iter.Cur); dumpLiveRangeTrace("Allocating ", Iter.Cur);
}
} }
void LinearScan::assignFinalRegisters( void LinearScan::assignFinalRegisters(
...@@ -843,6 +883,8 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, ...@@ -843,6 +883,8 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
dumpLiveRangeTrace("\nConsidering ", Iter.Cur); dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
assert(Target->getRegistersForVariable(Iter.Cur).any()); assert(Target->getRegistersForVariable(Iter.Cur).any());
Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur); Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
Iter.RegMaskUnfiltered =
RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
KillsRange.trim(Iter.Cur->getLiveRange().getStart()); KillsRange.trim(Iter.Cur->getLiveRange().getStart());
// Check for pre-colored ranges. If Cur is pre-colored, it definitely gets // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
...@@ -857,11 +899,14 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, ...@@ -857,11 +899,14 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
handleActiveRangeExpiredOrInactive(Iter.Cur); handleActiveRangeExpiredOrInactive(Iter.Cur);
handleInactiveRangeExpiredOrReactivated(Iter.Cur); handleInactiveRangeExpiredOrReactivated(Iter.Cur);
// Calculate available registers into Free[]. // Calculate available registers into Iter.Free[] and Iter.FreeUnfiltered[].
Iter.Free = Iter.RegMask; Iter.Free = Iter.RegMask;
Iter.FreeUnfiltered = Iter.RegMaskUnfiltered;
for (SizeT i = 0; i < Iter.RegMask.size(); ++i) { for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
if (RegUses[i] > 0) if (RegUses[i] > 0) {
Iter.Free[i] = false; Iter.Free[i] = false;
Iter.FreeUnfiltered[i] = false;
}
} }
findRegisterPreference(Iter); findRegisterPreference(Iter);
...@@ -889,11 +934,12 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, ...@@ -889,11 +934,12 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
filterFreeWithPrecoloredRanges(Iter); filterFreeWithPrecoloredRanges(Iter);
// Remove scratch registers from the Free[] list, and mark their Weights[] // Remove scratch registers from the Iter.Free[] list, and mark their
// as infinite, if KillsRange overlaps Cur's live range. // Iter.Weights[] as infinite, if KillsRange overlaps Cur's live range.
constexpr bool UseTrimmed = true; constexpr bool UseTrimmed = true;
if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) { if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
Iter.Free.reset(KillsMask); Iter.Free.reset(KillsMask);
Iter.FreeUnfiltered.reset(KillsMask);
for (int i = KillsMask.find_first(); i != -1; for (int i = KillsMask.find_first(); i != -1;
i = KillsMask.find_next(i)) { i = KillsMask.find_next(i)) {
Iter.Weights[i].setWeight(RegWeight::Inf); Iter.Weights[i].setWeight(RegWeight::Inf);
...@@ -906,7 +952,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, ...@@ -906,7 +952,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
if (BuildDefs::dump() && Verbose) { if (BuildDefs::dump() && Verbose) {
Ostream &Str = Ctx->getStrDump(); Ostream &Str = Ctx->getStrDump();
for (SizeT i = 0; i < Iter.RegMask.size(); ++i) { for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
if (Iter.RegMask[i]) { if (Iter.RegMaskUnfiltered[i]) {
Str << Target->getRegName(i, Iter.Cur->getType()) Str << Target->getRegName(i, Iter.Cur->getType())
<< "(U=" << RegUses[i] << ",F=" << Iter.Free[i] << "(U=" << RegUses[i] << ",F=" << Iter.Free[i]
<< ",P=" << Iter.PrecoloredUnhandledMask[i] << ") "; << ",P=" << Iter.PrecoloredUnhandledMask[i] << ") ";
...@@ -921,7 +967,8 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, ...@@ -921,7 +967,8 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
allocatePreferredRegister(Iter); allocatePreferredRegister(Iter);
} else if (Iter.Free.any()) { } else if (Iter.Free.any()) {
// Second choice: any free register. // Second choice: any free register.
allocateFreeRegister(Iter); constexpr bool Filtered = true;
allocateFreeRegister(Iter, Filtered);
} else { } else {
// Fallback: there are no free registers, so we look for the lowest-weight // Fallback: there are no free registers, so we look for the lowest-weight
// register and see if Cur has higher weight. // register and see if Cur has higher weight.
......
...@@ -61,7 +61,9 @@ private: ...@@ -61,7 +61,9 @@ private:
int32_t PreferReg = Variable::NoRegister; int32_t PreferReg = Variable::NoRegister;
bool AllowOverlap = false; bool AllowOverlap = false;
llvm::SmallBitVector RegMask; llvm::SmallBitVector RegMask;
llvm::SmallBitVector RegMaskUnfiltered;
llvm::SmallBitVector Free; llvm::SmallBitVector Free;
llvm::SmallBitVector FreeUnfiltered;
llvm::SmallBitVector PrecoloredUnhandledMask; // Note: only used for dumping llvm::SmallBitVector PrecoloredUnhandledMask; // Note: only used for dumping
llvm::SmallVector<RegWeight, REGS_SIZE> Weights; llvm::SmallVector<RegWeight, REGS_SIZE> Weights;
}; };
...@@ -98,7 +100,7 @@ private: ...@@ -98,7 +100,7 @@ private:
void filterFreeWithPrecoloredRanges(IterationState &Iter); void filterFreeWithPrecoloredRanges(IterationState &Iter);
void allocatePrecoloredRegister(Variable *Cur); void allocatePrecoloredRegister(Variable *Cur);
void allocatePreferredRegister(IterationState &Iter); void allocatePreferredRegister(IterationState &Iter);
void allocateFreeRegister(IterationState &Iter); void allocateFreeRegister(IterationState &Iter, bool Filtered);
void handleNoFreeRegisters(IterationState &Iter); void handleNoFreeRegisters(IterationState &Iter);
void assignFinalRegisters(const llvm::SmallBitVector &RegMaskFull, void assignFinalRegisters(const llvm::SmallBitVector &RegMaskFull,
const llvm::SmallBitVector &PreDefinedRegisters, const llvm::SmallBitVector &PreDefinedRegisters,
...@@ -130,6 +132,7 @@ private: ...@@ -130,6 +132,7 @@ private:
bool FindOverlap = false; bool FindOverlap = false;
const bool Verbose; const bool Verbose;
const bool UseReserve;
}; };
} // end of namespace Ice } // end of namespace Ice
......
...@@ -275,8 +275,15 @@ public: ...@@ -275,8 +275,15 @@ public:
virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include, virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const = 0; RegSetMask Exclude) const = 0;
/// Get the set of physical registers available for the specified Variable's
/// register class, applying register restrictions from the command line.
virtual const llvm::SmallBitVector & virtual const llvm::SmallBitVector &
getRegistersForVariable(const Variable *Var) const = 0; getRegistersForVariable(const Variable *Var) const = 0;
/// Get the set of *all* physical registers available for the specified
/// Variable's register class, *not* applying register restrictions from the
/// command line.
virtual const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const = 0;
virtual const llvm::SmallBitVector &getAliasesForRegister(SizeT) const = 0; virtual const llvm::SmallBitVector &getAliasesForRegister(SizeT) const = 0;
void regAlloc(RegAllocKind Kind); void regAlloc(RegAllocKind Kind);
......
...@@ -342,6 +342,9 @@ void TargetARM32::staticInit(GlobalContext *Ctx) { ...@@ -342,6 +342,9 @@ void TargetARM32::staticInit(GlobalContext *Ctx) {
TypeToRegisterSet[IceType_v4i32] = VectorRegisters; TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
TypeToRegisterSet[IceType_v4f32] = VectorRegisters; TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
filterTypeToRegisterSet( filterTypeToRegisterSet(
Ctx, RegARM32::Reg_NUM, TypeToRegisterSet, Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
llvm::array_lengthof(TypeToRegisterSet), [](int32_t RegNum) -> IceString { llvm::array_lengthof(TypeToRegisterSet), [](int32_t RegNum) -> IceString {
...@@ -6514,6 +6517,8 @@ void TargetHeaderARM32::lower() { ...@@ -6514,6 +6517,8 @@ void TargetHeaderARM32::lower() {
} }
llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
llvm::SmallBitVector
TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
} // end of namespace ARM32 } // end of namespace ARM32
......
...@@ -88,6 +88,12 @@ public: ...@@ -88,6 +88,12 @@ public:
assert(RC < RC_Target); assert(RC < RC_Target);
return TypeToRegisterSet[RC]; return TypeToRegisterSet[RC];
} }
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override { const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg]; return RegisterAliases[Reg];
} }
...@@ -1020,6 +1026,8 @@ protected: ...@@ -1020,6 +1026,8 @@ protected:
uint32_t MaxOutArgsSizeBytes = 0; uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array. // TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
static llvm::SmallBitVector
TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM]; VarList PhysicalRegisters[IceType_NUM];
......
...@@ -116,6 +116,9 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) { ...@@ -116,6 +116,9 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) {
TypeToRegisterSet[IceType_v4i32] = VectorRegisters; TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
TypeToRegisterSet[IceType_v4f32] = VectorRegisters; TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
llvm::array_lengthof(TypeToRegisterSet), llvm::array_lengthof(TypeToRegisterSet),
RegMIPS32::getRegName, getRegClassName); RegMIPS32::getRegName, getRegClassName);
...@@ -1126,6 +1129,7 @@ void TargetHeaderMIPS32::lower() { ...@@ -1126,6 +1129,7 @@ void TargetHeaderMIPS32::lower() {
} }
llvm::SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; llvm::SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
llvm::SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
llvm::SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; llvm::SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
} // end of namespace MIPS32 } // end of namespace MIPS32
......
...@@ -57,6 +57,12 @@ public: ...@@ -57,6 +57,12 @@ public:
assert(RC < RC_Target); assert(RC < RC_Target);
return TypeToRegisterSet[RC]; return TypeToRegisterSet[RC];
} }
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override { const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg]; return RegisterAliases[Reg];
} }
...@@ -263,6 +269,7 @@ protected: ...@@ -263,6 +269,7 @@ protected:
bool UsesFramePointer = false; bool UsesFramePointer = false;
bool NeedsStackAlignment = false; bool NeedsStackAlignment = false;
static llvm::SmallBitVector TypeToRegisterSet[RCMIPS32_NUM]; static llvm::SmallBitVector TypeToRegisterSet[RCMIPS32_NUM];
static llvm::SmallBitVector TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
static llvm::SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM]; static llvm::SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM];
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM]; VarList PhysicalRegisters[IceType_NUM];
......
...@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM> ...@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8632::Traits>::TypeToRegisterSet = {{}}; TargetX86Base<X8632::Traits>::TypeToRegisterSet = {{}};
template <> template <>
std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8632::Traits>::TypeToRegisterSetUnfiltered = {{}};
template <>
std::array<llvm::SmallBitVector, std::array<llvm::SmallBitVector,
TargetX86Base<X8632::Traits>::Traits::RegisterSet::Reg_NUM> TargetX86Base<X8632::Traits>::Traits::RegisterSet::Reg_NUM>
TargetX86Base<X8632::Traits>::RegisterAliases = {{}}; TargetX86Base<X8632::Traits>::RegisterAliases = {{}};
......
...@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM> ...@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8664::Traits>::TypeToRegisterSet = {{}}; TargetX86Base<X8664::Traits>::TypeToRegisterSet = {{}};
template <> template <>
std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8664::Traits>::TypeToRegisterSetUnfiltered = {{}};
template <>
std::array<llvm::SmallBitVector, std::array<llvm::SmallBitVector,
TargetX86Base<X8664::Traits>::Traits::RegisterSet::Reg_NUM> TargetX86Base<X8664::Traits>::Traits::RegisterSet::Reg_NUM>
TargetX86Base<X8664::Traits>::RegisterAliases = {{}}; TargetX86Base<X8664::Traits>::RegisterAliases = {{}};
......
...@@ -124,6 +124,13 @@ public: ...@@ -124,6 +124,13 @@ public:
return TypeToRegisterSet[RC]; return TypeToRegisterSet[RC];
} }
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override { const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
assert(Reg < Traits::RegisterSet::Reg_NUM); assert(Reg < Traits::RegisterSet::Reg_NUM);
return RegisterAliases[Reg]; return RegisterAliases[Reg];
...@@ -974,6 +981,8 @@ protected: ...@@ -974,6 +981,8 @@ protected:
bool PrologEmitsFixedAllocas = false; bool PrologEmitsFixedAllocas = false;
uint32_t MaxOutArgsSizeBytes = 0; uint32_t MaxOutArgsSizeBytes = 0;
static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet; static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet;
static std::array<llvm::SmallBitVector, RCX86_NUM>
TypeToRegisterSetUnfiltered;
static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM> static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM>
RegisterAliases; RegisterAliases;
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
......
...@@ -379,6 +379,8 @@ template <typename TraitsType> ...@@ -379,6 +379,8 @@ template <typename TraitsType>
void TargetX86Base<TraitsType>::staticInit(GlobalContext *Ctx) { void TargetX86Base<TraitsType>::staticInit(GlobalContext *Ctx) {
Traits::initRegisterSet(Ctx->getFlags(), &TypeToRegisterSet, Traits::initRegisterSet(Ctx->getFlags(), &TypeToRegisterSet,
&RegisterAliases); &RegisterAliases);
for (size_t i = 0; i < TypeToRegisterSet.size(); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
filterTypeToRegisterSet(Ctx, Traits::RegisterSet::Reg_NUM, filterTypeToRegisterSet(Ctx, Traits::RegisterSet::Reg_NUM,
TypeToRegisterSet.data(), TypeToRegisterSet.size(), TypeToRegisterSet.data(), TypeToRegisterSet.size(),
Traits::getRegName, getRegClassName); Traits::getRegName, getRegClassName);
...@@ -1945,8 +1947,6 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1945,8 +1947,6 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) {
Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
_mov(T_1, Src0Hi); _mov(T_1, Src0Hi);
_imul(T_1, Src1Lo); _imul(T_1, Src1Lo);
_mov(T_2, Src1Hi);
_imul(T_2, Src0Lo);
_mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax); _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
_mul(T_4Lo, T_3, Src1Lo); _mul(T_4Lo, T_3, Src1Lo);
// The mul instruction produces two dest variables, edx:eax. We create a // The mul instruction produces two dest variables, edx:eax. We create a
...@@ -1954,6 +1954,8 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1954,6 +1954,8 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) {
Context.insert<InstFakeDef>(T_4Hi, T_4Lo); Context.insert<InstFakeDef>(T_4Hi, T_4Lo);
_mov(DestLo, T_4Lo); _mov(DestLo, T_4Lo);
_add(T_4Hi, T_1); _add(T_4Hi, T_1);
_mov(T_2, Src1Hi);
_imul(T_2, Src0Lo);
_add(T_4Hi, T_2); _add(T_4Hi, T_2);
_mov(DestHi, T_4Hi); _mov(DestHi, T_4Hi);
} break; } break;
...@@ -5801,8 +5803,8 @@ void TargetX86Base<TraitsType>::lowerStore(const InstStore *Inst) { ...@@ -5801,8 +5803,8 @@ void TargetX86Base<TraitsType>::lowerStore(const InstStore *Inst) {
if (!Traits::Is64Bit && Ty == IceType_i64) { if (!Traits::Is64Bit && Ty == IceType_i64) {
Value = legalizeUndef(Value); Value = legalizeUndef(Value);
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
_store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr))); _store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr)));
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
_store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr))); _store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr)));
} else if (isVectorType(Ty)) { } else if (isVectorType(Ty)) {
_storep(legalizeToReg(Value), NewAddr); _storep(legalizeToReg(Value), NewAddr);
......
...@@ -125,7 +125,7 @@ entry: ...@@ -125,7 +125,7 @@ entry:
; OPTM1-LABEL: pass64BitConstArg ; OPTM1-LABEL: pass64BitConstArg
; OPTM1: sub esp ; OPTM1: sub esp
; OPTM1: mov DWORD PTR [esp+0x4] ; OPTM1: mov DWORD PTR [esp+0x4]
; OPTM1-NEXT: mov DWORD PTR [esp] ; OPTM1: mov DWORD PTR [esp]
; OPTM1-NEXT: mov DWORD PTR [esp+0x8],0x7b ; OPTM1-NEXT: mov DWORD PTR [esp+0x8],0x7b
; Bundle padding might be added (so not using -NEXT). ; Bundle padding might be added (so not using -NEXT).
; OPTM1: mov DWORD PTR [esp+0x10],0xdeadbeef ; OPTM1: mov DWORD PTR [esp+0x10],0xdeadbeef
...@@ -277,16 +277,16 @@ entry: ...@@ -277,16 +277,16 @@ entry:
} }
; CHECK-LABEL: mul64BitSigned ; CHECK-LABEL: mul64BitSigned
; CHECK: imul ; CHECK: imul
; CHECK: imul
; CHECK: mul ; CHECK: mul
; CHECK: add ; CHECK: add
; CHECK: imul
; CHECK: add ; CHECK: add
; ;
; OPTM1-LABEL: mul64BitSigned ; OPTM1-LABEL: mul64BitSigned
; OPTM1: imul ; OPTM1: imul
; OPTM1: imul
; OPTM1: mul ; OPTM1: mul
; OPTM1: add ; OPTM1: add
; OPTM1: imul
; OPTM1: add ; OPTM1: add
; ARM32-LABEL: mul64BitSigned ; ARM32-LABEL: mul64BitSigned
...@@ -302,16 +302,16 @@ entry: ...@@ -302,16 +302,16 @@ entry:
} }
; CHECK-LABEL: mul64BitUnsigned ; CHECK-LABEL: mul64BitUnsigned
; CHECK: imul ; CHECK: imul
; CHECK: imul
; CHECK: mul ; CHECK: mul
; CHECK: add ; CHECK: add
; CHECK: imul
; CHECK: add ; CHECK: add
; ;
; OPTM1-LABEL: mul64BitUnsigned ; OPTM1-LABEL: mul64BitUnsigned
; OPTM1: imul ; OPTM1: imul
; OPTM1: imul
; OPTM1: mul ; OPTM1: mul
; OPTM1: add ; OPTM1: add
; OPTM1: imul
; OPTM1: add ; OPTM1: add
; ARM32-LABEL: mul64BitUnsigned ; ARM32-LABEL: mul64BitUnsigned
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment