Commit b40595a1 by Jim Stichnoth

Subzero: Make the register allocator more robust with -reg-use and -reg-exclude.

The problem is that if you too aggressively -reg-use or -reg-exclude, you can get failures because of inherently high register pressure, and there are also contributions from the "specialty" register classes. For example, when you combine load optimization, address mode inference, local register availability optimization, and the div instruction, you can end up needing 5 simultaneously live infinite-weight registers. The fix/enhancement here is to keep track of the "reserve" set of registers for each register class, and allow the register allocator to draw from that as a last resort. This behavior is guarded by the -reg-reserve flag. This CL also includes two improvements in lowering sequences to reduce register pressure. BUG= none R=kschimpf@google.com Review URL: https://codereview.chromium.org/1641653004 .
parent 029bed9c
......@@ -55,6 +55,30 @@ Cfg::Cfg(GlobalContext *Ctx, uint32_t SequenceNumber)
Cfg::~Cfg() { assert(ICE_TLS_GET_FIELD(CurrentCfg) == nullptr); }
/// Create a string like "foo(i=123:b=9)" indicating the function name, number
/// of high-level instructions, and number of basic blocks. This string is only
/// used for dumping and other diagnostics, and the idea is that given a set of
/// functions to debug a problem on, it's easy to find the smallest or simplest
/// function to attack. Note that the counts may change somewhat depending on
/// what point it is called during the translation passes.
IceString Cfg::getFunctionNameAndSize() const {
if (!BuildDefs::dump())
return getFunctionName();
SizeT NodeCount = 0;
SizeT InstCount = 0;
for (CfgNode *Node : getNodes()) {
++NodeCount;
// Note: deleted instructions are *not* ignored.
InstCount += Node->getPhis().size();
for (Inst &I : Node->getInsts()) {
if (!llvm::isa<InstTarget>(&I))
++InstCount;
}
}
return getFunctionName() + "(i=" + std::to_string(InstCount) + ":b=" +
std::to_string(NodeCount) + ")";
}
void Cfg::setError(const IceString &Message) {
HasError = true;
ErrorMessage = Message;
......@@ -1075,7 +1099,9 @@ void Cfg::dump(const IceString &Message) {
Str << Args[i]->getType() << " ";
Args[i]->dump(this);
}
Str << ") {\n";
// Append an extra copy of the function name here, in order to print its
// size stats but not mess up lit tests.
Str << ") { # " << getFunctionNameAndSize() << "\n";
}
resetCurrentNode();
if (isVerbose(IceV_Liveness)) {
......
......@@ -64,7 +64,8 @@ public:
/// \name Manage the name and return type of the function being translated.
/// @{
void setFunctionName(const IceString &Name) { FunctionName = Name; }
IceString getFunctionName() const { return FunctionName; }
const IceString &getFunctionName() const { return FunctionName; }
IceString getFunctionNameAndSize() const;
void setReturnType(Type Ty) { ReturnType = Ty; }
Type getReturnType() const { return ReturnType; }
/// @}
......
......@@ -185,6 +185,13 @@ cl::opt<bool>
cl::desc("Randomize register allocation"),
cl::init(false));
/// Allow failsafe access to registers that were restricted via -reg-use or
/// -reg-exclude.
cl::opt<bool>
RegAllocReserve("reg-reserve",
cl::desc("Let register allocation use reserve registers"),
cl::init(false));
/// Repeat register allocation until convergence.
cl::opt<bool>
RepeatRegAlloc("regalloc-repeat",
......@@ -545,6 +552,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) {
OutFlags.setShouldReorderBasicBlocks(::ReorderBasicBlocks);
OutFlags.setShouldDoNopInsertion(::ShouldDoNopInsertion);
OutFlags.setShouldRandomizeRegAlloc(::RandomizeRegisterAllocation);
OutFlags.setRegAllocReserve(::RegAllocReserve);
OutFlags.setShouldRepeatRegAlloc(::RepeatRegAlloc);
OutFlags.setShouldReorderFunctions(::ReorderFunctions);
OutFlags.setShouldReorderGlobalVariables(::ReorderGlobalVariables);
......
......@@ -169,6 +169,11 @@ public:
/// Set ClFlags::RandomRegAlloc to a new value
void setShouldRandomizeRegAlloc(bool NewValue) { RandomRegAlloc = NewValue; }
/// Get the value of ClFlags::RegAllocReserve
bool getRegAllocReserve() const { return RegAllocReserve; }
/// Set ClFlags::RegAllocReserve to a new value
void setRegAllocReserve(bool NewValue) { RegAllocReserve = NewValue; }
/// Get the value of ClFlags::RepeatRegAlloc
bool shouldRepeatRegAlloc() const { return RepeatRegAlloc; }
/// Set ClFlags::RepeatRegAlloc to a new value
......@@ -425,6 +430,8 @@ private:
bool RandomNopInsertion;
/// see anonymous_namespace{IceClFlags.cpp}::RandomizeRegisterAllocation
bool RandomRegAlloc;
/// see anonymous_namespace{IceClFlags.cpp}::RegAllocReserve
bool RegAllocReserve;
/// see anonymous_namespace{IceClFlags.cpp}::RepeatRegAlloc
bool RepeatRegAlloc;
/// see anonymous_namespace{IceClFlags.cpp}::ReorderBasicBlocks
......
......@@ -309,7 +309,8 @@ void GlobalContext::translateFunctions() {
getErrorStatus()->assign(EC_Translation);
OstreamLocker L(this);
getStrError() << "ICE translation error: " << Func->getFunctionName()
<< ": " << Func->getError() << "\n";
<< ": " << Func->getError() << ": "
<< Func->getFunctionNameAndSize() << "\n";
Item = new EmitterWorkItem(Func->getSequenceNumber());
} else {
Func->getAssembler<>()->setInternal(Func->getInternal());
......@@ -320,7 +321,7 @@ void GlobalContext::translateFunctions() {
// The Cfg has already emitted into the assembly buffer, so
// stats have been fully collected into this thread's TLS.
// Dump them before TLS is reset for the next Cfg.
dumpStats(Func->getFunctionName());
dumpStats(Func->getFunctionNameAndSize());
Assembler *Asm = Func->releaseAssembler();
// Copy relevant fields into Asm before Func is deleted.
Asm->setFunctionName(Func->getFunctionName());
......@@ -549,7 +550,7 @@ void GlobalContext::emitItems() {
Cfg::setCurrentCfg(Func.get());
Func->emit();
Cfg::setCurrentCfg(nullptr);
dumpStats(Func->getFunctionName());
dumpStats(Func->getFunctionNameAndSize());
} break;
}
}
......
......@@ -76,11 +76,25 @@ void dumpLiveRange(const Variable *Var, const Cfg *Func) {
Str << " Range=" << Var->getLiveRange();
}
int32_t findMinWeightIndex(
const llvm::SmallBitVector &RegMask,
const llvm::SmallVector<RegWeight, LinearScan::REGS_SIZE> &Weights) {
int32_t MinWeightIndex = RegMask.find_first();
assert(MinWeightIndex >= 0);
for (int32_t i = RegMask.find_next(MinWeightIndex); i != -1;
i = RegMask.find_next(i)) {
if (Weights[i] < Weights[MinWeightIndex])
MinWeightIndex = i;
}
return MinWeightIndex;
}
} // end of anonymous namespace
LinearScan::LinearScan(Cfg *Func)
: Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()),
Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {}
Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)),
UseReserve(Ctx->getFlags().getRegAllocReserve()) {}
// Prepare for full register allocation of all variables. We depend on liveness
// analysis to have calculated live ranges.
......@@ -545,8 +559,8 @@ void LinearScan::findRegisterPreference(IterationState &Iter) {
}
}
// Remove registers from the Free[] list where an Inactive range overlaps with
// the current range.
// Remove registers from the Iter.Free[] list where an Inactive range overlaps
// with the current range.
void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
for (const Variable *Item : Inactive) {
if (!Item->rangeOverlaps(Iter.Cur))
......@@ -555,10 +569,11 @@ void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
// TODO(stichnot): Do this with bitvector ops, not a loop, for efficiency.
for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
RegAlias = Aliases.find_next(RegAlias)) {
// Don't assert(Free[RegNum]) because in theory (though probably never in
// practice) there could be two inactive variables that were marked with
// AllowOverlap.
// Don't assert(Iter.Free[RegNum]) because in theory (though probably
// never in practice) there could be two inactive variables that were
// marked with AllowOverlap.
Iter.Free[RegAlias] = false;
Iter.FreeUnfiltered[RegAlias] = false;
// Disable AllowOverlap if an Inactive variable, which is not Prefer,
// shares Prefer's register, and has a definition within Cur's live range.
if (Iter.AllowOverlap && Item != Iter.Prefer &&
......@@ -570,11 +585,11 @@ void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
}
}
// Remove registers from the Free[] list where an Unhandled pre-colored range
// overlaps with the current range, and set those registers to infinite weight
// so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is an
// early exit check that turns a guaranteed O(N^2) algorithm into expected
// linear complexity.
// Remove registers from the Iter.Free[] list where an Unhandled pre-colored
// range overlaps with the current range, and set those registers to infinite
// weight so that they aren't candidates for eviction.
// Cur->rangeEndsBefore(Item) is an early exit check that turns a guaranteed
// O(N^2) algorithm into expected linear complexity.
void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
// TODO(stichnot): Partition UnhandledPrecolored according to register class,
// to restrict the number of overlap comparisons needed.
......@@ -590,6 +605,7 @@ void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
RegAlias = Aliases.find_next(RegAlias)) {
Iter.Weights[RegAlias].setWeight(RegWeight::Inf);
Iter.Free[RegAlias] = false;
Iter.FreeUnfiltered[RegAlias] = false;
Iter.PrecoloredUnhandledMask[RegAlias] = true;
// Disable Iter.AllowOverlap if the preferred register is one of these
// pre-colored unhandled overlapping ranges.
......@@ -630,10 +646,14 @@ void LinearScan::allocatePreferredRegister(IterationState &Iter) {
Active.push_back(Iter.Cur);
}
void LinearScan::allocateFreeRegister(IterationState &Iter) {
int32_t RegNum = Iter.Free.find_first();
void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) {
const int32_t RegNum =
Filtered ? Iter.Free.find_first() : Iter.FreeUnfiltered.find_first();
Iter.Cur->setRegNumTmp(RegNum);
dumpLiveRangeTrace("Allocating ", Iter.Cur);
if (Filtered)
dumpLiveRangeTrace("Allocating ", Iter.Cur);
else
dumpLiveRangeTrace("Allocating X ", Iter.Cur);
const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
RegAlias = Aliases.find_next(RegAlias)) {
......@@ -672,77 +692,97 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
}
// All the weights are now calculated. Find the register with smallest weight.
int32_t MinWeightIndex = Iter.RegMask.find_first();
// MinWeightIndex must be valid because of the initial RegMask.any() test.
assert(MinWeightIndex >= 0);
for (SizeT i = MinWeightIndex + 1; i < Iter.Weights.size(); ++i) {
if (Iter.RegMask[i] && Iter.Weights[i] < Iter.Weights[MinWeightIndex])
MinWeightIndex = i;
}
int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
// Cur doesn't have priority over any other live ranges, so don't allocate
// any register to it, and move it to the Handled state.
Handled.push_back(Iter.Cur);
if (Iter.Cur->mustHaveReg()) {
if (Kind == RAK_Phi) {
addSpillFill(Iter);
} else {
dumpLiveRangeTrace("Failing ", Iter.Cur);
Func->setError("Unable to find a physical register for an "
"infinite-weight live range: " +
Iter.Cur->getName(Func));
}
if (!Iter.Cur->mustHaveReg()) {
// Iter.Cur doesn't have priority over any other live ranges, so don't
// allocate any register to it, and move it to the Handled state.
Handled.push_back(Iter.Cur);
return;
}
} else {
// Evict all live ranges in Active that register number MinWeightIndex is
// assigned to.
const llvm::SmallBitVector &Aliases = *RegAliases[MinWeightIndex];
for (SizeT I = Active.size(); I > 0; --I) {
const SizeT Index = I - 1;
Variable *Item = Active[Index];
int32_t RegNum = Item->getRegNumTmp();
if (Aliases[RegNum]) {
dumpLiveRangeTrace("Evicting A ", Item);
const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
RegAlias = Aliases.find_next(RegAlias)) {
--RegUses[RegAlias];
assert(RegUses[RegAlias] >= 0);
}
Item->setRegNumTmp(Variable::NoRegister);
moveItem(Active, Index, Handled);
Evicted.push_back(Item);
if (Kind == RAK_Phi) {
// Iter.Cur is infinite-weight but all physical registers are already
// taken, so we need to force one to be temporarily available.
addSpillFill(Iter);
Handled.push_back(Iter.Cur);
return;
}
// The remaining portion of the enclosing "if" block should only be
// reachable if we are manually limiting physical registers for testing.
if (UseReserve) {
if (Iter.FreeUnfiltered.any()) {
// There is some available physical register held in reserve, so use it.
constexpr bool NotFiltered = false;
allocateFreeRegister(Iter, NotFiltered);
// Iter.Cur is now on the Active list.
return;
}
// At this point, we need to find some reserve register that is already
// assigned to a non-infinite-weight variable. This could happen if some
// variable was previously assigned an alias of such a register.
MinWeightIndex = findMinWeightIndex(Iter.RegMaskUnfiltered, Iter.Weights);
}
if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
dumpLiveRangeTrace("Failing ", Iter.Cur);
Func->setError("Unable to find a physical register for an "
"infinite-weight live range "
"(consider using -reg-reserve): " +
Iter.Cur->getName(Func));
Handled.push_back(Iter.Cur);
return;
}
// Do the same for Inactive.
for (SizeT I = Inactive.size(); I > 0; --I) {
const SizeT Index = I - 1;
Variable *Item = Inactive[Index];
// Note: The Item->rangeOverlaps(Cur) clause is not part of the
// description of AssignMemLoc() in the original paper. But there doesn't
// seem to be any need to evict an inactive live range that doesn't
// overlap with the live range currently being considered. It's especially
// bad if we would end up evicting an infinite-weight but
// currently-inactive live range. The most common situation for this would
// be a scratch register kill set for call instructions.
if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) {
dumpLiveRangeTrace("Evicting I ", Item);
Item->setRegNumTmp(Variable::NoRegister);
moveItem(Inactive, Index, Handled);
Evicted.push_back(Item);
// At this point, MinWeightIndex points to a valid reserve register to
// reallocate to Iter.Cur, so drop into the eviction code.
}
// Evict all live ranges in Active that register number MinWeightIndex is
// assigned to.
const llvm::SmallBitVector &Aliases = *RegAliases[MinWeightIndex];
for (SizeT I = Active.size(); I > 0; --I) {
const SizeT Index = I - 1;
Variable *Item = Active[Index];
int32_t RegNum = Item->getRegNumTmp();
if (Aliases[RegNum]) {
dumpLiveRangeTrace("Evicting A ", Item);
const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
RegAlias = Aliases.find_next(RegAlias)) {
--RegUses[RegAlias];
assert(RegUses[RegAlias] >= 0);
}
Item->setRegNumTmp(Variable::NoRegister);
moveItem(Active, Index, Handled);
Evicted.push_back(Item);
}
// Assign the register to Cur.
Iter.Cur->setRegNumTmp(MinWeightIndex);
for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
RegAlias = Aliases.find_next(RegAlias)) {
assert(RegUses[RegAlias] >= 0);
++RegUses[RegAlias];
}
// Do the same for Inactive.
for (SizeT I = Inactive.size(); I > 0; --I) {
const SizeT Index = I - 1;
Variable *Item = Inactive[Index];
// Note: The Item->rangeOverlaps(Cur) clause is not part of the description
// of AssignMemLoc() in the original paper. But there doesn't seem to be any
// need to evict an inactive live range that doesn't overlap with the live
// range currently being considered. It's especially bad if we would end up
// evicting an infinite-weight but currently-inactive live range. The most
// common situation for this would be a scratch register kill set for call
// instructions.
if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) {
dumpLiveRangeTrace("Evicting I ", Item);
Item->setRegNumTmp(Variable::NoRegister);
moveItem(Inactive, Index, Handled);
Evicted.push_back(Item);
}
Active.push_back(Iter.Cur);
dumpLiveRangeTrace("Allocating ", Iter.Cur);
}
// Assign the register to Cur.
Iter.Cur->setRegNumTmp(MinWeightIndex);
for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
RegAlias = Aliases.find_next(RegAlias)) {
assert(RegUses[RegAlias] >= 0);
++RegUses[RegAlias];
}
Active.push_back(Iter.Cur);
dumpLiveRangeTrace("Allocating ", Iter.Cur);
}
void LinearScan::assignFinalRegisters(
......@@ -843,6 +883,8 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
assert(Target->getRegistersForVariable(Iter.Cur).any());
Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
Iter.RegMaskUnfiltered =
RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
KillsRange.trim(Iter.Cur->getLiveRange().getStart());
// Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
......@@ -857,11 +899,14 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
handleActiveRangeExpiredOrInactive(Iter.Cur);
handleInactiveRangeExpiredOrReactivated(Iter.Cur);
// Calculate available registers into Free[].
// Calculate available registers into Iter.Free[] and Iter.FreeUnfiltered[].
Iter.Free = Iter.RegMask;
Iter.FreeUnfiltered = Iter.RegMaskUnfiltered;
for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
if (RegUses[i] > 0)
if (RegUses[i] > 0) {
Iter.Free[i] = false;
Iter.FreeUnfiltered[i] = false;
}
}
findRegisterPreference(Iter);
......@@ -889,11 +934,12 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
filterFreeWithPrecoloredRanges(Iter);
// Remove scratch registers from the Free[] list, and mark their Weights[]
// as infinite, if KillsRange overlaps Cur's live range.
// Remove scratch registers from the Iter.Free[] list, and mark their
// Iter.Weights[] as infinite, if KillsRange overlaps Cur's live range.
constexpr bool UseTrimmed = true;
if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
Iter.Free.reset(KillsMask);
Iter.FreeUnfiltered.reset(KillsMask);
for (int i = KillsMask.find_first(); i != -1;
i = KillsMask.find_next(i)) {
Iter.Weights[i].setWeight(RegWeight::Inf);
......@@ -906,7 +952,7 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
if (BuildDefs::dump() && Verbose) {
Ostream &Str = Ctx->getStrDump();
for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
if (Iter.RegMask[i]) {
if (Iter.RegMaskUnfiltered[i]) {
Str << Target->getRegName(i, Iter.Cur->getType())
<< "(U=" << RegUses[i] << ",F=" << Iter.Free[i]
<< ",P=" << Iter.PrecoloredUnhandledMask[i] << ") ";
......@@ -921,7 +967,8 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
allocatePreferredRegister(Iter);
} else if (Iter.Free.any()) {
// Second choice: any free register.
allocateFreeRegister(Iter);
constexpr bool Filtered = true;
allocateFreeRegister(Iter, Filtered);
} else {
// Fallback: there are no free registers, so we look for the lowest-weight
// register and see if Cur has higher weight.
......
......@@ -61,7 +61,9 @@ private:
int32_t PreferReg = Variable::NoRegister;
bool AllowOverlap = false;
llvm::SmallBitVector RegMask;
llvm::SmallBitVector RegMaskUnfiltered;
llvm::SmallBitVector Free;
llvm::SmallBitVector FreeUnfiltered;
llvm::SmallBitVector PrecoloredUnhandledMask; // Note: only used for dumping
llvm::SmallVector<RegWeight, REGS_SIZE> Weights;
};
......@@ -98,7 +100,7 @@ private:
void filterFreeWithPrecoloredRanges(IterationState &Iter);
void allocatePrecoloredRegister(Variable *Cur);
void allocatePreferredRegister(IterationState &Iter);
void allocateFreeRegister(IterationState &Iter);
void allocateFreeRegister(IterationState &Iter, bool Filtered);
void handleNoFreeRegisters(IterationState &Iter);
void assignFinalRegisters(const llvm::SmallBitVector &RegMaskFull,
const llvm::SmallBitVector &PreDefinedRegisters,
......@@ -130,6 +132,7 @@ private:
bool FindOverlap = false;
const bool Verbose;
const bool UseReserve;
};
} // end of namespace Ice
......
......@@ -275,8 +275,15 @@ public:
virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const = 0;
/// Get the set of physical registers available for the specified Variable's
/// register class, applying register restrictions from the command line.
virtual const llvm::SmallBitVector &
getRegistersForVariable(const Variable *Var) const = 0;
/// Get the set of *all* physical registers available for the specified
/// Variable's register class, *not* applying register restrictions from the
/// command line.
virtual const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const = 0;
virtual const llvm::SmallBitVector &getAliasesForRegister(SizeT) const = 0;
void regAlloc(RegAllocKind Kind);
......
......@@ -342,6 +342,9 @@ void TargetARM32::staticInit(GlobalContext *Ctx) {
TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
filterTypeToRegisterSet(
Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
llvm::array_lengthof(TypeToRegisterSet), [](int32_t RegNum) -> IceString {
......@@ -6514,6 +6517,8 @@ void TargetHeaderARM32::lower() {
}
llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
llvm::SmallBitVector
TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
} // end of namespace ARM32
......
......@@ -88,6 +88,12 @@ public:
assert(RC < RC_Target);
return TypeToRegisterSet[RC];
}
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg];
}
......@@ -1020,6 +1026,8 @@ protected:
uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
static llvm::SmallBitVector
TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM];
......
......@@ -116,6 +116,9 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) {
TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
llvm::array_lengthof(TypeToRegisterSet),
RegMIPS32::getRegName, getRegClassName);
......@@ -1126,6 +1129,7 @@ void TargetHeaderMIPS32::lower() {
}
llvm::SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
llvm::SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
llvm::SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
} // end of namespace MIPS32
......
......@@ -57,6 +57,12 @@ public:
assert(RC < RC_Target);
return TypeToRegisterSet[RC];
}
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg];
}
......@@ -263,6 +269,7 @@ protected:
bool UsesFramePointer = false;
bool NeedsStackAlignment = false;
static llvm::SmallBitVector TypeToRegisterSet[RCMIPS32_NUM];
static llvm::SmallBitVector TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
static llvm::SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM];
llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM];
......
......@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8632::Traits>::TypeToRegisterSet = {{}};
template <>
std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8632::Traits>::TypeToRegisterSetUnfiltered = {{}};
template <>
std::array<llvm::SmallBitVector,
TargetX86Base<X8632::Traits>::Traits::RegisterSet::Reg_NUM>
TargetX86Base<X8632::Traits>::RegisterAliases = {{}};
......
......@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8664::Traits>::TypeToRegisterSet = {{}};
template <>
std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8664::Traits>::TypeToRegisterSetUnfiltered = {{}};
template <>
std::array<llvm::SmallBitVector,
TargetX86Base<X8664::Traits>::Traits::RegisterSet::Reg_NUM>
TargetX86Base<X8664::Traits>::RegisterAliases = {{}};
......
......@@ -124,6 +124,13 @@ public:
return TypeToRegisterSet[RC];
}
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
assert(Reg < Traits::RegisterSet::Reg_NUM);
return RegisterAliases[Reg];
......@@ -974,6 +981,8 @@ protected:
bool PrologEmitsFixedAllocas = false;
uint32_t MaxOutArgsSizeBytes = 0;
static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet;
static std::array<llvm::SmallBitVector, RCX86_NUM>
TypeToRegisterSetUnfiltered;
static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM>
RegisterAliases;
llvm::SmallBitVector RegsUsed;
......
......@@ -379,6 +379,8 @@ template <typename TraitsType>
void TargetX86Base<TraitsType>::staticInit(GlobalContext *Ctx) {
Traits::initRegisterSet(Ctx->getFlags(), &TypeToRegisterSet,
&RegisterAliases);
for (size_t i = 0; i < TypeToRegisterSet.size(); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
filterTypeToRegisterSet(Ctx, Traits::RegisterSet::Reg_NUM,
TypeToRegisterSet.data(), TypeToRegisterSet.size(),
Traits::getRegName, getRegClassName);
......@@ -1945,8 +1947,6 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) {
Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
_mov(T_1, Src0Hi);
_imul(T_1, Src1Lo);
_mov(T_2, Src1Hi);
_imul(T_2, Src0Lo);
_mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
_mul(T_4Lo, T_3, Src1Lo);
// The mul instruction produces two dest variables, edx:eax. We create a
......@@ -1954,6 +1954,8 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) {
Context.insert<InstFakeDef>(T_4Hi, T_4Lo);
_mov(DestLo, T_4Lo);
_add(T_4Hi, T_1);
_mov(T_2, Src1Hi);
_imul(T_2, Src0Lo);
_add(T_4Hi, T_2);
_mov(DestHi, T_4Hi);
} break;
......@@ -5801,8 +5803,8 @@ void TargetX86Base<TraitsType>::lowerStore(const InstStore *Inst) {
if (!Traits::Is64Bit && Ty == IceType_i64) {
Value = legalizeUndef(Value);
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
_store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr)));
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
_store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr)));
} else if (isVectorType(Ty)) {
_storep(legalizeToReg(Value), NewAddr);
......
......@@ -125,7 +125,7 @@ entry:
; OPTM1-LABEL: pass64BitConstArg
; OPTM1: sub esp
; OPTM1: mov DWORD PTR [esp+0x4]
; OPTM1-NEXT: mov DWORD PTR [esp]
; OPTM1: mov DWORD PTR [esp]
; OPTM1-NEXT: mov DWORD PTR [esp+0x8],0x7b
; Bundle padding might be added (so not using -NEXT).
; OPTM1: mov DWORD PTR [esp+0x10],0xdeadbeef
......@@ -277,16 +277,16 @@ entry:
}
; CHECK-LABEL: mul64BitSigned
; CHECK: imul
; CHECK: imul
; CHECK: mul
; CHECK: add
; CHECK: imul
; CHECK: add
;
; OPTM1-LABEL: mul64BitSigned
; OPTM1: imul
; OPTM1: imul
; OPTM1: mul
; OPTM1: add
; OPTM1: imul
; OPTM1: add
; ARM32-LABEL: mul64BitSigned
......@@ -302,16 +302,16 @@ entry:
}
; CHECK-LABEL: mul64BitUnsigned
; CHECK: imul
; CHECK: imul
; CHECK: mul
; CHECK: add
; CHECK: imul
; CHECK: add
;
; OPTM1-LABEL: mul64BitUnsigned
; OPTM1: imul
; OPTM1: imul
; OPTM1: mul
; OPTM1: add
; OPTM1: imul
; OPTM1: add
; ARM32-LABEL: mul64BitUnsigned
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment