Commit 70d0a054 by Jim Stichnoth

Subzero: Use the linear-scan register allocator for Om1 as well.

This removes the need for Om1's postLower() code which did its own ad-hoc register allocation. And it actually speeds up Om1 translation significantly. This mode of register allocation only allocates for infinite-weight Variables, while respecting live ranges of pre-colored Variables. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/733643005
parent edc115ec
...@@ -102,6 +102,11 @@ enum LivenessMode { ...@@ -102,6 +102,11 @@ enum LivenessMode {
Liveness_Intervals Liveness_Intervals
}; };
enum RegAllocKind {
RAK_Global, // full, global register allocation
RAK_InfOnly // allocation only for infinite-weight Variables
};
enum VerboseItem { enum VerboseItem {
IceV_None = 0, IceV_None = 0,
IceV_Instructions = 1 << 0, IceV_Instructions = 1 << 0,
......
...@@ -73,17 +73,16 @@ void dumpLiveRange(const Variable *Var, const Cfg *Func) { ...@@ -73,17 +73,16 @@ void dumpLiveRange(const Variable *Var, const Cfg *Func) {
} // end of anonymous namespace } // end of anonymous namespace
void LinearScan::initForGlobalAlloc() { // Prepare for full register allocation of all variables. We depend
// on liveness analysis to have calculated live ranges.
void LinearScan::initForGlobal() {
TimerMarker T(TimerStack::TT_initUnhandled, Func); TimerMarker T(TimerStack::TT_initUnhandled, Func);
Unhandled.clear(); FindPreference = true;
UnhandledPrecolored.clear(); FindOverlap = true;
Handled.clear();
Inactive.clear();
Active.clear();
// Gather the live ranges of all variables and add them to the
// Unhandled set.
const VarList &Vars = Func->getVariables(); const VarList &Vars = Func->getVariables();
Unhandled.reserve(Vars.size()); Unhandled.reserve(Vars.size());
// Gather the live ranges of all variables and add them to the
// Unhandled set.
for (Variable *Var : Vars) { for (Variable *Var : Vars) {
// Explicitly don't consider zero-weight variables, which are // Explicitly don't consider zero-weight variables, which are
// meant to be spill slots. // meant to be spill slots.
...@@ -101,6 +100,128 @@ void LinearScan::initForGlobalAlloc() { ...@@ -101,6 +100,128 @@ void LinearScan::initForGlobalAlloc() {
UnhandledPrecolored.push_back(Var); UnhandledPrecolored.push_back(Var);
} }
} }
// Build the (ordered) list of FakeKill instruction numbers.
Kills.clear();
for (CfgNode *Node : Func->getNodes()) {
for (auto I = Node->getInsts().begin(), E = Node->getInsts().end(); I != E;
++I) {
if (auto Kill = llvm::dyn_cast<InstFakeKill>(I)) {
if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted())
Kills.push_back(I->getNumber());
}
}
}
}
// Prepare for very simple register allocation of only infinite-weight
// Variables while respecting pre-colored Variables. Some properties
// we take advantage of:
//
// * Live ranges of interest consist of a single segment.
//
// * Live ranges of interest never span a call instruction.
//
// * Phi instructions are not considered because either phis have
// already been lowered, or they don't contain any pre-colored or
// infinite-weight Variables.
//
// * We don't need to renumber instructions before computing live
// ranges because all the high-level ICE instructions are deleted
// prior to lowering, and the low-level instructions are added in
// monotonically increasing order.
//
// * There are no opportunities for register preference or allowing
// overlap.
//
// Some properties we aren't (yet) taking advantage of:
//
// * Because live ranges are a single segment, the Unhandled set will
// always be empty, and the live range trimming operation is
// unnecessary.
//
// * Calculating overlap of single-segment live ranges could be
// optimized a bit.
void LinearScan::initForInfOnly() {
TimerMarker T(TimerStack::TT_initUnhandled, Func);
FindPreference = false;
FindOverlap = false;
SizeT NumVars = 0;
const VarList &Vars = Func->getVariables();
// Iterate across all instructions and record the begin and end of
// the live range for each variable that is pre-colored or infinite
// weight.
std::vector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
std::vector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
for (CfgNode *Node : Func->getNodes()) {
for (auto Inst = Node->getInsts().begin(), E = Node->getInsts().end();
Inst != E; ++Inst) {
if (Inst->isDeleted())
continue;
if (const Variable *Var = Inst->getDest()) {
if (Var->hasReg() || Var->getWeight() == RegWeight::Inf) {
if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) {
LRBegin[Var->getIndex()] = Inst->getNumber();
++NumVars;
}
}
}
for (SizeT I = 0; I < Inst->getSrcSize(); ++I) {
Operand *Src = Inst->getSrc(I);
SizeT NumVars = Src->getNumVars();
for (SizeT J = 0; J < NumVars; ++J) {
const Variable *Var = Src->getVar(J);
if (Var->hasReg() || Var->getWeight() == RegWeight::Inf)
LREnd[Var->getIndex()] = Inst->getNumber();
}
}
}
}
Unhandled.reserve(NumVars);
for (SizeT i = 0; i < Vars.size(); ++i) {
Variable *Var = Vars[i];
if (LRBegin[i] != Inst::NumberSentinel) {
assert(LREnd[i] != Inst::NumberSentinel);
Unhandled.push_back(Var);
Var->resetLiveRange();
const uint32_t WeightDelta = 1;
Var->addLiveRange(LRBegin[i], LREnd[i], WeightDelta);
Var->untrimLiveRange();
if (Var->hasReg()) {
Var->setRegNumTmp(Var->getRegNum());
Var->setLiveRangeInfiniteWeight();
UnhandledPrecolored.push_back(Var);
}
--NumVars;
}
}
// This isn't actually a fatal condition, but it would be nice to
// know if we somehow pre-calculated Unhandled's size wrong.
assert(NumVars == 0);
// Don't build up the list of Kills because we know that no
// infinite-weight Variable has a live range spanning a call.
Kills.clear();
}
void LinearScan::init(RegAllocKind Kind) {
Unhandled.clear();
UnhandledPrecolored.clear();
Handled.clear();
Inactive.clear();
Active.clear();
switch (Kind) {
case RAK_Global:
initForGlobal();
break;
case RAK_InfOnly:
initForInfOnly();
break;
}
struct CompareRanges { struct CompareRanges {
bool operator()(const Variable *L, const Variable *R) { bool operator()(const Variable *L, const Variable *R) {
InstNumberT Lstart = L->getLiveRange().getStart(); InstNumberT Lstart = L->getLiveRange().getStart();
...@@ -114,20 +235,6 @@ void LinearScan::initForGlobalAlloc() { ...@@ -114,20 +235,6 @@ void LinearScan::initForGlobalAlloc() {
std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges()); std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges());
std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(), std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(),
CompareRanges()); CompareRanges());
// Build the (ordered) list of FakeKill instruction numbers.
Kills.clear();
for (CfgNode *Node : Func->getNodes()) {
for (auto I = Node->getInsts().begin(), E = Node->getInsts().end(); I != E;
++I) {
if (I->isDeleted())
continue;
if (auto Kill = llvm::dyn_cast<InstFakeKill>(I)) {
if (!Kill->getLinked()->isDeleted())
Kills.push_back(I->getNumber());
}
}
}
} }
// Implements the linear-scan algorithm. Based on "Linear Scan // Implements the linear-scan algorithm. Based on "Linear Scan
...@@ -292,41 +399,41 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -292,41 +399,41 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
Variable *Prefer = NULL; Variable *Prefer = NULL;
int32_t PreferReg = Variable::NoRegister; int32_t PreferReg = Variable::NoRegister;
bool AllowOverlap = false; bool AllowOverlap = false;
if (const Inst *DefInst = VMetadata->getFirstDefinition(Cur)) { if (FindPreference) {
assert(DefInst->getDest() == Cur); if (const Inst *DefInst = VMetadata->getFirstDefinition(Cur)) {
bool IsAssign = DefInst->isSimpleAssign(); assert(DefInst->getDest() == Cur);
bool IsSingleDef = !VMetadata->isMultiDef(Cur); bool IsAssign = DefInst->isSimpleAssign();
for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) { bool IsSingleDef = !VMetadata->isMultiDef(Cur);
// TODO(stichnot): Iterate through the actual Variables of the for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) {
// instruction, not just the source operands. This could // TODO(stichnot): Iterate through the actual Variables of the
// capture Load instructions, including address mode // instruction, not just the source operands. This could
// optimization, for Prefer (but not for AllowOverlap). // capture Load instructions, including address mode
if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) { // optimization, for Prefer (but not for AllowOverlap).
int32_t SrcReg = SrcVar->getRegNumTmp(); if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) {
// Only consider source variables that have (so far) been int32_t SrcReg = SrcVar->getRegNumTmp();
// assigned a register. That register must be one in the // Only consider source variables that have (so far) been
// RegMask set, e.g. don't try to prefer the stack pointer // assigned a register. That register must be one in the
// as a result of the stacksave intrinsic. // RegMask set, e.g. don't try to prefer the stack pointer
if (SrcVar->hasRegTmp() && RegMask[SrcReg]) { // as a result of the stacksave intrinsic.
if (!Free[SrcReg]) { if (SrcVar->hasRegTmp() && RegMask[SrcReg]) {
// Don't bother trying to enable AllowOverlap if the if (FindOverlap && !Free[SrcReg]) {
// register is already free. // Don't bother trying to enable AllowOverlap if the
AllowOverlap = // register is already free.
IsSingleDef && IsAssign && !overlapsDefs(Func, Cur, SrcVar); AllowOverlap =
} IsSingleDef && IsAssign && !overlapsDefs(Func, Cur, SrcVar);
if (AllowOverlap || Free[SrcReg]) { }
Prefer = SrcVar; if (AllowOverlap || Free[SrcReg]) {
PreferReg = SrcReg; Prefer = SrcVar;
PreferReg = SrcReg;
}
} }
} }
} }
} if (Verbose && Prefer) {
} Str << "Initial Prefer=" << *Prefer << " R=" << PreferReg
if (Verbose) { << " LIVE=" << Prefer->getLiveRange()
if (Prefer) { << " Overlap=" << AllowOverlap << "\n";
Str << "Initial Prefer=" << *Prefer << " R=" << PreferReg }
<< " LIVE=" << Prefer->getLiveRange() << " Overlap=" << AllowOverlap
<< "\n";
} }
} }
...@@ -353,12 +460,14 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) { ...@@ -353,12 +460,14 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
// Disable AllowOverlap if an Active variable, which is not // Disable AllowOverlap if an Active variable, which is not
// Prefer, shares Prefer's register, and has a definition within // Prefer, shares Prefer's register, and has a definition within
// Cur's live range. // Cur's live range.
for (const Variable *Item : Active) { if (AllowOverlap) {
int32_t RegNum = Item->getRegNumTmp(); for (const Variable *Item : Active) {
if (Item != Prefer && RegNum == PreferReg && int32_t RegNum = Item->getRegNumTmp();
overlapsDefs(Func, Cur, Item)) { if (Item != Prefer && RegNum == PreferReg &&
AllowOverlap = false; overlapsDefs(Func, Cur, Item)) {
dumpDisableOverlap(Func, Item, "Active"); AllowOverlap = false;
dumpDisableOverlap(Func, Item, "Active");
}
} }
} }
......
...@@ -26,12 +26,16 @@ class LinearScan { ...@@ -26,12 +26,16 @@ class LinearScan {
LinearScan &operator=(const LinearScan &) = delete; LinearScan &operator=(const LinearScan &) = delete;
public: public:
LinearScan(Cfg *Func) : Func(Func) {} LinearScan(Cfg *Func)
void initForGlobalAlloc(); : Func(Func), FindPreference(false), FindOverlap(false) {}
void init(RegAllocKind Kind);
void scan(const llvm::SmallBitVector &RegMask); void scan(const llvm::SmallBitVector &RegMask);
void dump(Cfg *Func) const; void dump(Cfg *Func) const;
private: private:
void initForGlobal();
void initForInfOnly();
Cfg *const Func; Cfg *const Func;
typedef std::vector<Variable *> OrderedRanges; typedef std::vector<Variable *> OrderedRanges;
typedef std::list<Variable *> UnorderedRanges; typedef std::list<Variable *> UnorderedRanges;
...@@ -41,6 +45,12 @@ private: ...@@ -41,6 +45,12 @@ private:
OrderedRanges UnhandledPrecolored; OrderedRanges UnhandledPrecolored;
UnorderedRanges Active, Inactive, Handled; UnorderedRanges Active, Inactive, Handled;
std::vector<InstNumberT> Kills; std::vector<InstNumberT> Kills;
bool FindPreference;
bool FindOverlap;
// TODO(stichnot): We're not really using FindOverlap yet, but we
// may want a flavor of register allocation where FindPreference is
// useful but we didn't want to initialize VMetadata with VMK_All
// and therefore we can't safely allow overlap.
}; };
} // end of namespace Ice } // end of namespace Ice
......
...@@ -225,7 +225,7 @@ void TargetLowering::lower() { ...@@ -225,7 +225,7 @@ void TargetLowering::lower() {
// perhaps for the frame pointer) to be allocated. This set of // perhaps for the frame pointer) to be allocated. This set of
// registers could potentially be parameterized if we want to restrict // registers could potentially be parameterized if we want to restrict
// registers e.g. for performance testing. // registers e.g. for performance testing.
void TargetLowering::regAlloc() { void TargetLowering::regAlloc(RegAllocKind Kind) {
TimerMarker T(TimerStack::TT_regAlloc, Func); TimerMarker T(TimerStack::TT_regAlloc, Func);
LinearScan LinearScan(Func); LinearScan LinearScan(Func);
RegSetMask RegInclude = RegSet_None; RegSetMask RegInclude = RegSet_None;
...@@ -234,7 +234,7 @@ void TargetLowering::regAlloc() { ...@@ -234,7 +234,7 @@ void TargetLowering::regAlloc() {
RegInclude |= RegSet_CalleeSave; RegInclude |= RegSet_CalleeSave;
if (hasFramePointer()) if (hasFramePointer())
RegExclude |= RegSet_FramePointer; RegExclude |= RegSet_FramePointer;
LinearScan.initForGlobalAlloc(); LinearScan.init(Kind);
llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude); llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
LinearScan.scan(RegMask); LinearScan.scan(RegMask);
} }
......
...@@ -195,7 +195,7 @@ public: ...@@ -195,7 +195,7 @@ public:
virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include, virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const = 0; RegSetMask Exclude) const = 0;
virtual const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const = 0; virtual const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const = 0;
void regAlloc(); void regAlloc(RegAllocKind Kind);
virtual void emitVariable(const Variable *Var) const = 0; virtual void emitVariable(const Variable *Var) const = 0;
...@@ -236,11 +236,7 @@ protected: ...@@ -236,11 +236,7 @@ protected:
virtual void doAddressOptStore() {} virtual void doAddressOptStore() {}
virtual void randomlyInsertNop(float Probability) = 0; virtual void randomlyInsertNop(float Probability) = 0;
// This gives the target an opportunity to post-process the lowered // This gives the target an opportunity to post-process the lowered
// expansion before returning. The primary intention is to do some // expansion before returning.
// Register Manager activity as necessary, specifically to eagerly
// allocate registers based on affinity and other factors. The
// simplest lowering does nothing here and leaves it all to a
// subsequent global register allocation pass.
virtual void postLower() {} virtual void postLower() {}
Cfg *Func; Cfg *Func;
......
...@@ -9,9 +9,7 @@ ...@@ -9,9 +9,7 @@
// //
// This file implements the TargetLoweringX8632 class, which // This file implements the TargetLoweringX8632 class, which
// consists almost entirely of the lowering sequence for each // consists almost entirely of the lowering sequence for each
// high-level instruction. It also implements // high-level instruction.
// TargetX8632Fast::postLower() which does the simplest possible
// register allocation for the "fast" target.
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
...@@ -375,7 +373,7 @@ void TargetX8632::translateO2() { ...@@ -375,7 +373,7 @@ void TargetX8632::translateO2() {
// associated cleanup, to make the dump cleaner and more useful. // associated cleanup, to make the dump cleaner and more useful.
Func->dump("After initial x8632 codegen"); Func->dump("After initial x8632 codegen");
Func->getVMetadata()->init(VMK_All); Func->getVMetadata()->init(VMK_All);
regAlloc(); regAlloc(RAK_Global);
if (Func->hasError()) if (Func->hasError())
return; return;
Func->dump("After linear scan regalloc"); Func->dump("After linear scan regalloc");
...@@ -429,6 +427,11 @@ void TargetX8632::translateOm1() { ...@@ -429,6 +427,11 @@ void TargetX8632::translateOm1() {
return; return;
Func->dump("After initial x8632 codegen"); Func->dump("After initial x8632 codegen");
regAlloc(RAK_InfOnly);
if (Func->hasError())
return;
Func->dump("After regalloc of infinite-weight variables");
Func->genFrame(); Func->genFrame();
if (Func->hasError()) if (Func->hasError())
return; return;
...@@ -1816,9 +1819,6 @@ void TargetX8632::lowerCall(const InstCall *Instr) { ...@@ -1816,9 +1819,6 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
// stack locations. // stack locations.
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
// TODO: Consider calling postLower() here to reduce the register
// pressure associated with using too many infinite weight
// temporaries when lowering the call sequence in -Om1 mode.
} }
// Copy arguments to be passed in registers to the appropriate // Copy arguments to be passed in registers to the appropriate
...@@ -4112,8 +4112,6 @@ void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, ...@@ -4112,8 +4112,6 @@ void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
Variable *DestT = Func->makeVariable(Ty); Variable *DestT = Func->makeVariable(Ty);
lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
T = DestT; T = DestT;
// TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of
// infinite weight temporaries.
} }
lowerAssign(InstAssign::create(Func, Dest, T)); lowerAssign(InstAssign::create(Func, Dest, T));
...@@ -4200,7 +4198,7 @@ void TargetX8632::lowerPhiAssignments(CfgNode *Node, ...@@ -4200,7 +4198,7 @@ void TargetX8632::lowerPhiAssignments(CfgNode *Node,
assert(Node->getPhis().empty()); assert(Node->getPhis().empty());
CfgNode *Succ = Node->getOutEdges().front(); CfgNode *Succ = Node->getOutEdges().front();
getContext().init(Node); getContext().init(Node);
// Register set setup similar to regAlloc() and postLower(). // Register set setup similar to regAlloc().
RegSetMask RegInclude = RegSet_All; RegSetMask RegInclude = RegSet_All;
RegSetMask RegExclude = RegSet_StackPointer; RegSetMask RegExclude = RegSet_StackPointer;
if (hasFramePointer()) if (hasFramePointer())
...@@ -4512,115 +4510,20 @@ Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { ...@@ -4512,115 +4510,20 @@ Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
} }
void TargetX8632::postLower() { void TargetX8632::postLower() {
if (Ctx->getOptLevel() != Opt_m1) { if (Ctx->getOptLevel() == Opt_m1)
// Find two-address non-SSA instructions where Dest==Src0, and set
// the DestNonKillable flag to keep liveness analysis consistent.
for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
if (Inst->isDeleted())
continue;
if (Variable *Dest = Inst->getDest()) {
// TODO(stichnot): We may need to consider all source
// operands, not just the first one, if using 3-address
// instructions.
if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
Inst->setDestNonKillable();
}
}
return; return;
} // Find two-address non-SSA instructions where Dest==Src0, and set
// TODO: Avoid recomputing WhiteList every instruction. // the DestNonKillable flag to keep liveness analysis consistent.
RegSetMask RegInclude = RegSet_All;
RegSetMask RegExclude = RegSet_StackPointer;
if (hasFramePointer())
RegExclude |= RegSet_FramePointer;
llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude);
// Make one pass to black-list pre-colored registers. TODO: If
// there was some prior register allocation pass that made register
// assignments, those registers need to be black-listed here as
// well.
llvm::DenseMap<const Variable *, const Inst *> LastUses;
// The first pass also keeps track of which instruction is the last
// use for each infinite-weight variable. After the last use, the
// variable is released to the free list.
for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
if (Inst->isDeleted())
continue;
// Don't consider a FakeKill instruction, because (currently) it
// is only used to kill all scratch registers at a call site, and
// we don't want to black-list all scratch registers during the
// call lowering. This could become a problem since it relies on
// the lowering sequence not keeping any infinite-weight variables
// live across a call. TODO(stichnot): Consider replacing this
// whole postLower() implementation with a robust local register
// allocator, for example compute live ranges only for pre-colored
// and infinite-weight variables and run the existing linear-scan
// allocator.
assert(!llvm::isa<InstFakeKill>(Inst) || Inst->getSrcSize() == 0);
for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
Operand *Src = Inst->getSrc(SrcNum);
SizeT NumVars = Src->getNumVars();
for (SizeT J = 0; J < NumVars; ++J) {
const Variable *Var = Src->getVar(J);
// Track last uses of all variables, regardless of whether
// they are pre-colored or infinite-weight.
LastUses[Var] = Inst;
if (!Var->hasReg())
continue;
WhiteList[Var->getRegNum()] = false;
}
}
}
// The second pass colors infinite-weight variables.
llvm::SmallBitVector AvailableRegisters = WhiteList;
llvm::SmallBitVector FreedRegisters(WhiteList.size());
for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
FreedRegisters.reset();
if (Inst->isDeleted()) if (Inst->isDeleted())
continue; continue;
// Iterate over all variables referenced in the instruction, if (Variable *Dest = Inst->getDest()) {
// including the Dest variable (if any). If the variable is // TODO(stichnot): We may need to consider all source
// marked as infinite-weight, find it a register. If this // operands, not just the first one, if using 3-address
// instruction is the last use of the variable in the lowered // instructions.
// sequence, release the register to the free list after this if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
// instruction is completely processed. Note that the first pass Inst->setDestNonKillable();
// ignores the Dest operand, under the assumption that a
// pre-colored Dest will appear as a source operand in some
// subsequent instruction in the lowered sequence.
Variable *Dest = Inst->getDest();
SizeT NumSrcs = Inst->getSrcSize();
if (Dest)
++NumSrcs;
if (NumSrcs == 0)
continue;
OperandList Srcs(NumSrcs);
for (SizeT i = 0; i < Inst->getSrcSize(); ++i)
Srcs[i] = Inst->getSrc(i);
if (Dest)
Srcs[NumSrcs - 1] = Dest;
for (SizeT SrcNum = 0; SrcNum < NumSrcs; ++SrcNum) {
Operand *Src = Srcs[SrcNum];
SizeT NumVars = Src->getNumVars();
for (SizeT J = 0; J < NumVars; ++J) {
Variable *Var = Src->getVar(J);
if (!Var->hasReg() && Var->getWeight().isInf()) {
llvm::SmallBitVector AvailableTypedRegisters =
AvailableRegisters & getRegisterSetForType(Var->getType());
assert(AvailableTypedRegisters.any());
int32_t RegNum = AvailableTypedRegisters.find_first();
Var->setRegNum(RegNum);
AvailableRegisters[RegNum] = false;
}
if (Var->hasReg()) {
int32_t RegNum = Var->getRegNum();
assert(!AvailableRegisters[RegNum]);
if (LastUses[Var] == Inst) {
if (WhiteList[RegNum])
FreedRegisters[RegNum] = true;
}
}
}
} }
AvailableRegisters |= FreedRegisters;
} }
} }
......
...@@ -83,10 +83,12 @@ void Translator::translateFcn(Cfg *Fcn) { ...@@ -83,10 +83,12 @@ void Translator::translateFcn(Cfg *Fcn) {
ErrorStatus = true; ErrorStatus = true;
} }
if (Ctx->getFlags().UseIntegratedAssembler) { if (!ErrorStatus) {
Func->emitIAS(); if (Ctx->getFlags().UseIntegratedAssembler) {
} else { Func->emitIAS();
Func->emit(); } else {
Func->emit();
}
} }
Ctx->dumpStats(Func->getFunctionName()); Ctx->dumpStats(Func->getFunctionName());
} }
......
...@@ -38,10 +38,10 @@ entry: ...@@ -38,10 +38,10 @@ entry:
; CHECK: movzx eax, byte ptr [ebp - 4] ; CHECK: movzx eax, byte ptr [ebp - 4]
; CHECK: mov dword ptr [ebp - 16], eax ; CHECK: mov dword ptr [ebp - 16], eax
; CHECK: sub esp, 16 ; CHECK: sub esp, 16
; CHECK: mov ecx, dword ptr [ebp + 8] ; CHECK: mov eax, dword ptr [ebp + 8]
; CHECK: mov dword ptr [esp], ecx ; CHECK: mov dword ptr [esp], eax
; CHECK: mov ecx, dword ptr [ebp - 12] ; CHECK: mov eax, dword ptr [ebp - 12]
; CHECK: mov dword ptr [esp + 4], ecx ; CHECK: mov dword ptr [esp + 4], eax
; CHECK: mov ecx, dword ptr [ebp - 16] ; CHECK: mov eax, dword ptr [ebp - 16]
; CHECK: mov dword ptr [esp + 8], ecx ; CHECK: mov dword ptr [esp + 8], eax
; CHECK: call -4 ; CHECK: call -4
...@@ -31,9 +31,9 @@ entry: ...@@ -31,9 +31,9 @@ entry:
; PROB50: pmuludq %xmm2, %xmm1 ; PROB50: pmuludq %xmm2, %xmm1
; PROB50: nop # variant = 0 ; PROB50: nop # variant = 0
; PROB50: shufps $136, %xmm1, %xmm0 ; PROB50: shufps $136, %xmm1, %xmm0
; PROB50: pshufd $216, %xmm0, %xmm1 ; PROB50: pshufd $216, %xmm0, %xmm0
; PROB50: nop # variant = 2 ; PROB50: nop # variant = 2
; PROB50: movups %xmm1, (%esp) ; PROB50: movups %xmm0, (%esp)
; PROB50: movups (%esp), %xmm0 ; PROB50: movups (%esp), %xmm0
; PROB50: addl $60, %esp ; PROB50: addl $60, %esp
; PROB50: nop # variant = 0 ; PROB50: nop # variant = 0
...@@ -59,9 +59,9 @@ entry: ...@@ -59,9 +59,9 @@ entry:
; PROB90: nop # variant = 3 ; PROB90: nop # variant = 3
; PROB90: shufps $136, %xmm1, %xmm0 ; PROB90: shufps $136, %xmm1, %xmm0
; PROB90: nop # variant = 4 ; PROB90: nop # variant = 4
; PROB90: pshufd $216, %xmm0, %xmm1 ; PROB90: pshufd $216, %xmm0, %xmm0
; PROB90: nop # variant = 2 ; PROB90: nop # variant = 2
; PROB90: movups %xmm1, (%esp) ; PROB90: movups %xmm0, (%esp)
; PROB90: nop # variant = 4 ; PROB90: nop # variant = 4
; PROB90: movups (%esp), %xmm0 ; PROB90: movups (%esp), %xmm0
; PROB90: nop # variant = 2 ; PROB90: nop # variant = 2
...@@ -86,9 +86,9 @@ entry: ...@@ -86,9 +86,9 @@ entry:
; MAXNOPS2: nop # variant = 3 ; MAXNOPS2: nop # variant = 3
; MAXNOPS2: pmuludq %xmm2, %xmm1 ; MAXNOPS2: pmuludq %xmm2, %xmm1
; MAXNOPS2: shufps $136, %xmm1, %xmm0 ; MAXNOPS2: shufps $136, %xmm1, %xmm0
; MAXNOPS2: pshufd $216, %xmm0, %xmm1 ; MAXNOPS2: pshufd $216, %xmm0, %xmm0
; MAXNOPS2: nop # variant = 3 ; MAXNOPS2: nop # variant = 3
; MAXNOPS2: movups %xmm1, (%esp) ; MAXNOPS2: movups %xmm0, (%esp)
; MAXNOPS2: nop # variant = 0 ; MAXNOPS2: nop # variant = 0
; MAXNOPS2: movups (%esp), %xmm0 ; MAXNOPS2: movups (%esp), %xmm0
; MAXNOPS2: nop # variant = 2 ; MAXNOPS2: nop # variant = 2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment