Commit 318f4cda by Jim Stichnoth

Subzero: Use register availability during lowering to improve the code.

The problem is that given code like this: a = b + c d = a + e ... ... (use of a) ... Lowering may produce code like this, at least on x86: T1 = b T1 += c a = T1 T2 = a T2 += e d = T2 ... ... (use of a) ... If "a" has a long live range, it may not get a register, resulting in clumsy code in the middle of the sequence like "a=reg; reg=a". Normally one might expect store forwarding to make the clumsy code fast, but it does presumably add an extra instruction-retirement cycle to the critical path in a pointer-chasing loop, and makes a big difference on some benchmarks. The simple fix here is, at the end of lowering "a=b+c", keep track of the final "a=T1" assignment. Then, when lowering "d=a+e" and we look up "a", we can substitute "T1". This slightly increases the live range of T1, but it does a great job of avoiding the redundant reload of the register from the stack location. A more general fix (in the future) might be to do live range splitting and let the register allocator handle it. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095 R=kschimpf@google.com Review URL: https://codereview.chromium.org/1385433002 .
parent ed2c06b2
...@@ -570,6 +570,7 @@ void CfgNode::genCode() { ...@@ -570,6 +570,7 @@ void CfgNode::genCode() {
// Ensure target lowering actually moved the cursor. // Ensure target lowering actually moved the cursor.
assert(Context.getCur() != Orig); assert(Context.getCur() != Orig);
} }
Context.availabilityReset();
// Do preliminary lowering of the Phi instructions. // Do preliminary lowering of the Phi instructions.
Target->prelowerPhis(); Target->prelowerPhis();
} }
...@@ -683,7 +684,7 @@ bool CfgNode::liveness(Liveness *Liveness) { ...@@ -683,7 +684,7 @@ bool CfgNode::liveness(Liveness *Liveness) {
// Validate the integrity of the live ranges in this block. If there are any // Validate the integrity of the live ranges in this block. If there are any
// errors, it prints details and returns false. On success, it returns true. // errors, it prints details and returns false. On success, it returns true.
bool CfgNode::livenessValidateIntervals(Liveness *Liveness) { bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
if (!BuildDefs::asserts()) if (!BuildDefs::asserts())
return true; return true;
......
...@@ -110,7 +110,7 @@ public: ...@@ -110,7 +110,7 @@ public:
private: private:
CfgNode(Cfg *Func, SizeT LabelIndex); CfgNode(Cfg *Func, SizeT LabelIndex);
bool livenessValidateIntervals(Liveness *Liveness); bool livenessValidateIntervals(Liveness *Liveness) const;
Cfg *const Func; Cfg *const Func;
SizeT Number; /// invariant: Func->Nodes[Number]==this SizeT Number; /// invariant: Func->Nodes[Number]==this
const SizeT LabelNumber; /// persistent number for label generation const SizeT LabelNumber; /// persistent number for label generation
......
...@@ -137,7 +137,7 @@ bool LinearScan::livenessValidateIntervals( ...@@ -137,7 +137,7 @@ bool LinearScan::livenessValidateIntervals(
const DefUseErrorList &DefsWithoutUses, const DefUseErrorList &DefsWithoutUses,
const DefUseErrorList &UsesBeforeDefs, const DefUseErrorList &UsesBeforeDefs,
const CfgVector<InstNumberT> &LRBegin, const CfgVector<InstNumberT> &LRBegin,
const CfgVector<InstNumberT> &LREnd) { const CfgVector<InstNumberT> &LREnd) const {
if (DefsWithoutUses.empty() && UsesBeforeDefs.empty()) if (DefsWithoutUses.empty() && UsesBeforeDefs.empty())
return true; return true;
......
...@@ -62,7 +62,7 @@ private: ...@@ -62,7 +62,7 @@ private:
bool livenessValidateIntervals(const DefUseErrorList &DefsWithoutUses, bool livenessValidateIntervals(const DefUseErrorList &DefsWithoutUses,
const DefUseErrorList &UsesBeforeDefs, const DefUseErrorList &UsesBeforeDefs,
const CfgVector<InstNumberT> &LRBegin, const CfgVector<InstNumberT> &LRBegin,
const CfgVector<InstNumberT> &LREnd); const CfgVector<InstNumberT> &LREnd) const;
void initForGlobal(); void initForGlobal();
void initForInfOnly(); void initForInfOnly();
/// Move an item from the From set to the To set. From[Index] is pushed onto /// Move an item from the From set to the To set. From[Index] is pushed onto
......
...@@ -46,6 +46,7 @@ void LoweringContext::rewind() { ...@@ -46,6 +46,7 @@ void LoweringContext::rewind() {
Cur = Begin; Cur = Begin;
skipDeleted(Cur); skipDeleted(Cur);
Next = Cur; Next = Cur;
availabilityReset();
} }
void LoweringContext::insert(Inst *Inst) { void LoweringContext::insert(Inst *Inst) {
...@@ -70,6 +71,31 @@ Inst *LoweringContext::getLastInserted() const { ...@@ -70,6 +71,31 @@ Inst *LoweringContext::getLastInserted() const {
return LastInserted; return LastInserted;
} }
void LoweringContext::availabilityReset() {
LastDest = nullptr;
LastSrc = nullptr;
}
void LoweringContext::availabilityUpdate() {
availabilityReset();
Inst *Instr = LastInserted;
if (Instr == nullptr)
return;
if (!Instr->isSimpleAssign())
return;
if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr->getSrc(0))) {
LastDest = Instr->getDest();
LastSrc = SrcVar;
}
}
Variable *LoweringContext::availabilityGet(Operand *Src) const {
assert(Src);
if (Src == LastDest)
return LastSrc;
return nullptr;
}
TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) { TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
#define SUBZERO_TARGET(X) \ #define SUBZERO_TARGET(X) \
if (Target == Target_##X) \ if (Target == Target_##X) \
......
...@@ -65,6 +65,9 @@ public: ...@@ -65,6 +65,9 @@ public:
void setNext(InstList::iterator N) { Next = N; } void setNext(InstList::iterator N) { Next = N; }
void rewind(); void rewind();
void setInsertPoint(const InstList::iterator &Position) { Next = Position; } void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
void availabilityReset();
void availabilityUpdate();
Variable *availabilityGet(Operand *Src) const;
private: private:
/// Node is the argument to Inst::updateVars(). /// Node is the argument to Inst::updateVars().
...@@ -85,6 +88,11 @@ private: ...@@ -85,6 +88,11 @@ private:
InstList::iterator Begin; InstList::iterator Begin;
/// End is a copy of Insts.end(), used if Next needs to be advanced. /// End is a copy of Insts.end(), used if Next needs to be advanced.
InstList::iterator End; InstList::iterator End;
/// LastDest and LastSrc capture the parameters of the last "Dest=Src" simple
/// assignment inserted (provided Src is a variable). This is used for simple
/// availability analysis.
Variable *LastDest = nullptr;
Variable *LastSrc = nullptr;
void skipDeleted(InstList::iterator &I) const; void skipDeleted(InstList::iterator &I) const;
void advanceForward(InstList::iterator &I) const; void advanceForward(InstList::iterator &I) const;
......
...@@ -5016,6 +5016,24 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, ...@@ -5016,6 +5016,24 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
// the shl shift amount to be either an immediate or in ecx.) // the shl shift amount to be either an immediate or in ecx.)
assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
// Substitute with an available infinite-weight variable if possible. Only do
// this when we are not asking for a specific register, and when the
// substitution is not locked to a specific register, and when the types
// match, in order to capture the vast majority of opportunities and avoid
// corner cases in the lowering.
if (RegNum == Variable::NoRegister) {
if (Variable *Subst = getContext().availabilityGet(From)) {
// At this point we know there is a potential substitution available.
if (Subst->mustHaveReg() && !Subst->hasReg()) {
// At this point we know the substitution will have a register.
if (From->getType() == Subst->getType()) {
// At this point we know the substitution's register is compatible.
return Subst;
}
}
}
}
if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
// Before doing anything with a Mem operand, we need to ensure that the // Before doing anything with a Mem operand, we need to ensure that the
// Base and Index components are in physical registers. // Base and Index components are in physical registers.
...@@ -5239,6 +5257,7 @@ template <class Machine> void TargetX86Base<Machine>::postLower() { ...@@ -5239,6 +5257,7 @@ template <class Machine> void TargetX86Base<Machine>::postLower() {
if (Ctx->getFlags().getOptLevel() == Opt_m1) if (Ctx->getFlags().getOptLevel() == Opt_m1)
return; return;
markRedefinitions(); markRedefinitions();
Context.availabilityUpdate();
} }
template <class Machine> template <class Machine>
......
...@@ -77,10 +77,12 @@ entry: ...@@ -77,10 +77,12 @@ entry:
ret void ret void
} }
; CHECK-LABEL: CallIndirectGlobal ; CHECK-LABEL: CallIndirectGlobal
; Allow the first call to be to a different register because of simple
; availability optimization.
; CHECK: call
; CHECK: call [[REGISTER:[a-z]+]] ; CHECK: call [[REGISTER:[a-z]+]]
; CHECK: call [[REGISTER]] ; CHECK: call [[REGISTER]]
; CHECK: call [[REGISTER]] ; CHECK: call [[REGISTER]]
; CHECK: call [[REGISTER]]
; ;
; OPTM1-LABEL: CallIndirectGlobal ; OPTM1-LABEL: CallIndirectGlobal
; OPTM1: call [[TARGET:.+]] ; OPTM1: call [[TARGET:.+]]
......
...@@ -92,12 +92,15 @@ exit: ...@@ -92,12 +92,15 @@ exit:
; CHECK-LABEL: testPhi3 ; CHECK-LABEL: testPhi3
; CHECK: push [[EBX:.*]] ; CHECK: push [[EBX:.*]]
; CHECK: mov {{.*}},DWORD PTR [esp ; CHECK: mov [[EAX:.*]],DWORD PTR [esp
; CHECK: mov ; CHECK: mov [[ECX:.*]],[[EAX]]
; CHECK: mov {{.*}},DWORD PTR [[ADDR:.*0x3e8]] ;;; start of loop body
; CHECK: mov [[EDX:.*]],[[ECX]]
; CHECK: mov {{.*}},DWORD PTR [{{.*}}+0x3e8]
; CHECK: cmp {{.*}},0x0 ; CHECK: cmp {{.*}},0x0
; CHECK: jne ; CHECK: jne
; CHECK: mov DWORD PTR [[ADDR]] ;;; start of epilog
; CHECK: mov DWORD PTR {{.}}[[EDX]]+0x3e8],
; CHECK: pop [[EBX]] ; CHECK: pop [[EBX]]
; Test of "advanced phi lowering" with undef phi arg (integer vector). ; Test of "advanced phi lowering" with undef phi arg (integer vector).
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment