Commit 318f4cda by Jim Stichnoth

Subzero: Use register availability during lowering to improve the code.

The problem is that given code like this: a = b + c d = a + e ... ... (use of a) ... Lowering may produce code like this, at least on x86: T1 = b T1 += c a = T1 T2 = a T2 += e d = T2 ... ... (use of a) ... If "a" has a long live range, it may not get a register, resulting in clumsy code in the middle of the sequence like "a=reg; reg=a". Normally one might expect store forwarding to make the clumsy code fast, but it does presumably add an extra instruction-retirement cycle to the critical path in a pointer-chasing loop, and makes a big difference on some benchmarks. The simple fix here is, at the end of lowering "a=b+c", keep track of the final "a=T1" assignment. Then, when lowering "d=a+e" and we look up "a", we can substitute "T1". This slightly increases the live range of T1, but it does a great job of avoiding the redundant reload of the register from the stack location. A more general fix (in the future) might be to do live range splitting and let the register allocator handle it. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095 R=kschimpf@google.com Review URL: https://codereview.chromium.org/1385433002 .
parent ed2c06b2
......@@ -570,6 +570,7 @@ void CfgNode::genCode() {
// Ensure target lowering actually moved the cursor.
assert(Context.getCur() != Orig);
}
Context.availabilityReset();
// Do preliminary lowering of the Phi instructions.
Target->prelowerPhis();
}
......@@ -683,7 +684,7 @@ bool CfgNode::liveness(Liveness *Liveness) {
// Validate the integrity of the live ranges in this block. If there are any
// errors, it prints details and returns false. On success, it returns true.
bool CfgNode::livenessValidateIntervals(Liveness *Liveness) {
bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
if (!BuildDefs::asserts())
return true;
......
......@@ -110,7 +110,7 @@ public:
private:
CfgNode(Cfg *Func, SizeT LabelIndex);
bool livenessValidateIntervals(Liveness *Liveness);
bool livenessValidateIntervals(Liveness *Liveness) const;
Cfg *const Func;
SizeT Number; /// invariant: Func->Nodes[Number]==this
const SizeT LabelNumber; /// persistent number for label generation
......
......@@ -137,7 +137,7 @@ bool LinearScan::livenessValidateIntervals(
const DefUseErrorList &DefsWithoutUses,
const DefUseErrorList &UsesBeforeDefs,
const CfgVector<InstNumberT> &LRBegin,
const CfgVector<InstNumberT> &LREnd) {
const CfgVector<InstNumberT> &LREnd) const {
if (DefsWithoutUses.empty() && UsesBeforeDefs.empty())
return true;
......
......@@ -62,7 +62,7 @@ private:
bool livenessValidateIntervals(const DefUseErrorList &DefsWithoutUses,
const DefUseErrorList &UsesBeforeDefs,
const CfgVector<InstNumberT> &LRBegin,
const CfgVector<InstNumberT> &LREnd);
const CfgVector<InstNumberT> &LREnd) const;
void initForGlobal();
void initForInfOnly();
/// Move an item from the From set to the To set. From[Index] is pushed onto
......
......@@ -46,6 +46,7 @@ void LoweringContext::rewind() {
Cur = Begin;
skipDeleted(Cur);
Next = Cur;
availabilityReset();
}
void LoweringContext::insert(Inst *Inst) {
......@@ -70,6 +71,31 @@ Inst *LoweringContext::getLastInserted() const {
return LastInserted;
}
void LoweringContext::availabilityReset() {
LastDest = nullptr;
LastSrc = nullptr;
}
void LoweringContext::availabilityUpdate() {
availabilityReset();
Inst *Instr = LastInserted;
if (Instr == nullptr)
return;
if (!Instr->isSimpleAssign())
return;
if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr->getSrc(0))) {
LastDest = Instr->getDest();
LastSrc = SrcVar;
}
}
Variable *LoweringContext::availabilityGet(Operand *Src) const {
assert(Src);
if (Src == LastDest)
return LastSrc;
return nullptr;
}
TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
#define SUBZERO_TARGET(X) \
if (Target == Target_##X) \
......
......@@ -65,6 +65,9 @@ public:
void setNext(InstList::iterator N) { Next = N; }
void rewind();
void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
void availabilityReset();
void availabilityUpdate();
Variable *availabilityGet(Operand *Src) const;
private:
/// Node is the argument to Inst::updateVars().
......@@ -85,6 +88,11 @@ private:
InstList::iterator Begin;
/// End is a copy of Insts.end(), used if Next needs to be advanced.
InstList::iterator End;
/// LastDest and LastSrc capture the parameters of the last "Dest=Src" simple
/// assignment inserted (provided Src is a variable). This is used for simple
/// availability analysis.
Variable *LastDest = nullptr;
Variable *LastSrc = nullptr;
void skipDeleted(InstList::iterator &I) const;
void advanceForward(InstList::iterator &I) const;
......
......@@ -5016,6 +5016,24 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
// the shl shift amount to be either an immediate or in ecx.)
assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
// Substitute with an available infinite-weight variable if possible. Only do
// this when we are not asking for a specific register, and when the
// substitution is not locked to a specific register, and when the types
// match, in order to capture the vast majority of opportunities and avoid
// corner cases in the lowering.
if (RegNum == Variable::NoRegister) {
if (Variable *Subst = getContext().availabilityGet(From)) {
// At this point we know there is a potential substitution available.
if (Subst->mustHaveReg() && !Subst->hasReg()) {
// At this point we know the substitution will have a register.
if (From->getType() == Subst->getType()) {
// At this point we know the substitution's register is compatible.
return Subst;
}
}
}
}
if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
// Before doing anything with a Mem operand, we need to ensure that the
// Base and Index components are in physical registers.
......@@ -5239,6 +5257,7 @@ template <class Machine> void TargetX86Base<Machine>::postLower() {
if (Ctx->getFlags().getOptLevel() == Opt_m1)
return;
markRedefinitions();
Context.availabilityUpdate();
}
template <class Machine>
......
......@@ -77,10 +77,12 @@ entry:
ret void
}
; CHECK-LABEL: CallIndirectGlobal
; Allow the first call to be to a different register because of simple
; availability optimization.
; CHECK: call
; CHECK: call [[REGISTER:[a-z]+]]
; CHECK: call [[REGISTER]]
; CHECK: call [[REGISTER]]
; CHECK: call [[REGISTER]]
;
; OPTM1-LABEL: CallIndirectGlobal
; OPTM1: call [[TARGET:.+]]
......
......@@ -92,12 +92,15 @@ exit:
; CHECK-LABEL: testPhi3
; CHECK: push [[EBX:.*]]
; CHECK: mov {{.*}},DWORD PTR [esp
; CHECK: mov
; CHECK: mov {{.*}},DWORD PTR [[ADDR:.*0x3e8]]
; CHECK: mov [[EAX:.*]],DWORD PTR [esp
; CHECK: mov [[ECX:.*]],[[EAX]]
;;; start of loop body
; CHECK: mov [[EDX:.*]],[[ECX]]
; CHECK: mov {{.*}},DWORD PTR [{{.*}}+0x3e8]
; CHECK: cmp {{.*}},0x0
; CHECK: jne
; CHECK: mov DWORD PTR [[ADDR]]
;;; start of epilog
; CHECK: mov DWORD PTR {{.}}[[EDX]]+0x3e8],
; CHECK: pop [[EBX]]
; Test of "advanced phi lowering" with undef phi arg (integer vector).
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment