Commit 89d7956d by Jim Stichnoth

Subzero: Fix address mode optimization involving phi temporaries.

Also adds much-needed logging of the decision process that goes into the address mode optimization. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/490333003
parent 14c3f417
......@@ -101,6 +101,7 @@ enum VerboseItem {
IceV_LinearScan = 1 << 8,
IceV_Frame = 1 << 9,
IceV_Timing = 1 << 10,
IceV_AddrOpt = 1 << 11,
IceV_All = ~IceV_None
};
typedef uint32_t VerboseMask;
......
......@@ -146,9 +146,16 @@ void Variable::setUse(const Inst *Inst, const CfgNode *Node) {
}
void Variable::setDefinition(Inst *Inst, const CfgNode *Node) {
if (DefInst && !DefInst->isDeleted() && DefInst != Inst) {
// Detect when a variable is being defined multiple times,
// particularly for Phi instruction lowering. If this happens, we
// need to lock DefInst to NULL.
DefInst = NULL;
DefNode = NULL;
return;
}
if (DefNode == NULL)
return;
// Can first check preexisting DefInst if we care about multi-def vars.
DefInst = Inst;
if (Node != DefNode)
DefNode = NULL;
......
......@@ -136,6 +136,10 @@ void TargetLowering::doNopInsertion() {
void TargetLowering::lower() {
assert(!Context.atEnd());
Inst *Inst = *Context.getCur();
// Mark the current instruction as deleted before lowering,
// otherwise the Dest variable will likely get marked as non-SSA.
// See Variable::setDefinition().
Inst->setDeleted();
switch (Inst->getKind()) {
case Inst::Alloca:
lowerAlloca(llvm::dyn_cast<InstAlloca>(Inst));
......@@ -200,7 +204,6 @@ void TargetLowering::lower() {
Func->setError("Can't lower unsupported instruction type");
break;
}
Inst->setDeleted();
postLower();
......
......@@ -2607,13 +2607,13 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
NextBr->setDeleted();
Operand *Src0RM = legalize(
Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg, true);
_cmp(Src0RM, Src1);
_br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
NextBr->getTargetFalse());
// Skip over the following branch instruction.
NextBr->setDeleted();
Context.advanceNext();
return;
}
......@@ -3194,8 +3194,8 @@ bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
// Lower the phi assignments now, before the branch (same placement
// as before).
InstAssign *PhiAssign = PhiAssigns[i];
lowerAssign(PhiAssign);
PhiAssign->setDeleted();
lowerAssign(PhiAssign);
Context.advanceNext();
}
_br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
......@@ -3482,8 +3482,35 @@ bool isAdd(const Inst *Inst) {
return false;
}
void computeAddressOpt(Variable *&Base, Variable *&Index, uint16_t &Shift,
int32_t &Offset) {
void dumpAddressOpt(const Cfg *Func, const Variable *Base,
const Variable *Index, uint16_t Shift, int32_t Offset,
const Inst *Reason) {
if (!Func->getContext()->isVerbose(IceV_AddrOpt))
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "Instruction: ";
Reason->dumpDecorated(Func);
Str << " results in Base=";
if (Base)
Base->dump(Func);
else
Str << "<null>";
Str << ", Index=";
if (Index)
Index->dump(Func);
else
Str << "<null>";
Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
}
void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
Variable *&Index, uint16_t &Shift, int32_t &Offset) {
Func->setCurrentNode(NULL);
if (Func->getContext()->isVerbose(IceV_AddrOpt)) {
Ostream &Str = Func->getContext()->getStrDump();
Str << "\nStarting computeAddressOpt for instruction:\n ";
Instr->dumpDecorated(Func);
}
(void)Offset; // TODO: pattern-match for non-zero offsets.
if (Base == NULL)
return;
......@@ -3506,6 +3533,7 @@ void computeAddressOpt(Variable *&Base, Variable *&Index, uint16_t &Shift,
// TODO: ensure BaseVariable0 stays single-BB
true) {
Base = BaseVariable0;
dumpAddressOpt(Func, Base, Index, Shift, Offset, BaseInst);
continue;
}
......@@ -3523,6 +3551,7 @@ void computeAddressOpt(Variable *&Base, Variable *&Index, uint16_t &Shift,
Base = BaseVariable0;
Index = BaseVariable1;
Shift = 0; // should already have been 0
dumpAddressOpt(Func, Base, Index, Shift, Offset, BaseInst);
continue;
}
......@@ -3560,6 +3589,7 @@ void computeAddressOpt(Variable *&Base, Variable *&Index, uint16_t &Shift,
if (Shift + LogMult <= 3) {
Index = IndexVariable0;
Shift += LogMult;
dumpAddressOpt(Func, Base, Index, Shift, Offset, IndexInst);
continue;
}
}
......@@ -3589,6 +3619,7 @@ void computeAddressOpt(Variable *&Base, Variable *&Index, uint16_t &Shift,
}
Base = Var;
Offset += IsAdd ? Const->getValue() : -Const->getValue();
dumpAddressOpt(Func, Base, Index, Shift, Offset, BaseInst);
continue;
}
......@@ -3684,12 +3715,12 @@ void TargetX8632::doAddressOptLoad() {
const OperandX8632Mem::SegmentRegisters SegmentReg =
OperandX8632Mem::DefaultSegment;
Variable *Base = llvm::dyn_cast<Variable>(Addr);
computeAddressOpt(Base, Index, Shift, Offset);
computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
if (Base && Addr != Base) {
Inst->setDeleted();
Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Shift, SegmentReg);
Inst->setDeleted();
Context.insert(InstLoad::create(Func, Dest, Addr));
}
}
......@@ -3866,12 +3897,12 @@ void TargetX8632::doAddressOptStore() {
// registers there either.
const OperandX8632Mem::SegmentRegisters SegmentReg =
OperandX8632Mem::DefaultSegment;
computeAddressOpt(Base, Index, Shift, Offset);
computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
if (Base && Addr != Base) {
Inst->setDeleted();
Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Shift, SegmentReg);
Inst->setDeleted();
Context.insert(InstStore::create(Func, Data, Addr));
}
}
......@@ -3943,9 +3974,9 @@ TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
if (NextCast->getCastKind() == InstCast::Sext &&
NextCast->getSrc(0) == SignExtendedResult) {
NextCast->setDeleted();
_movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
// Skip over the instruction.
NextCast->setDeleted();
Context.advanceNext();
}
}
......
......@@ -45,6 +45,7 @@ static cl::list<Ice::VerboseItem> VerboseList(
clEnumValN(Ice::IceV_LinearScan, "regalloc", "Linear scan details"),
clEnumValN(Ice::IceV_Frame, "frame", "Stack frame layout details"),
clEnumValN(Ice::IceV_Timing, "time", "Pass timing details"),
clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"),
clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
clEnumValN(Ice::IceV_None, "none", "No verbosity"), clEnumValEnd));
static cl::opt<Ice::TargetArch> TargetArch(
......
; This is distilled from a real function that led to a bug in the
; address mode optimization code. It followed assignment chains
; through non-SSA temporaries created from Phi instruction lowering.
;
; This test depends to some degree on the stability of "--verbose
; addropt" output format.
; RUN: %llvm2ice -O2 --verbose addropt %s | FileCheck %s
declare i32 @_calloc_r(i32, i32, i32)
define internal i32 @_Balloc(i32 %ptr, i32 %k) {
entry:
%gep = add i32 %ptr, 76
%gep.asptr = inttoptr i32 %gep to i32*
%0 = load i32* %gep.asptr, align 1
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.end5
if.then: ; preds = %entry
%call = tail call i32 @_calloc_r(i32 %ptr, i32 4, i32 33)
%gep.asptr2 = inttoptr i32 %gep to i32*
store i32 %call, i32* %gep.asptr2, align 1
%cmp3 = icmp eq i32 %call, 0
br i1 %cmp3, label %return, label %if.end5
if.end5: ; preds = %if.then, %entry
%1 = phi i32 [ %call, %if.then ], [ %0, %entry ]
%gep_array = mul i32 %k, 4
%gep2 = add i32 %1, %gep_array
%gep2.asptr = inttoptr i32 %gep2 to i32*
%2 = load i32* %gep2.asptr, align 1
; The above load instruction is a good target for address mode
; optimization. Correct analysis would lead to dump output like:
; Starting computeAddressOpt for instruction:
; [ 15] %__13 = load i32* %gep2.asptr, align 1
; Instruction: [ 14] %gep2.asptr = i32 %gep2
; results in Base=%gep2, Index=<null>, Shift=0, Offset=0
; Instruction: [ 13] %gep2 = add i32 %__9, %gep_array
; results in Base=%__9, Index=%gep_array, Shift=0, Offset=0
; Instruction: [ 18] %__9 = i32 %__9_phi
; results in Base=%__9_phi, Index=%gep_array, Shift=0, Offset=0
; Instruction: [ 12] %gep_array = mul i32 %k, 4
; results in Base=%__9_phi, Index=%k, Shift=2, Offset=0
;
; Incorrect, overly-aggressive analysis would lead to output like:
; Starting computeAddressOpt for instruction:
; [ 15] %__13 = load i32* %gep2.asptr, align 1
; Instruction: [ 14] %gep2.asptr = i32 %gep2
; results in Base=%gep2, Index=<null>, Shift=0, Offset=0
; Instruction: [ 13] %gep2 = add i32 %__9, %gep_array
; results in Base=%__9, Index=%gep_array, Shift=0, Offset=0
; Instruction: [ 18] %__9 = i32 %__9_phi
; results in Base=%__9_phi, Index=%gep_array, Shift=0, Offset=0
; Instruction: [ 19] %__9_phi = i32 %__4
; results in Base=%__4, Index=%gep_array, Shift=0, Offset=0
; Instruction: [ 12] %gep_array = mul i32 %k, 4
; results in Base=%__4, Index=%k, Shift=2, Offset=0
;
; CHECK-NOT: results in Base=%__4,
;
ret i32 %2
return: ; preds = %if.then
ret i32 0
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment