Commit 3607b6c9 by Jim Stichnoth

Subzero: Find rematerializable variables transitively.

There are situations where a variable is assigned as the result of a rematerializable alloca instruction, and then another variable is assigned as essentially a known-offset interior pointer into the alloca space. In this case, the secondary variable is also rematerializable. We add a pass, after alloca analysis, to find these derived variables and mark them transitively as rematerializable. Because we lack use-def chains (or in fact any map to variable use locations), we need to iterate over the CFG until convergence. Fortunately, this is pretty cheap, and not even done unless the alloca analysis seeds it with an initial set of rematerializable variables. This analysis is only really needed for arithmetic instructions, but we also need to apply it to assignments and pointer-type bitcasts that are added when the IceConverter directly parses a .ll file rather than a .pexe file. BUG= none R=jpp@chromium.org, sehr@chromium.org Review URL: https://codereview.chromium.org/1441793002 .
parent 99165667
......@@ -630,6 +630,108 @@ void Cfg::processAllocas(bool SortAndCombine) {
AllocaBaseVariableType BasePointerType =
(HasDynamicAllocation ? BVT_UserPointer : BVT_StackPointer);
sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
if (!FixedAllocas.empty() || !AlignedAllocas.empty())
// No use calling findRematerializable() unless there is some
// rematerializable alloca instruction to seed it.
findRematerializable();
}
namespace {
// Helpers for findRematerializable(). For each of them, if a suitable
// rematerialization is found, the instruction's Dest variable is set to be
// rematerializable and it returns true, otherwise it returns false.
bool rematerializeArithmetic(const Inst *Instr) {
// Check that it's an Arithmetic instruction with an Add operation.
auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr);
if (Arith == nullptr || Arith->getOp() != InstArithmetic::Add)
return false;
// Check that Src(0) is rematerializable.
auto *Src0Var = llvm::dyn_cast<Variable>(Arith->getSrc(0));
if (Src0Var == nullptr || !Src0Var->isRematerializable())
return false;
// Check that Src(1) is an immediate.
auto *Src1Imm = llvm::dyn_cast<ConstantInteger32>(Arith->getSrc(1));
if (Src1Imm == nullptr)
return false;
Arith->getDest()->setRematerializable(
Src0Var->getRegNum(), Src0Var->getStackOffset() + Src1Imm->getValue());
return true;
}
bool rematerializeAssign(const Inst *Instr) {
// An InstAssign only originates from an inttoptr or ptrtoint instruction,
// which never occurs in a MINIMAL build.
if (BuildDefs::minimal())
return false;
// Check that it's an Assign instruction.
if (!llvm::isa<InstAssign>(Instr))
return false;
// Check that Src(0) is rematerializable.
auto *Src0Var = llvm::dyn_cast<Variable>(Instr->getSrc(0));
if (Src0Var == nullptr || !Src0Var->isRematerializable())
return false;
Instr->getDest()->setRematerializable(Src0Var->getRegNum(),
Src0Var->getStackOffset());
return true;
}
bool rematerializeCast(const Inst *Instr) {
// An pointer-type bitcast never occurs in a MINIMAL build.
if (BuildDefs::minimal())
return false;
// Check that it's a Cast instruction with a Bitcast operation.
auto *Cast = llvm::dyn_cast<InstCast>(Instr);
if (Cast == nullptr || Cast->getCastKind() != InstCast::Bitcast)
return false;
// Check that Src(0) is rematerializable.
auto *Src0Var = llvm::dyn_cast<Variable>(Cast->getSrc(0));
if (Src0Var == nullptr || !Src0Var->isRematerializable())
return false;
// Check that Dest and Src(0) have the same type.
Variable *Dest = Cast->getDest();
if (Dest->getType() != Src0Var->getType())
return false;
Dest->setRematerializable(Src0Var->getRegNum(), Src0Var->getStackOffset());
return true;
}
} // end of anonymous namespace
/// Scan the function to find additional rematerializable variables. This is
/// possible when the source operand of an InstAssignment is a rematerializable
/// variable, or the same for a pointer-type InstCast::Bitcast, or when an
/// InstArithmetic is an add of a rematerializable variable and an immediate.
/// Note that InstAssignment instructions and pointer-type InstCast::Bitcast
/// instructions generally only come about from the IceConverter's treatment of
/// inttoptr, ptrtoint, and bitcast instructions. TODO(stichnot): Consider
/// other possibilities, however unlikely, such as InstArithmetic::Sub, or
/// commutativity.
void Cfg::findRematerializable() {
// Scan the instructions in order, and repeat until no new opportunities are
// found. It may take more than one iteration because a variable's defining
// block may happen to come after a block where it is used, depending on the
// CfgNode linearization order.
bool FoundNewAssignment;
do {
FoundNewAssignment = false;
for (CfgNode *Node : getNodes()) {
// No need to process Phi instructions.
for (Inst &Instr : Node->getInsts()) {
if (Instr.isDeleted())
continue;
Variable *Dest = Instr.getDest();
if (Dest == nullptr || Dest->isRematerializable())
continue;
if (rematerializeArithmetic(&Instr) || rematerializeAssign(&Instr) ||
rematerializeCast(&Instr)) {
FoundNewAssignment = true;
}
}
}
} while (FoundNewAssignment);
}
void Cfg::doAddressOpt() {
......@@ -907,7 +1009,7 @@ void Cfg::emit() {
deleteJumpTableInsts();
if (Ctx->getFlags().getDecorateAsm()) {
for (Variable *Var : getVariables()) {
if (Var->getStackOffset()) {
if (Var->getStackOffset() && !Var->isRematerializable()) {
Str << "\t" << Var->getSymbolicStackOffset(this) << " = "
<< Var->getStackOffset() << "\n";
}
......
......@@ -191,14 +191,6 @@ public:
void reorderNodes();
void shuffleNodes();
enum AllocaBaseVariableType {
BVT_StackPointer,
BVT_FramePointer,
BVT_UserPointer
};
void sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
uint32_t CombinedAlignment, InstList &Insts,
AllocaBaseVariableType BaseVariableType);
/// Scan allocas to determine whether we need to use a frame pointer.
/// If SortAndCombine == true, merge all the fixed-size allocas in the
/// entry block and emit stack or frame pointer-relative addressing.
......@@ -267,6 +259,16 @@ private:
/// Iterate through the registered jump tables and emit them.
void emitJumpTables();
enum AllocaBaseVariableType {
BVT_StackPointer,
BVT_FramePointer,
BVT_UserPointer
};
void sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
uint32_t CombinedAlignment, InstList &Insts,
AllocaBaseVariableType BaseVariableType);
void findRematerializable();
GlobalContext *Ctx;
uint32_t SequenceNumber; /// output order for emission
uint32_t ConstantBlindingCookie = 0; /// cookie for constant blinding
......
......@@ -1318,6 +1318,10 @@ void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op,
template <class Machine>
void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
if (Dest->isRematerializable()) {
Context.insert(InstFakeDef::create(Func, Dest));
return;
}
Type Ty = Dest->getType();
Operand *Src0 = legalize(Inst->getSrc(0));
Operand *Src1 = legalize(Inst->getSrc(1));
......@@ -1898,6 +1902,10 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
template <class Machine>
void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
Variable *Dest = Inst->getDest();
if (Dest->isRematerializable()) {
Context.insert(InstFakeDef::create(Func, Dest));
return;
}
Operand *Src0 = Inst->getSrc(0);
assert(Dest->getType() == Src0->getType());
if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
......
......@@ -55,3 +55,38 @@ entry:
; CHECK-NEXT: mov DWORD PTR [esp+0x60],eax
; CHECK-NEXT: mov esp,ebp
; CHECK-NEXT: pop ebp
; Test that an interior pointer into a rematerializable variable is also
; rematerializable, and test that it is detected even when the use appears
; syntactically before the definition. Test that it is folded into mem
; operands, and also rematerializable through an lea instruction for direct use.
define internal i32 @fused_derived(i32 %arg) {
entry:
%a1 = alloca i8, i32 128, align 4
%a2 = alloca i8, i32 128, align 4
%a3 = alloca i8, i32 128, align 4
br label %block2
block1:
%a2_i32 = bitcast i8* %a2 to i32*
store i32 %arg, i32* %a2_i32, align 1
store i32 %arg, i32* %derived, align 1
ret i32 %retval
block2:
; The following are all rematerializable variables deriving from %a2.
%p2 = ptrtoint i8* %a2 to i32
%d = add i32 %p2, 12
%retval = add i32 %p2, 1
%derived = inttoptr i32 %d to i32*
br label %block1
}
; CHECK-LABEL: fused_derived
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x180
; CHECK-NEXT: mov {{.*}},esp
; CHECK-NEXT: jmp
; CHECK-NEXT: mov DWORD PTR [esp+0x80],[[ARG]]
; CHECK-NEXT: mov DWORD PTR [esp+0x8c],[[ARG]]
; CHECK-NEXT: lea eax,[esp+0x81]
; CHECK-NEXT: add esp,0x18c
; CHECK-NEXT: ret
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment