Commit e39d0ca2 by David Sehr

Sort allocas, compute frame pointer in Cfg pass

Split allocas in the entry block into two categories. The first has alignment <= stack alignment and constant size. The second violates one or both of those conditions. Sort both of these lists in descending alignment order and emit. Also, compute the need for a frame pointer during the pass. BUG= R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1414343010 .
parent 6cab5619
...@@ -201,26 +201,7 @@ void Cfg::translate() { ...@@ -201,26 +201,7 @@ void Cfg::translate() {
if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
Var64On32->initHiLo(this); Var64On32->initHiLo(this);
// Figure out which alloca instructions result in storage at known stack frame processAllocas();
// offsets. If this is true for all alloca instructions, then a stack pointer
// can still be used instead of a frame pointer, freeing up the frame pointer
// for normal register allocation. Additionally, for each such alloca, its
// address could be rematerialized at each use in terms of the stack/frame
// pointer, saving a stack slot and a load from that stack slot.
//
// This simple implementation is limited to alloca instructions at the start
// of the entry node.
for (Inst &Instr : getEntryNode()->getInsts()) {
if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
if (llvm::isa<Constant>(Alloca->getSizeInBytes())) {
Alloca->setKnownFrameOffset();
continue;
}
}
// The first instruction that is not an alloca with a constant size stops
// the search.
break;
}
// The set of translation passes and their order are determined by the // The set of translation passes and their order are determined by the
// target. // target.
...@@ -470,6 +451,91 @@ void Cfg::doArgLowering() { ...@@ -470,6 +451,91 @@ void Cfg::doArgLowering() {
getTarget()->lowerArguments(); getTarget()->lowerArguments();
} }
void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
bool IsKnownFrameOffset) {
if (Allocas.empty())
return;
// Sort by decreasing alignment. This does not really matter at the moment,
// but will allow compacting stack allocation when we fuse to one alloca.
std::sort(Allocas.begin(), Allocas.end(),
[](Inst *I1, Inst *I2) {
auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
return A1->getAlignInBytes() > A2->getAlignInBytes();
});
for (Inst *Instr: Allocas) {
auto *Alloca = llvm::cast<InstAlloca>(Instr);
// Move the alloca to its sorted position.
InstAlloca *NewAlloca = InstAlloca::create(this,
Alloca->getSizeInBytes(),
Alloca->getAlignInBytes(),
Alloca->getDest());
if (IsKnownFrameOffset)
NewAlloca->setKnownFrameOffset();
Insts.push_front(NewAlloca);
Alloca->setDeleted();
}
}
void Cfg::processAllocas() {
const uint32_t StackAlignment = getTarget()->getStackAlignment();
CfgNode *EntryNode = getEntryNode();
// Allocas in the entry block that have constant size and alignment less
// than or equal to the function's stack alignment.
CfgVector<Inst *> FixedAllocas;
// Allocas in the entry block that have constant size and alignment greater
// than the function's stack alignment.
CfgVector<Inst *> AlignedAllocas;
// LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(StackAlignment));
// Collect the Allocas into the two vectors.
bool RequiresFramePointer = false;
for (Inst &Instr : EntryNode->getInsts()) {
if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) {
// Variable-sized allocations require a frame pointer.
RequiresFramePointer = true;
continue;
}
uint32_t AlignmentParam = Alloca->getAlignInBytes();
// For default align=0, set it to the real value 1, to avoid any
// bit-manipulation problems below.
AlignmentParam = std::max(AlignmentParam, 1u);
assert(llvm::isPowerOf2_32(AlignmentParam));
if (AlignmentParam > StackAlignment) {
// Allocations aligned more than the stack require a frame pointer.
RequiresFramePointer = true;
AlignedAllocas.push_back(Alloca);
}
else
FixedAllocas.push_back(Alloca);
}
}
// Look for alloca instructions in other blocks
for (CfgNode *Node : Nodes) {
if (Node == EntryNode)
continue;
for (Inst &Instr : Node->getInsts()) {
if (llvm::isa<InstAlloca>(&Instr)) {
// Allocations outside the entry block require a frame pointer.
RequiresFramePointer = true;
break;
}
}
if (RequiresFramePointer)
break;
}
// Mark the target as requiring a frame pointer.
if (RequiresFramePointer)
getTarget()->setHasFramePointer();
// Add instructions to the head of the entry block in reverse order.
InstList &Insts = getEntryNode()->getInsts();
// Fixed, large alignment alloca addresses do not have known offset.
sortAllocas(AlignedAllocas, Insts, false);
// Fixed, small alignment alloca addresses have known offset.
sortAllocas(FixedAllocas, Insts, true);
}
void Cfg::doAddressOpt() { void Cfg::doAddressOpt() {
TimerMarker T(TimerStack::TT_doAddressOpt, this); TimerMarker T(TimerStack::TT_doAddressOpt, this);
for (CfgNode *Node : Nodes) for (CfgNode *Node : Nodes)
......
...@@ -185,6 +185,10 @@ public: ...@@ -185,6 +185,10 @@ public:
void advancedPhiLowering(); void advancedPhiLowering();
void reorderNodes(); void reorderNodes();
void shuffleNodes(); void shuffleNodes();
void sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
bool IsKnownFrameOffset);
/// Merge all the fixed-size allocas in the entry block.
void processAllocas();
void doAddressOpt(); void doAddressOpt();
void doArgLowering(); void doArgLowering();
void doNopInsertion(); void doNopInsertion();
......
...@@ -201,9 +201,11 @@ public: ...@@ -201,9 +201,11 @@ public:
virtual IceString getRegName(SizeT RegNum, Type Ty) const = 0; virtual IceString getRegName(SizeT RegNum, Type Ty) const = 0;
virtual bool hasFramePointer() const { return false; } virtual bool hasFramePointer() const { return false; }
virtual void setHasFramePointer() = 0;
virtual SizeT getStackReg() const = 0; virtual SizeT getStackReg() const = 0;
virtual SizeT getFrameOrStackReg() const = 0; virtual SizeT getFrameOrStackReg() const = 0;
virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0; virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
virtual uint32_t getStackAlignment() const = 0;
/// Return whether a 64-bit Variable should be split into a Variable64On32. /// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0; virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
......
...@@ -380,6 +380,10 @@ void TargetARM32::translateOm1() { ...@@ -380,6 +380,10 @@ void TargetARM32::translateOm1() {
} }
} }
uint32_t TargetARM32::getStackAlignment() const {
return ARM32_STACK_ALIGNMENT_BYTES;
}
bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
return Br->optimizeBranch(NextNode); return Br->optimizeBranch(NextNode);
......
...@@ -81,6 +81,7 @@ public: ...@@ -81,6 +81,7 @@ public:
return RegisterAliases[Reg]; return RegisterAliases[Reg];
} }
bool hasFramePointer() const override { return UsesFramePointer; } bool hasFramePointer() const override { return UsesFramePointer; }
void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegARM32::Reg_sp; } SizeT getStackReg() const override { return RegARM32::Reg_sp; }
SizeT getFrameOrStackReg() const override { SizeT getFrameOrStackReg() const override {
return UsesFramePointer ? RegARM32::Reg_fp : RegARM32::Reg_sp; return UsesFramePointer ? RegARM32::Reg_fp : RegARM32::Reg_sp;
...@@ -92,6 +93,7 @@ public: ...@@ -92,6 +93,7 @@ public:
// are rounded up to 4 bytes. // are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3; return (typeWidthInBytes(Ty) + 3) & ~3;
} }
uint32_t getStackAlignment() const override;
bool shouldSplitToVariable64On32(Type Ty) const override { bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64; return Ty == IceType_i64;
......
...@@ -49,6 +49,7 @@ public: ...@@ -49,6 +49,7 @@ public:
return RegisterAliases[Reg]; return RegisterAliases[Reg];
} }
bool hasFramePointer() const override { return UsesFramePointer; } bool hasFramePointer() const override { return UsesFramePointer; }
void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegMIPS32::Reg_SP; } SizeT getStackReg() const override { return RegMIPS32::Reg_SP; }
SizeT getFrameOrStackReg() const override { SizeT getFrameOrStackReg() const override {
return UsesFramePointer ? RegMIPS32::Reg_FP : RegMIPS32::Reg_SP; return UsesFramePointer ? RegMIPS32::Reg_FP : RegMIPS32::Reg_SP;
...@@ -58,6 +59,10 @@ public: ...@@ -58,6 +59,10 @@ public:
// are rounded up to 4 bytes. // are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3; return (typeWidthInBytes(Ty) + 3) & ~3;
} }
uint32_t getStackAlignment() const override {
// TODO(sehr): what is the stack alignment?
return 1;
}
bool shouldSplitToVariable64On32(Type Ty) const override { bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64; return Ty == IceType_i64;
......
...@@ -83,6 +83,7 @@ public: ...@@ -83,6 +83,7 @@ public:
} }
bool hasFramePointer() const override { return IsEbpBasedFrame; } bool hasFramePointer() const override { return IsEbpBasedFrame; }
void setHasFramePointer() override { IsEbpBasedFrame = true; }
SizeT getStackReg() const override { return Traits::RegisterSet::Reg_esp; } SizeT getStackReg() const override { return Traits::RegisterSet::Reg_esp; }
SizeT getFrameOrStackReg() const override { SizeT getFrameOrStackReg() const override {
return IsEbpBasedFrame ? Traits::RegisterSet::Reg_ebp return IsEbpBasedFrame ? Traits::RegisterSet::Reg_ebp
...@@ -93,6 +94,9 @@ public: ...@@ -93,6 +94,9 @@ public:
const uint32_t WordSizeInBytes = typeWidthInBytes(Traits::WordType); const uint32_t WordSizeInBytes = typeWidthInBytes(Traits::WordType);
return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes); return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes);
} }
uint32_t getStackAlignment() const override {
return Traits::X86_STACK_ALIGNMENT_BYTES;
}
bool shouldSplitToVariable64On32(Type Ty) const override { bool shouldSplitToVariable64On32(Type Ty) const override {
return Traits::Is64Bit ? false : Ty == IceType_i64; return Traits::Is64Bit ? false : Ty == IceType_i64;
......
...@@ -957,7 +957,6 @@ void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { ...@@ -957,7 +957,6 @@ void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
uint32_t Alignment = uint32_t Alignment =
std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
IsEbpBasedFrame = true;
_and(esp, Ctx->getConstantInt32(-Alignment)); _and(esp, Ctx->getConstantInt32(-Alignment));
} }
if (const auto *ConstantTotalSize = if (const auto *ConstantTotalSize =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment