Commit 2f3b8ec8 by David Sehr

Merge fixed alloca stack adjustments into the prolog

Also removes reliance on lowerAlloca entirely for the fixed allocations. BUG= R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1435363002 .
parent a1da6ff9
...@@ -494,53 +494,44 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, ...@@ -494,53 +494,44 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment); uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
// Ensure every alloca was assigned an offset. // Ensure every alloca was assigned an offset.
assert(Allocas.size() == Offsets.size()); assert(Allocas.size() == Offsets.size());
Variable *BaseVariable = makeVariable(IceType_i32);
Variable *AllocaDest = BaseVariable; switch (BaseVariableType) {
// Emit one addition for each alloca after the first. case BVT_UserPointer: {
for (size_t i = 0; i < Allocas.size(); ++i) { Variable *BaseVariable = makeVariable(IceType_i32);
auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]); for (SizeT i = 0; i < Allocas.size(); ++i) {
switch (BaseVariableType) { auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
case BVT_FramePointer:
case BVT_UserPointer: {
// Emit a new addition operation to replace the alloca. // Emit a new addition operation to replace the alloca.
Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]); Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
InstArithmetic *Add = InstArithmetic *Add =
InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(), InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
BaseVariable, AllocaOffset); BaseVariable, AllocaOffset);
Insts.push_front(Add); Insts.push_front(Add);
} break; Alloca->setDeleted();
case BVT_StackPointer: { }
Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
InstAlloca *CombinedAlloca =
InstAlloca::create(this, BaseVariable, AllocaSize, CombinedAlignment);
CombinedAlloca->setKnownFrameOffset();
Insts.push_front(CombinedAlloca);
} break;
case BVT_StackPointer:
case BVT_FramePointer: {
for (SizeT i = 0; i < Allocas.size(); ++i) {
auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
// Emit a fake definition of the rematerializable variable. // Emit a fake definition of the rematerializable variable.
Variable *Dest = Alloca->getDest(); Variable *Dest = Alloca->getDest();
InstFakeDef *Def = InstFakeDef::create(this, Dest); InstFakeDef *Def = InstFakeDef::create(this, Dest);
Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]); if (BaseVariableType == BVT_StackPointer)
Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
else
Dest->setRematerializable(getTarget()->getFrameReg(), Offsets[i]);
Insts.push_front(Def); Insts.push_front(Def);
} break; Alloca->setDeleted();
} }
Alloca->setDeleted(); // Allocate the fixed area in the function prolog.
} getTarget()->reserveFixedAllocaArea(TotalSize, CombinedAlignment);
Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
switch (BaseVariableType) {
case BVT_FramePointer: {
// Adjust the return of the alloca to the top of the returned region.
AllocaDest = makeVariable(IceType_i32);
InstArithmetic *Add = InstArithmetic::create(
this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize);
Insts.push_front(Add);
} break; } break;
case BVT_StackPointer: {
// Emit a fake use to keep the Alloca live.
InstFakeUse *Use = InstFakeUse::create(this, AllocaDest);
Insts.push_front(Use);
} break;
case BVT_UserPointer:
break;
} }
// And insert the fused alloca.
InstAlloca *CombinedAlloca =
InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest);
CombinedAlloca->setKnownFrameOffset();
Insts.push_front(CombinedAlloca);
} }
void Cfg::processAllocas(bool SortAndCombine) { void Cfg::processAllocas(bool SortAndCombine) {
...@@ -595,7 +586,7 @@ void Cfg::processAllocas(bool SortAndCombine) { ...@@ -595,7 +586,7 @@ void Cfg::processAllocas(bool SortAndCombine) {
// Allocas in the entry block that have constant size and alignment greater // Allocas in the entry block that have constant size and alignment greater
// than the function's stack alignment. // than the function's stack alignment.
CfgVector<Inst *> AlignedAllocas; CfgVector<Inst *> AlignedAllocas;
// Maximum alignment used for the dynamic/aligned allocas. // Maximum alignment used by any alloca.
uint32_t MaxAlignment = StackAlignment; uint32_t MaxAlignment = StackAlignment;
for (Inst &Instr : EntryNode->getInsts()) { for (Inst &Instr : EntryNode->getInsts()) {
if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
...@@ -623,14 +614,16 @@ void Cfg::processAllocas(bool SortAndCombine) { ...@@ -623,14 +614,16 @@ void Cfg::processAllocas(bool SortAndCombine) {
// do not have a known offset from either the stack or frame pointer. // do not have a known offset from either the stack or frame pointer.
// They grow up from a user pointer from an alloca. // They grow up from a user pointer from an alloca.
sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer); sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer);
// Fixed size allocas are addressed relative to the frame pointer.
sortAndCombineAllocas(FixedAllocas, StackAlignment, Insts,
BVT_FramePointer);
} else {
// Otherwise, fixed size allocas are addressed relative to the stack unless
// there are dynamic allocas.
const AllocaBaseVariableType BasePointerType =
(HasDynamicAllocation ? BVT_FramePointer : BVT_StackPointer);
sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
} }
// Otherwise, fixed size allocas are always addressed relative to the stack
// unless there are dynamic allocas.
// TODO(sehr): re-enable frame pointer and decrementing addressing.
AllocaBaseVariableType BasePointerType =
(HasDynamicAllocation ? BVT_UserPointer : BVT_StackPointer);
sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
if (!FixedAllocas.empty() || !AlignedAllocas.empty()) if (!FixedAllocas.empty() || !AlignedAllocas.empty())
// No use calling findRematerializable() unless there is some // No use calling findRematerializable() unless there is some
// rematerializable alloca instruction to seed it. // rematerializable alloca instruction to seed it.
......
...@@ -586,7 +586,7 @@ private: ...@@ -586,7 +586,7 @@ private:
uint32_t Align = Inst->getAlignment(); uint32_t Align = Inst->getAlignment();
Ice::Variable *Dest = mapValueToIceVar(Inst, Ice::getPointerType()); Ice::Variable *Dest = mapValueToIceVar(Inst, Ice::getPointerType());
return Ice::InstAlloca::create(Func.get(), ByteCount, Align, Dest); return Ice::InstAlloca::create(Func.get(), Dest, ByteCount, Align);
} }
Ice::Inst *convertUnreachableInstruction(const UnreachableInst * /*Inst*/) { Ice::Inst *convertUnreachableInstruction(const UnreachableInst * /*Inst*/) {
......
...@@ -218,8 +218,8 @@ bool Inst::liveness(InstNumberT InstNumber, LivenessBV &Live, ...@@ -218,8 +218,8 @@ bool Inst::liveness(InstNumberT InstNumber, LivenessBV &Live,
return true; return true;
} }
InstAlloca::InstAlloca(Cfg *Func, Operand *ByteCount, uint32_t AlignInBytes, InstAlloca::InstAlloca(Cfg *Func, Variable *Dest, Operand *ByteCount,
Variable *Dest) uint32_t AlignInBytes)
: InstHighLevel(Func, Inst::Alloca, 1, Dest), AlignInBytes(AlignInBytes) { : InstHighLevel(Func, Inst::Alloca, 1, Dest), AlignInBytes(AlignInBytes) {
// Verify AlignInBytes is 0 or a power of 2. // Verify AlignInBytes is 0 or a power of 2.
assert(AlignInBytes == 0 || llvm::isPowerOf2_32(AlignInBytes)); assert(AlignInBytes == 0 || llvm::isPowerOf2_32(AlignInBytes));
......
...@@ -246,10 +246,10 @@ class InstAlloca : public InstHighLevel { ...@@ -246,10 +246,10 @@ class InstAlloca : public InstHighLevel {
InstAlloca &operator=(const InstAlloca &) = delete; InstAlloca &operator=(const InstAlloca &) = delete;
public: public:
static InstAlloca *create(Cfg *Func, Operand *ByteCount, static InstAlloca *create(Cfg *Func, Variable *Dest, Operand *ByteCount,
uint32_t AlignInBytes, Variable *Dest) { uint32_t AlignInBytes) {
return new (Func->allocate<InstAlloca>()) return new (Func->allocate<InstAlloca>())
InstAlloca(Func, ByteCount, AlignInBytes, Dest); InstAlloca(Func, Dest, ByteCount, AlignInBytes);
} }
uint32_t getAlignInBytes() const { return AlignInBytes; } uint32_t getAlignInBytes() const { return AlignInBytes; }
Operand *getSizeInBytes() const { return getSrc(0); } Operand *getSizeInBytes() const { return getSrc(0); }
...@@ -259,8 +259,8 @@ public: ...@@ -259,8 +259,8 @@ public:
static bool classof(const Inst *Inst) { return Inst->getKind() == Alloca; } static bool classof(const Inst *Inst) { return Inst->getKind() == Alloca; }
private: private:
InstAlloca(Cfg *Func, Operand *ByteCount, uint32_t AlignInBytes, InstAlloca(Cfg *Func, Variable *Dest, Operand *ByteCount,
Variable *Dest); uint32_t AlignInBytes);
const uint32_t AlignInBytes; const uint32_t AlignInBytes;
bool KnownFrameOffset = false; bool KnownFrameOffset = false;
......
...@@ -98,17 +98,34 @@ MachineTraits<TargetX8632>::X86OperandMem::X86OperandMem( ...@@ -98,17 +98,34 @@ MachineTraits<TargetX8632>::X86OperandMem::X86OperandMem(
} }
} }
namespace {
static int32_t GetRematerializableOffset(Variable *Var, bool IgnoreStackAdjust,
const Ice::TargetX8632 *Target) {
int32_t Disp = 0;
Disp += Var->getStackOffset();
SizeT RegNum = static_cast<SizeT>(Var->getRegNum());
if (RegNum == Target->getStackReg()) {
if (!IgnoreStackAdjust)
Disp += Target->getStackAdjustment();
} else if (RegNum == Target->getFrameReg()) {
Disp += Target->getFrameFixedAllocaOffset();
} else {
llvm::report_fatal_error("Unexpected rematerializable register type");
}
return Disp;
}
} // end of anonymous namespace
void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const { void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
const ::Ice::TargetLowering *Target = Func->getTarget(); const auto *Target = static_cast<const Ice::TargetX8632 *>(Func->getTarget());
// If the base is rematerializable, we need to replace it with the correct // If the base is rematerializable, we need to replace it with the correct
// physical register (esp or ebp), and update the Offset. // physical register (esp or ebp), and update the Offset.
int32_t Disp = 0; int32_t Disp = 0;
if (getBase() && getBase()->isRematerializable()) { if (getBase() && getBase()->isRematerializable()) {
Disp += getBase()->getStackOffset(); Disp +=
if (!getIgnoreStackAdjust()) GetRematerializableOffset(getBase(), getIgnoreStackAdjust(), Target);
Disp += Target->getStackAdjustment();
} }
// The index should never be rematerializable. But if we ever allow it, then // The index should never be rematerializable. But if we ever allow it, then
// we should make sure the rematerialization offset is shifted by the Shift // we should make sure the rematerialization offset is shifted by the Shift
...@@ -135,7 +152,7 @@ void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const { ...@@ -135,7 +152,7 @@ void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
// TODO(sehr): ConstantRelocatable still needs updating for // TODO(sehr): ConstantRelocatable still needs updating for
// rematerializable base/index and Disp. // rematerializable base/index and Disp.
assert(Disp == 0); assert(Disp == 0);
CR->emitWithoutPrefix(Func->getTarget()); CR->emitWithoutPrefix(Target);
} else { } else {
llvm_unreachable("Invalid offset type for x86 mem operand"); llvm_unreachable("Invalid offset type for x86 mem operand");
} }
...@@ -165,10 +182,10 @@ void MachineTraits<TargetX8632>::X86OperandMem::dump(const Cfg *Func, ...@@ -165,10 +182,10 @@ void MachineTraits<TargetX8632>::X86OperandMem::dump(const Cfg *Func,
bool Dumped = false; bool Dumped = false;
Str << "["; Str << "[";
int32_t Disp = 0; int32_t Disp = 0;
const auto *Target = static_cast<const Ice::TargetX8632 *>(Func->getTarget());
if (getBase() && getBase()->isRematerializable()) { if (getBase() && getBase()->isRematerializable()) {
Disp += getBase()->getStackOffset(); Disp +=
if (!getIgnoreStackAdjust()) GetRematerializableOffset(getBase(), getIgnoreStackAdjust(), Target);
Disp += Func->getTarget()->getStackAdjustment();
} }
if (getBase()) { if (getBase()) {
if (Func) if (Func)
...@@ -230,13 +247,12 @@ void MachineTraits<TargetX8632>::X86OperandMem::emitSegmentOverride( ...@@ -230,13 +247,12 @@ void MachineTraits<TargetX8632>::X86OperandMem::emitSegmentOverride(
MachineTraits<TargetX8632>::Address MachineTraits<TargetX8632>::Address
MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress( MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress(
MachineTraits<TargetX8632>::Assembler *Asm, MachineTraits<TargetX8632>::Assembler *Asm,
const Ice::TargetLowering *Target) const { const Ice::TargetLowering *TargetLowering) const {
int32_t Disp = 0; int32_t Disp = 0;
const auto *Target = static_cast<const Ice::TargetX8632 *>(TargetLowering);
if (getBase() && getBase()->isRematerializable()) { if (getBase() && getBase()->isRematerializable()) {
Disp += getBase()->getStackOffset(); Disp +=
if (!getIgnoreStackAdjust()) { GetRematerializableOffset(getBase(), getIgnoreStackAdjust(), Target);
Disp += Target->getStackAdjustment();
}
} }
// The index should never be rematerializable. But if we ever allow it, then // The index should never be rematerializable. But if we ever allow it, then
// we should make sure the rematerialization offset is shifted by the Shift // we should make sure the rematerialization offset is shifted by the Shift
......
...@@ -487,7 +487,8 @@ void ConstantRelocatable::emit(TargetLowering *Target) const { ...@@ -487,7 +487,8 @@ void ConstantRelocatable::emit(TargetLowering *Target) const {
Target->emit(this); Target->emit(this);
} }
void ConstantRelocatable::emitWithoutPrefix(TargetLowering *Target) const { void ConstantRelocatable::emitWithoutPrefix(
const TargetLowering *Target) const {
Target->emitWithoutPrefix(this); Target->emitWithoutPrefix(this);
} }
......
...@@ -290,7 +290,7 @@ public: ...@@ -290,7 +290,7 @@ public:
bool getSuppressMangling() const { return SuppressMangling; } bool getSuppressMangling() const { return SuppressMangling; }
using Constant::emit; using Constant::emit;
void emit(TargetLowering *Target) const final; void emit(TargetLowering *Target) const final;
void emitWithoutPrefix(TargetLowering *Target) const; void emitWithoutPrefix(const TargetLowering *Target) const;
using Constant::dump; using Constant::dump;
void dump(const Cfg *Func, Ostream &Str) const override; void dump(const Cfg *Func, Ostream &Str) const override;
......
...@@ -368,7 +368,13 @@ void TargetLowering::getVarStackSlotParams( ...@@ -368,7 +368,13 @@ void TargetLowering::getVarStackSlotParams(
VarList SpilledVariables; VarList SpilledVariables;
for (Variable *Var : Variables) { for (Variable *Var : Variables) {
if (Var->hasReg()) { if (Var->hasReg()) {
RegsUsed[Var->getRegNum()] = true; // Don't consider a rematerializable variable to be an actual register use
// (specifically of the frame pointer). Otherwise, the prolog may decide
// to save the frame pointer twice - once because of the explicit need for
// a frame pointer, and once because of an active use of a callee-save
// register.
if (!Var->isRematerializable())
RegsUsed[Var->getRegNum()] = true;
continue; continue;
} }
// An argument either does not need a stack slot (if passed in a register) // An argument either does not need a stack slot (if passed in a register)
......
...@@ -203,9 +203,12 @@ public: ...@@ -203,9 +203,12 @@ public:
virtual bool hasFramePointer() const { return false; } virtual bool hasFramePointer() const { return false; }
virtual void setHasFramePointer() = 0; virtual void setHasFramePointer() = 0;
virtual SizeT getStackReg() const = 0; virtual SizeT getStackReg() const = 0;
virtual SizeT getFrameReg() const = 0;
virtual SizeT getFrameOrStackReg() const = 0; virtual SizeT getFrameOrStackReg() const = 0;
virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0; virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
virtual uint32_t getStackAlignment() const = 0; virtual uint32_t getStackAlignment() const = 0;
virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
virtual int32_t getFrameFixedAllocaOffset() const = 0;
/// Return whether a 64-bit Variable should be split into a Variable64On32. /// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0; virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
......
...@@ -86,8 +86,9 @@ public: ...@@ -86,8 +86,9 @@ public:
bool hasFramePointer() const override { return UsesFramePointer; } bool hasFramePointer() const override { return UsesFramePointer; }
void setHasFramePointer() override { UsesFramePointer = true; } void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegARM32::Reg_sp; } SizeT getStackReg() const override { return RegARM32::Reg_sp; }
SizeT getFrameReg() const override { return RegARM32::Reg_fp; }
SizeT getFrameOrStackReg() const override { SizeT getFrameOrStackReg() const override {
return UsesFramePointer ? RegARM32::Reg_fp : RegARM32::Reg_sp; return UsesFramePointer ? getFrameReg() : getStackReg();
} }
SizeT getReservedTmpReg() const { return RegARM32::Reg_ip; } SizeT getReservedTmpReg() const { return RegARM32::Reg_ip; }
...@@ -97,6 +98,17 @@ public: ...@@ -97,6 +98,17 @@ public:
return (typeWidthInBytes(Ty) + 3) & ~3; return (typeWidthInBytes(Ty) + 3) & ~3;
} }
uint32_t getStackAlignment() const override; uint32_t getStackAlignment() const override;
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
// TODO(sehr,jpp): Implement fixed stack layout.
(void)Size;
(void)Align;
llvm::report_fatal_error("Not yet implemented");
}
int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr,jpp): Implement fixed stack layout.
llvm::report_fatal_error("Not yet implemented");
return 0;
}
bool shouldSplitToVariable64On32(Type Ty) const override { bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64; return Ty == IceType_i64;
......
...@@ -54,8 +54,9 @@ public: ...@@ -54,8 +54,9 @@ public:
bool hasFramePointer() const override { return UsesFramePointer; } bool hasFramePointer() const override { return UsesFramePointer; }
void setHasFramePointer() override { UsesFramePointer = true; } void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegMIPS32::Reg_SP; } SizeT getStackReg() const override { return RegMIPS32::Reg_SP; }
SizeT getFrameReg() const override { return RegMIPS32::Reg_FP; }
SizeT getFrameOrStackReg() const override { SizeT getFrameOrStackReg() const override {
return UsesFramePointer ? RegMIPS32::Reg_FP : RegMIPS32::Reg_SP; return UsesFramePointer ? getFrameReg() : getStackReg();
} }
size_t typeWidthInBytesOnStack(Type Ty) const override { size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
...@@ -66,6 +67,17 @@ public: ...@@ -66,6 +67,17 @@ public:
// TODO(sehr): what is the stack alignment? // TODO(sehr): what is the stack alignment?
return 1; return 1;
} }
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
// TODO(sehr): Implement fixed stack layout.
(void)Size;
(void)Align;
llvm::report_fatal_error("Not yet implemented");
}
int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr): Implement fixed stack layout.
llvm::report_fatal_error("Not yet implemented");
return 0;
}
bool shouldSplitToVariable64On32(Type Ty) const override { bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64; return Ty == IceType_i64;
......
...@@ -506,20 +506,36 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -506,20 +506,36 @@ void TargetX8632::addProlog(CfgNode *Node) {
SpillAreaSizeBytes = StackSize - StackOffset; SpillAreaSizeBytes = StackSize - StackOffset;
} }
// Generate "sub esp, SpillAreaSizeBytes" // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
if (SpillAreaSizeBytes) // fixed allocations in the prolog.
if (PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
if (SpillAreaSizeBytes) {
// Generate "sub esp, SpillAreaSizeBytes"
_sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(SpillAreaSizeBytes)); Ctx->getConstantInt32(SpillAreaSizeBytes));
// If the fixed allocas are aligned more than the stack frame, align the
// stack pointer accordingly.
if (PrologEmitsFixedAllocas &&
FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
assert(IsEbpBasedFrame);
_and(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(-FixedAllocaAlignBytes));
}
}
// Account for alloca instructions with known frame offsets. // Account for known-frame-offset alloca instructions that were not already
SpillAreaSizeBytes += FixedAllocaSizeBytes; // combined into the prolog.
if (!PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
// Initialize the stack adjustment so that after all the known-frame-offset // Initialize the stack adjustment so that after all the known-frame-offset
// alloca instructions are emitted, the stack adjustment will reach zero. // alloca instructions are emitted, the stack adjustment will reach zero.
resetStackAdjustment(); resetStackAdjustment();
updateStackAdjustment(-FixedAllocaSizeBytes); if (!PrologEmitsFixedAllocas)
updateStackAdjustment(-FixedAllocaSizeBytes);
// Fill in stack offsets for stack args, and copy args into registers for // Fill in stack offsets for stack args, and copy args into registers for
// those that were register-allocated. Args are pushed right to left, so // those that were register-allocated. Args are pushed right to left, so
...@@ -539,11 +555,14 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -539,11 +555,14 @@ void TargetX8632::addProlog(CfgNode *Node) {
++NumXmmArgs; ++NumXmmArgs;
continue; continue;
} }
// For esp-based frames, the esp value may not stabilize to its home value // For esp-based frames where the allocas are done outside the prolog, the
// until after all the fixed-size alloca instructions have executed. In // esp value may not stabilize to its home value until after all the
// this case, a stack adjustment is needed when accessing in-args in order // fixed-size alloca instructions have executed. In this case, a stack
// to copy them into registers. // adjustment is needed when accessing in-args in order to copy them into
size_t StackAdjBytes = IsEbpBasedFrame ? 0 : -FixedAllocaSizeBytes; // registers.
size_t StackAdjBytes = 0;
if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
StackAdjBytes -= FixedAllocaSizeBytes;
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes, finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
InArgsSizeBytes); InArgsSizeBytes);
} }
......
...@@ -89,9 +89,9 @@ public: ...@@ -89,9 +89,9 @@ public:
bool hasFramePointer() const override { return IsEbpBasedFrame; } bool hasFramePointer() const override { return IsEbpBasedFrame; }
void setHasFramePointer() override { IsEbpBasedFrame = true; } void setHasFramePointer() override { IsEbpBasedFrame = true; }
SizeT getStackReg() const override { return Traits::RegisterSet::Reg_esp; } SizeT getStackReg() const override { return Traits::RegisterSet::Reg_esp; }
SizeT getFrameReg() const override { return Traits::RegisterSet::Reg_ebp; }
SizeT getFrameOrStackReg() const override { SizeT getFrameOrStackReg() const override {
return IsEbpBasedFrame ? Traits::RegisterSet::Reg_ebp return IsEbpBasedFrame ? getFrameReg() : getStackReg();
: Traits::RegisterSet::Reg_esp;
} }
size_t typeWidthInBytesOnStack(Type Ty) const override { size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of WordType bytes. // Round up to the next multiple of WordType bytes.
...@@ -101,6 +101,16 @@ public: ...@@ -101,6 +101,16 @@ public:
uint32_t getStackAlignment() const override { uint32_t getStackAlignment() const override {
return Traits::X86_STACK_ALIGNMENT_BYTES; return Traits::X86_STACK_ALIGNMENT_BYTES;
} }
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
FixedAllocaSizeBytes = Size;
assert(llvm::isPowerOf2_32(Align));
FixedAllocaAlignBytes = Align;
PrologEmitsFixedAllocas = true;
}
/// Returns the (negative) offset from ebp/rbp where the fixed Allocas start.
int32_t getFrameFixedAllocaOffset() const override {
return FixedAllocaSizeBytes - SpillAreaSizeBytes;
}
bool shouldSplitToVariable64On32(Type Ty) const override { bool shouldSplitToVariable64On32(Type Ty) const override {
return Traits::Is64Bit ? false : Ty == IceType_i64; return Traits::Is64Bit ? false : Ty == IceType_i64;
...@@ -691,6 +701,8 @@ protected: ...@@ -691,6 +701,8 @@ protected:
bool NeedsStackAlignment = false; bool NeedsStackAlignment = false;
size_t SpillAreaSizeBytes = 0; size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0; size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0;
bool PrologEmitsFixedAllocas = false;
static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet; static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet;
static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM> static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM>
RegisterAliases; RegisterAliases;
......
...@@ -2626,7 +2626,7 @@ void FunctionParser::ProcessRecord() { ...@@ -2626,7 +2626,7 @@ void FunctionParser::ProcessRecord() {
return; return;
} }
CurrentNode->appendInst(Ice::InstAlloca::create( CurrentNode->appendInst(Ice::InstAlloca::create(
Func.get(), ByteCount, Alignment, getNextInstVar(PtrTy))); Func.get(), getNextInstVar(PtrTy), ByteCount, Alignment));
return; return;
} }
case naclbitc::FUNC_CODE_INST_LOAD: { case naclbitc::FUNC_CODE_INST_LOAD: {
......
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 -allow-externally-defined-symbols \ ; RUN: --target x8632 -i %s --args -Om1 -allow-externally-defined-symbols \
; RUN: | %if --need=target_X8632 --command FileCheck %s ; RUN: | %if --need=target_X8632 --command FileCheck \
; RUN: --check-prefix CHECK-OPTM1 %s
; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented) ; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
; once enough infrastructure is in. Also, switch to --filetype=obj ; once enough infrastructure is in. Also, switch to --filetype=obj
...@@ -33,11 +34,18 @@ entry: ...@@ -33,11 +34,18 @@ entry:
ret void ret void
} }
; CHECK-LABEL: fixed_416_align_16 ; CHECK-LABEL: fixed_416_align_16
; CHECK: sub esp,0x1a0 ; CHECK: sub esp,0x1ac
; CHECK: sub esp,0x10 ; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax ; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1 ; CHECK: call {{.*}} R_{{.*}} f1
; CHECK-OPTM1-LABEL: fixed_416_align_16
; CHECK-OPTM1: sub esp,0xc
; CHECK-OPTM1: sub esp,0x1a0
; CHECK-OPTM1: sub esp,0x10
; CHECK-OPTM1: mov DWORD PTR [esp],eax
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_16 ; ARM32-LABEL: fixed_416_align_16
; ARM32: sub sp, sp, #416 ; ARM32: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bl {{.*}} R_{{.*}} f1
...@@ -52,8 +60,8 @@ entry: ...@@ -52,8 +60,8 @@ entry:
; CHECK-LABEL: fixed_416_align_32 ; CHECK-LABEL: fixed_416_align_32
; CHECK: push ebp ; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp ; CHECK-NEXT: mov ebp,esp
; CHECK: sub esp,0x1a8
; CHECK: and esp,0xffffffe0 ; CHECK: and esp,0xffffffe0
; CHECK: sub esp,0x1a0
; CHECK: sub esp,0x10 ; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax ; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1 ; CHECK: call {{.*}} R_{{.*}} f1
...@@ -72,11 +80,16 @@ entry: ...@@ -72,11 +80,16 @@ entry:
ret void ret void
} }
; CHECK-LABEL: fixed_351_align_16 ; CHECK-LABEL: fixed_351_align_16
; CHECK: sub esp,0x160 ; CHECK: sub esp,0x16c
; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax ; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1 ; CHECK: call {{.*}} R_{{.*}} f1
; CHECK-OPTM1-LABEL: fixed_351_align_16
; CHECK-OPTM1: sub esp,0xc
; CHECK-OPTM1: sub esp,0x160
; CHECK-OPTM1: mov DWORD PTR [esp],eax
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_16 ; ARM32-LABEL: fixed_351_align_16
; ARM32: sub sp, sp, #352 ; ARM32: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bl {{.*}} R_{{.*}} f1
...@@ -91,8 +104,8 @@ entry: ...@@ -91,8 +104,8 @@ entry:
; CHECK-LABEL: fixed_351_align_32 ; CHECK-LABEL: fixed_351_align_32
; CHECK: push ebp ; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp ; CHECK-NEXT: mov ebp,esp
; CHECK: sub esp,0x168
; CHECK: and esp,0xffffffe0 ; CHECK: and esp,0xffffffe0
; CHECK: sub esp,0x160
; CHECK: sub esp,0x10 ; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax ; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1 ; CHECK: call {{.*}} R_{{.*}} f1
......
...@@ -17,10 +17,8 @@ entry: ...@@ -17,10 +17,8 @@ entry:
} }
; CHECK-LABEL: caller1 ; CHECK-LABEL: caller1
; CHECK-NEXT: sub esp,0xc ; CHECK-NEXT: sub esp,0x2c
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10] ; CHECK-NEXT: mov eax,DWORD PTR [esp+0x30]
; CHECK-NEXT: sub esp,0x20
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: mov DWORD PTR [esp],eax ; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: sub esp,0x20 ; CHECK-NEXT: sub esp,0x20
; CHECK-NEXT: mov DWORD PTR [esp],eax ; CHECK-NEXT: mov DWORD PTR [esp],eax
...@@ -51,10 +49,8 @@ entry: ...@@ -51,10 +49,8 @@ entry:
} }
; CHECK-LABEL: caller2 ; CHECK-LABEL: caller2
; CHECK-NEXT: sub esp,0xc ; CHECK-NEXT: sub esp,0x4c
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10] ; CHECK-NEXT: mov eax,DWORD PTR [esp+0x50]
; CHECK-NEXT: sub esp,0x40
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: mov DWORD PTR [esp],eax ; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x20],eax ; CHECK-NEXT: mov DWORD PTR [esp+0x20],eax
; CHECK-NEXT: sub esp,0x20 ; CHECK-NEXT: sub esp,0x20
......
...@@ -19,14 +19,12 @@ entry: ...@@ -19,14 +19,12 @@ entry:
ret void ret void
} }
; CHECK-LABEL: fused_small_align ; CHECK-LABEL: fused_small_align
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x30 ; CHECK-NEXT: sub esp,0x30
; CHECK-NEXT: mov {{.*}},esp ; CHECK-NEXT: mov eax,DWORD PTR [esp+0x34]
; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax ; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x18],eax ; CHECK-NEXT: mov DWORD PTR [esp+0x18],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax ; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: add esp,0x3c ; CHECK-NEXT: add esp,0x30
; Test that a sequence of allocas with greater than stack alignment get fused. ; Test that a sequence of allocas with greater than stack alignment get fused.
define internal void @fused_large_align(i32 %arg) { define internal void @fused_large_align(i32 %arg) {
...@@ -45,11 +43,9 @@ entry: ...@@ -45,11 +43,9 @@ entry:
; CHECK-LABEL: fused_large_align ; CHECK-LABEL: fused_large_align
; CHECK-NEXT: push ebp ; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp ; CHECK-NEXT: mov ebp,esp
; CHECK-NEXT: sub esp,0x8
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: and esp,0xffffffc0
; CHECK-NEXT: sub esp,0x80 ; CHECK-NEXT: sub esp,0x80
; CHECK-NEXT: mov ecx,esp ; CHECK-NEXT: and esp,0xffffffc0
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: mov DWORD PTR [esp+0x40],eax ; CHECK-NEXT: mov DWORD PTR [esp+0x40],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax ; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x60],eax ; CHECK-NEXT: mov DWORD PTR [esp+0x60],eax
...@@ -80,13 +76,88 @@ block2: ...@@ -80,13 +76,88 @@ block2:
br label %block1 br label %block1
} }
; CHECK-LABEL: fused_derived ; CHECK-LABEL: fused_derived
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x180 ; CHECK-NEXT: sub esp,0x180
; CHECK-NEXT: mov {{.*}},esp ; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x184]
; CHECK-NEXT: jmp ; CHECK-NEXT: jmp
; CHECK-NEXT: mov DWORD PTR [esp+0x80],[[ARG]] ; CHECK-NEXT: mov DWORD PTR [esp+0x80],[[ARG]]
; CHECK-NEXT: mov DWORD PTR [esp+0x8c],[[ARG]] ; CHECK-NEXT: mov DWORD PTR [esp+0x8c],[[ARG]]
; CHECK-NEXT: lea eax,[esp+0x81] ; CHECK-NEXT: lea eax,[esp+0x81]
; CHECK-NEXT: add esp,0x18c ; CHECK-NEXT: add esp,0x180
; CHECK-NEXT: ret ; CHECK-NEXT: ret
; Test that a fixed alloca gets referenced by the frame pointer.
define internal void @fused_small_align_with_dynamic(i32 %arg) {
entry:
%a1 = alloca i8, i32 8, align 16
br label %next
next:
%a2 = alloca i8, i32 12, align 1
%a3 = alloca i8, i32 16, align 1
%p1 = bitcast i8* %a1 to i32*
%p2 = bitcast i8* %a2 to i32*
%p3 = bitcast i8* %a3 to i32*
store i32 %arg, i32* %p1, align 1
store i32 %arg, i32* %p2, align 1
store i32 %arg, i32* %p3, align 1
ret void
}
; CHECK-LABEL: fused_small_align_with_dynamic
; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp
; CHECK-NEXT: sub esp,0x18
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: sub esp,0x10
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: sub esp,0x10
; CHECK-NEXT: mov edx,esp
; CHECK-NEXT: mov DWORD PTR [ebp-0x18],eax
; CHECK-NEXT: mov DWORD PTR [ecx],eax
; CHECK-NEXT: mov DWORD PTR [edx],eax
; CHECK-NEXT: mov esp,ebp
; CHECK-NEXT: pop ebp
; Test that a sequence with greater than stack alignment and dynamic size
; get folded and referenced correctly;
define internal void @fused_large_align_with_dynamic(i32 %arg) {
entry:
%a1 = alloca i8, i32 8, align 32
%a2 = alloca i8, i32 12, align 32
%a3 = alloca i8, i32 16, align 1
%a4 = alloca i8, i32 16, align 1
br label %next
next:
%a5 = alloca i8, i32 16, align 1
%p1 = bitcast i8* %a1 to i32*
%p2 = bitcast i8* %a2 to i32*
%p3 = bitcast i8* %a3 to i32*
%p4 = bitcast i8* %a4 to i32*
%p5 = bitcast i8* %a5 to i32*
store i32 %arg, i32* %p1, align 1
store i32 %arg, i32* %p2, align 1
store i32 %arg, i32* %p3, align 1
store i32 %arg, i32* %p4, align 1
store i32 %arg, i32* %p5, align 1
ret void
}
; CHECK-LABEL: fused_large_align_with_dynamic
; CHECK-NEXT: push ebx
; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp
; CHECK-NEXT: sub esp,0x64
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0xc]
; CHECK-NEXT: and esp,0xffffffe0
; CHECK-NEXT: sub esp,0x40
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: mov edx,ecx
; CHECK-NEXT: add edx,0x20
; CHECK-NEXT: add ecx,0x0
; CHECK-NEXT: sub esp,0x10
; CHECK-NEXT: mov ebx,esp
; CHECK-NEXT: mov DWORD PTR [ecx],eax
; CHECK-NEXT: mov DWORD PTR [edx],eax
; CHECK-NEXT: mov DWORD PTR [ebp-0x14],eax
; CHECK-NEXT: mov DWORD PTR [ebp-0x24],eax
; CHECK-NEXT: mov DWORD PTR [ebx],eax
; CHECK-NEXT: mov esp,ebp
; CHECK-NEXT: pop ebp
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment