Commit 2f3b8ec8 by David Sehr

Merge fixed alloca stack adjustments into the prolog

Also removes reliance on lowerAlloca entirely for the fixed allocations. BUG= R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1435363002 .
parent a1da6ff9
......@@ -494,53 +494,44 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
// Ensure every alloca was assigned an offset.
assert(Allocas.size() == Offsets.size());
Variable *BaseVariable = makeVariable(IceType_i32);
Variable *AllocaDest = BaseVariable;
// Emit one addition for each alloca after the first.
for (size_t i = 0; i < Allocas.size(); ++i) {
auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
switch (BaseVariableType) {
case BVT_FramePointer:
case BVT_UserPointer: {
switch (BaseVariableType) {
case BVT_UserPointer: {
Variable *BaseVariable = makeVariable(IceType_i32);
for (SizeT i = 0; i < Allocas.size(); ++i) {
auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
// Emit a new addition operation to replace the alloca.
Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
InstArithmetic *Add =
InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
BaseVariable, AllocaOffset);
Insts.push_front(Add);
} break;
case BVT_StackPointer: {
Alloca->setDeleted();
}
Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
InstAlloca *CombinedAlloca =
InstAlloca::create(this, BaseVariable, AllocaSize, CombinedAlignment);
CombinedAlloca->setKnownFrameOffset();
Insts.push_front(CombinedAlloca);
} break;
case BVT_StackPointer:
case BVT_FramePointer: {
for (SizeT i = 0; i < Allocas.size(); ++i) {
auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
// Emit a fake definition of the rematerializable variable.
Variable *Dest = Alloca->getDest();
InstFakeDef *Def = InstFakeDef::create(this, Dest);
Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
if (BaseVariableType == BVT_StackPointer)
Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
else
Dest->setRematerializable(getTarget()->getFrameReg(), Offsets[i]);
Insts.push_front(Def);
} break;
Alloca->setDeleted();
}
Alloca->setDeleted();
}
Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
switch (BaseVariableType) {
case BVT_FramePointer: {
// Adjust the return of the alloca to the top of the returned region.
AllocaDest = makeVariable(IceType_i32);
InstArithmetic *Add = InstArithmetic::create(
this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize);
Insts.push_front(Add);
// Allocate the fixed area in the function prolog.
getTarget()->reserveFixedAllocaArea(TotalSize, CombinedAlignment);
} break;
case BVT_StackPointer: {
// Emit a fake use to keep the Alloca live.
InstFakeUse *Use = InstFakeUse::create(this, AllocaDest);
Insts.push_front(Use);
} break;
case BVT_UserPointer:
break;
}
// And insert the fused alloca.
InstAlloca *CombinedAlloca =
InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest);
CombinedAlloca->setKnownFrameOffset();
Insts.push_front(CombinedAlloca);
}
void Cfg::processAllocas(bool SortAndCombine) {
......@@ -595,7 +586,7 @@ void Cfg::processAllocas(bool SortAndCombine) {
// Allocas in the entry block that have constant size and alignment greater
// than the function's stack alignment.
CfgVector<Inst *> AlignedAllocas;
// Maximum alignment used for the dynamic/aligned allocas.
// Maximum alignment used by any alloca.
uint32_t MaxAlignment = StackAlignment;
for (Inst &Instr : EntryNode->getInsts()) {
if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
......@@ -623,14 +614,16 @@ void Cfg::processAllocas(bool SortAndCombine) {
// do not have a known offset from either the stack or frame pointer.
// They grow up from a user pointer from an alloca.
sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer);
// Fixed size allocas are addressed relative to the frame pointer.
sortAndCombineAllocas(FixedAllocas, StackAlignment, Insts,
BVT_FramePointer);
} else {
// Otherwise, fixed size allocas are addressed relative to the stack unless
// there are dynamic allocas.
const AllocaBaseVariableType BasePointerType =
(HasDynamicAllocation ? BVT_FramePointer : BVT_StackPointer);
sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
}
// Otherwise, fixed size allocas are always addressed relative to the stack
// unless there are dynamic allocas.
// TODO(sehr): re-enable frame pointer and decrementing addressing.
AllocaBaseVariableType BasePointerType =
(HasDynamicAllocation ? BVT_UserPointer : BVT_StackPointer);
sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
if (!FixedAllocas.empty() || !AlignedAllocas.empty())
// No use calling findRematerializable() unless there is some
// rematerializable alloca instruction to seed it.
......
......@@ -586,7 +586,7 @@ private:
uint32_t Align = Inst->getAlignment();
Ice::Variable *Dest = mapValueToIceVar(Inst, Ice::getPointerType());
return Ice::InstAlloca::create(Func.get(), ByteCount, Align, Dest);
return Ice::InstAlloca::create(Func.get(), Dest, ByteCount, Align);
}
Ice::Inst *convertUnreachableInstruction(const UnreachableInst * /*Inst*/) {
......
......@@ -218,8 +218,8 @@ bool Inst::liveness(InstNumberT InstNumber, LivenessBV &Live,
return true;
}
InstAlloca::InstAlloca(Cfg *Func, Operand *ByteCount, uint32_t AlignInBytes,
Variable *Dest)
InstAlloca::InstAlloca(Cfg *Func, Variable *Dest, Operand *ByteCount,
uint32_t AlignInBytes)
: InstHighLevel(Func, Inst::Alloca, 1, Dest), AlignInBytes(AlignInBytes) {
// Verify AlignInBytes is 0 or a power of 2.
assert(AlignInBytes == 0 || llvm::isPowerOf2_32(AlignInBytes));
......
......@@ -246,10 +246,10 @@ class InstAlloca : public InstHighLevel {
InstAlloca &operator=(const InstAlloca &) = delete;
public:
static InstAlloca *create(Cfg *Func, Operand *ByteCount,
uint32_t AlignInBytes, Variable *Dest) {
static InstAlloca *create(Cfg *Func, Variable *Dest, Operand *ByteCount,
uint32_t AlignInBytes) {
return new (Func->allocate<InstAlloca>())
InstAlloca(Func, ByteCount, AlignInBytes, Dest);
InstAlloca(Func, Dest, ByteCount, AlignInBytes);
}
uint32_t getAlignInBytes() const { return AlignInBytes; }
Operand *getSizeInBytes() const { return getSrc(0); }
......@@ -259,8 +259,8 @@ public:
static bool classof(const Inst *Inst) { return Inst->getKind() == Alloca; }
private:
InstAlloca(Cfg *Func, Operand *ByteCount, uint32_t AlignInBytes,
Variable *Dest);
InstAlloca(Cfg *Func, Variable *Dest, Operand *ByteCount,
uint32_t AlignInBytes);
const uint32_t AlignInBytes;
bool KnownFrameOffset = false;
......
......@@ -98,17 +98,34 @@ MachineTraits<TargetX8632>::X86OperandMem::X86OperandMem(
}
}
namespace {
static int32_t GetRematerializableOffset(Variable *Var, bool IgnoreStackAdjust,
const Ice::TargetX8632 *Target) {
int32_t Disp = 0;
Disp += Var->getStackOffset();
SizeT RegNum = static_cast<SizeT>(Var->getRegNum());
if (RegNum == Target->getStackReg()) {
if (!IgnoreStackAdjust)
Disp += Target->getStackAdjustment();
} else if (RegNum == Target->getFrameReg()) {
Disp += Target->getFrameFixedAllocaOffset();
} else {
llvm::report_fatal_error("Unexpected rematerializable register type");
}
return Disp;
}
} // end of anonymous namespace
void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
const ::Ice::TargetLowering *Target = Func->getTarget();
const auto *Target = static_cast<const Ice::TargetX8632 *>(Func->getTarget());
// If the base is rematerializable, we need to replace it with the correct
// physical register (esp or ebp), and update the Offset.
int32_t Disp = 0;
if (getBase() && getBase()->isRematerializable()) {
Disp += getBase()->getStackOffset();
if (!getIgnoreStackAdjust())
Disp += Target->getStackAdjustment();
Disp +=
GetRematerializableOffset(getBase(), getIgnoreStackAdjust(), Target);
}
// The index should never be rematerializable. But if we ever allow it, then
// we should make sure the rematerialization offset is shifted by the Shift
......@@ -135,7 +152,7 @@ void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
// TODO(sehr): ConstantRelocatable still needs updating for
// rematerializable base/index and Disp.
assert(Disp == 0);
CR->emitWithoutPrefix(Func->getTarget());
CR->emitWithoutPrefix(Target);
} else {
llvm_unreachable("Invalid offset type for x86 mem operand");
}
......@@ -165,10 +182,10 @@ void MachineTraits<TargetX8632>::X86OperandMem::dump(const Cfg *Func,
bool Dumped = false;
Str << "[";
int32_t Disp = 0;
const auto *Target = static_cast<const Ice::TargetX8632 *>(Func->getTarget());
if (getBase() && getBase()->isRematerializable()) {
Disp += getBase()->getStackOffset();
if (!getIgnoreStackAdjust())
Disp += Func->getTarget()->getStackAdjustment();
Disp +=
GetRematerializableOffset(getBase(), getIgnoreStackAdjust(), Target);
}
if (getBase()) {
if (Func)
......@@ -230,13 +247,12 @@ void MachineTraits<TargetX8632>::X86OperandMem::emitSegmentOverride(
MachineTraits<TargetX8632>::Address
MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress(
MachineTraits<TargetX8632>::Assembler *Asm,
const Ice::TargetLowering *Target) const {
const Ice::TargetLowering *TargetLowering) const {
int32_t Disp = 0;
const auto *Target = static_cast<const Ice::TargetX8632 *>(TargetLowering);
if (getBase() && getBase()->isRematerializable()) {
Disp += getBase()->getStackOffset();
if (!getIgnoreStackAdjust()) {
Disp += Target->getStackAdjustment();
}
Disp +=
GetRematerializableOffset(getBase(), getIgnoreStackAdjust(), Target);
}
// The index should never be rematerializable. But if we ever allow it, then
// we should make sure the rematerialization offset is shifted by the Shift
......
......@@ -487,7 +487,8 @@ void ConstantRelocatable::emit(TargetLowering *Target) const {
Target->emit(this);
}
void ConstantRelocatable::emitWithoutPrefix(TargetLowering *Target) const {
void ConstantRelocatable::emitWithoutPrefix(
const TargetLowering *Target) const {
Target->emitWithoutPrefix(this);
}
......
......@@ -290,7 +290,7 @@ public:
bool getSuppressMangling() const { return SuppressMangling; }
using Constant::emit;
void emit(TargetLowering *Target) const final;
void emitWithoutPrefix(TargetLowering *Target) const;
void emitWithoutPrefix(const TargetLowering *Target) const;
using Constant::dump;
void dump(const Cfg *Func, Ostream &Str) const override;
......
......@@ -368,7 +368,13 @@ void TargetLowering::getVarStackSlotParams(
VarList SpilledVariables;
for (Variable *Var : Variables) {
if (Var->hasReg()) {
RegsUsed[Var->getRegNum()] = true;
// Don't consider a rematerializable variable to be an actual register use
// (specifically of the frame pointer). Otherwise, the prolog may decide
// to save the frame pointer twice - once because of the explicit need for
// a frame pointer, and once because of an active use of a callee-save
// register.
if (!Var->isRematerializable())
RegsUsed[Var->getRegNum()] = true;
continue;
}
// An argument either does not need a stack slot (if passed in a register)
......
......@@ -203,9 +203,12 @@ public:
virtual bool hasFramePointer() const { return false; }
virtual void setHasFramePointer() = 0;
virtual SizeT getStackReg() const = 0;
virtual SizeT getFrameReg() const = 0;
virtual SizeT getFrameOrStackReg() const = 0;
virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
virtual uint32_t getStackAlignment() const = 0;
virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
virtual int32_t getFrameFixedAllocaOffset() const = 0;
/// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
......
......@@ -86,8 +86,9 @@ public:
bool hasFramePointer() const override { return UsesFramePointer; }
void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegARM32::Reg_sp; }
SizeT getFrameReg() const override { return RegARM32::Reg_fp; }
SizeT getFrameOrStackReg() const override {
return UsesFramePointer ? RegARM32::Reg_fp : RegARM32::Reg_sp;
return UsesFramePointer ? getFrameReg() : getStackReg();
}
SizeT getReservedTmpReg() const { return RegARM32::Reg_ip; }
......@@ -97,6 +98,17 @@ public:
return (typeWidthInBytes(Ty) + 3) & ~3;
}
uint32_t getStackAlignment() const override;
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
// TODO(sehr,jpp): Implement fixed stack layout.
(void)Size;
(void)Align;
llvm::report_fatal_error("Not yet implemented");
}
int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr,jpp): Implement fixed stack layout.
llvm::report_fatal_error("Not yet implemented");
return 0;
}
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
......
......@@ -54,8 +54,9 @@ public:
bool hasFramePointer() const override { return UsesFramePointer; }
void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegMIPS32::Reg_SP; }
SizeT getFrameReg() const override { return RegMIPS32::Reg_FP; }
SizeT getFrameOrStackReg() const override {
return UsesFramePointer ? RegMIPS32::Reg_FP : RegMIPS32::Reg_SP;
return UsesFramePointer ? getFrameReg() : getStackReg();
}
size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
......@@ -66,6 +67,17 @@ public:
// TODO(sehr): what is the stack alignment?
return 1;
}
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
// TODO(sehr): Implement fixed stack layout.
(void)Size;
(void)Align;
llvm::report_fatal_error("Not yet implemented");
}
int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr): Implement fixed stack layout.
llvm::report_fatal_error("Not yet implemented");
return 0;
}
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
......
......@@ -506,20 +506,36 @@ void TargetX8632::addProlog(CfgNode *Node) {
SpillAreaSizeBytes = StackSize - StackOffset;
}
// Generate "sub esp, SpillAreaSizeBytes"
if (SpillAreaSizeBytes)
// Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
// fixed allocations in the prolog.
if (PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
if (SpillAreaSizeBytes) {
// Generate "sub esp, SpillAreaSizeBytes"
_sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(SpillAreaSizeBytes));
// If the fixed allocas are aligned more than the stack frame, align the
// stack pointer accordingly.
if (PrologEmitsFixedAllocas &&
FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
assert(IsEbpBasedFrame);
_and(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(-FixedAllocaAlignBytes));
}
}
// Account for alloca instructions with known frame offsets.
SpillAreaSizeBytes += FixedAllocaSizeBytes;
// Account for known-frame-offset alloca instructions that were not already
// combined into the prolog.
if (!PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
// Initialize the stack adjustment so that after all the known-frame-offset
// alloca instructions are emitted, the stack adjustment will reach zero.
resetStackAdjustment();
updateStackAdjustment(-FixedAllocaSizeBytes);
if (!PrologEmitsFixedAllocas)
updateStackAdjustment(-FixedAllocaSizeBytes);
// Fill in stack offsets for stack args, and copy args into registers for
// those that were register-allocated. Args are pushed right to left, so
......@@ -539,11 +555,14 @@ void TargetX8632::addProlog(CfgNode *Node) {
++NumXmmArgs;
continue;
}
// For esp-based frames, the esp value may not stabilize to its home value
// until after all the fixed-size alloca instructions have executed. In
// this case, a stack adjustment is needed when accessing in-args in order
// to copy them into registers.
size_t StackAdjBytes = IsEbpBasedFrame ? 0 : -FixedAllocaSizeBytes;
// For esp-based frames where the allocas are done outside the prolog, the
// esp value may not stabilize to its home value until after all the
// fixed-size alloca instructions have executed. In this case, a stack
// adjustment is needed when accessing in-args in order to copy them into
// registers.
size_t StackAdjBytes = 0;
if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
StackAdjBytes -= FixedAllocaSizeBytes;
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
InArgsSizeBytes);
}
......
......@@ -89,9 +89,9 @@ public:
bool hasFramePointer() const override { return IsEbpBasedFrame; }
void setHasFramePointer() override { IsEbpBasedFrame = true; }
SizeT getStackReg() const override { return Traits::RegisterSet::Reg_esp; }
SizeT getFrameReg() const override { return Traits::RegisterSet::Reg_ebp; }
SizeT getFrameOrStackReg() const override {
return IsEbpBasedFrame ? Traits::RegisterSet::Reg_ebp
: Traits::RegisterSet::Reg_esp;
return IsEbpBasedFrame ? getFrameReg() : getStackReg();
}
size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of WordType bytes.
......@@ -101,6 +101,16 @@ public:
uint32_t getStackAlignment() const override {
return Traits::X86_STACK_ALIGNMENT_BYTES;
}
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
FixedAllocaSizeBytes = Size;
assert(llvm::isPowerOf2_32(Align));
FixedAllocaAlignBytes = Align;
PrologEmitsFixedAllocas = true;
}
/// Returns the (negative) offset from ebp/rbp where the fixed Allocas start.
int32_t getFrameFixedAllocaOffset() const override {
return FixedAllocaSizeBytes - SpillAreaSizeBytes;
}
bool shouldSplitToVariable64On32(Type Ty) const override {
return Traits::Is64Bit ? false : Ty == IceType_i64;
......@@ -691,6 +701,8 @@ protected:
bool NeedsStackAlignment = false;
size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0;
bool PrologEmitsFixedAllocas = false;
static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet;
static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM>
RegisterAliases;
......
......@@ -2626,7 +2626,7 @@ void FunctionParser::ProcessRecord() {
return;
}
CurrentNode->appendInst(Ice::InstAlloca::create(
Func.get(), ByteCount, Alignment, getNextInstVar(PtrTy)));
Func.get(), getNextInstVar(PtrTy), ByteCount, Alignment));
return;
}
case naclbitc::FUNC_CODE_INST_LOAD: {
......
......@@ -6,7 +6,8 @@
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 -allow-externally-defined-symbols \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: | %if --need=target_X8632 --command FileCheck \
; RUN: --check-prefix CHECK-OPTM1 %s
; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
; once enough infrastructure is in. Also, switch to --filetype=obj
......@@ -33,11 +34,18 @@ entry:
ret void
}
; CHECK-LABEL: fixed_416_align_16
; CHECK: sub esp,0x1a0
; CHECK: sub esp,0x1ac
; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1
; CHECK-OPTM1-LABEL: fixed_416_align_16
; CHECK-OPTM1: sub esp,0xc
; CHECK-OPTM1: sub esp,0x1a0
; CHECK-OPTM1: sub esp,0x10
; CHECK-OPTM1: mov DWORD PTR [esp],eax
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_16
; ARM32: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
......@@ -52,8 +60,8 @@ entry:
; CHECK-LABEL: fixed_416_align_32
; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp
; CHECK: sub esp,0x1a8
; CHECK: and esp,0xffffffe0
; CHECK: sub esp,0x1a0
; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1
......@@ -72,11 +80,16 @@ entry:
ret void
}
; CHECK-LABEL: fixed_351_align_16
; CHECK: sub esp,0x160
; CHECK: sub esp,0x10
; CHECK: sub esp,0x16c
; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1
; CHECK-OPTM1-LABEL: fixed_351_align_16
; CHECK-OPTM1: sub esp,0xc
; CHECK-OPTM1: sub esp,0x160
; CHECK-OPTM1: mov DWORD PTR [esp],eax
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_16
; ARM32: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
......@@ -91,8 +104,8 @@ entry:
; CHECK-LABEL: fixed_351_align_32
; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp
; CHECK: sub esp,0x168
; CHECK: and esp,0xffffffe0
; CHECK: sub esp,0x160
; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1
......
......@@ -17,10 +17,8 @@ entry:
}
; CHECK-LABEL: caller1
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x20
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: sub esp,0x2c
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x30]
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: sub esp,0x20
; CHECK-NEXT: mov DWORD PTR [esp],eax
......@@ -51,10 +49,8 @@ entry:
}
; CHECK-LABEL: caller2
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x40
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: sub esp,0x4c
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x50]
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x20],eax
; CHECK-NEXT: sub esp,0x20
......
......@@ -19,14 +19,12 @@ entry:
ret void
}
; CHECK-LABEL: fused_small_align
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x30
; CHECK-NEXT: mov {{.*}},esp
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x34]
; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x18],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: add esp,0x3c
; CHECK-NEXT: add esp,0x30
; Test that a sequence of allocas with greater than stack alignment get fused.
define internal void @fused_large_align(i32 %arg) {
......@@ -45,11 +43,9 @@ entry:
; CHECK-LABEL: fused_large_align
; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp
; CHECK-NEXT: sub esp,0x8
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: and esp,0xffffffc0
; CHECK-NEXT: sub esp,0x80
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: and esp,0xffffffc0
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: mov DWORD PTR [esp+0x40],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x60],eax
......@@ -80,13 +76,88 @@ block2:
br label %block1
}
; CHECK-LABEL: fused_derived
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x180
; CHECK-NEXT: mov {{.*}},esp
; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x184]
; CHECK-NEXT: jmp
; CHECK-NEXT: mov DWORD PTR [esp+0x80],[[ARG]]
; CHECK-NEXT: mov DWORD PTR [esp+0x8c],[[ARG]]
; CHECK-NEXT: lea eax,[esp+0x81]
; CHECK-NEXT: add esp,0x18c
; CHECK-NEXT: add esp,0x180
; CHECK-NEXT: ret
; Test that a fixed alloca gets referenced by the frame pointer.
define internal void @fused_small_align_with_dynamic(i32 %arg) {
entry:
%a1 = alloca i8, i32 8, align 16
br label %next
next:
%a2 = alloca i8, i32 12, align 1
%a3 = alloca i8, i32 16, align 1
%p1 = bitcast i8* %a1 to i32*
%p2 = bitcast i8* %a2 to i32*
%p3 = bitcast i8* %a3 to i32*
store i32 %arg, i32* %p1, align 1
store i32 %arg, i32* %p2, align 1
store i32 %arg, i32* %p3, align 1
ret void
}
; CHECK-LABEL: fused_small_align_with_dynamic
; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp
; CHECK-NEXT: sub esp,0x18
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: sub esp,0x10
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: sub esp,0x10
; CHECK-NEXT: mov edx,esp
; CHECK-NEXT: mov DWORD PTR [ebp-0x18],eax
; CHECK-NEXT: mov DWORD PTR [ecx],eax
; CHECK-NEXT: mov DWORD PTR [edx],eax
; CHECK-NEXT: mov esp,ebp
; CHECK-NEXT: pop ebp
; Test that a sequence with greater than stack alignment and dynamic size
; get folded and referenced correctly;
define internal void @fused_large_align_with_dynamic(i32 %arg) {
entry:
%a1 = alloca i8, i32 8, align 32
%a2 = alloca i8, i32 12, align 32
%a3 = alloca i8, i32 16, align 1
%a4 = alloca i8, i32 16, align 1
br label %next
next:
%a5 = alloca i8, i32 16, align 1
%p1 = bitcast i8* %a1 to i32*
%p2 = bitcast i8* %a2 to i32*
%p3 = bitcast i8* %a3 to i32*
%p4 = bitcast i8* %a4 to i32*
%p5 = bitcast i8* %a5 to i32*
store i32 %arg, i32* %p1, align 1
store i32 %arg, i32* %p2, align 1
store i32 %arg, i32* %p3, align 1
store i32 %arg, i32* %p4, align 1
store i32 %arg, i32* %p5, align 1
ret void
}
; CHECK-LABEL: fused_large_align_with_dynamic
; CHECK-NEXT: push ebx
; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp
; CHECK-NEXT: sub esp,0x64
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0xc]
; CHECK-NEXT: and esp,0xffffffe0
; CHECK-NEXT: sub esp,0x40
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: mov edx,ecx
; CHECK-NEXT: add edx,0x20
; CHECK-NEXT: add ecx,0x0
; CHECK-NEXT: sub esp,0x10
; CHECK-NEXT: mov ebx,esp
; CHECK-NEXT: mov DWORD PTR [ecx],eax
; CHECK-NEXT: mov DWORD PTR [edx],eax
; CHECK-NEXT: mov DWORD PTR [ebp-0x14],eax
; CHECK-NEXT: mov DWORD PTR [ebp-0x24],eax
; CHECK-NEXT: mov DWORD PTR [ebx],eax
; CHECK-NEXT: mov esp,ebp
; CHECK-NEXT: pop ebp
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment