Commit 1afb4836 by Sagar Thakur Committed by Jim Stichnoth

[Subzero][MIPS32] Implements lowering of alloca instruction

BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/2067183002 . Patch from Sagar Thakur <sagar.thakur@imgtec.com>.
parent bdb912f4
......@@ -163,10 +163,28 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) {
RegMIPS32::getRegName, getRegClassName);
}
void TargetMIPS32::unsetIfNonLeafFunc() {
for (CfgNode *Node : Func->getNodes()) {
for (Inst &Instr : Node->getInsts()) {
if (llvm::isa<InstCall>(&Instr)) {
// Unset MaybeLeafFunc if call instruction exists.
MaybeLeafFunc = false;
return;
}
}
}
}
uint32_t TargetMIPS32::getStackAlignment() const {
return MIPS32_STACK_ALIGNMENT_BYTES;
}
void TargetMIPS32::findMaxStackOutArgsSize() {
// MinNeededOutArgsBytes should be updated if the Target ever creates a
// high-level InstCall that requires more stack bytes.
constexpr size_t MinNeededOutArgsBytes = 16;
size_t MinNeededOutArgsBytes = 0;
if (!MaybeLeafFunc)
MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
for (CfgNode *Node : Func->getNodes()) {
Context.init(Node);
......@@ -188,10 +206,12 @@ void TargetMIPS32::translateO2() {
// https://code.google.com/p/nativeclient/issues/detail?id=4094
genTargetHelperCalls();
unsetIfNonLeafFunc();
findMaxStackOutArgsSize();
// Merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false;
static constexpr bool SortAndCombineAllocas = true;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
......@@ -291,6 +311,8 @@ void TargetMIPS32::translateOm1() {
// TODO: share passes with X86?
genTargetHelperCalls();
unsetIfNonLeafFunc();
findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack.
......@@ -441,8 +463,8 @@ OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
// hold the operand.
auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg));
return OperandMIPS32Mem::create(
Func, Ty, Base,
llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
Func, Ty, Base, llvm::cast<ConstantInteger32>(
Ctx->getConstantInt32(Base->getStackOffset())));
}
void TargetMIPS32::emitVariable(const Variable *Var) const {
......@@ -808,15 +830,9 @@ void TargetMIPS32::addProlog(CfgNode *Node) {
uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes;
if (MaybeLeafFunc)
MaxOutArgsSizeBytes = 0;
// Adds the out args space to the stack, and align SP if necessary.
uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes;
// TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with
// TotalStackSizeBytes once lowerAlloca is implemented and leaf function
// information is generated by lowerCall.
uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes +
FixedAllocaSizeBytes + MaxOutArgsSizeBytes;
// Generate "addiu sp, sp, -TotalStackSizeBytes"
if (TotalStackSizeBytes) {
......@@ -854,7 +870,7 @@ void TargetMIPS32::addProlog(CfgNode *Node) {
// those that were register-allocated. Args are pushed right to left, so
// Arg[0] is closest to the stack/frame pointer.
const VarList &Args = Func->getArgs();
size_t InArgsSizeBytes = 0;
size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
TargetMIPS32::CallingConv CC;
uint32_t ArgNo = 0;
......@@ -1002,14 +1018,64 @@ SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
}
void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
UsesFramePointer = true;
// Conservatively require the stack to be aligned. Some stack adjustment
// operations implemented below assume that the stack is aligned before the
// alloca. All the alloca code ensures that the stack alignment is preserved
// after the alloca. The stack alignment restriction can be relaxed in some
// cases.
NeedsStackAlignment = true;
UnimplementedLoweringError(this, Instr);
// For default align=0, set it to the real value 1, to avoid any
// bit-manipulation problems below.
const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
// LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(AlignmentParam));
assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
const uint32_t Alignment =
std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
const bool OptM1 = getFlags().getOptLevel() == Opt_m1;
const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
const bool UseFramePointer =
hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
if (UseFramePointer)
setHasFramePointer();
Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
Variable *Dest = Instr->getDest();
Operand *TotalSize = Instr->getSizeInBytes();
if (const auto *ConstantTotalSize =
llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
const uint32_t Value =
Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
FixedAllocaSizeBytes += Value;
// Constant size alloca.
if (!UseFramePointer) {
// If we don't need a Frame Pointer, this alloca has a known offset to the
// stack pointer. We don't need adjust the stack pointer, nor assign any
// value to Dest, as Dest is rematerializable.
assert(Dest->isRematerializable());
Context.insert<InstFakeDef>(Dest);
return;
}
} else {
UnimplementedLoweringError(this, Instr);
return;
}
// Add enough to the returned address to account for the out args area.
if (MaxOutArgsSizeBytes > 0) {
Variable *T = makeReg(getPointerType());
_addiu(T, SP, MaxOutArgsSizeBytes);
_mov(Dest, T);
} else {
_mov(Dest, SP);
}
}
void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
......
......@@ -89,15 +89,12 @@ public:
// are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3;
}
uint32_t getStackAlignment() const override {
// TODO(sehr): what is the stack alignment?
return 1;
}
uint32_t getStackAlignment() const override;
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
// TODO(sehr): Implement fixed stack layout.
(void)Size;
(void)Align;
llvm::report_fatal_error("Not yet implemented");
FixedAllocaSizeBytes = Size;
assert(llvm::isPowerOf2_32(Align));
FixedAllocaAlignBytes = Align;
PrologEmitsFixedAllocas = true;
}
int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr): Implement fixed stack layout.
......@@ -105,6 +102,8 @@ public:
return 0;
}
uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
}
......@@ -447,6 +446,8 @@ public:
static Type stackSlotType();
Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
void unsetIfNonLeafFunc();
// Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog() to pre-allocate
// the outargs area
......@@ -563,6 +564,8 @@ protected:
static constexpr uint32_t CHAR_BITS = 8;
static constexpr uint32_t INT32_BITS = 32;
size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0;
private:
ENABLE_MAKE_UNIQUE;
......
......@@ -26,6 +26,20 @@
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
; RUN: %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target mips32 -i %s --args -O2 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix MIPS32 --check-prefix=MIPS32-OPT2 %s
; RUN: %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target mips32 -i %s --args -Om1 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix MIPS32 --check-prefix=MIPS32-OPTM1 %s
define internal void @fixed_416_align_16(i32 %n) {
entry:
%array = alloca i8, i32 416, align 16
......@@ -50,6 +64,10 @@ entry:
; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
; MIPS32-LABEL: fixed_416_align_16
; MIPS32-OPT2: addiu sp,sp,-440
; MIPS32-OPTM1: addiu sp,sp,-448
define internal void @fixed_416_align_32(i32 %n) {
entry:
%array = alloca i8, i32 400, align 32
......@@ -72,6 +90,10 @@ entry:
; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
; MIPS32-LABEL: fixed_416_align_32
; MIPS32-OPT2: addiu sp,sp,-440
; MIPS32-OPTM1: addiu sp,sp,-448
; Show that the amount to allocate will be rounded up.
define internal void @fixed_351_align_16(i32 %n) {
entry:
......@@ -97,6 +119,10 @@ entry:
; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
; MIPS32-LABEL: fixed_351_align_16
; MIPS32-OPT2: addiu sp,sp,-376
; MIPS32-OPTM1: addiu sp,sp,-384
define internal void @fixed_351_align_32(i32 %n) {
entry:
%array = alloca i8, i32 351, align 32
......@@ -119,6 +145,10 @@ entry:
; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
; MIPS32-LABEL: fixed_351_align_32
; MIPS32-OPT2: addiu sp,sp,-376
; MIPS32-OPTM1: addiu sp,sp,-384
declare void @f1(i32 %ignored)
declare void @f2(i32 %ignored)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment