Commit 1afb4836 by Sagar Thakur Committed by Jim Stichnoth

[Subzero][MIPS32] Implements lowering of alloca instruction

BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/2067183002 . Patch from Sagar Thakur <sagar.thakur@imgtec.com>.
parent bdb912f4
...@@ -163,10 +163,28 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) { ...@@ -163,10 +163,28 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) {
RegMIPS32::getRegName, getRegClassName); RegMIPS32::getRegName, getRegClassName);
} }
void TargetMIPS32::unsetIfNonLeafFunc() {
for (CfgNode *Node : Func->getNodes()) {
for (Inst &Instr : Node->getInsts()) {
if (llvm::isa<InstCall>(&Instr)) {
// Unset MaybeLeafFunc if call instruction exists.
MaybeLeafFunc = false;
return;
}
}
}
}
uint32_t TargetMIPS32::getStackAlignment() const {
return MIPS32_STACK_ALIGNMENT_BYTES;
}
void TargetMIPS32::findMaxStackOutArgsSize() { void TargetMIPS32::findMaxStackOutArgsSize() {
// MinNeededOutArgsBytes should be updated if the Target ever creates a // MinNeededOutArgsBytes should be updated if the Target ever creates a
// high-level InstCall that requires more stack bytes. // high-level InstCall that requires more stack bytes.
constexpr size_t MinNeededOutArgsBytes = 16; size_t MinNeededOutArgsBytes = 0;
if (!MaybeLeafFunc)
MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
MaxOutArgsSizeBytes = MinNeededOutArgsBytes; MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
for (CfgNode *Node : Func->getNodes()) { for (CfgNode *Node : Func->getNodes()) {
Context.init(Node); Context.init(Node);
...@@ -188,10 +206,12 @@ void TargetMIPS32::translateO2() { ...@@ -188,10 +206,12 @@ void TargetMIPS32::translateO2() {
// https://code.google.com/p/nativeclient/issues/detail?id=4094 // https://code.google.com/p/nativeclient/issues/detail?id=4094
genTargetHelperCalls(); genTargetHelperCalls();
unsetIfNonLeafFunc();
findMaxStackOutArgsSize(); findMaxStackOutArgsSize();
// Merge Alloca instructions, and lay out the stack. // Merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false; static constexpr bool SortAndCombineAllocas = true;
Func->processAllocas(SortAndCombineAllocas); Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing"); Func->dump("After Alloca processing");
...@@ -291,6 +311,8 @@ void TargetMIPS32::translateOm1() { ...@@ -291,6 +311,8 @@ void TargetMIPS32::translateOm1() {
// TODO: share passes with X86? // TODO: share passes with X86?
genTargetHelperCalls(); genTargetHelperCalls();
unsetIfNonLeafFunc();
findMaxStackOutArgsSize(); findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack. // Do not merge Alloca instructions, and lay out the stack.
...@@ -441,8 +463,8 @@ OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) { ...@@ -441,8 +463,8 @@ OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
// hold the operand. // hold the operand.
auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg));
return OperandMIPS32Mem::create( return OperandMIPS32Mem::create(
Func, Ty, Base, Func, Ty, Base, llvm::cast<ConstantInteger32>(
llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); Ctx->getConstantInt32(Base->getStackOffset())));
} }
void TargetMIPS32::emitVariable(const Variable *Var) const { void TargetMIPS32::emitVariable(const Variable *Var) const {
...@@ -808,15 +830,9 @@ void TargetMIPS32::addProlog(CfgNode *Node) { ...@@ -808,15 +830,9 @@ void TargetMIPS32::addProlog(CfgNode *Node) {
uint32_t GlobalsAndSubsequentPaddingSize = uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes; GlobalsSize + LocalsSlotsPaddingBytes;
if (MaybeLeafFunc)
MaxOutArgsSizeBytes = 0;
// Adds the out args space to the stack, and align SP if necessary. // Adds the out args space to the stack, and align SP if necessary.
uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes; uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes +
FixedAllocaSizeBytes + MaxOutArgsSizeBytes;
// TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with
// TotalStackSizeBytes once lowerAlloca is implemented and leaf function
// information is generated by lowerCall.
// Generate "addiu sp, sp, -TotalStackSizeBytes" // Generate "addiu sp, sp, -TotalStackSizeBytes"
if (TotalStackSizeBytes) { if (TotalStackSizeBytes) {
...@@ -854,7 +870,7 @@ void TargetMIPS32::addProlog(CfgNode *Node) { ...@@ -854,7 +870,7 @@ void TargetMIPS32::addProlog(CfgNode *Node) {
// those that were register-allocated. Args are pushed right to left, so // those that were register-allocated. Args are pushed right to left, so
// Arg[0] is closest to the stack/frame pointer. // Arg[0] is closest to the stack/frame pointer.
const VarList &Args = Func->getArgs(); const VarList &Args = Func->getArgs();
size_t InArgsSizeBytes = 0; size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
TargetMIPS32::CallingConv CC; TargetMIPS32::CallingConv CC;
uint32_t ArgNo = 0; uint32_t ArgNo = 0;
...@@ -1002,14 +1018,64 @@ SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include, ...@@ -1002,14 +1018,64 @@ SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
} }
void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
UsesFramePointer = true;
// Conservatively require the stack to be aligned. Some stack adjustment // Conservatively require the stack to be aligned. Some stack adjustment
// operations implemented below assume that the stack is aligned before the // operations implemented below assume that the stack is aligned before the
// alloca. All the alloca code ensures that the stack alignment is preserved // alloca. All the alloca code ensures that the stack alignment is preserved
// after the alloca. The stack alignment restriction can be relaxed in some // after the alloca. The stack alignment restriction can be relaxed in some
// cases. // cases.
NeedsStackAlignment = true; NeedsStackAlignment = true;
UnimplementedLoweringError(this, Instr);
// For default align=0, set it to the real value 1, to avoid any
// bit-manipulation problems below.
const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
// LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(AlignmentParam));
assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
const uint32_t Alignment =
std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
const bool OptM1 = getFlags().getOptLevel() == Opt_m1;
const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
const bool UseFramePointer =
hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
if (UseFramePointer)
setHasFramePointer();
Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
Variable *Dest = Instr->getDest();
Operand *TotalSize = Instr->getSizeInBytes();
if (const auto *ConstantTotalSize =
llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
const uint32_t Value =
Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
FixedAllocaSizeBytes += Value;
// Constant size alloca.
if (!UseFramePointer) {
// If we don't need a Frame Pointer, this alloca has a known offset to the
// stack pointer. We don't need adjust the stack pointer, nor assign any
// value to Dest, as Dest is rematerializable.
assert(Dest->isRematerializable());
Context.insert<InstFakeDef>(Dest);
return;
}
} else {
UnimplementedLoweringError(this, Instr);
return;
}
// Add enough to the returned address to account for the out args area.
if (MaxOutArgsSizeBytes > 0) {
Variable *T = makeReg(getPointerType());
_addiu(T, SP, MaxOutArgsSizeBytes);
_mov(Dest, T);
} else {
_mov(Dest, SP);
}
} }
void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
......
...@@ -89,15 +89,12 @@ public: ...@@ -89,15 +89,12 @@ public:
// are rounded up to 4 bytes. // are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3; return (typeWidthInBytes(Ty) + 3) & ~3;
} }
uint32_t getStackAlignment() const override { uint32_t getStackAlignment() const override;
// TODO(sehr): what is the stack alignment?
return 1;
}
void reserveFixedAllocaArea(size_t Size, size_t Align) override { void reserveFixedAllocaArea(size_t Size, size_t Align) override {
// TODO(sehr): Implement fixed stack layout. FixedAllocaSizeBytes = Size;
(void)Size; assert(llvm::isPowerOf2_32(Align));
(void)Align; FixedAllocaAlignBytes = Align;
llvm::report_fatal_error("Not yet implemented"); PrologEmitsFixedAllocas = true;
} }
int32_t getFrameFixedAllocaOffset() const override { int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr): Implement fixed stack layout. // TODO(sehr): Implement fixed stack layout.
...@@ -105,6 +102,8 @@ public: ...@@ -105,6 +102,8 @@ public:
return 0; return 0;
} }
uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
bool shouldSplitToVariable64On32(Type Ty) const override { bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64; return Ty == IceType_i64;
} }
...@@ -447,6 +446,8 @@ public: ...@@ -447,6 +446,8 @@ public:
static Type stackSlotType(); static Type stackSlotType();
Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
void unsetIfNonLeafFunc();
// Iterates over the CFG and determines the maximum outgoing stack arguments // Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog() to pre-allocate // bytes. This information is later used during addProlog() to pre-allocate
// the outargs area // the outargs area
...@@ -563,6 +564,8 @@ protected: ...@@ -563,6 +564,8 @@ protected:
static constexpr uint32_t CHAR_BITS = 8; static constexpr uint32_t CHAR_BITS = 8;
static constexpr uint32_t INT32_BITS = 32; static constexpr uint32_t INT32_BITS = 32;
size_t SpillAreaSizeBytes = 0; size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0;
private: private:
ENABLE_MAKE_UNIQUE; ENABLE_MAKE_UNIQUE;
......
...@@ -26,6 +26,20 @@ ...@@ -26,6 +26,20 @@
; RUN: | %if --need=target_ARM32 --need=allow_dump \ ; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s ; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
; RUN: %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target mips32 -i %s --args -O2 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix MIPS32 --check-prefix=MIPS32-OPT2 %s
; RUN: %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target mips32 -i %s --args -Om1 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix MIPS32 --check-prefix=MIPS32-OPTM1 %s
define internal void @fixed_416_align_16(i32 %n) { define internal void @fixed_416_align_16(i32 %n) {
entry: entry:
%array = alloca i8, i32 416, align 16 %array = alloca i8, i32 416, align 16
...@@ -50,6 +64,10 @@ entry: ...@@ -50,6 +64,10 @@ entry:
; ARM32-OPTM1: sub sp, sp, #416 ; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bl {{.*}} R_{{.*}} f1
; MIPS32-LABEL: fixed_416_align_16
; MIPS32-OPT2: addiu sp,sp,-440
; MIPS32-OPTM1: addiu sp,sp,-448
define internal void @fixed_416_align_32(i32 %n) { define internal void @fixed_416_align_32(i32 %n) {
entry: entry:
%array = alloca i8, i32 400, align 32 %array = alloca i8, i32 400, align 32
...@@ -72,6 +90,10 @@ entry: ...@@ -72,6 +90,10 @@ entry:
; ARM32: bic sp, sp, #31 ; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bl {{.*}} R_{{.*}} f1
; MIPS32-LABEL: fixed_416_align_32
; MIPS32-OPT2: addiu sp,sp,-440
; MIPS32-OPTM1: addiu sp,sp,-448
; Show that the amount to allocate will be rounded up. ; Show that the amount to allocate will be rounded up.
define internal void @fixed_351_align_16(i32 %n) { define internal void @fixed_351_align_16(i32 %n) {
entry: entry:
...@@ -97,6 +119,10 @@ entry: ...@@ -97,6 +119,10 @@ entry:
; ARM32-OPTM1: sub sp, sp, #352 ; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bl {{.*}} R_{{.*}} f1
; MIPS32-LABEL: fixed_351_align_16
; MIPS32-OPT2: addiu sp,sp,-376
; MIPS32-OPTM1: addiu sp,sp,-384
define internal void @fixed_351_align_32(i32 %n) { define internal void @fixed_351_align_32(i32 %n) {
entry: entry:
%array = alloca i8, i32 351, align 32 %array = alloca i8, i32 351, align 32
...@@ -119,6 +145,10 @@ entry: ...@@ -119,6 +145,10 @@ entry:
; ARM32: bic sp, sp, #31 ; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bl {{.*}} R_{{.*}} f1
; MIPS32-LABEL: fixed_351_align_32
; MIPS32-OPT2: addiu sp,sp,-376
; MIPS32-OPTM1: addiu sp,sp,-384
declare void @f1(i32 %ignored) declare void @f1(i32 %ignored)
declare void @f2(i32 %ignored) declare void @f2(i32 %ignored)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment