Commit 614140e2 by John Porto

Subzero. ARM32. Combine allocas.

parent fc22f770
...@@ -484,8 +484,14 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, ...@@ -484,8 +484,14 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
} else { } else {
// Addressing is relative to the stack pointer or to a user pointer. Add // Addressing is relative to the stack pointer or to a user pointer. Add
// the offset before adding the size of the object, because it grows // the offset before adding the size of the object, because it grows
// upwards from the stack pointer. // upwards from the stack pointer. In addition, if the addressing is
Offsets.push_back(CurrentOffset); // relative to the stack pointer, we need to add the pre-computed max out
// args size bytes.
const uint32_t OutArgsOffsetOrZero =
(BaseVariableType == BVT_StackPointer)
? getTarget()->maxOutArgsSizeBytes()
: 0;
Offsets.push_back(CurrentOffset + OutArgsOffsetOrZero);
} }
// Update the running offset of the fused alloca region. // Update the running offset of the fused alloca region.
CurrentOffset += Size; CurrentOffset += Size;
......
...@@ -211,6 +211,7 @@ public: ...@@ -211,6 +211,7 @@ public:
virtual uint32_t getStackAlignment() const = 0; virtual uint32_t getStackAlignment() const = 0;
virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0; virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
virtual int32_t getFrameFixedAllocaOffset() const = 0; virtual int32_t getFrameFixedAllocaOffset() const = 0;
virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
/// Return whether a 64-bit Variable should be split into a Variable64On32. /// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0; virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
......
...@@ -99,16 +99,15 @@ public: ...@@ -99,16 +99,15 @@ public:
} }
uint32_t getStackAlignment() const override; uint32_t getStackAlignment() const override;
void reserveFixedAllocaArea(size_t Size, size_t Align) override { void reserveFixedAllocaArea(size_t Size, size_t Align) override {
// TODO(sehr,jpp): Implement fixed stack layout. FixedAllocaSizeBytes = Size;
(void)Size; assert(llvm::isPowerOf2_32(Align));
(void)Align; FixedAllocaAlignBytes = Align;
llvm::report_fatal_error("Not yet implemented"); PrologEmitsFixedAllocas = true;
} }
int32_t getFrameFixedAllocaOffset() const override { int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr,jpp): Implement fixed stack layout. return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
llvm::report_fatal_error("Not yet implemented");
return 0;
} }
uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
bool shouldSplitToVariable64On32(Type Ty) const override { bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64; return Ty == IceType_i64;
...@@ -250,7 +249,8 @@ protected: ...@@ -250,7 +249,8 @@ protected:
Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister); Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
static Type stackSlotType(); static Type stackSlotType();
Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister); Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
void alignRegisterPow2(Variable *Reg, uint32_t Align); void alignRegisterPow2(Variable *Reg, uint32_t Align,
int32_t TmpRegNum = Variable::NoRegister);
/// Returns a vector in a register with the given constant entries. /// Returns a vector in a register with the given constant entries.
Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister); Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
...@@ -811,7 +811,7 @@ protected: ...@@ -811,7 +811,7 @@ protected:
} }
// Iterates over the CFG and determines the maximum outgoing stack arguments // Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog() do pre-allocate // bytes. This information is later used during addProlog() to pre-allocate
// the outargs area. // the outargs area.
// TODO(jpp): This could live in the Parser, if we provided a Target-specific // TODO(jpp): This could live in the Parser, if we provided a Target-specific
// method that the Parser could call. // method that the Parser could call.
...@@ -852,6 +852,9 @@ protected: ...@@ -852,6 +852,9 @@ protected:
bool NeedsStackAlignment = false; bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true; bool MaybeLeafFunc = true;
size_t SpillAreaSizeBytes = 0; size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0;
bool PrologEmitsFixedAllocas = false;
uint32_t MaxOutArgsSizeBytes = 0; uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array. // TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM]; static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM];
...@@ -970,6 +973,29 @@ private: ...@@ -970,6 +973,29 @@ private:
}; };
BoolComputationTracker BoolComputations; BoolComputationTracker BoolComputations;
// AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
// without specifying a physical register. This is needed for creating unbound
// temporaries during Ice -> ARM lowering, but before register allocation.
// This a safe-guard that, during the legalization post-passes no unbound
// temporaries are created.
bool AllowTemporaryWithNoReg = true;
// ForbidTemporaryWithoutReg is a RAII class that manages
// AllowTemporaryWithNoReg.
class ForbidTemporaryWithoutReg {
ForbidTemporaryWithoutReg() = delete;
ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg&) = delete;
ForbidTemporaryWithoutReg &operator=(const ForbidTemporaryWithoutReg&) = delete;
public:
explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
Target->AllowTemporaryWithNoReg = false;
}
~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
private:
TargetARM32 *const Target;
};
}; };
class TargetDataARM32 final : public TargetDataLowering { class TargetDataARM32 final : public TargetDataLowering {
......
...@@ -28,70 +28,65 @@ define internal i32 @AllocBigAlign() { ...@@ -28,70 +28,65 @@ define internal i32 @AllocBigAlign() {
; ASM-LABEL:AllocBigAlign: ; ASM-LABEL:AllocBigAlign:
; ASM-NEXT:.LAllocBigAlign$__0: ; ASM-NEXT:.LAllocBigAlign$__0:
; ASM-NEXT: push {fp}
; ASM-NEXT: mov fp, sp
; ASM-NEXT: sub sp, sp, #12
; ASM-NEXT: bic sp, sp, #31
; ASM-NEXT: sub sp, sp, #32
; ASM-NEXT: mov r0, sp
; ASM-NEXT: mov sp, fp
; ASM-NEXT: pop {fp}
; ASM-NEXT: # fp = def.pseudo
; ASM-NEXT: bx lr
; DIS-LABEL:00000000 <AllocBigAlign>: ; DIS-LABEL:00000000 <AllocBigAlign>:
; DIS-NEXT: 0: e52db004
; DIS-NEXT: 4: e1a0b00d
; DIS-NEXT: 8: e24dd00c
; DIS-NEXT: c: e3cdd01f
; DIS-NEXT: 10: e24dd020
; DIS-NEXT: 14: e1a0000d
; DIS-NEXT: 18: e1a0d00b
; DIS-NEXT: 1c: e49db004
; DIS-NEXT: 20: e12fff1e
; IASM-LABEL:AllocBigAlign: ; IASM-LABEL:AllocBigAlign:
; IASM-NEXT:.LAllocBigAlign$__0: ; IASM-NEXT:.LAllocBigAlign$__0:
; ASM-NEXT: push {fp}
; DIS-NEXT: 0: e52db004
; IASM-NEXT: .byte 0x4 ; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0 ; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x2d ; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mov fp, sp
; DIS-NEXT: 4: e1a0b00d
; IASM: .byte 0xd ; IASM: .byte 0xd
; IASM-NEXT: .byte 0xb0 ; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0xa0 ; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
; IASM: .byte 0xc ; ASM-NEXT: sub sp, sp, #32
; DIS-NEXT: 8: e24dd020
; IASM: .byte 0x20
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d ; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2 ; IASM-NEXT: .byte 0xe2
; ASM-NEXT: bic sp, sp, #31
; DIS-NEXT: c: e3cdd01f
; IASM: .byte 0x1f ; IASM: .byte 0x1f
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0xcd ; IASM-NEXT: .byte 0xcd
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; IASM: .byte 0x20 ; ASM-NEXT: # sp = def.pseudo
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; IASM: .byte 0xd ; ASM-NEXT: add r0, sp, #0
; DIS-NEXT: 10: e28d0000
; IASM: .byte 0x0
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xa0 ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe2
; ASM-NEXT: mov sp, fp
; DIS-NEXT: 14: e1a0d00b
; IASM: .byte 0xb ; IASM: .byte 0xb
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0xa0 ; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
; ASM-NEXT: pop {fp}
; DIS-NEXT: 18: e49db004
; IASM-NEXT: .byte 0x4 ; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0 ; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe4 ; IASM-NEXT: .byte 0xe4
; ASM-NEXT: # fp = def.pseudo
; ASM-NEXT: bx lr
; DIS-NEXT: 1c: e12fff1e
; IASM: .byte 0x1e ; IASM: .byte 0x1e
; IASM-NEXT: .byte 0xff ; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f ; IASM-NEXT: .byte 0x2f
......
...@@ -17,14 +17,14 @@ ...@@ -17,14 +17,14 @@
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \ ; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \ ; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \ ; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s ; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPT2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \ ; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \ ; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \ ; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \ ; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \ ; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s ; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
define internal void @fixed_416_align_16(i32 %n) { define internal void @fixed_416_align_16(i32 %n) {
entry: entry:
...@@ -47,8 +47,9 @@ entry: ...@@ -47,8 +47,9 @@ entry:
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1 ; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_16 ; ARM32-LABEL: fixed_416_align_16
; ARM32: sub sp, sp, #416 ; ARM32-OPT2: sub sp, sp, #428
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
define internal void @fixed_416_align_32(i32 %n) { define internal void @fixed_416_align_32(i32 %n) {
entry: entry:
...@@ -67,9 +68,10 @@ entry: ...@@ -67,9 +68,10 @@ entry:
; CHECK: call {{.*}} R_{{.*}} f1 ; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_32 ; ARM32-LABEL: fixed_416_align_32
; ARM32: bic sp, sp, #31 ; ARM32-OPT2: sub sp, sp, #424
; ARM32: sub sp, sp, #416 ; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
; Show that the amount to allocate will be rounded up. ; Show that the amount to allocate will be rounded up.
define internal void @fixed_351_align_16(i32 %n) { define internal void @fixed_351_align_16(i32 %n) {
...@@ -91,8 +93,9 @@ entry: ...@@ -91,8 +93,9 @@ entry:
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1 ; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_16 ; ARM32-LABEL: fixed_351_align_16
; ARM32: sub sp, sp, #352 ; ARM32-OPT2: sub sp, sp, #364
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
define internal void @fixed_351_align_32(i32 %n) { define internal void @fixed_351_align_32(i32 %n) {
entry: entry:
...@@ -111,9 +114,10 @@ entry: ...@@ -111,9 +114,10 @@ entry:
; CHECK: call {{.*}} R_{{.*}} f1 ; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_32 ; ARM32-LABEL: fixed_351_align_32
; ARM32: bic sp, sp, #31 ; ARM32-OPT2: sub sp, sp, #360
; ARM32: sub sp, sp, #352 ; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
declare void @f1(i32 %ignored) declare void @f1(i32 %ignored)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment