Subzero: Align spill locations to natural alignment.

This requires sorting the spilled variables based on alignment and introducing additional padding around the spill location areas. These changes allow vector instructions to accept memory operands. Old stack frame layout: New stack frame layout: +---------------------+ +---------------------+ | return address | | return address | +---------------------+ +---------------------+ | preserved registers | | preserved registers | +---------------------+ +---------------------+ | global spill area | | padding | +---------------------+ +---------------------+ | local spill area | | global spill area | +---------------------+ +---------------------+ | padding | | padding | +---------------------+ +---------------------+ | local variables | | local spill area | +---------------------+ +---------------------+ | padding | +---------------------+ | local variables | +---------------------+ BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/465413003

Subzero: Align spill locations to natural alignment.
d4799f47 · Matt Wala · f820da5e · d4799f47 · d4799f47 · d4799f47
Commit d4799f47 authored Aug 14, 2014 by Matt Wala
4 changed files
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -134,7 +134,7 @@ public:
  virtual bool hasFramePointer() const { return false; }
  virtual SizeT getFrameOrStackReg() const = 0;
-  virtual size_t typeWidthInBytesOnStack(Type Ty) = 0;
+  virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
  bool hasComputedFrame() const { return HasComputedFrame; }
  int32_t getStackAdjustment() const { return StackAdjustment; }
  void updateStackAdjustment(int32_t Offset) { StackAdjustment += Offset; }

--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -40,7 +40,7 @@ public:
  virtual SizeT getFrameOrStackReg() const {
    return IsEbpBasedFrame ? Reg_ebp : Reg_esp;
  }
-  virtual size_t typeWidthInBytesOnStack(Type Ty) {
+  virtual size_t typeWidthInBytesOnStack(Type Ty) const {
    // Round up to the next multiple of 4 bytes.  In particular, i1,
    // i8, and i16 are rounded up to 4 bytes.
    return (typeWidthInBytes(Ty) + 3) & ~3;
@@ -125,6 +125,8 @@ protected:
  void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
                           Operand *Src0, Operand *Src1);
+  void sortByAlignment(VarList &Dest, const VarList &Source) const;
  // Operand legalization helpers.  To deal with address mode
  // constraints, the helpers will create a new Operand and emit
  // instructions that guarantee that the Operand kind is one of those
@@ -458,7 +460,7 @@ protected:
  bool IsEbpBasedFrame;
  bool NeedsStackAlignment;
  size_t FrameSizeLocals;
-  size_t LocalsSizeBytes;
+  size_t SpillAreaSizeBytes;
  llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
  llvm::SmallBitVector ScratchRegs;
  llvm::SmallBitVector RegsUsed;

--- a/tests_lit/llvm2ice_tests/align-spill-locations.ll
+++ b/tests_lit/llvm2ice_tests/align-spill-locations.ll
+; This checks to ensure that Subzero aligns spill slots.
+; RUN: %llvm2ice --verbose none %s | FileCheck  %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck  %s
+; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
+; The location of the stack slot for a variable is inferred from the
+; return sequence.
+; In this file, "global" refers to a variable with a live range across
+; multiple basic blocks (not an LLVM global variable) and "local"
+; refers to a variable that is live in only a single basic block.
+define <4 x i32> @align_global_vector(i32 %arg) {
+entry:
+  %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0
+  br label %block
+block:
+  call void @ForceXmmSpills()
+  ret <4 x i32> %vec.global
+; CHECK-LABEL: align_global_vector:
+; CHECK: movups xmm0, xmmword ptr [esp]
+; CHECK-NEXT: add esp, 28
+; CHECK-NEXT: ret
+}
+define <4 x i32> @align_local_vector(i32 %arg) {
+entry:
+  br label %block
+block:
+  %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0
+  call void @ForceXmmSpills()
+  ret <4 x i32> %vec.local
+; CHECK-LABEL: align_local_vector:
+; CHECK: movups xmm0, xmmword ptr [esp]
+; CHECK-NEXT: add esp, 28
+; CHECK-NEXT: ret
+}
+declare void @ForceXmmSpills()
+define <4 x i32> @align_global_vector_ebp_based(i32 %arg) {
+entry:
+  %alloc = alloca i8, i32 1, align 1
+  %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0
+  br label %block
+block:
+  call void @ForceXmmSpillsAndUseAlloca(i8* %alloc)
+  ret <4 x i32> %vec.global
+; CHECK-LABEL: align_global_vector_ebp_based:
+; CHECK: movups xmm0, xmmword ptr [ebp-24]
+; CHECK-NEXT: mov esp, ebp
+; CHECK-NEXT: pop ebp
+; CHECK: ret
+}
+define <4 x i32> @align_local_vector_ebp_based(i32 %arg) {
+entry:
+  %alloc = alloca i8, i32 1, align 1
+  %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0
+  call void @ForceXmmSpillsAndUseAlloca(i8* %alloc)
+  ret <4 x i32> %vec.local
+; CHECK-LABEL: align_local_vector_ebp_based:
+; CHECK: movups xmm0, xmmword ptr [ebp-24]
+; CHECK-NEXT: mov esp, ebp
+; CHECK-NEXT: pop ebp
+; CHECK: ret
+}
+define <4 x i32> @align_local_vector_and_global_float(i32 %arg) {
+entry:
+  %float.global = sitofp i32 %arg to float
+  call void @ForceXmmSpillsAndUseFloat(float %float.global)
+  br label %block
+block:
+  %vec.local = insertelement <4 x i32> undef, i32 undef, i32 0
+  call void @ForceXmmSpillsAndUseFloat(float %float.global)
+  ret <4 x i32> %vec.local
+; CHECK-LABEL: align_local_vector_and_global_float:
+; CHECK: cvtsi2ss xmm0, eax
+; CHECK-NEXT: movss dword ptr [esp+28], xmm0
+; CHECK: movups xmm0, xmmword ptr [esp]
+; CHECK-NEXT: add esp, 44
+; CHECK-NEXT: ret
+}
+declare void @ForceXmmSpillsAndUseAlloca(i8*)
+declare void @ForceXmmSpillsAndUseFloat(float)
+; ERRORS-NOT: ICE translation error