Commit a551dfce by Nicolas Capens Committed by Nicolas Capens

Always align the stack to the fixed alloca requirements.

Local variables that use a fixed alloca stack slot are assigned offsets starting at 0, before the prolog is written. Therefore the stack pointer needs to be aligned to the alloca's maximum alignment requirement. This required the following changes: - Add FixedAllocaSizeBytes to SpillAreaSizeBytes before aligning it. - Compute the maximum alignment requirement from FixedAllocaAlignBytes and SpillAreaAlignmentBytes, and prior NeedsStackAlignment uses. - Always align the stack pointer to this maximum. - Affected lit tests have been rebased. Note that in some cases the frame size is now bigger than necessary. This is due to FixedAllocaSizeBytes being padding to be a multiple of the alignment. This isn't strictly necessary since the spill areas take care of their own alignment. BUG=swiftshader:29 Change-Id: Ief30acda91c958d072528b8b59c2e933f68adbb1 Reviewed-on: https://chromium-review.googlesource.com/419816Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org>
parent 298d14e2
...@@ -1061,7 +1061,7 @@ protected: ...@@ -1061,7 +1061,7 @@ protected:
InstructionSetEnum InstructionSet = Traits::InstructionSet::Begin; InstructionSetEnum InstructionSet = Traits::InstructionSet::Begin;
bool IsEbpBasedFrame = false; bool IsEbpBasedFrame = false;
bool NeedsStackAlignment = false; size_t RequiredStackAlignment = sizeof(Traits::WordType);
size_t SpillAreaSizeBytes = 0; size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0; size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0; size_t FixedAllocaAlignBytes = 0;
......
...@@ -1124,40 +1124,47 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) { ...@@ -1124,40 +1124,47 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
const Type ReturnType = Func->getReturnType(); const Type ReturnType = Func->getReturnType();
if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) { if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
if (isScalarFloatingType(ReturnType)) { if (isScalarFloatingType(ReturnType)) {
// Avoid misaligned double-precicion load/store. // Avoid misaligned double-precision load/store.
NeedsStackAlignment = true; RequiredStackAlignment = std::max<size_t>(
RequiredStackAlignment, Traits::X86_STACK_ALIGNMENT_BYTES);
SpillAreaSizeBytes = SpillAreaSizeBytes =
std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes); std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
} }
} }
// Align esp if necessary. RequiredStackAlignment =
if (NeedsStackAlignment) { std::max<size_t>(RequiredStackAlignment, SpillAreaAlignmentBytes);
uint32_t StackOffset =
Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; if (PrologEmitsFixedAllocas) {
uint32_t StackSize = RequiredStackAlignment =
Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); std::max(RequiredStackAlignment, FixedAllocaAlignBytes);
StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
SpillAreaSizeBytes = StackSize - StackOffset;
} else {
SpillAreaSizeBytes += maxOutArgsSizeBytes();
} }
// Combine fixed allocations into SpillAreaSizeBytes if we are emitting the // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
// fixed allocations in the prolog. // fixed allocations in the prolog.
if (PrologEmitsFixedAllocas) if (PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes; SpillAreaSizeBytes += FixedAllocaSizeBytes;
// Entering the function has made the stack pointer unaligned. Re-align it by
// adjusting the stack size.
uint32_t StackOffset = Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
uint32_t StackSize = Utils::applyAlignment(StackOffset + SpillAreaSizeBytes,
RequiredStackAlignment);
StackSize = Utils::applyAlignment(StackSize + maxOutArgsSizeBytes(),
RequiredStackAlignment);
SpillAreaSizeBytes = StackSize - StackOffset;
if (SpillAreaSizeBytes) { if (SpillAreaSizeBytes) {
// Generate "sub stackptr, SpillAreaSizeBytes" // Generate "sub stackptr, SpillAreaSizeBytes"
_sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes)); _sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
// If the fixed allocas are aligned more than the stack frame, align the }
// stack pointer accordingly.
if (PrologEmitsFixedAllocas && // If the required alignment is greater than the stack pointer's guaranteed
FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) { // alignment, align the stack pointer accordingly.
assert(IsEbpBasedFrame); if (RequiredStackAlignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
_and(getPhysicalRegister(getStackReg(), Traits::WordType), assert(IsEbpBasedFrame);
Ctx->getConstantInt32(-FixedAllocaAlignBytes)); _and(getPhysicalRegister(getStackReg(), Traits::WordType),
} Ctx->getConstantInt32(-RequiredStackAlignment));
} }
// Account for known-frame-offset alloca instructions that were not already // Account for known-frame-offset alloca instructions that were not already
...@@ -1449,7 +1456,8 @@ void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Instr) { ...@@ -1449,7 +1456,8 @@ void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Instr) {
// alloca. All the alloca code ensures that the stack alignment is preserved // alloca. All the alloca code ensures that the stack alignment is preserved
// after the alloca. The stack alignment restriction can be relaxed in some // after the alloca. The stack alignment restriction can be relaxed in some
// cases. // cases.
NeedsStackAlignment = true; RequiredStackAlignment = std::max<size_t>(RequiredStackAlignment,
Traits::X86_STACK_ALIGNMENT_BYTES);
// For default align=0, set it to the real value 1, to avoid any // For default align=0, set it to the real value 1, to avoid any
// bit-manipulation problems below. // bit-manipulation problems below.
...@@ -2603,7 +2611,8 @@ void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) { ...@@ -2603,7 +2611,8 @@ void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) {
// * Stack arguments of vector type are aligned to start at the next highest // * Stack arguments of vector type are aligned to start at the next highest
// multiple of 16 bytes. Other stack arguments are aligned to the next word // multiple of 16 bytes. Other stack arguments are aligned to the next word
// size boundary (4 or 8 bytes, respectively). // size boundary (4 or 8 bytes, respectively).
NeedsStackAlignment = true; RequiredStackAlignment = std::max<size_t>(RequiredStackAlignment,
Traits::X86_STACK_ALIGNMENT_BYTES);
using OperandList = using OperandList =
llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
......
...@@ -13,7 +13,7 @@ define internal i32 @func(i32 %arg1, i32 %arg2) { ...@@ -13,7 +13,7 @@ define internal i32 @func(i32 %arg1, i32 %arg2) {
} }
; CHECK: func ; CHECK: func
; CHECK-NEXT: sub esp,0xa0 ; CHECK-NEXT: sub esp,0xac
; CHECK-NEXT: lea eax,[esp] ; CHECK-NEXT: lea eax,[esp]
; CHECK-NEXT: shr eax,0x3 ; CHECK-NEXT: shr eax,0x3
; CHECK-NEXT: mov DWORD PTR [eax+0x20000000],0xffffffff ; CHECK-NEXT: mov DWORD PTR [eax+0x20000000],0xffffffff
...@@ -27,5 +27,5 @@ define internal i32 @func(i32 %arg1, i32 %arg2) { ...@@ -27,5 +27,5 @@ define internal i32 @func(i32 %arg1, i32 %arg2) {
; CHECK-NEXT: mov DWORD PTR [eax+0x2000000c],0x0 ; CHECK-NEXT: mov DWORD PTR [eax+0x2000000c],0x0
; CHECK-NEXT: mov DWORD PTR [eax+0x20000010],0x0 ; CHECK-NEXT: mov DWORD PTR [eax+0x20000010],0x0
; CHECK-NEXT: mov eax,0x2a ; CHECK-NEXT: mov eax,0x2a
; CHECK-NEXT: add esp,0xa0 ; CHECK-NEXT: add esp,0xac
; CHECK-NEXT: ret ; CHECK-NEXT: ret
...@@ -80,7 +80,7 @@ entry: ...@@ -80,7 +80,7 @@ entry:
; CHECK-LABEL: fixed_416_align_32 ; CHECK-LABEL: fixed_416_align_32
; CHECK: push ebp ; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp ; CHECK-NEXT: mov ebp,esp
; CHECK: sub esp,0x1b8 ; CHECK: sub esp,0x1d8
; CHECK: and esp,0xffffffe0 ; CHECK: and esp,0xffffffe0
; CHECK: lea eax,[esp+0x10] ; CHECK: lea eax,[esp+0x10]
; CHECK: mov DWORD PTR [esp],eax ; CHECK: mov DWORD PTR [esp],eax
...@@ -145,7 +145,7 @@ entry: ...@@ -145,7 +145,7 @@ entry:
; CHECK-LABEL: fixed_351_align_32 ; CHECK-LABEL: fixed_351_align_32
; CHECK: push ebp ; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp ; CHECK-NEXT: mov ebp,esp
; CHECK: sub esp,0x178 ; CHECK: sub esp,0x198
; CHECK: and esp,0xffffffe0 ; CHECK: and esp,0xffffffe0
; CHECK: lea eax,[esp+0x10] ; CHECK: lea eax,[esp+0x10]
; CHECK: mov DWORD PTR [esp],eax ; CHECK: mov DWORD PTR [esp],eax
......
...@@ -25,12 +25,12 @@ entry: ...@@ -25,12 +25,12 @@ entry:
ret void ret void
} }
; CHECK-LABEL: fused_small_align ; CHECK-LABEL: fused_small_align
; CHECK-NEXT: sub esp,0x30 ; CHECK-NEXT: sub esp,0x3c
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x34] ; CHECK-NEXT: mov eax,DWORD PTR [esp+0x40]
; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax ; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x18],eax ; CHECK-NEXT: mov DWORD PTR [esp+0x18],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax ; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: add esp,0x30 ; CHECK-NEXT: add esp,0x3c
; MIPS32-LABEL: fused_small_align ; MIPS32-LABEL: fused_small_align
; MIPS32: addiu sp,sp,{{.*}} ; MIPS32: addiu sp,sp,{{.*}}
; MIPS32: move v0,a0 ; MIPS32: move v0,a0
...@@ -57,7 +57,7 @@ entry: ...@@ -57,7 +57,7 @@ entry:
; CHECK-LABEL: fused_large_align ; CHECK-LABEL: fused_large_align
; CHECK-NEXT: push ebp ; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp ; CHECK-NEXT: mov ebp,esp
; CHECK-NEXT: sub esp,0x80 ; CHECK-NEXT: sub esp,0xb8
; CHECK-NEXT: and esp,0xffffffc0 ; CHECK-NEXT: and esp,0xffffffc0
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8] ; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: mov DWORD PTR [esp+0x40],eax ; CHECK-NEXT: mov DWORD PTR [esp+0x40],eax
...@@ -102,13 +102,13 @@ block2: ...@@ -102,13 +102,13 @@ block2:
br label %block1 br label %block1
} }
; CHECK-LABEL: fused_derived ; CHECK-LABEL: fused_derived
; CHECK-NEXT: sub esp,0x180 ; CHECK-NEXT: sub esp,0x18c
; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x184] ; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x190]
; CHECK-NEXT: jmp ; CHECK-NEXT: jmp
; CHECK-NEXT: mov DWORD PTR [esp+0x80],[[ARG]] ; CHECK-NEXT: mov DWORD PTR [esp+0x80],[[ARG]]
; CHECK-NEXT: mov DWORD PTR [esp+0x8c],[[ARG]] ; CHECK-NEXT: mov DWORD PTR [esp+0x8c],[[ARG]]
; CHECK-NEXT: lea eax,[esp+0x81] ; CHECK-NEXT: lea eax,[esp+0x81]
; CHECK-NEXT: add esp,0x180 ; CHECK-NEXT: add esp,0x18c
; CHECK-NEXT: ret ; CHECK-NEXT: ret
; MIPS32-LABEL: fused_derived ; MIPS32-LABEL: fused_derived
; MIPS32: addiu sp,sp,{{.*}} ; MIPS32: addiu sp,sp,{{.*}}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment