Commit 4e679e51 by Nicolas Capens Committed by Nicolas Capens

Support 16-byte aligned stack on 32-bit Windows.

On Windows x86-32, the ABI only guarantees the stack to be 4-byte aligned. We therefore need the stack pointer to be explicitly aligned when using vectors. This demands using a frame pointer (to access function arguments). Also, we had to change accessing spilled variables from the stack pointer instead of the frame pointer so they are also aligned. This change does not affect PNaCl. Projects using the Microsoft ABI should define SUBZERO_USE_MICROSOFT_ABI. BUG=swiftshader:29 Change-Id: I186ce9435244d6fa9494ec514a91122b6be130b3 Reviewed-on: https://chromium-review.googlesource.com/427348Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org>
parent a551dfce
......@@ -1006,6 +1006,13 @@ void Cfg::processAllocas(bool SortAndCombine) {
assert(EntryNode);
// LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(StackAlignment));
// If the ABI's stack alignment is smaller than the vector size (16 bytes),
// conservatively use a frame pointer to allow for explicit alignment of the
// stack pointer. This needs to happen before register allocation so the frame
// pointer can be reserved.
if (getTarget()->needsStackPointerAlignment()) {
getTarget()->setHasFramePointer();
}
// Determine if there are large alignment allocations in the entry block or
// dynamic allocations (variable size in the entry block).
bool HasLargeAlignment = false;
......@@ -1083,7 +1090,7 @@ void Cfg::processAllocas(bool SortAndCombine) {
// Add instructions to the head of the entry block in reverse order.
InstList &Insts = getEntryNode()->getInsts();
if (HasDynamicAllocation && HasLargeAlignment) {
// We are using a frame pointer, but fixed large-alignment alloca addresses,
// We are using a frame pointer, but fixed large-alignment alloca addresses
// do not have a known offset from either the stack or frame pointer.
// They grow up from a user pointer from an alloca.
sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer);
......
......@@ -211,7 +211,7 @@
// sboxres, isGPR, is64, is32, is16, is8, isXmm, is64To8, is32To8,
// is16To8, isTrunc8Rcvr, isAhRcvr, aliases)
#if defined(_WIN32) && defined(SUBZERO_USE_MICROSOFT_ABI) // Microsoft x86-64 ABI
#if defined(SUBZERO_USE_MICROSOFT_ABI) // Microsoft x86-64 ABI
#define REGX8664_BYTEREG_TABLE REGX8664_BYTEREG_TABLE2(0, 1)
#define REGX8664_GPR_TABLE REGX8664_GPR_TABLE2(0, 1)
#else // System V AMD64 ABI
......
......@@ -250,6 +250,7 @@ public:
virtual RegNumT getFrameOrStackReg() const = 0;
virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
virtual uint32_t getStackAlignment() const = 0;
virtual bool needsStackPointerAlignment() const { return false; }
virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
virtual int32_t getFrameFixedAllocaOffset() const = 0;
virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
......
......@@ -1391,7 +1391,7 @@ void TargetMIPS32::addProlog(CfgNode *Node) {
// +------------------------+
// | 8. padding |
// +------------------------+
// | 9. out args |
// | 9. out args |
// +------------------------+ <--- StackPointer
//
// The following variables record the size in bytes of the given areas:
......
......@@ -116,7 +116,12 @@ const TargetX8632Traits::TableTypeX8632AttributesType
const size_t TargetX8632Traits::TableTypeX8632AttributesSize =
llvm::array_lengthof(TableTypeX8632Attributes);
#if defined(SUBZERO_USE_MICROSOFT_ABI)
// Windows 32-bit only guarantees 4 byte stack alignment
const uint32_t TargetX8632Traits::X86_STACK_ALIGNMENT_BYTES = 4;
#else
const uint32_t TargetX8632Traits::X86_STACK_ALIGNMENT_BYTES = 16;
#endif
const char *TargetX8632Traits::TargetName = "X8632";
template <>
......
......@@ -702,7 +702,7 @@ public:
static RegNumT getRdxOrDie() { return RegisterSet::Reg_rdx; }
#if defined(_WIN32) && defined(SUBZERO_USE_MICROSOFT_ABI)
#if defined(SUBZERO_USE_MICROSOFT_ABI)
// Microsoft x86-64 calling convention:
//
// * The first four arguments of vector/fp type, regardless of their
......
......@@ -153,6 +153,10 @@ public:
RegNumT getStackReg() const override { return Traits::StackPtr; }
RegNumT getFrameReg() const override { return Traits::FramePtr; }
RegNumT getFrameOrStackReg() const override {
// If the stack pointer needs to be aligned, then the frame pointer is
// unaligned, so always use the stack pointer.
if (needsStackPointerAlignment())
return getStackReg();
return IsEbpBasedFrame ? getFrameReg() : getStackReg();
}
size_t typeWidthInBytesOnStack(Type Ty) const override {
......@@ -163,6 +167,11 @@ public:
uint32_t getStackAlignment() const override {
return Traits::X86_STACK_ALIGNMENT_BYTES;
}
bool needsStackPointerAlignment() const override {
// If the ABI's stack alignment is smaller than the vector size (16 bytes),
// use the (realigned) stack pointer for addressing any stack variables.
return Traits::X86_STACK_ALIGNMENT_BYTES < 16;
}
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
FixedAllocaSizeBytes = Size;
assert(llvm::isPowerOf2_32(Align));
......
......@@ -998,7 +998,7 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
// | 1. return address |
// +------------------------+
// | 2. preserved registers |
// +------------------------+
// +------------------------+ <--- BasePointer (if used)
// | 3. padding |
// +------------------------+
// | 4. global spill area |
......@@ -1017,14 +1017,16 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
// +------------------------+ <--- StackPointer
//
// The following variables record the size in bytes of the given areas:
// * X86_RET_IP_SIZE_BYTES: area 1
// * PreservedRegsSizeBytes: area 2
// * SpillAreaPaddingBytes: area 3
// * GlobalsSize: area 4
// * X86_RET_IP_SIZE_BYTES: area 1
// * PreservedRegsSizeBytes: area 2
// * SpillAreaPaddingBytes: area 3
// * GlobalsSize: area 4
// * LocalsSlotsPaddingBytes: area 5
// * GlobalsAndSubsequentPaddingSize: areas 4 - 5
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 10
// * maxOutArgsSizeBytes(): area 10
// * LocalsSpillAreaSize: area 6
// * FixedAllocaSizeBytes: areas 7 - 8
// * SpillAreaSizeBytes: areas 3 - 10
// * maxOutArgsSizeBytes(): areas 9 - 10
// Determine stack frame offsets for each Variable without a register
// assignment. This can be done as one variable per stack slot. Or, do
......@@ -1105,7 +1107,6 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
// after the preserved registers and before the spill areas.
// LocalsSlotsPaddingBytes is the amount of padding between the globals and
// locals area if they are separate.
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
uint32_t LocalsSlotsPaddingBytes = 0;
......@@ -1177,8 +1178,8 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
// Fill in stack offsets for stack args, and copy args into registers for
// those that were register-allocated. Args are pushed right to left, so
// Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr =
getPhysicalRegister(getFrameOrStackReg(), Traits::WordType);
RegNumT FrameOrStackReg = IsEbpBasedFrame ? getFrameReg() : getStackReg();
Variable *FramePtr = getPhysicalRegister(FrameOrStackReg, Traits::WordType);
size_t BasicFrameOffset =
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame)
......@@ -1226,7 +1227,7 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
// Fill in stack offsets for locals.
assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
IsEbpBasedFrame);
IsEbpBasedFrame && !needsStackPointerAlignment());
// Assign stack offsets to variables that have been linked to spilled
// variables.
for (Variable *Var : VariablesLinkedToSpillSlots) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment