Commit d4799f47 by Matt Wala

Subzero: Align spill locations to natural alignment.

This requires sorting the spilled variables based on alignment and introducing additional padding around the spill location areas. These changes allow vector instructions to accept memory operands. Old stack frame layout: New stack frame layout: +---------------------+ +---------------------+ | return address | | return address | +---------------------+ +---------------------+ | preserved registers | | preserved registers | +---------------------+ +---------------------+ | global spill area | | padding | +---------------------+ +---------------------+ | local spill area | | global spill area | +---------------------+ +---------------------+ | padding | | padding | +---------------------+ +---------------------+ | local variables | | local spill area | +---------------------+ +---------------------+ | padding | +---------------------+ | local variables | +---------------------+ BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/465413003
parent f820da5e
...@@ -134,7 +134,7 @@ public: ...@@ -134,7 +134,7 @@ public:
virtual bool hasFramePointer() const { return false; } virtual bool hasFramePointer() const { return false; }
virtual SizeT getFrameOrStackReg() const = 0; virtual SizeT getFrameOrStackReg() const = 0;
virtual size_t typeWidthInBytesOnStack(Type Ty) = 0; virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
bool hasComputedFrame() const { return HasComputedFrame; } bool hasComputedFrame() const { return HasComputedFrame; }
int32_t getStackAdjustment() const { return StackAdjustment; } int32_t getStackAdjustment() const { return StackAdjustment; }
void updateStackAdjustment(int32_t Offset) { StackAdjustment += Offset; } void updateStackAdjustment(int32_t Offset) { StackAdjustment += Offset; }
......
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
#include "IceTargetLoweringX8632.h" #include "IceTargetLoweringX8632.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include <strings.h>
namespace Ice { namespace Ice {
namespace { namespace {
...@@ -128,13 +130,23 @@ const uint32_t X86_CHAR_BIT = 8; ...@@ -128,13 +130,23 @@ const uint32_t X86_CHAR_BIT = 8;
const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
// Size of the return address on the stack // Size of the return address on the stack
const uint32_t X86_RET_IP_SIZE_BYTES = 4; const uint32_t X86_RET_IP_SIZE_BYTES = 4;
// The base 2 logarithm of the width in bytes of the smallest stack slot
const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
// The base 2 logarithm of the width in bytes of the largest stack slot
const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
// Value and Alignment are in bytes. Return Value adjusted to the next
// highest multiple of Alignment.
uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
// power of 2
assert((Alignment & (Alignment - 1)) == 0);
return (Value + Alignment - 1) & -Alignment;
}
// Value is a size in bytes. Return Value adjusted to the next highest // Value is in bytes. Return Value adjusted to the next highest multiple
// multiple of the stack alignment. // of the stack alignment.
uint32_t applyStackAlignment(uint32_t Value) { uint32_t applyStackAlignment(uint32_t Value) {
// power of 2 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;
} }
// Instruction set options // Instruction set options
...@@ -248,7 +260,7 @@ void __attribute__((unused)) xMacroIntegrityCheck() { ...@@ -248,7 +260,7 @@ void __attribute__((unused)) xMacroIntegrityCheck() {
TargetX8632::TargetX8632(Cfg *Func) TargetX8632::TargetX8632(Cfg *Func)
: TargetLowering(Func), InstructionSet(CLInstructionSet), : TargetLowering(Func), InstructionSet(CLInstructionSet),
IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
PhysicalRegisters(VarList(Reg_NUM)) { PhysicalRegisters(VarList(Reg_NUM)) {
// TODO: Don't initialize IntegerRegisters and friends every time. // TODO: Don't initialize IntegerRegisters and friends every time.
// Instead, initialize in some sort of static initializer for the // Instead, initialize in some sort of static initializer for the
...@@ -520,6 +532,30 @@ void TargetX8632::lowerArguments() { ...@@ -520,6 +532,30 @@ void TargetX8632::lowerArguments() {
} }
} }
void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
// Sort the variables into buckets according to the log of their width
// in bytes.
const SizeT NumBuckets =
X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1;
VarList Buckets[NumBuckets];
for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E;
++I) {
Variable *Var = *I;
uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());
SizeT LogNaturalAlignment = ffs(NaturalAlignment) - 1;
assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE);
assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE);
SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE;
Buckets[BucketIndex].push_back(Var);
}
for (SizeT I = 0, E = NumBuckets; I < E; ++I) {
VarList &List = Buckets[NumBuckets - I - 1];
Dest.insert(Dest.end(), List.begin(), List.end());
}
}
// Helper function for addProlog(). // Helper function for addProlog().
// //
// This assumes Arg is an argument passed on the stack. This sets the // This assumes Arg is an argument passed on the stack. This sets the
...@@ -563,6 +599,35 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, ...@@ -563,6 +599,35 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
Type TargetX8632::stackSlotType() { return IceType_i32; } Type TargetX8632::stackSlotType() { return IceType_i32; }
void TargetX8632::addProlog(CfgNode *Node) { void TargetX8632::addProlog(CfgNode *Node) {
// Stack frame layout:
//
// +------------------------+
// | 1. return address |
// +------------------------+
// | 2. preserved registers |
// +------------------------+
// | 3. padding |
// +------------------------+
// | 4. global spill area |
// +------------------------+
// | 5. padding |
// +------------------------+
// | 6. local spill area |
// +------------------------+
// | 7. padding |
// +------------------------+
// | 8. allocas |
// +------------------------+
//
// The following variables record the size in bytes of the given areas:
// * X86_RET_IP_SIZE_BYTES: area 1
// * PreservedRegsSizeBytes: area 2
// * SpillAreaPaddingBytes: area 3
// * GlobalsSize: area 4
// * GlobalsAndSubsequentPaddingSize: areas 4 - 5
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 7
// If SimpleCoalescing is false, each variable without a register // If SimpleCoalescing is false, each variable without a register
// gets its own unique stack slot, which leads to large stack // gets its own unique stack slot, which leads to large stack
// frames. If SimpleCoalescing is true, then each "global" variable // frames. If SimpleCoalescing is true, then each "global" variable
...@@ -573,7 +638,7 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -573,7 +638,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
const bool SimpleCoalescing = true; const bool SimpleCoalescing = true;
size_t InArgsSizeBytes = 0; size_t InArgsSizeBytes = 0;
size_t PreservedRegsSizeBytes = 0; size_t PreservedRegsSizeBytes = 0;
LocalsSizeBytes = 0; SpillAreaSizeBytes = 0;
Context.init(Node); Context.init(Node);
Context.setInsertPoint(Context.getCur()); Context.setInsertPoint(Context.getCur());
...@@ -595,10 +660,19 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -595,10 +660,19 @@ void TargetX8632::addProlog(CfgNode *Node) {
std::vector<size_t> LocalsSize(Func->getNumNodes()); std::vector<size_t> LocalsSize(Func->getNumNodes());
// Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
// LocalsSizeBytes. // SpillAreaSizeBytes.
RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
const VarList &Variables = Func->getVariables(); const VarList &Variables = Func->getVariables();
const VarList &Args = Func->getArgs(); const VarList &Args = Func->getArgs();
VarList SpilledVariables, SortedSpilledVariables,
VariablesLinkedToSpillSplots;
// If there is a separate locals area, this specifies the alignment
// for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
// The entire spill locations area gets aligned to largest natural
// alignment of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
I != E; ++I) { I != E; ++I) {
Variable *Var = *I; Variable *Var = *I;
...@@ -617,25 +691,42 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -617,25 +691,42 @@ void TargetX8632::addProlog(CfgNode *Node) {
// that stack slot. // that stack slot.
if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
if (Variable *Linked = Var->getPreferredRegister()) { if (Variable *Linked = Var->getPreferredRegister()) {
if (!Linked->hasReg()) if (!Linked->hasReg()) {
VariablesLinkedToSpillSplots.push_back(Var);
continue; continue;
}
} }
} }
SpilledVariables.push_back(Var);
}
SortedSpilledVariables.reserve(SpilledVariables.size());
sortByAlignment(SortedSpilledVariables, SpilledVariables);
for (VarList::const_iterator I = SortedSpilledVariables.begin(),
E = SortedSpilledVariables.end();
I != E; ++I) {
Variable *Var = *I;
size_t Increment = typeWidthInBytesOnStack(Var->getType()); size_t Increment = typeWidthInBytesOnStack(Var->getType());
if (!SpillAreaAlignmentBytes)
SpillAreaAlignmentBytes = Increment;
if (SimpleCoalescing) { if (SimpleCoalescing) {
if (Var->isMultiblockLife()) { if (Var->isMultiblockLife()) {
GlobalsSize += Increment; GlobalsSize += Increment;
} else { } else {
SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
LocalsSize[NodeIndex] += Increment; LocalsSize[NodeIndex] += Increment;
if (LocalsSize[NodeIndex] > LocalsSizeBytes) if (LocalsSize[NodeIndex] > SpillAreaSizeBytes)
LocalsSizeBytes = LocalsSize[NodeIndex]; SpillAreaSizeBytes = LocalsSize[NodeIndex];
if (!LocalsSlotsAlignmentBytes)
LocalsSlotsAlignmentBytes = Increment;
} }
} else { } else {
LocalsSizeBytes += Increment; SpillAreaSizeBytes += Increment;
} }
} }
LocalsSizeBytes += GlobalsSize; uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers. // Add push instructions for preserved registers.
for (SizeT i = 0; i < CalleeSaves.size(); ++i) { for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
...@@ -658,17 +749,40 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -658,17 +749,40 @@ void TargetX8632::addProlog(CfgNode *Node) {
_mov(ebp, esp); _mov(ebp, esp);
} }
// Align the variables area. SpillAreaPaddingBytes is the size of
// the region after the preserved registers and before the spill
// areas.
uint32_t SpillAreaPaddingBytes = 0;
if (SpillAreaAlignmentBytes) {
assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
uint32_t SpillAreaStart =
applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
SpillAreaSizeBytes += SpillAreaPaddingBytes;
}
// If there are separate globals and locals areas, make sure the
// locals area is aligned by padding the end of the globals area.
uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
if (LocalsSlotsAlignmentBytes) {
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
GlobalsAndSubsequentPaddingSize =
applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
SpillAreaSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;
}
// Align esp if necessary.
if (NeedsStackAlignment) { if (NeedsStackAlignment) {
uint32_t StackSize = applyStackAlignment( uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
LocalsSizeBytes = SpillAreaSizeBytes = StackSize - StackOffset;
StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;
} }
// Generate "sub esp, LocalsSizeBytes" // Generate "sub esp, SpillAreaSizeBytes"
if (LocalsSizeBytes) if (SpillAreaSizeBytes)
_sub(getPhysicalRegister(Reg_esp), _sub(getPhysicalRegister(Reg_esp),
Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes));
resetStackAdjustment(); resetStackAdjustment();
...@@ -678,7 +792,7 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -678,7 +792,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame) if (!IsEbpBasedFrame)
BasicFrameOffset += LocalsSizeBytes; BasicFrameOffset += SpillAreaSizeBytes;
unsigned NumXmmArgs = 0; unsigned NumXmmArgs = 0;
for (SizeT i = 0; i < Args.size(); ++i) { for (SizeT i = 0; i < Args.size(); ++i) {
...@@ -692,40 +806,24 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -692,40 +806,24 @@ void TargetX8632::addProlog(CfgNode *Node) {
} }
// Fill in stack offsets for locals. // Fill in stack offsets for locals.
size_t TotalGlobalsSize = GlobalsSize; size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
GlobalsSize = 0;
LocalsSize.assign(LocalsSize.size(), 0); LocalsSize.assign(LocalsSize.size(), 0);
size_t NextStackOffset = 0; size_t NextStackOffset = GlobalsSpaceUsed;
for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); for (VarList::const_iterator I = SortedSpilledVariables.begin(),
E = SortedSpilledVariables.end();
I != E; ++I) { I != E; ++I) {
Variable *Var = *I; Variable *Var = *I;
if (Var->hasReg()) {
RegsUsed[Var->getRegNum()] = true;
continue;
}
if (Var->getIsArg())
continue;
if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
continue;
if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
if (Variable *Linked = Var->getPreferredRegister()) {
if (!Linked->hasReg()) {
// TODO: Make sure Linked has already been assigned a stack
// slot.
Var->setStackOffset(Linked->getStackOffset());
continue;
}
}
}
size_t Increment = typeWidthInBytesOnStack(Var->getType()); size_t Increment = typeWidthInBytesOnStack(Var->getType());
if (SimpleCoalescing) { if (SimpleCoalescing) {
if (Var->isMultiblockLife()) { if (Var->isMultiblockLife()) {
GlobalsSize += Increment; GlobalsSpaceUsed += Increment;
NextStackOffset = GlobalsSize; NextStackOffset = GlobalsSpaceUsed;
} else { } else {
SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
LocalsSize[NodeIndex] += Increment; LocalsSize[NodeIndex] += Increment;
NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex]; NextStackOffset = SpillAreaPaddingBytes +
GlobalsAndSubsequentPaddingSize +
LocalsSize[NodeIndex];
} }
} else { } else {
NextStackOffset += Increment; NextStackOffset += Increment;
...@@ -733,18 +831,45 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -733,18 +831,45 @@ void TargetX8632::addProlog(CfgNode *Node) {
if (IsEbpBasedFrame) if (IsEbpBasedFrame)
Var->setStackOffset(-NextStackOffset); Var->setStackOffset(-NextStackOffset);
else else
Var->setStackOffset(LocalsSizeBytes - NextStackOffset); Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
} }
this->FrameSizeLocals = NextStackOffset; this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;
this->HasComputedFrame = true; this->HasComputedFrame = true;
// Assign stack offsets to variables that have been linked to spilled
// variables.
for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(),
E = VariablesLinkedToSpillSplots.end();
I != E; ++I) {
Variable *Var = *I;
Variable *Linked = Var->getPreferredRegister();
Var->setStackOffset(Linked->getStackOffset());
}
if (Func->getContext()->isVerbose(IceV_Frame)) { if (Func->getContext()->isVerbose(IceV_Frame)) {
Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes Ostream &Str = Func->getContext()->getStrDump();
<< "\n"
<< "InArgsSizeBytes=" << InArgsSizeBytes Str << "Stack layout:\n";
<< "\n" uint32_t EspAdjustmentPaddingSize =
<< "PreservedRegsSizeBytes=" SpillAreaSizeBytes - LocalsSpillAreaSize -
<< PreservedRegsSizeBytes << "\n"; GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
Str << " in-args = " << InArgsSizeBytes << " bytes\n"
<< " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
<< " globals spill area = " << GlobalsSize << " bytes\n"
<< " globals-locals spill areas intermediate padding = "
<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
<< " esp alignment padding = " << EspAdjustmentPaddingSize
<< " bytes\n";
Str << "Stack details:\n"
<< " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
<< " bytes\n"
<< " is ebp based = " << IsEbpBasedFrame << "\n";
} }
} }
...@@ -771,9 +896,9 @@ void TargetX8632::addEpilog(CfgNode *Node) { ...@@ -771,9 +896,9 @@ void TargetX8632::addEpilog(CfgNode *Node) {
_mov(esp, ebp); _mov(esp, ebp);
_pop(ebp); _pop(ebp);
} else { } else {
// add esp, LocalsSizeBytes // add esp, SpillAreaSizeBytes
if (LocalsSizeBytes) if (SpillAreaSizeBytes)
_add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); _add(esp, Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes));
} }
// Add pop instructions for preserved registers. // Add pop instructions for preserved registers.
...@@ -991,8 +1116,7 @@ void TargetX8632::lowerAlloca(const InstAlloca *Inst) { ...@@ -991,8 +1116,7 @@ void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
if (ConstantInteger *ConstantTotalSize = if (ConstantInteger *ConstantTotalSize =
llvm::dyn_cast<ConstantInteger>(TotalSize)) { llvm::dyn_cast<ConstantInteger>(TotalSize)) {
uint32_t Value = ConstantTotalSize->getValue(); uint32_t Value = ConstantTotalSize->getValue();
// Round Value up to the next highest multiple of the alignment. Value = applyAlignment(Value, Alignment);
Value = (Value + Alignment - 1) & -Alignment;
_sub(esp, Ctx->getConstantInt(IceType_i32, Value)); _sub(esp, Ctx->getConstantInt(IceType_i32, Value));
} else { } else {
// Non-constant sizes need to be adjusted to the next highest // Non-constant sizes need to be adjusted to the next highest
...@@ -1239,12 +1363,6 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1239,12 +1363,6 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
} else if (isVectorType(Dest->getType())) { } else if (isVectorType(Dest->getType())) {
// TODO: Trap on integer divide and integer modulo by zero. // TODO: Trap on integer divide and integer modulo by zero.
// See: https://code.google.com/p/nativeclient/issues/detail?id=3899 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
//
// TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
// registers. This is a workaround of the fact that there is no
// support for aligning stack operands. Once there is support,
// remove LEGAL_HACK.
#define LEGAL_HACK(s) legalizeToVar((s))
switch (Inst->getOp()) { switch (Inst->getOp()) {
case InstArithmetic::_num: case InstArithmetic::_num:
llvm_unreachable("Unknown arithmetic operator"); llvm_unreachable("Unknown arithmetic operator");
...@@ -1252,31 +1370,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1252,31 +1370,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
case InstArithmetic::Add: { case InstArithmetic::Add: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_padd(T, LEGAL_HACK(Src1)); _padd(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::And: { case InstArithmetic::And: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_pand(T, LEGAL_HACK(Src1)); _pand(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Or: { case InstArithmetic::Or: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_por(T, LEGAL_HACK(Src1)); _por(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Xor: { case InstArithmetic::Xor: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_pxor(T, LEGAL_HACK(Src1)); _pxor(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Sub: { case InstArithmetic::Sub: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_psub(T, LEGAL_HACK(Src1)); _psub(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Mul: { case InstArithmetic::Mul: {
...@@ -1287,7 +1405,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1287,7 +1405,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_pmull(T, LEGAL_HACK(Src1)); _pmull(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} else if (Dest->getType() == IceType_v4i32) { } else if (Dest->getType() == IceType_v4i32) {
// Lowering sequence: // Lowering sequence:
...@@ -1320,14 +1438,9 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1320,14 +1438,9 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
Variable *T3 = makeReg(IceType_v4i32); Variable *T3 = makeReg(IceType_v4i32);
Variable *T4 = makeReg(IceType_v4i32); Variable *T4 = makeReg(IceType_v4i32);
_movp(T1, Src0); _movp(T1, Src0);
// TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R _pshufd(T2, Src0, Mask1030);
// with Src1 after stack operand alignment support is _pshufd(T3, Src1, Mask1030);
// implemented. _pmuludq(T1, Src1);
Variable *Src0R = LEGAL_HACK(Src0);
Variable *Src1R = LEGAL_HACK(Src1);
_pshufd(T2, Src0R, Mask1030);
_pshufd(T3, Src1R, Mask1030);
_pmuludq(T1, Src1R);
_pmuludq(T2, T3); _pmuludq(T2, T3);
_shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
_pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
...@@ -1349,32 +1462,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1349,32 +1462,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
case InstArithmetic::Fadd: { case InstArithmetic::Fadd: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_addps(T, LEGAL_HACK(Src1)); _addps(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Fsub: { case InstArithmetic::Fsub: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_subps(T, LEGAL_HACK(Src1)); _subps(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Fmul: { case InstArithmetic::Fmul: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_mulps(T, LEGAL_HACK(Src1)); _mulps(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Fdiv: { case InstArithmetic::Fdiv: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_divps(T, LEGAL_HACK(Src1)); _divps(T, Src1);
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Frem: case InstArithmetic::Frem:
scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
break; break;
} }
#undef LEGAL_HACK
} else { // Dest->getType() is non-i64 scalar } else { // Dest->getType() is non-i64 scalar
Variable *T_edx = NULL; Variable *T_edx = NULL;
Variable *T = NULL; Variable *T = NULL;
...@@ -2199,22 +2311,15 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { ...@@ -2199,22 +2311,15 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
_pextr(ExtractedElementR, SourceVectR, Mask); _pextr(ExtractedElementR, SourceVectR, Mask);
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
// Use pshufd and movd/movss. // Use pshufd and movd/movss.
//
// ALIGNHACK: Force vector operands to registers in instructions
// that require aligned memory operands until support for data
// alignment is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect))
Operand *SourceVectRM =
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Variable *T = NULL; Variable *T = NULL;
if (Index) { if (Index) {
// The shuffle only needs to occur if the element to be extracted // The shuffle only needs to occur if the element to be extracted
// is not at the lowest index. // is not at the lowest index.
Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
T = makeReg(Ty); T = makeReg(Ty);
_pshufd(T, ALIGN_HACK(SourceVectRM), Mask); _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
} else { } else {
T = ALIGN_HACK(SourceVectRM); T = legalizeToVar(SourceVectNotLegalized);
} }
if (InVectorElementTy == IceType_i32) { if (InVectorElementTy == IceType_i32) {
...@@ -2228,7 +2333,6 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { ...@@ -2228,7 +2333,6 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Context.insert(InstFakeDef::create(Func, ExtractedElementR)); Context.insert(InstFakeDef::create(Func, ExtractedElementR));
_movss(ExtractedElementR, T); _movss(ExtractedElementR, T);
} }
#undef ALIGN_HACK
} else { } else {
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
// Spill the value to a stack slot and do the extraction in memory. // Spill the value to a stack slot and do the extraction in memory.
...@@ -2287,23 +2391,18 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { ...@@ -2287,23 +2391,18 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
// ALIGNHACK: Without support for data alignment, both operands to
// cmpps need to be forced into registers. Once support for data
// alignment is implemented, remove LEGAL_HACK.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
switch (Condition) { switch (Condition) {
default: { default: {
InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
T = makeReg(Src0RM->getType()); T = makeReg(Src0RM->getType());
_movp(T, Src0RM); _movp(T, Src0RM);
_cmpps(T, LEGAL_HACK(Src1RM), Predicate); _cmpps(T, Src1RM, Predicate);
} break; } break;
case InstFcmp::One: { case InstFcmp::One: {
// Check both unequal and ordered. // Check both unequal and ordered.
T = makeReg(Src0RM->getType()); T = makeReg(Src0RM->getType());
Variable *T2 = makeReg(Src0RM->getType()); Variable *T2 = makeReg(Src0RM->getType());
Src1RM = LEGAL_HACK(Src1RM);
_movp(T, Src0RM); _movp(T, Src0RM);
_cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);
_movp(T2, Src0RM); _movp(T2, Src0RM);
...@@ -2314,7 +2413,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { ...@@ -2314,7 +2413,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
// Check both equal or unordered. // Check both equal or unordered.
T = makeReg(Src0RM->getType()); T = makeReg(Src0RM->getType());
Variable *T2 = makeReg(Src0RM->getType()); Variable *T2 = makeReg(Src0RM->getType());
Src1RM = LEGAL_HACK(Src1RM);
_movp(T, Src0RM); _movp(T, Src0RM);
_cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);
_movp(T2, Src0RM); _movp(T2, Src0RM);
...@@ -2322,7 +2420,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { ...@@ -2322,7 +2420,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
_por(T, T2); _por(T, T2);
} break; } break;
} }
#undef LEGAL_HACK
} }
_movp(Dest, T); _movp(Dest, T);
...@@ -2427,10 +2524,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { ...@@ -2427,10 +2524,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
Src1RM = T1; Src1RM = T1;
} }
// TODO: ALIGNHACK: Both operands to compare instructions need to be
// in registers until data alignment support is implemented. Once
// there is support for data alignment, LEGAL_HACK can be removed.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
Variable *T = makeReg(Ty); Variable *T = makeReg(Ty);
switch (Condition) { switch (Condition) {
default: default:
...@@ -2438,42 +2531,41 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { ...@@ -2438,42 +2531,41 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
break; break;
case InstIcmp::Eq: { case InstIcmp::Eq: {
_movp(T, Src0RM); _movp(T, Src0RM);
_pcmpeq(T, LEGAL_HACK(Src1RM)); _pcmpeq(T, Src1RM);
} break; } break;
case InstIcmp::Ne: { case InstIcmp::Ne: {
_movp(T, Src0RM); _movp(T, Src0RM);
_pcmpeq(T, LEGAL_HACK(Src1RM)); _pcmpeq(T, Src1RM);
Variable *MinusOne = makeVectorOfMinusOnes(Ty); Variable *MinusOne = makeVectorOfMinusOnes(Ty);
_pxor(T, MinusOne); _pxor(T, MinusOne);
} break; } break;
case InstIcmp::Ugt: case InstIcmp::Ugt:
case InstIcmp::Sgt: { case InstIcmp::Sgt: {
_movp(T, Src0RM); _movp(T, Src0RM);
_pcmpgt(T, LEGAL_HACK(Src1RM)); _pcmpgt(T, Src1RM);
} break; } break;
case InstIcmp::Uge: case InstIcmp::Uge:
case InstIcmp::Sge: { case InstIcmp::Sge: {
// !(Src1RM > Src0RM) // !(Src1RM > Src0RM)
_movp(T, Src1RM); _movp(T, Src1RM);
_pcmpgt(T, LEGAL_HACK(Src0RM)); _pcmpgt(T, Src0RM);
Variable *MinusOne = makeVectorOfMinusOnes(Ty); Variable *MinusOne = makeVectorOfMinusOnes(Ty);
_pxor(T, MinusOne); _pxor(T, MinusOne);
} break; } break;
case InstIcmp::Ult: case InstIcmp::Ult:
case InstIcmp::Slt: { case InstIcmp::Slt: {
_movp(T, Src1RM); _movp(T, Src1RM);
_pcmpgt(T, LEGAL_HACK(Src0RM)); _pcmpgt(T, Src0RM);
} break; } break;
case InstIcmp::Ule: case InstIcmp::Ule:
case InstIcmp::Sle: { case InstIcmp::Sle: {
// !(Src0RM > Src1RM) // !(Src0RM > Src1RM)
_movp(T, Src0RM); _movp(T, Src0RM);
_pcmpgt(T, LEGAL_HACK(Src1RM)); _pcmpgt(T, Src1RM);
Variable *MinusOne = makeVectorOfMinusOnes(Ty); Variable *MinusOne = makeVectorOfMinusOnes(Ty);
_pxor(T, MinusOne); _pxor(T, MinusOne);
} break; } break;
} }
#undef LEGAL_HACK
_movp(Dest, T); _movp(Dest, T);
eliminateNextVectorSextInstruction(Dest); eliminateNextVectorSextInstruction(Dest);
...@@ -2649,12 +2741,7 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { ...@@ -2649,12 +2741,7 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
// ALIGNHACK: Force vector operands to registers in instructions
// that require aligned memory operands until support for data
// alignment is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect))
if (Index == 1) { if (Index == 1) {
SourceVectRM = ALIGN_HACK(SourceVectRM);
_shufps(ElementR, SourceVectRM, Mask1Constant); _shufps(ElementR, SourceVectRM, Mask1Constant);
_shufps(ElementR, SourceVectRM, Mask2Constant); _shufps(ElementR, SourceVectRM, Mask2Constant);
_movp(Inst->getDest(), ElementR); _movp(Inst->getDest(), ElementR);
...@@ -2665,7 +2752,6 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { ...@@ -2665,7 +2752,6 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
_shufps(T, ElementR, Mask2Constant); _shufps(T, ElementR, Mask2Constant);
_movp(Inst->getDest(), T); _movp(Inst->getDest(), T);
} }
#undef ALIGN_HACK
} else { } else {
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
// Spill the value to a stack slot and perform the insertion in // Spill the value to a stack slot and perform the insertion in
...@@ -3627,10 +3713,6 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { ...@@ -3627,10 +3713,6 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
Variable *T = makeReg(SrcTy); Variable *T = makeReg(SrcTy);
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
// ALIGNHACK: Until data alignment support is implemented, vector
// instructions need to have vector operands in registers. Once
// there is support for data alignment, LEGAL_HACK can be removed.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
if (InstructionSet >= SSE4_1) { if (InstructionSet >= SSE4_1) {
// TODO(wala): If the condition operand is a constant, use blendps // TODO(wala): If the condition operand is a constant, use blendps
// or pblendw. // or pblendw.
...@@ -3643,7 +3725,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { ...@@ -3643,7 +3725,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
_movp(xmm0, ConditionRM); _movp(xmm0, ConditionRM);
_psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));
_movp(T, SrcFRM); _movp(T, SrcFRM);
_blendvps(T, LEGAL_HACK(SrcTRM), xmm0); _blendvps(T, SrcTRM, xmm0);
_movp(Dest, T); _movp(Dest, T);
} else { } else {
assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
...@@ -3652,7 +3734,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { ...@@ -3652,7 +3734,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
_movp(T, SrcFRM); _movp(T, SrcFRM);
_pblendvb(T, LEGAL_HACK(SrcTRM), xmm0); _pblendvb(T, SrcTRM, xmm0);
_movp(Dest, T); _movp(Dest, T);
} }
return; return;
...@@ -3676,11 +3758,10 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { ...@@ -3676,11 +3758,10 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
_movp(T, ConditionRM); _movp(T, ConditionRM);
} }
_movp(T2, T); _movp(T2, T);
_pand(T, LEGAL_HACK(SrcTRM)); _pand(T, SrcTRM);
_pandn(T2, LEGAL_HACK(SrcFRM)); _pandn(T2, SrcFRM);
_por(T, T2); _por(T, T2);
_movp(Dest, T); _movp(Dest, T);
#undef LEGAL_HACK
return; return;
} }
......
...@@ -40,7 +40,7 @@ public: ...@@ -40,7 +40,7 @@ public:
virtual SizeT getFrameOrStackReg() const { virtual SizeT getFrameOrStackReg() const {
return IsEbpBasedFrame ? Reg_ebp : Reg_esp; return IsEbpBasedFrame ? Reg_ebp : Reg_esp;
} }
virtual size_t typeWidthInBytesOnStack(Type Ty) { virtual size_t typeWidthInBytesOnStack(Type Ty) const {
// Round up to the next multiple of 4 bytes. In particular, i1, // Round up to the next multiple of 4 bytes. In particular, i1,
// i8, and i16 are rounded up to 4 bytes. // i8, and i16 are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3; return (typeWidthInBytes(Ty) + 3) & ~3;
...@@ -125,6 +125,8 @@ protected: ...@@ -125,6 +125,8 @@ protected:
void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest, void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Operand *Src0, Operand *Src1); Operand *Src0, Operand *Src1);
void sortByAlignment(VarList &Dest, const VarList &Source) const;
// Operand legalization helpers. To deal with address mode // Operand legalization helpers. To deal with address mode
// constraints, the helpers will create a new Operand and emit // constraints, the helpers will create a new Operand and emit
// instructions that guarantee that the Operand kind is one of those // instructions that guarantee that the Operand kind is one of those
...@@ -458,7 +460,7 @@ protected: ...@@ -458,7 +460,7 @@ protected:
bool IsEbpBasedFrame; bool IsEbpBasedFrame;
bool NeedsStackAlignment; bool NeedsStackAlignment;
size_t FrameSizeLocals; size_t FrameSizeLocals;
size_t LocalsSizeBytes; size_t SpillAreaSizeBytes;
llvm::SmallBitVector TypeToRegisterSet[IceType_NUM]; llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
llvm::SmallBitVector ScratchRegs; llvm::SmallBitVector ScratchRegs;
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
......
; This checks to ensure that Subzero aligns spill slots.
; RUN: %llvm2ice --verbose none %s | FileCheck %s
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
; The location of the stack slot for a variable is inferred from the
; return sequence.
; In this file, "global" refers to a variable with a live range across
; multiple basic blocks (not an LLVM global variable) and "local"
; refers to a variable that is live in only a single basic block.
define <4 x i32> @align_global_vector(i32 %arg) {
entry:
%vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0
br label %block
block:
call void @ForceXmmSpills()
ret <4 x i32> %vec.global
; CHECK-LABEL: align_global_vector:
; CHECK: movups xmm0, xmmword ptr [esp]
; CHECK-NEXT: add esp, 28
; CHECK-NEXT: ret
}
define <4 x i32> @align_local_vector(i32 %arg) {
entry:
br label %block
block:
%vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0
call void @ForceXmmSpills()
ret <4 x i32> %vec.local
; CHECK-LABEL: align_local_vector:
; CHECK: movups xmm0, xmmword ptr [esp]
; CHECK-NEXT: add esp, 28
; CHECK-NEXT: ret
}
declare void @ForceXmmSpills()
define <4 x i32> @align_global_vector_ebp_based(i32 %arg) {
entry:
%alloc = alloca i8, i32 1, align 1
%vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0
br label %block
block:
call void @ForceXmmSpillsAndUseAlloca(i8* %alloc)
ret <4 x i32> %vec.global
; CHECK-LABEL: align_global_vector_ebp_based:
; CHECK: movups xmm0, xmmword ptr [ebp-24]
; CHECK-NEXT: mov esp, ebp
; CHECK-NEXT: pop ebp
; CHECK: ret
}
define <4 x i32> @align_local_vector_ebp_based(i32 %arg) {
entry:
%alloc = alloca i8, i32 1, align 1
%vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0
call void @ForceXmmSpillsAndUseAlloca(i8* %alloc)
ret <4 x i32> %vec.local
; CHECK-LABEL: align_local_vector_ebp_based:
; CHECK: movups xmm0, xmmword ptr [ebp-24]
; CHECK-NEXT: mov esp, ebp
; CHECK-NEXT: pop ebp
; CHECK: ret
}
define <4 x i32> @align_local_vector_and_global_float(i32 %arg) {
entry:
%float.global = sitofp i32 %arg to float
call void @ForceXmmSpillsAndUseFloat(float %float.global)
br label %block
block:
%vec.local = insertelement <4 x i32> undef, i32 undef, i32 0
call void @ForceXmmSpillsAndUseFloat(float %float.global)
ret <4 x i32> %vec.local
; CHECK-LABEL: align_local_vector_and_global_float:
; CHECK: cvtsi2ss xmm0, eax
; CHECK-NEXT: movss dword ptr [esp+28], xmm0
; CHECK: movups xmm0, xmmword ptr [esp]
; CHECK-NEXT: add esp, 44
; CHECK-NEXT: ret
}
declare void @ForceXmmSpillsAndUseAlloca(i8*)
declare void @ForceXmmSpillsAndUseFloat(float)
; ERRORS-NOT: ICE translation error
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment