Commit 105b7044 by Matt Wala

Subzero: Align the stack at the point of function calls.

Be compatible with the x86-32 calling convention by ensuring that the stack is aligned to 16 bytes at the point of the call instruction. Also ensure that vector arguments passed on the stack are 16 byte aligned. Also, make alloca instructions respect alignment. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/444443002
parent 8835b89b
...@@ -134,7 +134,6 @@ if __name__ == '__main__': ...@@ -134,7 +134,6 @@ if __name__ == '__main__':
objs.append(bitcode) objs.append(bitcode)
linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++' linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++'
# TODO: Remove -mstackrealign after Subzero supports stack alignment. shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', args.driver] +
shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', objs +
'-mstackrealign', args.driver] + objs +
['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)]) ['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)])
...@@ -54,6 +54,14 @@ for optlevel in ${OPTLEVELS} ; do ...@@ -54,6 +54,14 @@ for optlevel in ${OPTLEVELS} ; do
--output=test_bitmanip_O${optlevel}_${attribute} --output=test_bitmanip_O${optlevel}_${attribute}
./crosstest.py -O${optlevel} --mattr ${attribute} \ ./crosstest.py -O${optlevel} --mattr ${attribute} \
--prefix=Subzero_ --target=x8632 \
--dir="${OUTDIR}" \
--llvm-bin-path="${LLVM_BIN_PATH}" \
--test=test_calling_conv.cpp \
--driver=test_calling_conv_main.cpp \
--output=test_calling_conv_O${optlevel}_${attribute}
./crosstest.py -O${optlevel} --mattr ${attribute} \
--prefix=Subzero_ \ --prefix=Subzero_ \
--target=x8632 \ --target=x8632 \
--dir="${OUTDIR}" \ --dir="${OUTDIR}" \
...@@ -137,6 +145,7 @@ for optlevel in ${OPTLEVELS} ; do ...@@ -137,6 +145,7 @@ for optlevel in ${OPTLEVELS} ; do
"${OUTDIR}"/mem_intrin_O${optlevel}_${attribute} "${OUTDIR}"/mem_intrin_O${optlevel}_${attribute}
"${OUTDIR}"/test_arith_O${optlevel}_${attribute} "${OUTDIR}"/test_arith_O${optlevel}_${attribute}
"${OUTDIR}"/test_bitmanip_O${optlevel}_${attribute} "${OUTDIR}"/test_bitmanip_O${optlevel}_${attribute}
"${OUTDIR}"/test_calling_conv_O${optlevel}_${attribute}
"${OUTDIR}"/test_cast_O${optlevel}_${attribute} "${OUTDIR}"/test_cast_O${optlevel}_${attribute}
"${OUTDIR}"/test_fcmp_O${optlevel}_${attribute} "${OUTDIR}"/test_fcmp_O${optlevel}_${attribute}
"${OUTDIR}"/test_global_O${optlevel}_${attribute} "${OUTDIR}"/test_global_O${optlevel}_${attribute}
......
//===- subzero/crosstest/test_calling_conv.cpp - Implementation for tests -===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the test functions used to check that Subzero
// generates code compatible with the calling convention used by
// llc. "Caller" functions test the handling of out-args, and "callee"
// functions test the handling of in-args.
//
//===----------------------------------------------------------------------===//
#include <cstring>
#include "test_calling_conv.h"
#define CALL_AS_TYPE(Ty, Func) (reinterpret_cast<Ty *>(Func))
void caller_i(void) {
int arg1 = 0x12345678;
CALL_AS_TYPE(callee_i_Ty, Callee)(arg1);
}
void caller_vvvvv(void) {
v4si32 arg1 = {0, 1, 2, 3};
v4si32 arg2 = {4, 5, 6, 7};
v4si32 arg3 = {8, 9, 10, 11};
v4si32 arg4 = {12, 13, 14, 15};
v4si32 arg5 = {16, 17, 18, 19};
CALL_AS_TYPE(callee_vvvvv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5);
}
void caller_vlvlivfvdviv(void) {
v4f32 arg1 = {0, 1, 2, 3};
int64_t arg2 = 4;
v4f32 arg3 = {6, 7, 8, 9};
int64_t arg4 = 10;
int arg5 = 11;
v4f32 arg6 = {12, 13, 14, 15};
float arg7 = 16;
v4f32 arg8 = {17, 18, 19, 20};
double arg9 = 21;
v4f32 arg10 = {22, 23, 24, 25};
int arg11 = 26;
v4f32 arg12 = {27, 28, 29, 30};
CALL_AS_TYPE(callee_vlvlivfvdviv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9, arg10,
arg11, arg12);
}
#define HANDLE_ARG(ARGNUM) \
case ARGNUM: \
memcpy(&Buf[0], &arg##ARGNUM, sizeof(arg##ARGNUM)); \
break;
void __attribute__((noinline)) callee_i(int arg1) {
switch (ArgNum) { HANDLE_ARG(1); }
}
void __attribute__((noinline))
callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) {
switch (ArgNum) {
HANDLE_ARG(1);
HANDLE_ARG(2);
HANDLE_ARG(3);
HANDLE_ARG(4);
HANDLE_ARG(5);
}
}
void __attribute__((noinline))
callee_vlvlivfvdviv(v4f32 arg1, int64_t arg2, v4f32 arg3, int64_t arg4, int arg5,
v4f32 arg6, float arg7, v4f32 arg8, double arg9, v4f32 arg10,
int arg11, v4f32 arg12) {
switch (ArgNum) {
HANDLE_ARG(1);
HANDLE_ARG(2);
HANDLE_ARG(3);
HANDLE_ARG(4);
HANDLE_ARG(5);
HANDLE_ARG(6);
HANDLE_ARG(7);
HANDLE_ARG(8);
HANDLE_ARG(9);
HANDLE_ARG(10);
HANDLE_ARG(11);
HANDLE_ARG(12);
}
}
//===- subzero/crosstest/test_calling_conv.def - testing macros -*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines macros for testing the calling convention.
//
//===----------------------------------------------------------------------===//
#ifndef TEST_CALLING_CONV_DEF
#define TEST_CALLING_CONV_DEF
#define STR(x) (#x)
#define TEST_FUNC_TABLE \
/* caller, callee, argc */ \
X(caller_i, callee_i, 1) \
X(caller_vvvvv, callee_vvvvv, 5) \
X(caller_vlvlivfvdviv, callee_vlvlivfvdviv, 12) \
// #define X(caller, callee, argc)
#endif // TEST_CALLING_CONV_DEF
//===- subzero/crosstest/test_calling_conv.h - Test prototypes --*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the function prototypes for crosstesting the calling
// convention.
//
//===----------------------------------------------------------------------===//
#include "test_calling_conv.def"
#include "vectors.h"
typedef void (*CalleePtrTy)();
extern CalleePtrTy Callee;
extern size_t ArgNum;
extern char *Buf;
void caller_i();
void caller_alloca_i();
typedef void callee_i_Ty(int);
callee_i_Ty callee_i;
callee_i_Ty callee_alloca_i;
void caller_vvvvv();
typedef void (callee_vvvvv_Ty)(v4si32, v4si32, v4si32, v4si32, v4si32);
callee_vvvvv_Ty callee_vvvvv;
void caller_vlvlivfvdviv();
typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64_t, v4f32, int64_t, int, v4f32,
float, v4f32, double, v4f32, int, v4f32);
callee_vlvlivfvdviv_Ty callee_vlvlivfvdviv;
//===- subzero/crosstest/test_calling_conv_main.cpp - Driver for tests ----===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the driver for cross testing the compatibility of
// calling conventions.
//
//===----------------------------------------------------------------------===//
/* crosstest.py --test=test_calling_conv.cpp \
--driver=test_calling_conv_main.cpp --prefix=Subzero_ \
--output=test_calling_conv */
#include <cstring>
#include <iostream>
#include <sstream>
#include "test_calling_conv.h"
namespace Subzero_ {
#include "test_calling_conv.h"
}
// The crosstest code consists of caller / callee function pairs.
//
// The caller function initializes a list of arguments and calls the
// function located at Callee.
//
// The callee function writes the argument numbered ArgNum into the
// location pointed to by Buf.
//
// testCaller() tests that caller functions, as compiled by Subzero and
// llc, pass arguments to the callee in the same way. The Caller() and
// Subzero_Caller() functions both call the same callee (which has been
// compiled by llc). The result in the global buffer is compared to
// check that it is the same value after the calls by both callers.
//
// testCallee() runs the same kind of test, except that the functions
// Callee() and Subzero_Callee() are being tested to ensure that both
// functions receive arguments from the caller in the same way. The
// caller is compiled by llc.
size_t ArgNum, Subzero_ArgNum;
CalleePtrTy Callee, Subzero_Callee;
char *Buf, *Subzero_Buf;
const static size_t BUF_SIZE = 16;
std::string bufAsString(const char Buf[BUF_SIZE]) {
std::ostringstream OS;
for (size_t i = 0; i < BUF_SIZE; ++i) {
if (i > 0)
OS << " ";
OS << (unsigned) Buf[i];
}
return OS.str();
}
void testCaller(size_t &TotalTests, size_t &Passes, size_t &Failures) {
static struct {
const char *CallerName, *CalleeName;
size_t Args;
void (*Caller)(void);
void (*Subzero_Caller)(void);
CalleePtrTy Callee;
} Funcs[] = {
#define X(caller, callee, argc) \
{ \
STR(caller), STR(callee), argc, &caller, &Subzero_::caller, \
reinterpret_cast<CalleePtrTy>(&callee), \
} \
,
TEST_FUNC_TABLE
#undef X
};
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
for (size_t f = 0; f < NumFuncs; ++f) {
char BufLlc[BUF_SIZE], BufSz[BUF_SIZE];
Callee = Subzero_Callee = Funcs[f].Callee;
for (size_t i = 0; i < Funcs[f].Args; ++i) {
memset(BufLlc, 0xff, sizeof(BufLlc));
memset(BufSz, 0xff, sizeof(BufSz));
ArgNum = Subzero_ArgNum = i;
Buf = BufLlc;
Funcs[f].Caller();
Buf = BufSz;
Funcs[f].Subzero_Caller();
++TotalTests;
if (!memcmp(BufLlc, BufSz, sizeof(BufLlc))) {
++Passes;
} else {
++Failures;
std::cout << "testCaller(Caller=" << Funcs[f].CallerName
<< ", Callee=" << Funcs[f].CalleeName << ", ArgNum=" << ArgNum
<< ")\nsz =" << bufAsString(BufSz)
<< "\nllc=" << bufAsString(BufLlc) << "\n";
}
}
}
}
void testCallee(size_t &TotalTests, size_t &Passes, size_t &Failures) {
static struct {
const char *CallerName, *CalleeName;
size_t Args;
void (*Caller)(void);
CalleePtrTy Callee, Subzero_Callee;
} Funcs[] = {
#define X(caller, callee, argc) \
{ \
STR(caller), STR(callee), argc, &caller, \
reinterpret_cast<CalleePtrTy>(&callee), \
reinterpret_cast<CalleePtrTy>(&Subzero_::callee) \
} \
,
TEST_FUNC_TABLE
#undef X
};
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
for (size_t f = 0; f < NumFuncs; ++f) {
char BufLlc[BUF_SIZE], BufSz[BUF_SIZE];
Buf = BufLlc;
Subzero_Buf = BufSz;
for (size_t i = 0; i < Funcs[f].Args; ++i) {
memset(BufLlc, 0xff, sizeof(BufLlc));
memset(BufSz, 0xff, sizeof(BufSz));
ArgNum = Subzero_ArgNum = i;
Callee = Funcs[f].Callee;
Funcs[f].Caller();
Callee = Funcs[f].Subzero_Callee;
Funcs[f].Caller();
++TotalTests;
if (!memcmp(BufLlc, BufSz, sizeof(BufLlc))) {
++Passes;
} else {
++Failures;
std::cout << "testCallee(Caller=" << Funcs[f].CallerName
<< ", Callee=" << Funcs[f].CalleeName << ", ArgNum=" << ArgNum
<< ")\nsz =" << bufAsString(BufSz)
<< "\nllc=" << bufAsString(BufLlc) << "\n";
}
}
}
}
int main(int argc, char *argv[]) {
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
testCaller(TotalTests, Passes, Failures);
testCallee(TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
return Failures;
}
...@@ -92,6 +92,9 @@ OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base, ...@@ -92,6 +92,9 @@ OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base,
} }
} }
InstX8632AdjustStack::InstX8632AdjustStack(Cfg *Func, SizeT Amount)
: InstX8632(Func, InstX8632::Adjuststack, 0, NULL), Amount(Amount) {}
InstX8632Mul::InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1, InstX8632Mul::InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1,
Operand *Source2) Operand *Source2)
: InstX8632(Func, InstX8632::Mul, 2, Dest) { : InstX8632(Func, InstX8632::Mul, 2, Dest) {
...@@ -226,6 +229,12 @@ InstX8632Movp::InstX8632Movp(Cfg *Func, Variable *Dest, Operand *Source) ...@@ -226,6 +229,12 @@ InstX8632Movp::InstX8632Movp(Cfg *Func, Variable *Dest, Operand *Source)
addSource(Source); addSource(Source);
} }
InstX8632StoreP::InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem)
: InstX8632(Func, InstX8632::StoreP, 2, NULL) {
addSource(Value);
addSource(Mem);
}
InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem) InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem)
: InstX8632(Func, InstX8632::StoreQ, 2, NULL) { : InstX8632(Func, InstX8632::StoreQ, 2, NULL) {
addSource(Value); addSource(Value);
...@@ -933,6 +942,24 @@ void InstX8632Store::dump(const Cfg *Func) const { ...@@ -933,6 +942,24 @@ void InstX8632Store::dump(const Cfg *Func) const {
getSrc(0)->dump(Func); getSrc(0)->dump(Func);
} }
void InstX8632StoreP::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Str << "\tmovups\t";
getSrc(1)->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << "\n";
}
void InstX8632StoreP::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Str << "storep." << getSrc(0)->getType() << " ";
getSrc(1)->dump(Func);
Str << ", ";
getSrc(0)->dump(Func);
}
void InstX8632StoreQ::emit(const Cfg *Func) const { void InstX8632StoreQ::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2); assert(getSrcSize() == 2);
...@@ -1236,6 +1263,17 @@ void InstX8632Pop::dump(const Cfg *Func) const { ...@@ -1236,6 +1263,17 @@ void InstX8632Pop::dump(const Cfg *Func) const {
Str << " = pop." << getDest()->getType() << " "; Str << " = pop." << getDest()->getType() << " ";
} }
void InstX8632AdjustStack::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\tsub\tesp, " << Amount << "\n";
Func->getTarget()->updateStackAdjustment(Amount);
}
void InstX8632AdjustStack::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Str << "esp = sub.i32 esp, " << Amount;
}
void InstX8632Push::emit(const Cfg *Func) const { void InstX8632Push::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
......
...@@ -137,6 +137,7 @@ public: ...@@ -137,6 +137,7 @@ public:
Add, Add,
Addps, Addps,
Addss, Addss,
Adjuststack,
And, And,
Blendvps, Blendvps,
Br, Br,
...@@ -204,6 +205,7 @@ public: ...@@ -204,6 +205,7 @@ public:
Shufps, Shufps,
Sqrtss, Sqrtss,
Store, Store,
StoreP,
StoreQ, StoreQ,
Sub, Sub,
Subps, Subps,
...@@ -340,6 +342,26 @@ private: ...@@ -340,6 +342,26 @@ private:
InstX8632Label *Label; // Intra-block branch target InstX8632Label *Label; // Intra-block branch target
}; };
// AdjustStack instruction - subtracts esp by the given amount and
// updates the stack offset during code emission.
class InstX8632AdjustStack : public InstX8632 {
public:
static InstX8632AdjustStack *create(Cfg *Func, SizeT Amount) {
return new (Func->allocate<InstX8632AdjustStack>())
InstX8632AdjustStack(Func, Amount);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
private:
InstX8632AdjustStack(Cfg *Func, SizeT Amount);
InstX8632AdjustStack(const InstX8632AdjustStack &) LLVM_DELETED_FUNCTION;
InstX8632AdjustStack &operator=(const InstX8632AdjustStack &)
LLVM_DELETED_FUNCTION;
SizeT Amount;
};
// Call instruction. Arguments should have already been pushed. // Call instruction. Arguments should have already been pushed.
class InstX8632Call : public InstX8632 { class InstX8632Call : public InstX8632 {
public: public:
...@@ -960,6 +982,23 @@ private: ...@@ -960,6 +982,23 @@ private:
virtual ~InstX8632Movp() {} virtual ~InstX8632Movp() {}
}; };
class InstX8632StoreP : public InstX8632 {
public:
static InstX8632StoreP *create(Cfg *Func, Operand *Value, OperandX8632 *Mem) {
return new (Func->allocate<InstX8632StoreP>())
InstX8632StoreP(Func, Value, Mem);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, StoreP); }
private:
InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem);
InstX8632StoreP(const InstX8632StoreP &) LLVM_DELETED_FUNCTION;
InstX8632StoreP &operator=(const InstX8632StoreP &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632StoreP() {}
};
// This is essentially a "movq" instruction with an OperandX8632Mem // This is essentially a "movq" instruction with an OperandX8632Mem
// operand instead of Variable as the destination. It's important // operand instead of Variable as the destination. It's important
// for liveness that there is no Dest operand. // for liveness that there is no Dest operand.
......
...@@ -121,9 +121,21 @@ Type getInVectorElementType(Type Ty) { ...@@ -121,9 +121,21 @@ Type getInVectorElementType(Type Ty) {
} }
// The maximum number of arguments to pass in XMM registers // The maximum number of arguments to pass in XMM registers
const unsigned X86_MAX_XMM_ARGS = 4; const uint32_t X86_MAX_XMM_ARGS = 4;
// The number of bits in a byte // The number of bits in a byte
const unsigned X86_CHAR_BIT = 8; const uint32_t X86_CHAR_BIT = 8;
// Stack alignment
const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
// Size of the return address on the stack
const uint32_t X86_RET_IP_SIZE_BYTES = 4;
// Value is a size in bytes. Return Value adjusted to the next highest
// multiple of the stack alignment.
uint32_t applyStackAlignment(uint32_t Value) {
// power of 2
assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;
}
// Instruction set options // Instruction set options
namespace cl = ::llvm::cl; namespace cl = ::llvm::cl;
...@@ -248,8 +260,8 @@ void __attribute__((unused)) xMacroIntegrityCheck() { ...@@ -248,8 +260,8 @@ void __attribute__((unused)) xMacroIntegrityCheck() {
TargetX8632::TargetX8632(Cfg *Func) TargetX8632::TargetX8632(Cfg *Func)
: TargetLowering(Func), InstructionSet(CLInstructionSet), : TargetLowering(Func), InstructionSet(CLInstructionSet),
IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0), IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
NextLabelNumber(0), ComputedLiveRanges(false), LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
PhysicalRegisters(VarList(Reg_NUM)) { PhysicalRegisters(VarList(Reg_NUM)) {
// TODO: Don't initialize IntegerRegisters and friends every time. // TODO: Don't initialize IntegerRegisters and friends every time.
// Instead, initialize in some sort of static initializer for the // Instead, initialize in some sort of static initializer for the
...@@ -543,6 +555,9 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, ...@@ -543,6 +555,9 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
return; return;
} }
if (isVectorType(Ty)) {
InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
}
Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
InArgsSizeBytes += typeWidthInBytesOnStack(Ty); InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
if (Arg->hasReg()) { if (Arg->hasReg()) {
...@@ -570,7 +585,6 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -570,7 +585,6 @@ void TargetX8632::addProlog(CfgNode *Node) {
// or B. // or B.
const bool SimpleCoalescing = true; const bool SimpleCoalescing = true;
size_t InArgsSizeBytes = 0; size_t InArgsSizeBytes = 0;
size_t RetIpSizeBytes = 4;
size_t PreservedRegsSizeBytes = 0; size_t PreservedRegsSizeBytes = 0;
LocalsSizeBytes = 0; LocalsSizeBytes = 0;
Context.init(Node); Context.init(Node);
...@@ -657,6 +671,13 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -657,6 +671,13 @@ void TargetX8632::addProlog(CfgNode *Node) {
_mov(ebp, esp); _mov(ebp, esp);
} }
if (NeedsStackAlignment) {
uint32_t StackSize = applyStackAlignment(
X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes);
LocalsSizeBytes =
StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;
}
// Generate "sub esp, LocalsSizeBytes" // Generate "sub esp, LocalsSizeBytes"
if (LocalsSizeBytes) if (LocalsSizeBytes)
_sub(getPhysicalRegister(Reg_esp), _sub(getPhysicalRegister(Reg_esp),
...@@ -668,7 +689,7 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -668,7 +689,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
// for those that were register-allocated. Args are pushed right to // for those that were register-allocated. Args are pushed right to
// left, so Arg[0] is closest to the stack/frame pointer. // left, so Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes; size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame) if (!IsEbpBasedFrame)
BasicFrameOffset += LocalsSizeBytes; BasicFrameOffset += LocalsSizeBytes;
...@@ -959,12 +980,42 @@ llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, ...@@ -959,12 +980,42 @@ llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
void TargetX8632::lowerAlloca(const InstAlloca *Inst) { void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
IsEbpBasedFrame = true; IsEbpBasedFrame = true;
// TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize // Conservatively require the stack to be aligned. Some stack
// the number of adjustments of esp, etc. // adjustment operations implemented below assume that the stack is
// aligned before the alloca. All the alloca code ensures that the
// stack alignment is preserved after the alloca. The stack alignment
// restriction can be relaxed in some cases.
NeedsStackAlignment = true;
// TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.
Variable *esp = getPhysicalRegister(Reg_esp); Variable *esp = getPhysicalRegister(Reg_esp);
Operand *TotalSize = legalize(Inst->getSizeInBytes()); Operand *TotalSize = legalize(Inst->getSizeInBytes());
Variable *Dest = Inst->getDest(); Variable *Dest = Inst->getDest();
_sub(esp, TotalSize); uint32_t AlignmentParam = Inst->getAlignInBytes();
// LLVM enforces power of 2 alignment.
assert((AlignmentParam & (AlignmentParam - 1)) == 0);
assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
_and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));
}
if (ConstantInteger *ConstantTotalSize =
llvm::dyn_cast<ConstantInteger>(TotalSize)) {
uint32_t Value = ConstantTotalSize->getValue();
// Round Value up to the next highest multiple of the alignment.
Value = (Value + Alignment - 1) & -Alignment;
_sub(esp, Ctx->getConstantInt(IceType_i32, Value));
} else {
// Non-constant sizes need to be adjusted to the next highest
// multiple of the required alignment at runtime.
Variable *T = makeReg(IceType_i32);
_mov(T, TotalSize);
_add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));
_and(T, Ctx->getConstantInt(IceType_i32, -Alignment));
_sub(esp, T);
}
_mov(Dest, esp); _mov(Dest, esp);
} }
...@@ -1544,51 +1595,78 @@ void TargetX8632::lowerBr(const InstBr *Inst) { ...@@ -1544,51 +1595,78 @@ void TargetX8632::lowerBr(const InstBr *Inst) {
} }
void TargetX8632::lowerCall(const InstCall *Instr) { void TargetX8632::lowerCall(const InstCall *Instr) {
// x86-32 calling convention:
//
// * At the point before the call, the stack must be aligned to 16
// bytes.
//
// * The first four arguments of vector type, regardless of their
// position relative to the other arguments in the argument list, are
// placed in registers xmm0 - xmm3.
//
// * Other arguments are pushed onto the stack in right-to-left order,
// such that the left-most argument ends up on the top of the stack at
// the lowest memory address.
//
// * Stack arguments of vector type are aligned to start at the next
// highest multiple of 16 bytes. Other stack arguments are aligned to
// 4 bytes.
//
// This intends to match the section "IA-32 Function Calling
// Convention" of the document "OS X ABI Function Call Guide" by
// Apple.
NeedsStackAlignment = true;
OperandList XmmArgs;
OperandList StackArgs, StackArgLocations;
uint32_t ParameterAreaSizeBytes = 0;
// Classify each argument operand according to the location where the // Classify each argument operand according to the location where the
// argument is passed. // argument is passed.
OperandList XmmArgs;
OperandList StackArgs;
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Operand *Arg = Instr->getArg(i); Operand *Arg = Instr->getArg(i);
if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) { Type Ty = Arg->getType();
// The PNaCl ABI requires the width of arguments to be at least 32 bits.
assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 ||
Ty == IceType_f64 || isVectorType(Ty));
if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
XmmArgs.push_back(Arg); XmmArgs.push_back(Arg);
} else { } else {
StackArgs.push_back(Arg); StackArgs.push_back(Arg);
if (isVectorType(Arg->getType())) {
ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
}
Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes);
StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
} }
} }
// For stack arguments, generate a sequence of push instructions,
// pushing right to left, keeping track of stack offsets in case a // Adjust the parameter area so that the stack is aligned. It is
// push involves a stack operand and we are using an esp-based frame. // assumed that the stack is already aligned at the start of the
uint32_t StackOffset = 0; // calling sequence.
// TODO: Consolidate the stack adjustment for function calls by ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
// reserving enough space for the arguments only once.
// Subtract the appropriate amount for the argument area. This also
// takes care of setting the stack adjustment during emission.
// //
// TODO: If for some reason the call instruction gets dead-code // TODO: If for some reason the call instruction gets dead-code
// eliminated after lowering, we would need to ensure that the // eliminated after lowering, we would need to ensure that the
// pre-call push instructions and the post-call esp adjustment get // pre-call and the post-call esp adjustment get eliminated as well.
// eliminated as well. if (ParameterAreaSizeBytes) {
for (OperandList::reverse_iterator I = StackArgs.rbegin(), _adjust_stack(ParameterAreaSizeBytes);
E = StackArgs.rend(); I != E; ++I) {
Operand *Arg = legalize(*I);
if (Arg->getType() == IceType_i64) {
_push(hiOperand(Arg));
_push(loOperand(Arg));
} else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) {
// If the Arg turns out to be a memory operand, more than one push
// instruction is required. This ends up being somewhat clumsy in
// the current IR, so we use a workaround. Force the operand into
// a (xmm) register, and then push the register. An xmm register
// push is actually not possible in x86, but the Push instruction
// emitter handles this by decrementing the stack pointer and
// directly writing the xmm register value.
_push(legalize(Arg, Legal_Reg));
} else {
// Otherwise PNaCl requires parameter types to be at least 32-bits.
assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32);
_push(Arg);
} }
StackOffset += typeWidthInBytesOnStack(Arg->getType());
// Copy arguments that are passed on the stack to the appropriate
// stack locations.
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
// TODO: Consider calling postLower() here to reduce the register
// pressure associated with using too many infinite weight
// temporaries when lowering the call sequence in -Om1 mode.
} }
// Copy arguments to be passed in registers to the appropriate // Copy arguments to be passed in registers to the appropriate
// registers. // registers.
// TODO: Investigate the impact of lowering arguments passed in // TODO: Investigate the impact of lowering arguments passed in
...@@ -1652,10 +1730,11 @@ void TargetX8632::lowerCall(const InstCall *Instr) { ...@@ -1652,10 +1730,11 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
if (ReturnRegHi) if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi)); Context.insert(InstFakeDef::create(Func, ReturnRegHi));
// Add the appropriate offset to esp. // Add the appropriate offset to esp. The call instruction takes care
if (StackOffset) { // of resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
_add(esp, Ctx->getConstantInt(IceType_i32, StackOffset)); _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes));
} }
// Insert a register-kill pseudo instruction. // Insert a register-kill pseudo instruction.
...@@ -2134,9 +2213,9 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { ...@@ -2134,9 +2213,9 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
// Use pshufd and movd/movss. // Use pshufd and movd/movss.
// //
// ALIGNHACK: Force vector operands to registers in instructions that // ALIGNHACK: Force vector operands to registers in instructions
// require aligned memory operands until support for stack alignment // that require aligned memory operands until support for data
// is implemented. // alignment is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect)) #define ALIGN_HACK(Vect) legalizeToVar((Vect))
Operand *SourceVectRM = Operand *SourceVectRM =
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
...@@ -2221,8 +2300,8 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { ...@@ -2221,8 +2300,8 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
// ALIGNHACK: Without support for stack alignment, both operands to // ALIGNHACK: Without support for data alignment, both operands to
// cmpps need to be forced into registers. Once support for stack // cmpps need to be forced into registers. Once support for data
// alignment is implemented, remove LEGAL_HACK. // alignment is implemented, remove LEGAL_HACK.
#define LEGAL_HACK(Vect) legalizeToVar((Vect)) #define LEGAL_HACK(Vect) legalizeToVar((Vect))
switch (Condition) { switch (Condition) {
...@@ -2362,8 +2441,8 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { ...@@ -2362,8 +2441,8 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
} }
// TODO: ALIGNHACK: Both operands to compare instructions need to be // TODO: ALIGNHACK: Both operands to compare instructions need to be
// in registers until stack alignment support is implemented. Once // in registers until data alignment support is implemented. Once
// there is support for stack alignment, LEGAL_HACK can be removed. // there is support for data alignment, LEGAL_HACK can be removed.
#define LEGAL_HACK(Vect) legalizeToVar((Vect)) #define LEGAL_HACK(Vect) legalizeToVar((Vect))
Variable *T = makeReg(Ty); Variable *T = makeReg(Ty);
switch (Condition) { switch (Condition) {
...@@ -2583,9 +2662,9 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { ...@@ -2583,9 +2662,9 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
// ALIGNHACK: Force vector operands to registers in instructions that // ALIGNHACK: Force vector operands to registers in instructions
// require aligned memory operands until support for stack alignment // that require aligned memory operands until support for data
// is implemented. // alignment is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect)) #define ALIGN_HACK(Vect) legalizeToVar((Vect))
if (Index == 1) { if (Index == 1) {
SourceVectRM = ALIGN_HACK(SourceVectRM); SourceVectRM = ALIGN_HACK(SourceVectRM);
...@@ -2873,7 +2952,8 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -2873,7 +2952,8 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
} }
case Intrinsics::Memset: { case Intrinsics::Memset: {
// The value operand needs to be extended to a stack slot size // The value operand needs to be extended to a stack slot size
// because "push" only works for a specific operand size. // because the PNaCl ABI requires arguments to be at least 32 bits
// wide.
Operand *ValOp = Instr->getArg(1); Operand *ValOp = Instr->getArg(1);
assert(ValOp->getType() == IceType_i8); assert(ValOp->getType() == IceType_i8);
Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode()); Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode());
...@@ -3560,9 +3640,9 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { ...@@ -3560,9 +3640,9 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
Variable *T = makeReg(SrcTy); Variable *T = makeReg(SrcTy);
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
// ALIGNHACK: Until stack alignment support is implemented, vector // ALIGNHACK: Until data alignment support is implemented, vector
// instructions need to have vector operands in registers. Once // instructions need to have vector operands in registers. Once
// there is support for stack alignment, LEGAL_HACK can be removed. // there is support for data alignment, LEGAL_HACK can be removed.
#define LEGAL_HACK(Vect) legalizeToVar((Vect)) #define LEGAL_HACK(Vect) legalizeToVar((Vect))
if (InstructionSet >= SSE4_1) { if (InstructionSet >= SSE4_1) {
// TODO(wala): If the condition operand is a constant, use blendps // TODO(wala): If the condition operand is a constant, use blendps
...@@ -3657,13 +3737,16 @@ void TargetX8632::lowerStore(const InstStore *Inst) { ...@@ -3657,13 +3737,16 @@ void TargetX8632::lowerStore(const InstStore *Inst) {
Operand *Value = Inst->getData(); Operand *Value = Inst->getData();
Operand *Addr = Inst->getAddr(); Operand *Addr = Inst->getAddr();
OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
Type Ty = NewAddr->getType();
if (NewAddr->getType() == IceType_i64) { if (Ty == IceType_i64) {
Value = legalize(Value); Value = legalize(Value);
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
_store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
_store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
} else if (isVectorType(Ty)) {
_storep(legalizeToVar(Value), NewAddr);
} else { } else {
Value = legalize(Value, Legal_Reg | Legal_Imm, true); Value = legalize(Value, Legal_Reg | Legal_Imm, true);
_store(Value, NewAddr); _store(Value, NewAddr);
...@@ -4039,9 +4122,9 @@ void TargetX8632::postLower() { ...@@ -4039,9 +4122,9 @@ void TargetX8632::postLower() {
llvm::SmallBitVector AvailableTypedRegisters = llvm::SmallBitVector AvailableTypedRegisters =
AvailableRegisters & getRegisterSetForType(Var->getType()); AvailableRegisters & getRegisterSetForType(Var->getType());
if (!AvailableTypedRegisters.any()) { if (!AvailableTypedRegisters.any()) {
// This is a hack in case we run out of physical registers // This is a hack in case we run out of physical registers due
// due to an excessive number of "push" instructions from // to an excessively long code sequence, as might happen when
// lowering a call. // lowering arguments in lowerCall().
AvailableRegisters = WhiteList; AvailableRegisters = WhiteList;
AvailableTypedRegisters = AvailableTypedRegisters =
AvailableRegisters & getRegisterSetForType(Var->getType()); AvailableRegisters & getRegisterSetForType(Var->getType());
......
...@@ -188,6 +188,9 @@ protected: ...@@ -188,6 +188,9 @@ protected:
void _add(Variable *Dest, Operand *Src0) { void _add(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Add::create(Func, Dest, Src0)); Context.insert(InstX8632Add::create(Func, Dest, Src0));
} }
void _adjust_stack(int32_t Amount) {
Context.insert(InstX8632AdjustStack::create(Func, Amount));
}
void _addps(Variable *Dest, Operand *Src0) { void _addps(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Addps::create(Func, Dest, Src0)); Context.insert(InstX8632Addps::create(Func, Dest, Src0));
} }
...@@ -412,6 +415,9 @@ protected: ...@@ -412,6 +415,9 @@ protected:
void _store(Operand *Value, OperandX8632 *Mem) { void _store(Operand *Value, OperandX8632 *Mem) {
Context.insert(InstX8632Store::create(Func, Value, Mem)); Context.insert(InstX8632Store::create(Func, Value, Mem));
} }
void _storep(Operand *Value, OperandX8632 *Mem) {
Context.insert(InstX8632StoreP::create(Func, Value, Mem));
}
void _storeq(Operand *Value, OperandX8632 *Mem) { void _storeq(Operand *Value, OperandX8632 *Mem) {
Context.insert(InstX8632StoreQ::create(Func, Value, Mem)); Context.insert(InstX8632StoreQ::create(Func, Value, Mem));
} }
...@@ -450,6 +456,7 @@ protected: ...@@ -450,6 +456,7 @@ protected:
const X86InstructionSet InstructionSet; const X86InstructionSet InstructionSet;
bool IsEbpBasedFrame; bool IsEbpBasedFrame;
bool NeedsStackAlignment;
size_t FrameSizeLocals; size_t FrameSizeLocals;
size_t LocalsSizeBytes; size_t LocalsSizeBytes;
llvm::SmallBitVector TypeToRegisterSet[IceType_NUM]; llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
......
...@@ -33,40 +33,50 @@ entry: ...@@ -33,40 +33,50 @@ entry:
ret i32 %add3 ret i32 %add3
} }
; CHECK: pass64BitArg: ; CHECK: pass64BitArg:
; CHECK: push 123 ; CHECK: sub esp
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp+4]
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp]
; CHECK-NEXT: call ignore64BitArgNoInline ; CHECK: mov dword ptr [esp+8], 123
; CHECK: push ; CHECK: mov dword ptr [esp+16]
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp+12]
; CHECK-NEXT: push 123 ; CHECK: call ignore64BitArgNoInline
; CHECK-NEXT: push ; CHECK sub esp
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp+4]
; CHECK-NEXT: call ignore64BitArgNoInline ; CHECK: mov dword ptr [esp]
; CHECK: push ; CHECK: mov dword ptr [esp+8], 123
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp+16]
; CHECK-NEXT: push 123 ; CHECK: mov dword ptr [esp+12]
; CHECK-NEXT: push ; CHECK: call ignore64BitArgNoInline
; CHECK-NEXT: push ; CHECK: sub esp
; CHECK-NEXT: call ignore64BitArgNoInline ; CHECK: mov dword ptr [esp+4]
; CHECK: mov dword ptr [esp]
; CHECK: mov dword ptr [esp+8], 123
; CHECK: mov dword ptr [esp+16]
; CHECK: mov dword ptr [esp+12]
; CHECK: call ignore64BitArgNoInline
; ;
; OPTM1: pass64BitArg: ; OPTM1: pass64BitArg:
; OPTM1: push 123 ; OPTM1: sub esp
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp+4]
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp]
; OPTM1-NEXT: call ignore64BitArgNoInline ; OPTM1: mov dword ptr [esp+8], 123
; OPTM1: push ; OPTM1: mov dword ptr [esp+16]
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp+12]
; OPTM1-NEXT: push 123 ; OPTM1: call ignore64BitArgNoInline
; OPTM1-NEXT: push ; OPTM1 sub esp
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp+4]
; OPTM1-NEXT: call ignore64BitArgNoInline ; OPTM1: mov dword ptr [esp]
; OPTM1: push ; OPTM1: mov dword ptr [esp+8], 123
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp+16]
; OPTM1-NEXT: push 123 ; OPTM1: mov dword ptr [esp+12]
; OPTM1-NEXT: push ; OPTM1: call ignore64BitArgNoInline
; OPTM1-NEXT: push ; OPTM1: sub esp
; OPTM1-NEXT: call ignore64BitArgNoInline ; OPTM1: mov dword ptr [esp+4]
; OPTM1: mov dword ptr [esp]
; OPTM1: mov dword ptr [esp+8], 123
; OPTM1: mov dword ptr [esp+16]
; OPTM1: mov dword ptr [esp+12]
; OPTM1: call ignore64BitArgNoInline
declare i32 @ignore64BitArgNoInline(i64, i32, i64) declare i32 @ignore64BitArgNoInline(i64, i32, i64)
...@@ -76,19 +86,21 @@ entry: ...@@ -76,19 +86,21 @@ entry:
ret i32 %call ret i32 %call
} }
; CHECK: pass64BitConstArg: ; CHECK: pass64BitConstArg:
; CHECK: push 3735928559 ; CHECK: sub esp
; CHECK-NEXT: push 305419896 ; CHECK: mov dword ptr [esp+4]
; CHECK-NEXT: push 123 ; CHECK-NEXT: mov dword ptr [esp]
; CHECK-NEXT: push ecx ; CHECK-NEXT: mov dword ptr [esp+8], 123
; CHECK-NEXT: push eax ; CHECK-NEXT: mov dword ptr [esp+16], 3735928559
; CHECK-NEXT: mov dword ptr [esp+12], 305419896
; CHECK-NEXT: call ignore64BitArgNoInline ; CHECK-NEXT: call ignore64BitArgNoInline
; ;
; OPTM1: pass64BitConstArg: ; OPTM1: pass64BitConstArg:
; OPTM1: push 3735928559 ; OPTM1: sub esp
; OPTM1-NEXT: push 305419896 ; OPTM1: mov dword ptr [esp+4]
; OPTM1-NEXT: push 123 ; OPTM1-NEXT: mov dword ptr [esp]
; OPTM1-NEXT: push dword ptr [ ; OPTM1-NEXT: mov dword ptr [esp+8], 123
; OPTM1-NEXT: push dword ptr [ ; OPTM1-NEXT: mov dword ptr [esp+16], 3735928559
; OPTM1-NEXT: mov dword ptr [esp+12], 305419896
; OPTM1-NEXT: call ignore64BitArgNoInline ; OPTM1-NEXT: call ignore64BitArgNoInline
define internal i64 @return64BitArg(i64 %a) { define internal i64 @return64BitArg(i64 %a) {
...@@ -240,14 +252,14 @@ entry: ...@@ -240,14 +252,14 @@ entry:
ret i64 %div ret i64 %div
} }
; CHECK-LABEL: div64BitSignedConst: ; CHECK-LABEL: div64BitSignedConst:
; CHECK: push 2874 ; CHECK: mov dword ptr [esp+12], 2874
; CHECK: push 1942892530 ; CHECK: mov dword ptr [esp+8], 1942892530
; CHECK: call __divdi3 ; CHECK: call __divdi3
; CHECK: ret ; CHECK: ret
; ;
; OPTM1-LABEL: div64BitSignedConst: ; OPTM1-LABEL: div64BitSignedConst:
; OPTM1: push 2874 ; OPTM1: mov dword ptr [esp+12], 2874
; OPTM1: push 1942892530 ; OPTM1: mov dword ptr [esp+8], 1942892530
; OPTM1: call __divdi3 ; OPTM1: call __divdi3
; OPTM1: ret ; OPTM1: ret
......
; This is a basic test of the alloca instruction - one test for alloca ; This is a basic test of the alloca instruction.
; of a fixed size, and one test for variable size.
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -O2 --verbose none %s \ ; RUN: %llvm2ice -O2 --verbose none %s \
; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
; RUN: %llvm2ice -Om1 --verbose none %s \ ; RUN: %llvm2ice -Om1 --verbose none %s \
...@@ -12,45 +11,95 @@ ...@@ -12,45 +11,95 @@
; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \ ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
; RUN: | FileCheck --check-prefix=DUMP %s ; RUN: | FileCheck --check-prefix=DUMP %s
define void @fixed_400(i32 %n) { define void @fixed_416_align_16(i32 %n) {
entry: entry:
%array = alloca i8, i32 400, align 16 %array = alloca i8, i32 416, align 16
%__2 = ptrtoint i8* %array to i32 %__2 = ptrtoint i8* %array to i32
call void @f1(i32 %__2) call void @f1(i32 %__2)
ret void ret void
} }
; CHECK: fixed_400: ; CHECK-LABEL: fixed_416_align_16:
; CHECK: sub esp, 400 ; CHECK: sub esp, 416
; CHECK-NEXT: mov eax, esp ; CHECK: sub esp, 16
; CHECK-NEXT: push eax ; CHECK: mov dword ptr [esp], eax
; CHECK-NEXT: call f1 ; CHECK: call f1
;
; OPTM1: fixed_400: define void @fixed_416_align_32(i32 %n) {
; OPTM1: sub esp, 400 entry:
; OPTM1-NEXT: mov {{.*}}, esp %array = alloca i8, i32 400, align 32
; OPTM1: push %__2 = ptrtoint i8* %array to i32
; OPTM1-NEXT: call f1 call void @f1(i32 %__2)
ret void
}
; CHECK-LABEL: fixed_416_align_32:
; CHECK: and esp, 4294967264
; CHECK: sub esp, 416
; CHECK: sub esp, 16
; CHECK: mov dword ptr [esp], eax
; CHECK: call f1
define void @fixed_351_align_16(i32 %n) {
entry:
%array = alloca i8, i32 351, align 16
%__2 = ptrtoint i8* %array to i32
call void @f1(i32 %__2)
ret void
}
; CHECK-LABEL: fixed_351_align_16:
; CHECK: sub esp, 352
; CHECK: sub esp, 16
; CHECK: mov dword ptr [esp], eax
; CHECK: call f1
define void @fixed_351_align_32(i32 %n) {
entry:
%array = alloca i8, i32 351, align 32
%__2 = ptrtoint i8* %array to i32
call void @f1(i32 %__2)
ret void
}
; CHECK-LABEL: fixed_351_align_32:
; CHECK: and esp, 4294967264
; CHECK: sub esp, 352
; CHECK: sub esp, 16
; CHECK: mov dword ptr [esp], eax
; CHECK: call f1
declare void @f1(i32) declare void @f1(i32)
define void @variable_n(i32 %n) { define void @variable_n_align_16(i32 %n) {
entry: entry:
%array = alloca i8, i32 %n, align 16 %array = alloca i8, i32 %n, align 16
%__2 = ptrtoint i8* %array to i32 %__2 = ptrtoint i8* %array to i32
call void @f2(i32 %__2) call void @f2(i32 %__2)
ret void ret void
} }
; CHECK: variable_n: ; CHECK-LABEL: variable_n_align_16:
; CHECK: mov eax, dword ptr [ebp+8] ; CHECK: mov eax, dword ptr [ebp+8]
; CHECK-NEXT: sub esp, eax ; CHECK: add eax, 15
; CHECK-NEXT: mov eax, esp ; CHECK: and eax, 4294967280
; CHECK-NEXT: push eax ; CHECK: sub esp, eax
; CHECK-NEXT: call f2 ; CHECK: sub esp, 16
; ; CHECK: mov dword ptr [esp], eax
; OPTM1: variable_n: ; CHECK: call f2
; OPTM1: mov {{.*}}, esp
; OPTM1: push define void @variable_n_align_32(i32 %n) {
; OPTM1-NEXT: call f2 entry:
%array = alloca i8, i32 %n, align 32
%__2 = ptrtoint i8* %array to i32
call void @f2(i32 %__2)
ret void
}
; In -O2, the order of the CHECK-DAG lines in the output is switched.
; CHECK-LABEL: variable_n_align_32:
; CHECK-DAG: and esp, 4294967264
; CHECK-DAG: mov eax, dword ptr [ebp+8]
; CHECK: add eax, 31
; CHECK: and eax, 4294967264
; CHECK: sub esp, eax
; CHECK: sub esp, 16
; CHECK: mov dword ptr [esp], eax
; CHECK: call f2
declare void @f2(i32) declare void @f2(i32)
......
...@@ -22,10 +22,11 @@ entry: ...@@ -22,10 +22,11 @@ entry:
; lowering code changes. ; lowering code changes.
; CHECK: memcpy_helper: ; CHECK: memcpy_helper:
; CHECK: push ebx
; CHECK: push ebp ; CHECK: push ebp
; CHECK: mov ebp, esp ; CHECK: mov ebp, esp
; CHECK: sub esp, 20 ; CHECK: sub esp, 20
; CHECK: mov eax, dword ptr [ebp+12] ; CHECK: mov eax, dword ptr [ebp+16]
; CHECK: mov dword ptr [ebp-4], eax ; CHECK: mov dword ptr [ebp-4], eax
; CHECK: sub esp, 128 ; CHECK: sub esp, 128
; CHECK: mov dword ptr [ebp-8], esp ; CHECK: mov dword ptr [ebp-8], esp
...@@ -33,7 +34,11 @@ entry: ...@@ -33,7 +34,11 @@ entry:
; CHECK: mov dword ptr [ebp-12], eax ; CHECK: mov dword ptr [ebp-12], eax
; CHECK: movzx eax, byte ptr [ebp-4] ; CHECK: movzx eax, byte ptr [ebp-4]
; CHECK: mov dword ptr [ebp-16], eax ; CHECK: mov dword ptr [ebp-16], eax
; CHECK: push dword ptr [ebp-16] ; CHECK: sub esp, 16
; CHECK: push dword ptr [ebp-12] ; CHECK: mov ecx, dword ptr [ebp+12]
; CHECK: push dword ptr [ebp+8] ; CHECK: mov dword ptr [esp], ecx
; CHECK: mov edx, dword ptr [ebp-12]
; CHECK: mov dword ptr [esp+4], edx
; CHECK: mov ebx, dword ptr [ebp-16]
; CHECK: mov dword ptr [esp+8], ebx
; CHECK: call memcpy_helper2 ; CHECK: call memcpy_helper2
...@@ -45,11 +45,11 @@ entry: ...@@ -45,11 +45,11 @@ entry:
ret i32 %add3 ret i32 %add3
} }
; CHECK-LABEL: passFpArgs ; CHECK-LABEL: passFpArgs
; CHECK: push 123 ; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline ; CHECK: call ignoreFpArgsNoInline
; CHECK: push 123 ; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline ; CHECK: call ignoreFpArgsNoInline
; CHECK: push 123 ; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline ; CHECK: call ignoreFpArgsNoInline
declare i32 @ignoreFpArgsNoInline(float, i32, double) declare i32 @ignoreFpArgsNoInline(float, i32, double)
...@@ -60,7 +60,7 @@ entry: ...@@ -60,7 +60,7 @@ entry:
ret i32 %call ret i32 %call
} }
; CHECK-LABEL: passFpConstArg ; CHECK-LABEL: passFpConstArg
; CHECK: push 123 ; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline ; CHECK: call ignoreFpArgsNoInline
define internal i32 @passFp32ConstArg(float %a) { define internal i32 @passFp32ConstArg(float %a) {
...@@ -69,8 +69,8 @@ entry: ...@@ -69,8 +69,8 @@ entry:
ret i32 %call ret i32 %call
} }
; CHECK-LABEL: passFp32ConstArg ; CHECK-LABEL: passFp32ConstArg
; CHECK: push dword ; CHECK: mov dword ptr [esp+4], 123
; CHECK: push 123 ; CHECK: movss dword ptr [esp+8]
; CHECK: call ignoreFp32ArgsNoInline ; CHECK: call ignoreFp32ArgsNoInline
declare i32 @ignoreFp32ArgsNoInline(float, i32, float) declare i32 @ignoreFp32ArgsNoInline(float, i32, float)
...@@ -415,8 +415,8 @@ entry: ...@@ -415,8 +415,8 @@ entry:
ret double %conv ret double %conv
} }
; CHECK-LABEL: unsigned64ToDouble ; CHECK-LABEL: unsigned64ToDouble
; CHECK: push 2874 ; CHECK: mov dword ptr [esp+4], 2874
; CHECK: push 1942892530 ; CHECK: mov dword ptr [esp], 1942892530
; CHECK: call cvtui64tod ; CHECK: call cvtui64tod
; CHECK: fstp ; CHECK: fstp
......
...@@ -37,8 +37,7 @@ define float @undef_float() { ...@@ -37,8 +37,7 @@ define float @undef_float() {
entry: entry:
ret float undef ret float undef
; CHECK-LABEL: undef_float: ; CHECK-LABEL: undef_float:
; CHECK-NOT: sub esp ; CHECK: [L$float$
; CHECK: fld
} }
define <4 x i1> @undef_v4i1() { define <4 x i1> @undef_v4i1() {
......
; This file checks that Subzero generates code in accordance with the ; This file checks that Subzero generates code in accordance with the
; calling convention for vectors. ; calling convention for vectors.
; NOTE: CHECK / OPTM1 lines containing the following strings may be
; subject to change:
;
; * movups: The movups instruction may be changed to movaps when the
; load / store operation is 16 byte aligned.
;
; * stack offsets: These may need to be changed if stack alignment
; support is implemented.
;
; * stack adjustment operations
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
; RUN: %llvm2ice -O2 --verbose none %s \ ; RUN: %llvm2ice -O2 --verbose none %s \
...@@ -150,7 +139,7 @@ define <4 x float> @test_returning_interspersed_arg4(i32 %i32arg0, double %doubl ...@@ -150,7 +139,7 @@ define <4 x float> @test_returning_interspersed_arg4(i32 %i32arg0, double %doubl
entry: entry:
ret <4 x float> %arg4 ret <4 x float> %arg4
; CHECK-LABEL: test_returning_interspersed_arg4: ; CHECK-LABEL: test_returning_interspersed_arg4:
; CHECK: movups xmm0, xmmword ptr [esp+44] ; CHECK: movups xmm0, xmmword ptr [esp+52]
; CHECK: ret ; CHECK: ret
; OPTM1-LABEL: test_returning_interspersed_arg4: ; OPTM1-LABEL: test_returning_interspersed_arg4:
...@@ -172,33 +161,69 @@ entry: ...@@ -172,33 +161,69 @@ entry:
call void @VectorArgs(<4 x float> %arg9, <4 x float> %arg8, <4 x float> %arg7, <4 x float> %arg6, <4 x float> %arg5, <4 x float> %arg4) call void @VectorArgs(<4 x float> %arg9, <4 x float> %arg8, <4 x float> %arg7, <4 x float> %arg6, <4 x float> %arg5, <4 x float> %arg4)
ret void ret void
; CHECK-LABEL: test_passing_vectors: ; CHECK-LABEL: test_passing_vectors:
; CHECK: movups [[ARG6:.*]], xmmword ptr [esp+4] ; CHECK: sub esp, 32
; CHECK: sub esp, 16 ; CHECK: movups [[ARG5:.*]], xmmword ptr [esp+64]
; CHECK-NEXT: movups xmmword ptr [esp], [[ARG6]] ; CHECK: movups xmmword ptr [esp], [[ARG5]]
; CHECK: movups [[ARG5:.*]], xmmword ptr [esp+36] ; CHECK: movups [[ARG6:.*]], xmmword ptr [esp+48]
; CHECK: sub esp, 16 ; CHECK: movups xmmword ptr [esp+16], [[ARG6]]
; CHECK-NEXT: movups xmmword ptr [esp], [[ARG5]] ; CHECK: movups xmm0, xmmword ptr [esp+128]
; CHECK: movups xmm0, xmmword ptr [esp+116] ; CHECK: movups xmm1, xmmword ptr [esp+112]
; CHECK: movups xmm1, xmmword ptr [esp+100] ; CHECK: movups xmm2, xmmword ptr [esp+96]
; CHECK: movups xmm2, xmmword ptr [esp+84] ; CHECK: movups xmm3, xmmword ptr [esp+80]
; CHECK: movups xmm3, xmmword ptr [esp+68]
; CHECK: call VectorArgs ; CHECK: call VectorArgs
; CHECK-NEXT: add esp, 32 ; CHECK-NEXT: add esp, 32
; CHECK: ret ; CHECK: ret
; OPTM1-LABEL: test_passing_vectors: ; OPTM1-LABEL: test_passing_vectors:
; OPTM1: movups [[ARG6:.*]], xmmword ptr {{.*}} ; OPTM1: sub esp, 32
; OPTM1: sub esp, 16
; OPTM1: movups xmmword ptr [esp], [[ARG6]]
; OPTM1: movups [[ARG5:.*]], xmmword ptr {{.*}} ; OPTM1: movups [[ARG5:.*]], xmmword ptr {{.*}}
; OPTM1: sub esp, 16 ; OPTM1: movups xmmword ptr [esp], [[ARG5]]
; OPTM1-NEXT: movups xmmword ptr [esp], [[ARG5]] ; OPTM1: movups [[ARG6:.*]], xmmword ptr {{.*}}
; OPTM1: movups xmmword ptr [esp+16], [[ARG6]]
; OPTM1: movups xmm0, xmmword ptr {{.*}} ; OPTM1: movups xmm0, xmmword ptr {{.*}}
; OPTM1: movups xmm1, xmmword ptr {{.*}} ; OPTM1: movups xmm1, xmmword ptr {{.*}}
; OPTM1: movups xmm2, xmmword ptr {{.*}} ; OPTM1: movups xmm2, xmmword ptr {{.*}}
; OPTM1: movups xmm3, xmmword ptr {{.*}} ; OPTM1: movups xmm3, xmmword ptr {{.*}}
; OPTM1: call VectorArgs ; OPTM1: call VectorArgs
; OPTM1: add esp, 32 ; OPTM1-NEXT: add esp, 32
; OPTM1: ret
}
declare void @InterspersedVectorArgs(<4 x float>, i64, <4 x float>, i64, <4 x float>, float, <4 x float>, double, <4 x float>, i32, <4 x float>)
define void @test_passing_vectors_interspersed(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5, <4 x float> %arg6, <4 x float> %arg7, <4 x float> %arg8, <4 x float> %arg9) {
entry:
; Kills XMM registers so that no in-arg lowering code interferes
; with the test.
call void @killXmmRegisters()
call void @InterspersedVectorArgs(<4 x float> %arg9, i64 0, <4 x float> %arg8, i64 1, <4 x float> %arg7, float 2.000000e+00, <4 x float> %arg6, double 3.000000e+00, <4 x float> %arg5, i32 4, <4 x float> %arg4)
ret void
; CHECK-LABEL: test_passing_vectors_interspersed:
; CHECK: sub esp, 80
; CHECK: movups [[ARG9:.*]], xmmword ptr [esp+112]
; CHECK: movups xmmword ptr [esp+32], [[ARG9]]
; CHECK: movups [[ARG11:.*]], xmmword ptr [esp+96]
; CHECK: movups xmmword ptr [esp+64], [[ARG11]]
; CHECK: movups xmm0, xmmword ptr [esp+176]
; CHECK: movups xmm1, xmmword ptr [esp+160]
; CHECK: movups xmm2, xmmword ptr [esp+144]
; CHECK: movups xmm3, xmmword ptr [esp+128]
; CHECK: call InterspersedVectorArgs
; CHECK-NEXT: add esp, 80
; CHECK: ret
; OPTM1-LABEL: test_passing_vectors_interspersed:
; OPTM1: sub esp, 80
; OPTM1: movups [[ARG9:.*]], xmmword ptr {{.*}}
; OPTM1: movups xmmword ptr [esp+32], [[ARG9]]
; OPTM1: movups [[ARG11:.*]], xmmword ptr {{.*}}
; OPTM1: movups xmmword ptr [esp+64], [[ARG11]]
; OPTM1: movups xmm0, xmmword ptr {{.*}}
; OPTM1: movups xmm1, xmmword ptr {{.*}}
; OPTM1: movups xmm2, xmmword ptr {{.*}}
; OPTM1: movups xmm3, xmmword ptr {{.*}}
; OPTM1: call InterspersedVectorArgs
; OPTM1-NEXT: add esp, 80
; OPTM1: ret ; OPTM1: ret
} }
...@@ -220,8 +245,8 @@ entry: ...@@ -220,8 +245,8 @@ entry:
; OPTM1-LABEL: test_receiving_vectors: ; OPTM1-LABEL: test_receiving_vectors:
; OPTM1: call VectorReturn ; OPTM1: call VectorReturn
; OPTM1: movups [[LOC:.*]], xmm0 ; OPTM1: movups {{.*}}, xmm0
; OPTM1: movups xmm0, [[LOC]] ; OPTM1: movups xmm0, {{.*}}
; OPTM1: call VectorReturn ; OPTM1: call VectorReturn
; OPTM1: ret ; OPTM1: ret
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment