Commit 105b7044 by Matt Wala

Subzero: Align the stack at the point of function calls.

Be compatible with the x86-32 calling convention by ensuring that the stack is aligned to 16 bytes at the point of the call instruction. Also ensure that vector arguments passed on the stack are 16 byte aligned. Also, make alloca instructions respect alignment. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/444443002
parent 8835b89b
...@@ -134,7 +134,6 @@ if __name__ == '__main__': ...@@ -134,7 +134,6 @@ if __name__ == '__main__':
objs.append(bitcode) objs.append(bitcode)
linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++' linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++'
# TODO: Remove -mstackrealign after Subzero supports stack alignment. shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', args.driver] +
shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', objs +
'-mstackrealign', args.driver] + objs +
['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)]) ['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)])
...@@ -54,6 +54,14 @@ for optlevel in ${OPTLEVELS} ; do ...@@ -54,6 +54,14 @@ for optlevel in ${OPTLEVELS} ; do
--output=test_bitmanip_O${optlevel}_${attribute} --output=test_bitmanip_O${optlevel}_${attribute}
./crosstest.py -O${optlevel} --mattr ${attribute} \ ./crosstest.py -O${optlevel} --mattr ${attribute} \
--prefix=Subzero_ --target=x8632 \
--dir="${OUTDIR}" \
--llvm-bin-path="${LLVM_BIN_PATH}" \
--test=test_calling_conv.cpp \
--driver=test_calling_conv_main.cpp \
--output=test_calling_conv_O${optlevel}_${attribute}
./crosstest.py -O${optlevel} --mattr ${attribute} \
--prefix=Subzero_ \ --prefix=Subzero_ \
--target=x8632 \ --target=x8632 \
--dir="${OUTDIR}" \ --dir="${OUTDIR}" \
...@@ -137,6 +145,7 @@ for optlevel in ${OPTLEVELS} ; do ...@@ -137,6 +145,7 @@ for optlevel in ${OPTLEVELS} ; do
"${OUTDIR}"/mem_intrin_O${optlevel}_${attribute} "${OUTDIR}"/mem_intrin_O${optlevel}_${attribute}
"${OUTDIR}"/test_arith_O${optlevel}_${attribute} "${OUTDIR}"/test_arith_O${optlevel}_${attribute}
"${OUTDIR}"/test_bitmanip_O${optlevel}_${attribute} "${OUTDIR}"/test_bitmanip_O${optlevel}_${attribute}
"${OUTDIR}"/test_calling_conv_O${optlevel}_${attribute}
"${OUTDIR}"/test_cast_O${optlevel}_${attribute} "${OUTDIR}"/test_cast_O${optlevel}_${attribute}
"${OUTDIR}"/test_fcmp_O${optlevel}_${attribute} "${OUTDIR}"/test_fcmp_O${optlevel}_${attribute}
"${OUTDIR}"/test_global_O${optlevel}_${attribute} "${OUTDIR}"/test_global_O${optlevel}_${attribute}
......
//===- subzero/crosstest/test_calling_conv.cpp - Implementation for tests -===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the test functions used to check that Subzero
// generates code compatible with the calling convention used by
// llc. "Caller" functions test the handling of out-args, and "callee"
// functions test the handling of in-args.
//
//===----------------------------------------------------------------------===//
#include <cstring>
#include "test_calling_conv.h"
#define CALL_AS_TYPE(Ty, Func) (reinterpret_cast<Ty *>(Func))
void caller_i(void) {
int arg1 = 0x12345678;
CALL_AS_TYPE(callee_i_Ty, Callee)(arg1);
}
void caller_vvvvv(void) {
v4si32 arg1 = {0, 1, 2, 3};
v4si32 arg2 = {4, 5, 6, 7};
v4si32 arg3 = {8, 9, 10, 11};
v4si32 arg4 = {12, 13, 14, 15};
v4si32 arg5 = {16, 17, 18, 19};
CALL_AS_TYPE(callee_vvvvv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5);
}
void caller_vlvlivfvdviv(void) {
v4f32 arg1 = {0, 1, 2, 3};
int64_t arg2 = 4;
v4f32 arg3 = {6, 7, 8, 9};
int64_t arg4 = 10;
int arg5 = 11;
v4f32 arg6 = {12, 13, 14, 15};
float arg7 = 16;
v4f32 arg8 = {17, 18, 19, 20};
double arg9 = 21;
v4f32 arg10 = {22, 23, 24, 25};
int arg11 = 26;
v4f32 arg12 = {27, 28, 29, 30};
CALL_AS_TYPE(callee_vlvlivfvdviv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9, arg10,
arg11, arg12);
}
#define HANDLE_ARG(ARGNUM) \
case ARGNUM: \
memcpy(&Buf[0], &arg##ARGNUM, sizeof(arg##ARGNUM)); \
break;
void __attribute__((noinline)) callee_i(int arg1) {
switch (ArgNum) { HANDLE_ARG(1); }
}
void __attribute__((noinline))
callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) {
switch (ArgNum) {
HANDLE_ARG(1);
HANDLE_ARG(2);
HANDLE_ARG(3);
HANDLE_ARG(4);
HANDLE_ARG(5);
}
}
void __attribute__((noinline))
callee_vlvlivfvdviv(v4f32 arg1, int64_t arg2, v4f32 arg3, int64_t arg4, int arg5,
v4f32 arg6, float arg7, v4f32 arg8, double arg9, v4f32 arg10,
int arg11, v4f32 arg12) {
switch (ArgNum) {
HANDLE_ARG(1);
HANDLE_ARG(2);
HANDLE_ARG(3);
HANDLE_ARG(4);
HANDLE_ARG(5);
HANDLE_ARG(6);
HANDLE_ARG(7);
HANDLE_ARG(8);
HANDLE_ARG(9);
HANDLE_ARG(10);
HANDLE_ARG(11);
HANDLE_ARG(12);
}
}
//===- subzero/crosstest/test_calling_conv.def - testing macros -*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines macros for testing the calling convention.
//
//===----------------------------------------------------------------------===//
#ifndef TEST_CALLING_CONV_DEF
#define TEST_CALLING_CONV_DEF
#define STR(x) (#x)
#define TEST_FUNC_TABLE \
/* caller, callee, argc */ \
X(caller_i, callee_i, 1) \
X(caller_vvvvv, callee_vvvvv, 5) \
X(caller_vlvlivfvdviv, callee_vlvlivfvdviv, 12) \
// #define X(caller, callee, argc)
#endif // TEST_CALLING_CONV_DEF
//===- subzero/crosstest/test_calling_conv.h - Test prototypes --*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the function prototypes for crosstesting the calling
// convention.
//
//===----------------------------------------------------------------------===//
#include "test_calling_conv.def"
#include "vectors.h"
typedef void (*CalleePtrTy)();
extern CalleePtrTy Callee;
extern size_t ArgNum;
extern char *Buf;
void caller_i();
void caller_alloca_i();
typedef void callee_i_Ty(int);
callee_i_Ty callee_i;
callee_i_Ty callee_alloca_i;
void caller_vvvvv();
typedef void (callee_vvvvv_Ty)(v4si32, v4si32, v4si32, v4si32, v4si32);
callee_vvvvv_Ty callee_vvvvv;
void caller_vlvlivfvdviv();
typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64_t, v4f32, int64_t, int, v4f32,
float, v4f32, double, v4f32, int, v4f32);
callee_vlvlivfvdviv_Ty callee_vlvlivfvdviv;
//===- subzero/crosstest/test_calling_conv_main.cpp - Driver for tests ----===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the driver for cross testing the compatibility of
// calling conventions.
//
//===----------------------------------------------------------------------===//
/* crosstest.py --test=test_calling_conv.cpp \
--driver=test_calling_conv_main.cpp --prefix=Subzero_ \
--output=test_calling_conv */
#include <cstring>
#include <iostream>
#include <sstream>
#include "test_calling_conv.h"
namespace Subzero_ {
#include "test_calling_conv.h"
}
// The crosstest code consists of caller / callee function pairs.
//
// The caller function initializes a list of arguments and calls the
// function located at Callee.
//
// The callee function writes the argument numbered ArgNum into the
// location pointed to by Buf.
//
// testCaller() tests that caller functions, as compiled by Subzero and
// llc, pass arguments to the callee in the same way. The Caller() and
// Subzero_Caller() functions both call the same callee (which has been
// compiled by llc). The result in the global buffer is compared to
// check that it is the same value after the calls by both callers.
//
// testCallee() runs the same kind of test, except that the functions
// Callee() and Subzero_Callee() are being tested to ensure that both
// functions receive arguments from the caller in the same way. The
// caller is compiled by llc.
size_t ArgNum, Subzero_ArgNum;
CalleePtrTy Callee, Subzero_Callee;
char *Buf, *Subzero_Buf;
const static size_t BUF_SIZE = 16;
std::string bufAsString(const char Buf[BUF_SIZE]) {
std::ostringstream OS;
for (size_t i = 0; i < BUF_SIZE; ++i) {
if (i > 0)
OS << " ";
OS << (unsigned) Buf[i];
}
return OS.str();
}
void testCaller(size_t &TotalTests, size_t &Passes, size_t &Failures) {
static struct {
const char *CallerName, *CalleeName;
size_t Args;
void (*Caller)(void);
void (*Subzero_Caller)(void);
CalleePtrTy Callee;
} Funcs[] = {
#define X(caller, callee, argc) \
{ \
STR(caller), STR(callee), argc, &caller, &Subzero_::caller, \
reinterpret_cast<CalleePtrTy>(&callee), \
} \
,
TEST_FUNC_TABLE
#undef X
};
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
for (size_t f = 0; f < NumFuncs; ++f) {
char BufLlc[BUF_SIZE], BufSz[BUF_SIZE];
Callee = Subzero_Callee = Funcs[f].Callee;
for (size_t i = 0; i < Funcs[f].Args; ++i) {
memset(BufLlc, 0xff, sizeof(BufLlc));
memset(BufSz, 0xff, sizeof(BufSz));
ArgNum = Subzero_ArgNum = i;
Buf = BufLlc;
Funcs[f].Caller();
Buf = BufSz;
Funcs[f].Subzero_Caller();
++TotalTests;
if (!memcmp(BufLlc, BufSz, sizeof(BufLlc))) {
++Passes;
} else {
++Failures;
std::cout << "testCaller(Caller=" << Funcs[f].CallerName
<< ", Callee=" << Funcs[f].CalleeName << ", ArgNum=" << ArgNum
<< ")\nsz =" << bufAsString(BufSz)
<< "\nllc=" << bufAsString(BufLlc) << "\n";
}
}
}
}
void testCallee(size_t &TotalTests, size_t &Passes, size_t &Failures) {
static struct {
const char *CallerName, *CalleeName;
size_t Args;
void (*Caller)(void);
CalleePtrTy Callee, Subzero_Callee;
} Funcs[] = {
#define X(caller, callee, argc) \
{ \
STR(caller), STR(callee), argc, &caller, \
reinterpret_cast<CalleePtrTy>(&callee), \
reinterpret_cast<CalleePtrTy>(&Subzero_::callee) \
} \
,
TEST_FUNC_TABLE
#undef X
};
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
for (size_t f = 0; f < NumFuncs; ++f) {
char BufLlc[BUF_SIZE], BufSz[BUF_SIZE];
Buf = BufLlc;
Subzero_Buf = BufSz;
for (size_t i = 0; i < Funcs[f].Args; ++i) {
memset(BufLlc, 0xff, sizeof(BufLlc));
memset(BufSz, 0xff, sizeof(BufSz));
ArgNum = Subzero_ArgNum = i;
Callee = Funcs[f].Callee;
Funcs[f].Caller();
Callee = Funcs[f].Subzero_Callee;
Funcs[f].Caller();
++TotalTests;
if (!memcmp(BufLlc, BufSz, sizeof(BufLlc))) {
++Passes;
} else {
++Failures;
std::cout << "testCallee(Caller=" << Funcs[f].CallerName
<< ", Callee=" << Funcs[f].CalleeName << ", ArgNum=" << ArgNum
<< ")\nsz =" << bufAsString(BufSz)
<< "\nllc=" << bufAsString(BufLlc) << "\n";
}
}
}
}
int main(int argc, char *argv[]) {
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
testCaller(TotalTests, Passes, Failures);
testCallee(TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
return Failures;
}
...@@ -92,6 +92,9 @@ OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base, ...@@ -92,6 +92,9 @@ OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base,
} }
} }
InstX8632AdjustStack::InstX8632AdjustStack(Cfg *Func, SizeT Amount)
: InstX8632(Func, InstX8632::Adjuststack, 0, NULL), Amount(Amount) {}
InstX8632Mul::InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1, InstX8632Mul::InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1,
Operand *Source2) Operand *Source2)
: InstX8632(Func, InstX8632::Mul, 2, Dest) { : InstX8632(Func, InstX8632::Mul, 2, Dest) {
...@@ -226,6 +229,12 @@ InstX8632Movp::InstX8632Movp(Cfg *Func, Variable *Dest, Operand *Source) ...@@ -226,6 +229,12 @@ InstX8632Movp::InstX8632Movp(Cfg *Func, Variable *Dest, Operand *Source)
addSource(Source); addSource(Source);
} }
InstX8632StoreP::InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem)
: InstX8632(Func, InstX8632::StoreP, 2, NULL) {
addSource(Value);
addSource(Mem);
}
InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem) InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem)
: InstX8632(Func, InstX8632::StoreQ, 2, NULL) { : InstX8632(Func, InstX8632::StoreQ, 2, NULL) {
addSource(Value); addSource(Value);
...@@ -933,6 +942,24 @@ void InstX8632Store::dump(const Cfg *Func) const { ...@@ -933,6 +942,24 @@ void InstX8632Store::dump(const Cfg *Func) const {
getSrc(0)->dump(Func); getSrc(0)->dump(Func);
} }
void InstX8632StoreP::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Str << "\tmovups\t";
getSrc(1)->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << "\n";
}
void InstX8632StoreP::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Str << "storep." << getSrc(0)->getType() << " ";
getSrc(1)->dump(Func);
Str << ", ";
getSrc(0)->dump(Func);
}
void InstX8632StoreQ::emit(const Cfg *Func) const { void InstX8632StoreQ::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2); assert(getSrcSize() == 2);
...@@ -1236,6 +1263,17 @@ void InstX8632Pop::dump(const Cfg *Func) const { ...@@ -1236,6 +1263,17 @@ void InstX8632Pop::dump(const Cfg *Func) const {
Str << " = pop." << getDest()->getType() << " "; Str << " = pop." << getDest()->getType() << " ";
} }
void InstX8632AdjustStack::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\tsub\tesp, " << Amount << "\n";
Func->getTarget()->updateStackAdjustment(Amount);
}
void InstX8632AdjustStack::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Str << "esp = sub.i32 esp, " << Amount;
}
void InstX8632Push::emit(const Cfg *Func) const { void InstX8632Push::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
......
...@@ -137,6 +137,7 @@ public: ...@@ -137,6 +137,7 @@ public:
Add, Add,
Addps, Addps,
Addss, Addss,
Adjuststack,
And, And,
Blendvps, Blendvps,
Br, Br,
...@@ -204,6 +205,7 @@ public: ...@@ -204,6 +205,7 @@ public:
Shufps, Shufps,
Sqrtss, Sqrtss,
Store, Store,
StoreP,
StoreQ, StoreQ,
Sub, Sub,
Subps, Subps,
...@@ -340,6 +342,26 @@ private: ...@@ -340,6 +342,26 @@ private:
InstX8632Label *Label; // Intra-block branch target InstX8632Label *Label; // Intra-block branch target
}; };
// AdjustStack instruction - subtracts esp by the given amount and
// updates the stack offset during code emission.
class InstX8632AdjustStack : public InstX8632 {
public:
static InstX8632AdjustStack *create(Cfg *Func, SizeT Amount) {
return new (Func->allocate<InstX8632AdjustStack>())
InstX8632AdjustStack(Func, Amount);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
private:
InstX8632AdjustStack(Cfg *Func, SizeT Amount);
InstX8632AdjustStack(const InstX8632AdjustStack &) LLVM_DELETED_FUNCTION;
InstX8632AdjustStack &operator=(const InstX8632AdjustStack &)
LLVM_DELETED_FUNCTION;
SizeT Amount;
};
// Call instruction. Arguments should have already been pushed. // Call instruction. Arguments should have already been pushed.
class InstX8632Call : public InstX8632 { class InstX8632Call : public InstX8632 {
public: public:
...@@ -960,6 +982,23 @@ private: ...@@ -960,6 +982,23 @@ private:
virtual ~InstX8632Movp() {} virtual ~InstX8632Movp() {}
}; };
class InstX8632StoreP : public InstX8632 {
public:
static InstX8632StoreP *create(Cfg *Func, Operand *Value, OperandX8632 *Mem) {
return new (Func->allocate<InstX8632StoreP>())
InstX8632StoreP(Func, Value, Mem);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, StoreP); }
private:
InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem);
InstX8632StoreP(const InstX8632StoreP &) LLVM_DELETED_FUNCTION;
InstX8632StoreP &operator=(const InstX8632StoreP &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632StoreP() {}
};
// This is essentially a "movq" instruction with an OperandX8632Mem // This is essentially a "movq" instruction with an OperandX8632Mem
// operand instead of Variable as the destination. It's important // operand instead of Variable as the destination. It's important
// for liveness that there is no Dest operand. // for liveness that there is no Dest operand.
......
...@@ -188,6 +188,9 @@ protected: ...@@ -188,6 +188,9 @@ protected:
void _add(Variable *Dest, Operand *Src0) { void _add(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Add::create(Func, Dest, Src0)); Context.insert(InstX8632Add::create(Func, Dest, Src0));
} }
void _adjust_stack(int32_t Amount) {
Context.insert(InstX8632AdjustStack::create(Func, Amount));
}
void _addps(Variable *Dest, Operand *Src0) { void _addps(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Addps::create(Func, Dest, Src0)); Context.insert(InstX8632Addps::create(Func, Dest, Src0));
} }
...@@ -412,6 +415,9 @@ protected: ...@@ -412,6 +415,9 @@ protected:
void _store(Operand *Value, OperandX8632 *Mem) { void _store(Operand *Value, OperandX8632 *Mem) {
Context.insert(InstX8632Store::create(Func, Value, Mem)); Context.insert(InstX8632Store::create(Func, Value, Mem));
} }
void _storep(Operand *Value, OperandX8632 *Mem) {
Context.insert(InstX8632StoreP::create(Func, Value, Mem));
}
void _storeq(Operand *Value, OperandX8632 *Mem) { void _storeq(Operand *Value, OperandX8632 *Mem) {
Context.insert(InstX8632StoreQ::create(Func, Value, Mem)); Context.insert(InstX8632StoreQ::create(Func, Value, Mem));
} }
...@@ -450,6 +456,7 @@ protected: ...@@ -450,6 +456,7 @@ protected:
const X86InstructionSet InstructionSet; const X86InstructionSet InstructionSet;
bool IsEbpBasedFrame; bool IsEbpBasedFrame;
bool NeedsStackAlignment;
size_t FrameSizeLocals; size_t FrameSizeLocals;
size_t LocalsSizeBytes; size_t LocalsSizeBytes;
llvm::SmallBitVector TypeToRegisterSet[IceType_NUM]; llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
......
...@@ -33,40 +33,50 @@ entry: ...@@ -33,40 +33,50 @@ entry:
ret i32 %add3 ret i32 %add3
} }
; CHECK: pass64BitArg: ; CHECK: pass64BitArg:
; CHECK: push 123 ; CHECK: sub esp
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp+4]
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp]
; CHECK-NEXT: call ignore64BitArgNoInline ; CHECK: mov dword ptr [esp+8], 123
; CHECK: push ; CHECK: mov dword ptr [esp+16]
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp+12]
; CHECK-NEXT: push 123 ; CHECK: call ignore64BitArgNoInline
; CHECK-NEXT: push ; CHECK sub esp
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp+4]
; CHECK-NEXT: call ignore64BitArgNoInline ; CHECK: mov dword ptr [esp]
; CHECK: push ; CHECK: mov dword ptr [esp+8], 123
; CHECK-NEXT: push ; CHECK: mov dword ptr [esp+16]
; CHECK-NEXT: push 123 ; CHECK: mov dword ptr [esp+12]
; CHECK-NEXT: push ; CHECK: call ignore64BitArgNoInline
; CHECK-NEXT: push ; CHECK: sub esp
; CHECK-NEXT: call ignore64BitArgNoInline ; CHECK: mov dword ptr [esp+4]
; CHECK: mov dword ptr [esp]
; CHECK: mov dword ptr [esp+8], 123
; CHECK: mov dword ptr [esp+16]
; CHECK: mov dword ptr [esp+12]
; CHECK: call ignore64BitArgNoInline
; ;
; OPTM1: pass64BitArg: ; OPTM1: pass64BitArg:
; OPTM1: push 123 ; OPTM1: sub esp
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp+4]
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp]
; OPTM1-NEXT: call ignore64BitArgNoInline ; OPTM1: mov dword ptr [esp+8], 123
; OPTM1: push ; OPTM1: mov dword ptr [esp+16]
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp+12]
; OPTM1-NEXT: push 123 ; OPTM1: call ignore64BitArgNoInline
; OPTM1-NEXT: push ; OPTM1 sub esp
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp+4]
; OPTM1-NEXT: call ignore64BitArgNoInline ; OPTM1: mov dword ptr [esp]
; OPTM1: push ; OPTM1: mov dword ptr [esp+8], 123
; OPTM1-NEXT: push ; OPTM1: mov dword ptr [esp+16]
; OPTM1-NEXT: push 123 ; OPTM1: mov dword ptr [esp+12]
; OPTM1-NEXT: push ; OPTM1: call ignore64BitArgNoInline
; OPTM1-NEXT: push ; OPTM1: sub esp
; OPTM1-NEXT: call ignore64BitArgNoInline ; OPTM1: mov dword ptr [esp+4]
; OPTM1: mov dword ptr [esp]
; OPTM1: mov dword ptr [esp+8], 123
; OPTM1: mov dword ptr [esp+16]
; OPTM1: mov dword ptr [esp+12]
; OPTM1: call ignore64BitArgNoInline
declare i32 @ignore64BitArgNoInline(i64, i32, i64) declare i32 @ignore64BitArgNoInline(i64, i32, i64)
...@@ -76,19 +86,21 @@ entry: ...@@ -76,19 +86,21 @@ entry:
ret i32 %call ret i32 %call
} }
; CHECK: pass64BitConstArg: ; CHECK: pass64BitConstArg:
; CHECK: push 3735928559 ; CHECK: sub esp
; CHECK-NEXT: push 305419896 ; CHECK: mov dword ptr [esp+4]
; CHECK-NEXT: push 123 ; CHECK-NEXT: mov dword ptr [esp]
; CHECK-NEXT: push ecx ; CHECK-NEXT: mov dword ptr [esp+8], 123
; CHECK-NEXT: push eax ; CHECK-NEXT: mov dword ptr [esp+16], 3735928559
; CHECK-NEXT: mov dword ptr [esp+12], 305419896
; CHECK-NEXT: call ignore64BitArgNoInline ; CHECK-NEXT: call ignore64BitArgNoInline
; ;
; OPTM1: pass64BitConstArg: ; OPTM1: pass64BitConstArg:
; OPTM1: push 3735928559 ; OPTM1: sub esp
; OPTM1-NEXT: push 305419896 ; OPTM1: mov dword ptr [esp+4]
; OPTM1-NEXT: push 123 ; OPTM1-NEXT: mov dword ptr [esp]
; OPTM1-NEXT: push dword ptr [ ; OPTM1-NEXT: mov dword ptr [esp+8], 123
; OPTM1-NEXT: push dword ptr [ ; OPTM1-NEXT: mov dword ptr [esp+16], 3735928559
; OPTM1-NEXT: mov dword ptr [esp+12], 305419896
; OPTM1-NEXT: call ignore64BitArgNoInline ; OPTM1-NEXT: call ignore64BitArgNoInline
define internal i64 @return64BitArg(i64 %a) { define internal i64 @return64BitArg(i64 %a) {
...@@ -240,14 +252,14 @@ entry: ...@@ -240,14 +252,14 @@ entry:
ret i64 %div ret i64 %div
} }
; CHECK-LABEL: div64BitSignedConst: ; CHECK-LABEL: div64BitSignedConst:
; CHECK: push 2874 ; CHECK: mov dword ptr [esp+12], 2874
; CHECK: push 1942892530 ; CHECK: mov dword ptr [esp+8], 1942892530
; CHECK: call __divdi3 ; CHECK: call __divdi3
; CHECK: ret ; CHECK: ret
; ;
; OPTM1-LABEL: div64BitSignedConst: ; OPTM1-LABEL: div64BitSignedConst:
; OPTM1: push 2874 ; OPTM1: mov dword ptr [esp+12], 2874
; OPTM1: push 1942892530 ; OPTM1: mov dword ptr [esp+8], 1942892530
; OPTM1: call __divdi3 ; OPTM1: call __divdi3
; OPTM1: ret ; OPTM1: ret
......
; This is a basic test of the alloca instruction - one test for alloca ; This is a basic test of the alloca instruction.
; of a fixed size, and one test for variable size.
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -O2 --verbose none %s \ ; RUN: %llvm2ice -O2 --verbose none %s \
; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
; RUN: %llvm2ice -Om1 --verbose none %s \ ; RUN: %llvm2ice -Om1 --verbose none %s \
...@@ -12,45 +11,95 @@ ...@@ -12,45 +11,95 @@
; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \ ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
; RUN: | FileCheck --check-prefix=DUMP %s ; RUN: | FileCheck --check-prefix=DUMP %s
define void @fixed_400(i32 %n) { define void @fixed_416_align_16(i32 %n) {
entry: entry:
%array = alloca i8, i32 400, align 16 %array = alloca i8, i32 416, align 16
%__2 = ptrtoint i8* %array to i32 %__2 = ptrtoint i8* %array to i32
call void @f1(i32 %__2) call void @f1(i32 %__2)
ret void ret void
} }
; CHECK: fixed_400: ; CHECK-LABEL: fixed_416_align_16:
; CHECK: sub esp, 400 ; CHECK: sub esp, 416
; CHECK-NEXT: mov eax, esp ; CHECK: sub esp, 16
; CHECK-NEXT: push eax ; CHECK: mov dword ptr [esp], eax
; CHECK-NEXT: call f1 ; CHECK: call f1
;
; OPTM1: fixed_400: define void @fixed_416_align_32(i32 %n) {
; OPTM1: sub esp, 400 entry:
; OPTM1-NEXT: mov {{.*}}, esp %array = alloca i8, i32 400, align 32
; OPTM1: push %__2 = ptrtoint i8* %array to i32
; OPTM1-NEXT: call f1 call void @f1(i32 %__2)
ret void
}
; CHECK-LABEL: fixed_416_align_32:
; CHECK: and esp, 4294967264
; CHECK: sub esp, 416
; CHECK: sub esp, 16
; CHECK: mov dword ptr [esp], eax
; CHECK: call f1
define void @fixed_351_align_16(i32 %n) {
entry:
%array = alloca i8, i32 351, align 16
%__2 = ptrtoint i8* %array to i32
call void @f1(i32 %__2)
ret void
}
; CHECK-LABEL: fixed_351_align_16:
; CHECK: sub esp, 352
; CHECK: sub esp, 16
; CHECK: mov dword ptr [esp], eax
; CHECK: call f1
define void @fixed_351_align_32(i32 %n) {
entry:
%array = alloca i8, i32 351, align 32
%__2 = ptrtoint i8* %array to i32
call void @f1(i32 %__2)
ret void
}
; CHECK-LABEL: fixed_351_align_32:
; CHECK: and esp, 4294967264
; CHECK: sub esp, 352
; CHECK: sub esp, 16
; CHECK: mov dword ptr [esp], eax
; CHECK: call f1
declare void @f1(i32) declare void @f1(i32)
define void @variable_n(i32 %n) { define void @variable_n_align_16(i32 %n) {
entry: entry:
%array = alloca i8, i32 %n, align 16 %array = alloca i8, i32 %n, align 16
%__2 = ptrtoint i8* %array to i32 %__2 = ptrtoint i8* %array to i32
call void @f2(i32 %__2) call void @f2(i32 %__2)
ret void ret void
} }
; CHECK: variable_n: ; CHECK-LABEL: variable_n_align_16:
; CHECK: mov eax, dword ptr [ebp+8] ; CHECK: mov eax, dword ptr [ebp+8]
; CHECK-NEXT: sub esp, eax ; CHECK: add eax, 15
; CHECK-NEXT: mov eax, esp ; CHECK: and eax, 4294967280
; CHECK-NEXT: push eax ; CHECK: sub esp, eax
; CHECK-NEXT: call f2 ; CHECK: sub esp, 16
; ; CHECK: mov dword ptr [esp], eax
; OPTM1: variable_n: ; CHECK: call f2
; OPTM1: mov {{.*}}, esp
; OPTM1: push define void @variable_n_align_32(i32 %n) {
; OPTM1-NEXT: call f2 entry:
%array = alloca i8, i32 %n, align 32
%__2 = ptrtoint i8* %array to i32
call void @f2(i32 %__2)
ret void
}
; In -O2, the order of the CHECK-DAG lines in the output is switched.
; CHECK-LABEL: variable_n_align_32:
; CHECK-DAG: and esp, 4294967264
; CHECK-DAG: mov eax, dword ptr [ebp+8]
; CHECK: add eax, 31
; CHECK: and eax, 4294967264
; CHECK: sub esp, eax
; CHECK: sub esp, 16
; CHECK: mov dword ptr [esp], eax
; CHECK: call f2
declare void @f2(i32) declare void @f2(i32)
......
...@@ -22,18 +22,23 @@ entry: ...@@ -22,18 +22,23 @@ entry:
; lowering code changes. ; lowering code changes.
; CHECK: memcpy_helper: ; CHECK: memcpy_helper:
; CHECK: push ebp ; CHECK: push ebx
; CHECK: mov ebp, esp ; CHECK: push ebp
; CHECK: sub esp, 20 ; CHECK: mov ebp, esp
; CHECK: mov eax, dword ptr [ebp+12] ; CHECK: sub esp, 20
; CHECK: mov dword ptr [ebp-4], eax ; CHECK: mov eax, dword ptr [ebp+16]
; CHECK: sub esp, 128 ; CHECK: mov dword ptr [ebp-4], eax
; CHECK: mov dword ptr [ebp-8], esp ; CHECK: sub esp, 128
; CHECK: mov eax, dword ptr [ebp-8] ; CHECK: mov dword ptr [ebp-8], esp
; CHECK: mov dword ptr [ebp-12], eax ; CHECK: mov eax, dword ptr [ebp-8]
; CHECK: movzx eax, byte ptr [ebp-4] ; CHECK: mov dword ptr [ebp-12], eax
; CHECK: mov dword ptr [ebp-16], eax ; CHECK: movzx eax, byte ptr [ebp-4]
; CHECK: push dword ptr [ebp-16] ; CHECK: mov dword ptr [ebp-16], eax
; CHECK: push dword ptr [ebp-12] ; CHECK: sub esp, 16
; CHECK: push dword ptr [ebp+8] ; CHECK: mov ecx, dword ptr [ebp+12]
; CHECK: call memcpy_helper2 ; CHECK: mov dword ptr [esp], ecx
; CHECK: mov edx, dword ptr [ebp-12]
; CHECK: mov dword ptr [esp+4], edx
; CHECK: mov ebx, dword ptr [ebp-16]
; CHECK: mov dword ptr [esp+8], ebx
; CHECK: call memcpy_helper2
...@@ -45,11 +45,11 @@ entry: ...@@ -45,11 +45,11 @@ entry:
ret i32 %add3 ret i32 %add3
} }
; CHECK-LABEL: passFpArgs ; CHECK-LABEL: passFpArgs
; CHECK: push 123 ; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline ; CHECK: call ignoreFpArgsNoInline
; CHECK: push 123 ; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline ; CHECK: call ignoreFpArgsNoInline
; CHECK: push 123 ; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline ; CHECK: call ignoreFpArgsNoInline
declare i32 @ignoreFpArgsNoInline(float, i32, double) declare i32 @ignoreFpArgsNoInline(float, i32, double)
...@@ -60,7 +60,7 @@ entry: ...@@ -60,7 +60,7 @@ entry:
ret i32 %call ret i32 %call
} }
; CHECK-LABEL: passFpConstArg ; CHECK-LABEL: passFpConstArg
; CHECK: push 123 ; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline ; CHECK: call ignoreFpArgsNoInline
define internal i32 @passFp32ConstArg(float %a) { define internal i32 @passFp32ConstArg(float %a) {
...@@ -69,8 +69,8 @@ entry: ...@@ -69,8 +69,8 @@ entry:
ret i32 %call ret i32 %call
} }
; CHECK-LABEL: passFp32ConstArg ; CHECK-LABEL: passFp32ConstArg
; CHECK: push dword ; CHECK: mov dword ptr [esp+4], 123
; CHECK: push 123 ; CHECK: movss dword ptr [esp+8]
; CHECK: call ignoreFp32ArgsNoInline ; CHECK: call ignoreFp32ArgsNoInline
declare i32 @ignoreFp32ArgsNoInline(float, i32, float) declare i32 @ignoreFp32ArgsNoInline(float, i32, float)
...@@ -415,8 +415,8 @@ entry: ...@@ -415,8 +415,8 @@ entry:
ret double %conv ret double %conv
} }
; CHECK-LABEL: unsigned64ToDouble ; CHECK-LABEL: unsigned64ToDouble
; CHECK: push 2874 ; CHECK: mov dword ptr [esp+4], 2874
; CHECK: push 1942892530 ; CHECK: mov dword ptr [esp], 1942892530
; CHECK: call cvtui64tod ; CHECK: call cvtui64tod
; CHECK: fstp ; CHECK: fstp
......
...@@ -37,8 +37,7 @@ define float @undef_float() { ...@@ -37,8 +37,7 @@ define float @undef_float() {
entry: entry:
ret float undef ret float undef
; CHECK-LABEL: undef_float: ; CHECK-LABEL: undef_float:
; CHECK-NOT: sub esp ; CHECK: [L$float$
; CHECK: fld
} }
define <4 x i1> @undef_v4i1() { define <4 x i1> @undef_v4i1() {
......
; This file checks that Subzero generates code in accordance with the ; This file checks that Subzero generates code in accordance with the
; calling convention for vectors. ; calling convention for vectors.
; NOTE: CHECK / OPTM1 lines containing the following strings may be
; subject to change:
;
; * movups: The movups instruction may be changed to movaps when the
; load / store operation is 16 byte aligned.
;
; * stack offsets: These may need to be changed if stack alignment
; support is implemented.
;
; * stack adjustment operations
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
; RUN: %llvm2ice -O2 --verbose none %s \ ; RUN: %llvm2ice -O2 --verbose none %s \
...@@ -150,7 +139,7 @@ define <4 x float> @test_returning_interspersed_arg4(i32 %i32arg0, double %doubl ...@@ -150,7 +139,7 @@ define <4 x float> @test_returning_interspersed_arg4(i32 %i32arg0, double %doubl
entry: entry:
ret <4 x float> %arg4 ret <4 x float> %arg4
; CHECK-LABEL: test_returning_interspersed_arg4: ; CHECK-LABEL: test_returning_interspersed_arg4:
; CHECK: movups xmm0, xmmword ptr [esp+44] ; CHECK: movups xmm0, xmmword ptr [esp+52]
; CHECK: ret ; CHECK: ret
; OPTM1-LABEL: test_returning_interspersed_arg4: ; OPTM1-LABEL: test_returning_interspersed_arg4:
...@@ -172,33 +161,69 @@ entry: ...@@ -172,33 +161,69 @@ entry:
call void @VectorArgs(<4 x float> %arg9, <4 x float> %arg8, <4 x float> %arg7, <4 x float> %arg6, <4 x float> %arg5, <4 x float> %arg4) call void @VectorArgs(<4 x float> %arg9, <4 x float> %arg8, <4 x float> %arg7, <4 x float> %arg6, <4 x float> %arg5, <4 x float> %arg4)
ret void ret void
; CHECK-LABEL: test_passing_vectors: ; CHECK-LABEL: test_passing_vectors:
; CHECK: movups [[ARG6:.*]], xmmword ptr [esp+4] ; CHECK: sub esp, 32
; CHECK: sub esp, 16 ; CHECK: movups [[ARG5:.*]], xmmword ptr [esp+64]
; CHECK-NEXT: movups xmmword ptr [esp], [[ARG6]] ; CHECK: movups xmmword ptr [esp], [[ARG5]]
; CHECK: movups [[ARG5:.*]], xmmword ptr [esp+36] ; CHECK: movups [[ARG6:.*]], xmmword ptr [esp+48]
; CHECK: sub esp, 16 ; CHECK: movups xmmword ptr [esp+16], [[ARG6]]
; CHECK-NEXT: movups xmmword ptr [esp], [[ARG5]] ; CHECK: movups xmm0, xmmword ptr [esp+128]
; CHECK: movups xmm0, xmmword ptr [esp+116] ; CHECK: movups xmm1, xmmword ptr [esp+112]
; CHECK: movups xmm1, xmmword ptr [esp+100] ; CHECK: movups xmm2, xmmword ptr [esp+96]
; CHECK: movups xmm2, xmmword ptr [esp+84] ; CHECK: movups xmm3, xmmword ptr [esp+80]
; CHECK: movups xmm3, xmmword ptr [esp+68]
; CHECK: call VectorArgs ; CHECK: call VectorArgs
; CHECK-NEXT: add esp, 32 ; CHECK-NEXT: add esp, 32
; CHECK: ret ; CHECK: ret
; OPTM1-LABEL: test_passing_vectors: ; OPTM1-LABEL: test_passing_vectors:
; OPTM1: movups [[ARG6:.*]], xmmword ptr {{.*}} ; OPTM1: sub esp, 32
; OPTM1: sub esp, 16
; OPTM1: movups xmmword ptr [esp], [[ARG6]]
; OPTM1: movups [[ARG5:.*]], xmmword ptr {{.*}} ; OPTM1: movups [[ARG5:.*]], xmmword ptr {{.*}}
; OPTM1: sub esp, 16 ; OPTM1: movups xmmword ptr [esp], [[ARG5]]
; OPTM1-NEXT: movups xmmword ptr [esp], [[ARG5]] ; OPTM1: movups [[ARG6:.*]], xmmword ptr {{.*}}
; OPTM1: movups xmmword ptr [esp+16], [[ARG6]]
; OPTM1: movups xmm0, xmmword ptr {{.*}} ; OPTM1: movups xmm0, xmmword ptr {{.*}}
; OPTM1: movups xmm1, xmmword ptr {{.*}} ; OPTM1: movups xmm1, xmmword ptr {{.*}}
; OPTM1: movups xmm2, xmmword ptr {{.*}} ; OPTM1: movups xmm2, xmmword ptr {{.*}}
; OPTM1: movups xmm3, xmmword ptr {{.*}} ; OPTM1: movups xmm3, xmmword ptr {{.*}}
; OPTM1: call VectorArgs ; OPTM1: call VectorArgs
; OPTM1: add esp, 32 ; OPTM1-NEXT: add esp, 32
; OPTM1: ret
}
declare void @InterspersedVectorArgs(<4 x float>, i64, <4 x float>, i64, <4 x float>, float, <4 x float>, double, <4 x float>, i32, <4 x float>)
define void @test_passing_vectors_interspersed(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5, <4 x float> %arg6, <4 x float> %arg7, <4 x float> %arg8, <4 x float> %arg9) {
entry:
; Kills XMM registers so that no in-arg lowering code interferes
; with the test.
call void @killXmmRegisters()
call void @InterspersedVectorArgs(<4 x float> %arg9, i64 0, <4 x float> %arg8, i64 1, <4 x float> %arg7, float 2.000000e+00, <4 x float> %arg6, double 3.000000e+00, <4 x float> %arg5, i32 4, <4 x float> %arg4)
ret void
; CHECK-LABEL: test_passing_vectors_interspersed:
; CHECK: sub esp, 80
; CHECK: movups [[ARG9:.*]], xmmword ptr [esp+112]
; CHECK: movups xmmword ptr [esp+32], [[ARG9]]
; CHECK: movups [[ARG11:.*]], xmmword ptr [esp+96]
; CHECK: movups xmmword ptr [esp+64], [[ARG11]]
; CHECK: movups xmm0, xmmword ptr [esp+176]
; CHECK: movups xmm1, xmmword ptr [esp+160]
; CHECK: movups xmm2, xmmword ptr [esp+144]
; CHECK: movups xmm3, xmmword ptr [esp+128]
; CHECK: call InterspersedVectorArgs
; CHECK-NEXT: add esp, 80
; CHECK: ret
; OPTM1-LABEL: test_passing_vectors_interspersed:
; OPTM1: sub esp, 80
; OPTM1: movups [[ARG9:.*]], xmmword ptr {{.*}}
; OPTM1: movups xmmword ptr [esp+32], [[ARG9]]
; OPTM1: movups [[ARG11:.*]], xmmword ptr {{.*}}
; OPTM1: movups xmmword ptr [esp+64], [[ARG11]]
; OPTM1: movups xmm0, xmmword ptr {{.*}}
; OPTM1: movups xmm1, xmmword ptr {{.*}}
; OPTM1: movups xmm2, xmmword ptr {{.*}}
; OPTM1: movups xmm3, xmmword ptr {{.*}}
; OPTM1: call InterspersedVectorArgs
; OPTM1-NEXT: add esp, 80
; OPTM1: ret ; OPTM1: ret
} }
...@@ -220,8 +245,8 @@ entry: ...@@ -220,8 +245,8 @@ entry:
; OPTM1-LABEL: test_receiving_vectors: ; OPTM1-LABEL: test_receiving_vectors:
; OPTM1: call VectorReturn ; OPTM1: call VectorReturn
; OPTM1: movups [[LOC:.*]], xmm0 ; OPTM1: movups {{.*}}, xmm0
; OPTM1: movups xmm0, [[LOC]] ; OPTM1: movups xmm0, {{.*}}
; OPTM1: call VectorReturn ; OPTM1: call VectorReturn
; OPTM1: ret ; OPTM1: ret
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment