Commit 1d235425 by John Porto

Subzero. Native 64-bit int arithmetic on x86-64.

This CL modifies the x86 instruction selection template to allow native 64-bit GPR support. It also enables x86-64 crosstests. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1273153002.
parent 83ccadcf
......@@ -325,6 +325,7 @@ $(OBJDIR)/unittest/AssemblerX8664: $(OBJDIR)/unittest
RT_SRC := runtime/szrt.c runtime/szrt_ll.ll runtime/szrt_profiler.c
RT_OBJ := build/runtime/szrt_native_x8632.o build/runtime/szrt_sb_x8632.o \
build/runtime/szrt_native_x8664.o build/runtime/szrt_sb_x8664.o \
build/runtime/szrt_native_arm32.o build/runtime/szrt_sb_arm32.o
runtime: $(RT_OBJ)
......@@ -348,10 +349,13 @@ else
check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
# Do all native/sse2 tests, but only test_vector_ops for native/sse4.1.
# For (slow) sandboxed tests, limit to Om1/sse4.1.
# TODO(jpp): implement x8664 sandbox, then enable xtests.
./pydir/crosstest_generator.py -v --lit \
--toolchain-root $(TOOLCHAIN_ROOT) \
-i x8632,native,sse2 -i x8632,native,sse4.1,test_vector_ops \
-i x8632,sandbox,sse4.1,Om1 \
-i x8664,native,sse2 -i x8664,native,sse4.1,test_vector_ops \
-e x8664,native,sse2,test_global \
-i arm32,native,neon,simple_loop \
-i arm32,native,neon,mem_intrin \
-i arm32,native,neon,test_bitmanip \
......
......@@ -8,6 +8,7 @@
#include <cstring>
#include "mem_intrin.h"
#include "xdefs.h"
typedef int elem_t;
......@@ -15,9 +16,9 @@ typedef int elem_t;
* Reset buf to the sequence of bytes: n, n+1, n+2 ... length - 1
*/
static void __attribute__((noinline))
reset_buf(uint8_t *buf, uint8_t init, size_t length) {
size_t i;
size_t v = init;
reset_buf(uint8_t *buf, uint8_t init, SizeT length) {
SizeT i;
SizeT v = init;
for (i = 0; i < length; ++i)
buf[i] = v++;
}
......@@ -27,8 +28,8 @@ reset_buf(uint8_t *buf, uint8_t init, size_t length) {
* smaller buffers, whose total won't approach 2**16).
*/
static int __attribute__((noinline))
fletcher_checksum(uint8_t *buf, size_t length) {
size_t i;
fletcher_checksum(uint8_t *buf, SizeT length) {
SizeT i;
int sum = 0;
int sum_of_sums = 0;
const int kModulus = 255;
......@@ -63,20 +64,20 @@ int memset_test_fixed_len(uint8_t init) {
return fletcher_checksum((uint8_t *)buf, BYTE_LENGTH);
}
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) {
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
reset_buf(buf, init, length);
memcpy((void *)buf2, (void *)buf, length);
return fletcher_checksum(buf2, length);
}
int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) {
int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
int sum1;
int sum2;
const int overlap_bytes = 4 * sizeof(elem_t);
if (length <= overlap_bytes)
return 0;
uint8_t *overlap_buf = buf + overlap_bytes;
size_t reduced_length = length - overlap_bytes;
SizeT reduced_length = length - overlap_bytes;
reset_buf(buf, init, length);
/* Test w/ overlap. */
......@@ -88,7 +89,7 @@ int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) {
return sum1 + sum2;
}
int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) {
int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
memset((void *)buf, init, length);
memset((void *)buf2, init + 4, length);
return fletcher_checksum(buf, length) + fletcher_checksum(buf2, length);
......
......@@ -4,10 +4,11 @@
* There is no include guard since this will be included multiple times,
* under different namespaces.
*/
#include "xdefs.h"
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length);
int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length);
int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length);
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memcpy_test_fixed_len(uint8_t init);
int memmove_test_fixed_len(uint8_t init);
......
......@@ -5,6 +5,8 @@
#include <cstdio>
#include "mem_intrin.h"
#include "xdefs.h"
namespace Subzero_ {
#include "mem_intrin.h"
}
......@@ -12,7 +14,7 @@ namespace Subzero_ {
#define XSTR(s) STR(s)
#define STR(s) #s
void testFixedLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
void testFixedLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
#define do_test_fixed(test_func) \
for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
++TotalTests; \
......@@ -33,11 +35,11 @@ void testFixedLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
#undef do_test_fixed
}
void testVariableLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
void testVariableLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
uint8_t buf[256];
uint8_t buf2[256];
#define do_test_variable(test_func) \
for (size_t len = 4; len < 128; ++len) { \
for (SizeT len = 4; len < 128; ++len) { \
for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
++TotalTests; \
int llc_result = test_func(buf, buf2, init_val, len); \
......@@ -58,7 +60,11 @@ void testVariableLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
#undef do_test_variable
}
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
unsigned TotalTests = 0;
unsigned Passes = 0;
unsigned Failures = 0;
......
......@@ -6,7 +6,11 @@
int simple_loop(int *a, int n);
int Subzero_simple_loop(int *a, int n);
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
unsigned TotalTests = 0;
unsigned Passes = 0;
unsigned Failures = 0;
......
//===- subzero/crosstest/stack_hack.x8664.c - X8664 stack hack ------------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements main() for crosstests in x86-64.
//
//===----------------------------------------------------------------------===//
#include <assert.h>
#include <stdint.h>
#include <sys/mman.h>
// X8664_STACK_HACK needs to be defined before xdefs.h is included.
#define X8664_STACK_HACK
#include "xdefs.h"
/// xSetStack is used to set %rsp to NewRsp. OldRsp is a pointer that will be
/// used to save the old %rsp value.
#define xSetStack(NewRsp, OldRsp) \
do { \
__asm__ volatile("xchgq %1, %%rsp\n\t" \
"xchgq %1, %0" \
: "=r"(*(OldRsp)) \
: "r"(NewRsp)); \
} while (0)
extern int wrapped_main(int argc, char *argv[]);
unsigned char *xStackStart(uint32 StackEnd, uint32 Size) {
const uint32 PageBoundary = 4 << 20; // 4 MB.
const uint64 StackStart = StackEnd - Size;
assert(StackStart + (PageBoundary - 1) & ~(PageBoundary - 1) &&
"StackStart not aligned to page boundary.");
(void)PageBoundary;
assert((StackStart & 0xFFFFFFFF00000000ull) == 0 && "StackStart wraps.");
return (unsigned char *)StackStart;
}
unsigned char *xAllocStack(uint64 StackEnd, uint32 Size) {
assert((StackEnd & 0xFFFFFFFF00000000ull) == 0 && "Invalid StackEnd.");
void *Stack =
mmap(xStackStart(StackEnd, Size), Size, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_PRIVATE | MAP_GROWSDOWN | MAP_ANONYMOUS, -1, 0);
assert(Stack != MAP_FAILED && "mmap failed. no stack.");
return Stack;
}
void xDeallocStack(uint64 StackEnd, uint32 Size) {
assert((StackEnd & 0xFFFFFFFF00000000ull) == 0 && "Invalid StackEnd.");
munmap(xStackStart(StackEnd, Size), Size);
}
int main(int argc, char *argv[]) {
// These "locals" need to live **NOT** in the stack.
static int Argc;
static char **Argv;
static const uint32_t StackEnd = 0x80000000;
static const uint32_t StackSize = 40 * 1024 * 1024;
static unsigned char *new_rsp;
static unsigned char *old_rsp;
static unsigned char *dummy_rsp;
static int Failures;
Argc = argc;
Argv = argv;
new_rsp = xAllocStack(StackEnd, StackSize) + StackSize;
xSetStack(new_rsp, &old_rsp);
Failures = wrapped_main(Argc, Argv);
xSetStack(old_rsp, &new_rsp);
xDeallocStack(StackEnd, StackSize);
return Failures;
}
......@@ -17,13 +17,14 @@
#include <stdint.h>
#include "test_arith.h"
#include "xdefs.h"
#define X(inst, op, isdiv, isshift) \
bool test##inst(bool a, bool b) { return a op b; } \
uint8_t test##inst(uint8_t a, uint8_t b) { return a op b; } \
uint16_t test##inst(uint16_t a, uint16_t b) { return a op b; } \
uint32_t test##inst(uint32_t a, uint32_t b) { return a op b; } \
uint64_t test##inst(uint64_t a, uint64_t b) { return a op b; } \
uint64 test##inst(uint64 a, uint64 b) { return a op b; } \
v4ui32 test##inst(v4ui32 a, v4ui32 b) { return a op b; } \
v8ui16 test##inst(v8ui16 a, v8ui16 b) { return a op b; } \
v16ui8 test##inst(v16ui8 a, v16ui8 b) { return a op b; }
......@@ -35,7 +36,7 @@ UINTOP_TABLE
myint8_t test##inst(myint8_t a, myint8_t b) { return a op b; } \
int16_t test##inst(int16_t a, int16_t b) { return a op b; } \
int32_t test##inst(int32_t a, int32_t b) { return a op b; } \
int64_t test##inst(int64_t a, int64_t b) { return a op b; } \
int64 test##inst(int64 a, int64 b) { return a op b; } \
v4si32 test##inst(v4si32 a, v4si32 b) { return a op b; } \
v8si16 test##inst(v8si16 a, v8si16 b) { return a op b; } \
v16si8 test##inst(v16si8 a, v16si8 b) { return a op b; }
......
......@@ -14,6 +14,7 @@
#include <stdint.h>
#include "test_arith.def"
#include "xdefs.h"
#include "vectors.h"
......@@ -22,7 +23,7 @@
uint8_t test##inst(uint8_t a, uint8_t b); \
uint16_t test##inst(uint16_t a, uint16_t b); \
uint32_t test##inst(uint32_t a, uint32_t b); \
uint64_t test##inst(uint64_t a, uint64_t b); \
uint64 test##inst(uint64 a, uint64 b); \
v4ui32 test##inst(v4ui32 a, v4ui32 b); \
v8ui16 test##inst(v8ui16 a, v8ui16 b); \
v16ui8 test##inst(v16ui8 a, v16ui8 b);
......@@ -34,7 +35,7 @@ UINTOP_TABLE
myint8_t test##inst(myint8_t a, myint8_t b); \
int16_t test##inst(int16_t a, int16_t b); \
int32_t test##inst(int32_t a, int32_t b); \
int64_t test##inst(int64_t a, int64_t b); \
int64 test##inst(int64 a, int64 b); \
v4si32 test##inst(v4si32 a, v4si32 b); \
v8si16 test##inst(v8si16 a, v8si16 b); \
v16si8 test##inst(v16si8 a, v16si8 b);
......
......@@ -28,6 +28,8 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_arith.h"
#include "xdefs.h"
namespace Subzero_ {
#include "test_arith.h"
}
......@@ -363,7 +365,11 @@ void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
}
}
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......@@ -372,7 +378,7 @@ int main(int argc, char **argv) {
testsInt<uint8_t, myint8_t>(TotalTests, Passes, Failures);
testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures);
testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures);
testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures);
testsInt<uint64, int64>(TotalTests, Passes, Failures);
testsVecInt<v4ui32, v4si32>(TotalTests, Passes, Failures);
testsVecInt<v8ui16, v8si16>(TotalTests, Passes, Failures);
testsVecInt<v16ui8, v16si8>(TotalTests, Passes, Failures);
......
......@@ -14,6 +14,8 @@
#ifndef TEST_BIT_MANIP_DEF
#define TEST_BIT_MANIP_DEF
#include "xdefs.h"
#define STR(s) #s
#define BMI_OPS \
......@@ -25,13 +27,13 @@
#define BMI_TYPES \
/* type */ \
X(uint32_t) \
X(uint64_t)
X(uint32) \
X(uint64)
// #define X(type)
#define FOR_ALL_BMI_TYPES_INST(F, inst) \
F(inst, uint32_t) \
F(inst, uint64_t)
F(inst, uint32) \
F(inst, uint64)
#define FOR_ALL_BMI_OP_TYPES(X) \
FOR_ALL_BMI_TYPES_INST(X, ctlz) \
......@@ -42,7 +44,7 @@
#define BSWAP_TABLE \
/* type, builtin_name */ \
X(uint16_t, __builtin_bswap16) \
X(uint32_t, __builtin_bswap32) \
X(uint64_t, __builtin_bswap64)
X(uint32, __builtin_bswap32) \
X(uint64, __builtin_bswap64)
#endif // TEST_BIT_MANIP_DEF
......@@ -23,11 +23,13 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_bitmanip.h"
#include "xdefs.h"
namespace Subzero_ {
#include "test_bitmanip.h"
}
volatile uint64_t Values[] = {
volatile uint64 Values[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0xc0de, 0xabcd, 0xdcba, 0x007fffff /*Max subnormal + */,
0x00800000 /*Min+ */, 0x7f7fffff /*Max+ */, 0x7f800000 /*+Inf*/,
......@@ -71,9 +73,9 @@ void testBitManip(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
<< "(" << static_cast<uint64_t>(Value)
<< "): sz=" << static_cast<uint64_t>(ResultSz)
<< " llc=" << static_cast<uint64_t>(ResultLlc) << "\n";
<< "(" << static_cast<uint64>(Value)
<< "): sz=" << static_cast<uint64>(ResultSz)
<< " llc=" << static_cast<uint64>(ResultLlc) << "\n";
}
}
}
......@@ -101,24 +103,28 @@ void testByteSwap(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
<< "(" << static_cast<uint64_t>(Value)
<< "): sz=" << static_cast<uint64_t>(ResultSz)
<< " llc=" << static_cast<uint64_t>(ResultLlc) << "\n";
<< "(" << static_cast<uint64>(Value)
<< "): sz=" << static_cast<uint64>(ResultSz)
<< " llc=" << static_cast<uint64>(ResultLlc) << "\n";
}
}
}
}
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
testBitManip<uint32_t>(TotalTests, Passes, Failures);
testBitManip<uint64_t>(TotalTests, Passes, Failures);
testBitManip<uint64>(TotalTests, Passes, Failures);
testByteSwap<uint16_t>(TotalTests, Passes, Failures);
testByteSwap<uint32_t>(TotalTests, Passes, Failures);
testByteSwap<uint64_t>(TotalTests, Passes, Failures);
testByteSwap<uint64>(TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
......
......@@ -17,6 +17,7 @@
#include <cstring>
#include "test_calling_conv.h"
#include "xdefs.h"
#define CALL_AS_TYPE(Ty, Func) (reinterpret_cast<Ty *>(Func))
......@@ -37,9 +38,9 @@ void caller_vvvvv(void) {
void caller_vlvlivfvdviv(void) {
v4f32 arg1 = {0, 1, 2, 3};
int64_t arg2 = 4;
int64 arg2 = 4;
v4f32 arg3 = {6, 7, 8, 9};
int64_t arg4 = 10;
int64 arg4 = 10;
int arg5 = 11;
v4f32 arg6 = {12, 13, 14, 15};
float arg7 = 16;
......@@ -75,8 +76,8 @@ callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) {
}
void __attribute__((noinline))
callee_vlvlivfvdviv(v4f32 arg1, int64_t arg2, v4f32 arg3, int64_t arg4,
int arg5, v4f32 arg6, float arg7, v4f32 arg8, double arg9,
callee_vlvlivfvdviv(v4f32 arg1, int64 arg2, v4f32 arg3, int64 arg4, int arg5,
v4f32 arg6, float arg7, v4f32 arg8, double arg9,
v4f32 arg10, int arg11, v4f32 arg12) {
switch (ArgNum) {
HANDLE_ARG(1);
......
......@@ -14,6 +14,7 @@
#include "test_calling_conv.def"
#include "vectors.h"
#include "xdefs.h"
typedef void (*CalleePtrTy)();
extern CalleePtrTy Callee;
......@@ -31,6 +32,6 @@ typedef void(callee_vvvvv_Ty)(v4si32, v4si32, v4si32, v4si32, v4si32);
callee_vvvvv_Ty callee_vvvvv;
void caller_vlvlivfvdviv();
typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64_t, v4f32, int64_t, int, v4f32,
typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64, v4f32, int64, int, v4f32,
float, v4f32, double, v4f32, int, v4f32);
callee_vlvlivfvdviv_Ty callee_vlvlivfvdviv;
......@@ -162,7 +162,11 @@ void testCallee(size_t &TotalTests, size_t &Passes, size_t &Failures) {
}
}
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......
......@@ -16,6 +16,7 @@
#include <stdint.h>
#include "test_cast.h"
#include "xdefs.h"
template <typename FromType, typename ToType>
ToType __attribute__((noinline)) cast(FromType a) {
......@@ -38,8 +39,8 @@ template <typename ToType> class Caster {
static ToType f(uint16_t a) { return cast<uint16_t, ToType>(a); }
static ToType f(int32_t a) { return cast<int32_t, ToType>(a); }
static ToType f(uint32_t a) { return cast<uint32_t, ToType>(a); }
static ToType f(int64_t a) { return cast<int64_t, ToType>(a); }
static ToType f(uint64_t a) { return cast<uint64_t, ToType>(a); }
static ToType f(int64 a) { return cast<int64, ToType>(a); }
static ToType f(uint64 a) { return cast<uint64, ToType>(a); }
static ToType f(float a) { return cast<float, ToType>(a); }
static ToType f(double a) { return cast<double, ToType>(a); }
};
......@@ -56,8 +57,8 @@ template class Caster<int16_t>;
template class Caster<uint16_t>;
template class Caster<int32_t>;
template class Caster<uint32_t>;
template class Caster<int64_t>;
template class Caster<uint64_t>;
template class Caster<int64>;
template class Caster<uint64>;
template class Caster<float>;
template class Caster<double>;
......@@ -67,8 +68,8 @@ template class Caster<double>;
double makeBitCasters() {
double Result = 0;
Result += castBits<uint32_t, float>(0);
Result += castBits<uint64_t, double>(0);
Result += castBits<uint64, double>(0);
Result += castBits<float, uint32_t>(0);
Result += castBits<double, uint64_t>(0);
Result += castBits<double, uint64>(0);
return Result;
}
......@@ -22,6 +22,7 @@
#include "test_arith.def"
#include "vectors.h"
#include "xdefs.h"
// Include test_cast.h twice - once normally, and once within the
// Subzero_ namespace, corresponding to the llc and Subzero translated
......@@ -82,8 +83,8 @@ void testValue(FromType Val, size_t &TotalTests, size_t &Passes,
COMPARE(cast, FromType, int16_t, Val, FromTypeString);
COMPARE(cast, FromType, uint32_t, Val, FromTypeString);
COMPARE(cast, FromType, int32_t, Val, FromTypeString);
COMPARE(cast, FromType, uint64_t, Val, FromTypeString);
COMPARE(cast, FromType, int64_t, Val, FromTypeString);
COMPARE(cast, FromType, uint64, Val, FromTypeString);
COMPARE(cast, FromType, int64, Val, FromTypeString);
COMPARE(cast, FromType, float, Val, FromTypeString);
COMPARE(cast, FromType, double, Val, FromTypeString);
}
......@@ -110,7 +111,11 @@ void testVector(size_t &TotalTests, size_t &Passes, size_t &Failures,
}
}
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......@@ -147,7 +152,7 @@ int main(int argc, char **argv) {
0x80000000, 0x80000001, 0xfffffffe, 0xffffffff};
static const size_t NumValsSi32 = sizeof(ValsSi32) / sizeof(*ValsSi32);
volatile uint64_t ValsUi64[] = {
volatile uint64 ValsUi64[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x100000000ull, 0x100000001ull,
......@@ -155,7 +160,7 @@ int main(int argc, char **argv) {
0x8000000000000001ull, 0xfffffffffffffffeull, 0xffffffffffffffffull};
static const size_t NumValsUi64 = sizeof(ValsUi64) / sizeof(*ValsUi64);
volatile int64_t ValsSi64[] = {
volatile int64 ValsSi64[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x100000000ll, 0x100000001ll,
......@@ -203,13 +208,13 @@ int main(int argc, char **argv) {
testValue<int32_t>(Val, TotalTests, Passes, Failures, "int32_t");
}
for (size_t i = 0; i < NumValsUi64; ++i) {
uint64_t Val = ValsUi64[i];
testValue<uint64_t>(Val, TotalTests, Passes, Failures, "uint64_t");
COMPARE(castBits, uint64_t, double, Val, "uint64_t");
uint64 Val = ValsUi64[i];
testValue<uint64>(Val, TotalTests, Passes, Failures, "uint64");
COMPARE(castBits, uint64, double, Val, "uint64");
}
for (size_t i = 0; i < NumValsSi64; ++i) {
int64_t Val = ValsSi64[i];
testValue<int64_t>(Val, TotalTests, Passes, Failures, "int64_t");
int64 Val = ValsSi64[i];
testValue<int64>(Val, TotalTests, Passes, Failures, "int64");
}
for (size_t i = 0; i < NumValsF32; ++i) {
for (unsigned j = 0; j < 2; ++j) {
......@@ -226,7 +231,7 @@ int main(int argc, char **argv) {
if (j > 0)
Val = -Val;
testValue<double>(Val, TotalTests, Passes, Failures, "double");
COMPARE(castBits, double, uint64_t, Val, "double");
COMPARE(castBits, double, uint64, Val, "double");
}
}
testVector<v4ui32, v4f32>(TotalTests, Passes, Failures, "v4ui32", "v4f32");
......
......@@ -159,7 +159,11 @@ void testsVector(size_t &TotalTests, size_t &Passes, size_t &Failures) {
}
}
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......
......@@ -15,12 +15,13 @@
#include <stdint.h>
#include "test_icmp.h"
#include "xdefs.h"
#define X(cmp, op) \
bool icmp##cmp(uint8_t a, uint8_t b) { return a op b; } \
bool icmp##cmp(uint16_t a, uint16_t b) { return a op b; } \
bool icmp##cmp(uint32_t a, uint32_t b) { return a op b; } \
bool icmp##cmp(uint64_t a, uint64_t b) { return a op b; } \
bool icmp##cmp(uint64 a, uint64 b) { return a op b; } \
v4ui32 icmp##cmp(v4ui32 a, v4ui32 b) { return a op b; } \
v8ui16 icmp##cmp(v8ui16 a, v8ui16 b) { return a op b; } \
v16ui8 icmp##cmp(v16ui8 a, v16ui8 b) { return a op b; }
......@@ -31,7 +32,7 @@ ICMP_U_TABLE
bool icmp##cmp(myint8_t a, myint8_t b) { return a op b; } \
bool icmp##cmp(int16_t a, int16_t b) { return a op b; } \
bool icmp##cmp(int32_t a, int32_t b) { return a op b; } \
bool icmp##cmp(int64_t a, int64_t b) { return a op b; } \
bool icmp##cmp(int64 a, int64 b) { return a op b; } \
v4si32 icmp##cmp(v4si32 a, v4si32 b) { return a op b; } \
v8si16 icmp##cmp(v8si16 a, v8si16 b) { return a op b; } \
v16si8 icmp##cmp(v16si8 a, v16si8 b) { return a op b; }
......
......@@ -15,12 +15,13 @@
#include "test_icmp.def"
#include "vectors.h"
#include "xdefs.h"
#define X(cmp, op) \
bool icmp##cmp(uint8_t a, uint8_t b); \
bool icmp##cmp(uint16_t a, uint16_t b); \
bool icmp##cmp(uint32_t a, uint32_t b); \
bool icmp##cmp(uint64_t a, uint64_t b); \
bool icmp##cmp(uint64 a, uint64 b); \
v4ui32 icmp##cmp(v4ui32 a, v4ui32 b); \
v8ui16 icmp##cmp(v8ui16 a, v8ui16 b); \
v16ui8 icmp##cmp(v16ui8 a, v16ui8 b);
......@@ -31,7 +32,7 @@ ICMP_U_TABLE
bool icmp##cmp(myint8_t a, myint8_t b); \
bool icmp##cmp(int16_t a, int16_t b); \
bool icmp##cmp(int32_t a, int32_t b); \
bool icmp##cmp(int64_t a, int64_t b); \
bool icmp##cmp(int64 a, int64 b); \
v4si32 icmp##cmp(v4si32 a, v4si32 b); \
v8si16 icmp##cmp(v8si16 a, v8si16 b); \
v16si8 icmp##cmp(v16si8 a, v16si8 b);
......
......@@ -23,10 +23,13 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_icmp.h"
namespace Subzero_ {
#include "test_icmp.h"
}
#include "xdefs.h"
volatile unsigned Values[] = {
0x0, 0x1, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x7e, 0x7f, 0x80, 0x81,
......@@ -265,7 +268,11 @@ void testsVecI1(size_t &TotalTests, size_t &Passes, size_t &Failures) {
}
}
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......@@ -273,7 +280,7 @@ int main(int argc, char **argv) {
testsInt<uint8_t, myint8_t>(TotalTests, Passes, Failures);
testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures);
testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures);
testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures);
testsInt<uint64, int64>(TotalTests, Passes, Failures);
testsVecInt<v4ui32, v4si32>(TotalTests, Passes, Failures);
testsVecInt<v8ui16, v8si16>(TotalTests, Passes, Failures);
testsVecInt<v16ui8, v16si8>(TotalTests, Passes, Failures);
......
......@@ -130,7 +130,11 @@ void testSelectI1(size_t &TotalTests, size_t &Passes, size_t &Failures) {
}
}
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......
......@@ -22,7 +22,11 @@
DECLARE_TESTS()
DECLARE_TESTS(Subzero_)
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......
......@@ -25,7 +25,11 @@ namespace Subzero_ {
#include "test_strengthreduce.h"
}
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......
......@@ -14,6 +14,8 @@
#ifndef TEST_SYNC_ATOMIC_DEF
#define TEST_SYNC_ATOMIC_DEF
#include "xdefs.h"
#define STR(s) #s
#define RMWOP_TABLE \
......@@ -30,14 +32,14 @@
X(uint8_t) \
X(uint16_t) \
X(uint32_t) \
X(uint64_t)
X(uint64)
//#define X(type)
#define FOR_ALL_RMWTYPES_INST(F, inst) \
F(inst, uint8_t) \
F(inst, uint16_t) \
F(inst, uint32_t) \
F(inst, uint64_t)
F(inst, uint64)
#define FOR_ALL_RMWOP_TYPES(X) \
FOR_ALL_RMWTYPES_INST(X, add) \
......
......@@ -28,11 +28,12 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_sync_atomic.h"
#include "xdefs.h"
namespace Subzero_ {
#include "test_sync_atomic.h"
}
volatile uint64_t Values[] = {
volatile uint64 Values[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x007fffff /*Max subnormal + */, 0x00800000 /*Min+ */,
0x7f7fffff /*Max+ */, 0x7f800000 /*+Inf*/, 0xff800000 /*-Inf*/,
......@@ -51,7 +52,7 @@ struct {
volatile uint8_t l8;
volatile uint16_t l16;
volatile uint32_t l32;
volatile uint64_t l64;
volatile uint64 l64;
} AtomicLocs;
template <typename Type>
......@@ -91,12 +92,12 @@ void testAtomicRMW(volatile Type *AtomicLoc, size_t &TotalTests, size_t &Passes,
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
<< "(" << static_cast<uint64_t>(Value1) << ", "
<< static_cast<uint64_t>(Value2)
<< "): sz1=" << static_cast<uint64_t>(ResultSz1)
<< " llc1=" << static_cast<uint64_t>(ResultLlc1)
<< " sz2=" << static_cast<uint64_t>(ResultSz2)
<< " llc2=" << static_cast<uint64_t>(ResultLlc2) << "\n";
<< "(" << static_cast<uint64>(Value1) << ", "
<< static_cast<uint64>(Value2)
<< "): sz1=" << static_cast<uint64>(ResultSz1)
<< " llc1=" << static_cast<uint64>(ResultLlc1)
<< " sz2=" << static_cast<uint64>(ResultSz2)
<< " llc2=" << static_cast<uint64>(ResultLlc2) << "\n";
}
}
}
......@@ -137,12 +138,12 @@ void testValCompareAndSwap(volatile Type *AtomicLoc, size_t &TotalTests,
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
<< "(" << static_cast<uint64_t>(Value1) << ", "
<< static_cast<uint64_t>(Value2)
<< "): sz1=" << static_cast<uint64_t>(ResultSz1)
<< " llc1=" << static_cast<uint64_t>(ResultLlc1)
<< " sz2=" << static_cast<uint64_t>(ResultSz2)
<< " llc2=" << static_cast<uint64_t>(ResultLlc2) << "\n";
<< "(" << static_cast<uint64>(Value1) << ", "
<< static_cast<uint64>(Value2)
<< "): sz1=" << static_cast<uint64>(ResultSz1)
<< " llc1=" << static_cast<uint64>(ResultLlc1)
<< " sz2=" << static_cast<uint64>(ResultSz2)
<< " llc2=" << static_cast<uint64>(ResultLlc2) << "\n";
}
}
}
......@@ -166,6 +167,22 @@ template <typename Type> void *threadWrapper(void *Data) {
return NULL;
}
#ifndef X8664_STACK_HACK
void AllocStackForThread(uint32, pthread_attr_t *) {}
#else // defined(X8664_STACK_HACK)
void AllocStackForThread(uint32 m, pthread_attr_t *attr) {
static const uint32_t ThreadStackBase = 0x60000000;
static const uint32_t ThreadStackSize = 4 << 20; // 4MB.
if (pthread_attr_setstack(
attr, xAllocStack(ThreadStackBase - 2 * m * ThreadStackSize,
ThreadStackSize),
ThreadStackSize) != 0) {
std::cout << "pthread_attr_setstack: " << strerror(errno) << "\n";
abort();
}
}
#endif // X8664_STACK_HACK
template <typename Type>
void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
size_t &Passes, size_t &Failures) {
......@@ -184,7 +201,7 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
// Just test a few values, otherwise it takes a *really* long time.
volatile uint64_t ValuesSubset[] = {1, 0x7e, 0x000fffffffffffffffll};
volatile uint64 ValuesSubset[] = {1, 0x7e, 0x000fffffffffffffffll};
const size_t NumValuesSubset = sizeof(ValuesSubset) / sizeof(*ValuesSubset);
for (size_t f = 0; f < NumFuncs; ++f) {
......@@ -200,12 +217,18 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
++TotalTests;
const size_t NumThreads = 4;
pthread_t t[NumThreads];
pthread_attr_t attr[NumThreads];
// Try N threads w/ just Llc.
*AtomicLoc = Value1;
for (size_t m = 0; m < NumThreads; ++m) {
pthread_create(&t[m], NULL, &threadWrapper<Type>,
reinterpret_cast<void *>(&TDataLlc));
pthread_attr_init(&attr[m]);
AllocStackForThread(m, &attr[m]);
if (pthread_create(&t[m], &attr[m], &threadWrapper<Type>,
reinterpret_cast<void *>(&TDataLlc)) != 0) {
std::cout << "pthread_create failed w/ " << strerror(errno) << "\n";
abort();
}
}
for (size_t m = 0; m < NumThreads; ++m) {
pthread_join(t[m], NULL);
......@@ -215,7 +238,9 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
// Try N threads w/ both Sz and Llc.
*AtomicLoc = Value1;
for (size_t m = 0; m < NumThreads; ++m) {
if (pthread_create(&t[m], NULL, &threadWrapper<Type>,
pthread_attr_init(&attr[m]);
AllocStackForThread(m, &attr[m]);
if (pthread_create(&t[m], &attr[m], &threadWrapper<Type>,
m % 2 == 0
? reinterpret_cast<void *>(&TDataLlc)
: reinterpret_cast<void *>(&TDataSz)) != 0) {
......@@ -238,18 +263,21 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
} else {
++Failures;
std::cout << "test_with_threads_" << Funcs[f].Name
<< (8 * sizeof(Type)) << "("
<< static_cast<uint64_t>(Value1) << ", "
<< static_cast<uint64_t>(Value2)
<< "): llc=" << static_cast<uint64_t>(ResultLlc)
<< " mixed=" << static_cast<uint64_t>(ResultMixed) << "\n";
<< (8 * sizeof(Type)) << "(" << static_cast<uint64>(Value1)
<< ", " << static_cast<uint64>(Value2)
<< "): llc=" << static_cast<uint64>(ResultLlc)
<< " mixed=" << static_cast<uint64>(ResultMixed) << "\n";
}
}
}
}
}
int main(int argc, char **argv) {
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......@@ -257,18 +285,17 @@ int main(int argc, char **argv) {
testAtomicRMW<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testAtomicRMW<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures);
testAtomicRMW<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures);
testAtomicRMW<uint64_t>(&AtomicLocs.l64, TotalTests, Passes, Failures);
testAtomicRMW<uint64>(&AtomicLocs.l64, TotalTests, Passes, Failures);
testValCompareAndSwap<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testValCompareAndSwap<uint16_t>(&AtomicLocs.l16, TotalTests, Passes,
Failures);
testValCompareAndSwap<uint32_t>(&AtomicLocs.l32, TotalTests, Passes,
Failures);
testValCompareAndSwap<uint64_t>(&AtomicLocs.l64, TotalTests, Passes,
Failures);
testValCompareAndSwap<uint64>(&AtomicLocs.l64, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint64_t>(&AtomicLocs.l64, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint64>(&AtomicLocs.l64, TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
......
......@@ -130,7 +130,11 @@ void testExtractElement(size_t &TotalTests, size_t &Passes, size_t &Failures) {
free(TestVectors);
}
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
......
//===- subzero/crosstest/xdefs.h - Definitions for the crosstests. --------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines the int64 and uint64 types to avoid link-time errors when compiling
// the crosstests in LP64.
//
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_CROSSTEST_XDEFS_H_
#define SUBZERO_CROSSTEST_XDEFS_H_
typedef unsigned int int32;
typedef unsigned int uint32;
typedef long long int64;
typedef unsigned long long uint64;
typedef unsigned int SizeT;
#ifdef X8664_STACK_HACK
// the X86_STACK_HACK is an intrusive way of getting the crosstests to run in
// x86_64 LP64 even with an ILP32 model. This hack allocates a new stack for
// running the tests in the low 4GB of the address space.
#ifdef __cplusplus
#define XTEST_EXTERN extern "C"
#else // !defined(__cplusplus)
#define XTEST_EXTERN extern
#endif // __cplusplus
/// xAllocStack allocates the memory chunk [StackEnd - Size - 1, StackEnd). It
/// requires StackEnd to be less than 32-bits long. Conversely, xDeallocStack
/// frees that memory chunk.
/// {@
XTEST_EXTERN unsigned char *xAllocStack(uint64 StackEnd, uint32 Size);
XTEST_EXTERN void xDeallocStack(uint64 StackEnd, uint32 Size);
/// @}
// wrapped_main is invoked by the x86-64 stack hack main. We declare a prototype
// so the compiler (and not the linker) can yell if a test's wrapped_main
// prototype does not match what we want.
XTEST_EXTERN int wrapped_main(int argc, char *argv[]);
#undef XTEST_EXTERN
#endif // X8664_STACK_HACK
#endif // SUBZERO_CROSSTEST_XDEFS_H_
......@@ -124,6 +124,8 @@ def main():
MakeRuntimesForTarget(targets.X8632Target, ll_files,
srcdir, tempdir, rtdir, args.verbose)
MakeRuntimesForTarget(targets.X8664Target, ll_files,
srcdir, tempdir, rtdir, args.verbose)
MakeRuntimesForTarget(targets.ARM32Target, ll_files,
srcdir, tempdir, rtdir, args.verbose)
......
......@@ -177,6 +177,18 @@ def main():
'szrt_{sb}_' + args.target + '.o'
).format(root=nacl_root, sb='sb' if args.sandbox else 'native'))
pure_c = os.path.splitext(args.driver)[1] == '.c'
# TargetX8664 is ilp32, but clang does not currently support such
# configuration. In order to run the crosstests we play nasty, dangerous
# tricks with the stack pointer.
needs_stack_hack = (args.target == 'x8664')
stack_hack_params = []
if needs_stack_hack:
shellcmd('{bin}/clang -g -o stack_hack.x8664.{key}.o -c '
'stack_hack.x8664.c'.format(bin=bindir, key=key))
stack_hack_params.append('-DX8664_STACK_HACK')
stack_hack_params.append('stack_hack.x8664.{key}.o'.format(key=key))
# Set compiler to clang, clang++, pnacl-clang, or pnacl-clang++.
compiler = '{bin}/{prefix}{cc}'.format(
bin=bindir, prefix='pnacl-' if args.sandbox else '',
......@@ -189,7 +201,7 @@ def main():
'-lm', '-lpthread',
'-Wl,--defsym=__Sz_AbsoluteZero=0'] +
target_info.cross_headers)
shellcmd([compiler, args.driver] + objs +
shellcmd([compiler] + stack_hack_params + [args.driver] + objs +
['-o', os.path.join(args.dir, args.output)] + sb_native_args)
if __name__ == '__main__':
......
......@@ -55,15 +55,17 @@ def main():
root = FindBaseNaCl()
# The rest of the attribute sets.
targets = [ 'x8632', 'arm32' ]
targets = [ 'x8632', 'x8664', 'arm32' ]
sandboxing = [ 'native', 'sandbox' ]
opt_levels = [ 'Om1', 'O2' ]
arch_attrs = { 'x8632': [ 'sse2', 'sse4.1' ],
'x8664': [ 'sse2', 'sse4.1' ],
'arm32': [ 'neon', 'hwdiv-arm' ] }
flat_attrs = []
for v in arch_attrs.values():
flat_attrs += v
arch_flags = { 'x8632': [],
'x8664': [],
# ARM doesn't have an integrated assembler yet.
'arm32': ['--filetype=asm'] }
# all_keys is only used in the help text.
......
......@@ -40,6 +40,5 @@ ARM32Target = TargetInfo(target='arm32',
ld_emu='armelf_nacl',
cross_headers=['-isystem', FindARMCrossInclude()])
def ConvertTripleToNaCl(nonsfi_triple):
return nonsfi_triple.replace('linux', 'nacl')
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
......@@ -53,7 +54,7 @@ void __Sz_profile_summary() {
printf("%s", SubzeroLogo);
for (const struct BlockProfileInfo **curr = &__Sz_block_profile_info;
*curr != NULL; ++curr) {
printf("%lld\t%s\n", (*curr)->Counter, (*curr)->BlockName);
printf("%" PRIu64 "\t%s\n", (*curr)->Counter, (*curr)->BlockName);
}
fflush(stdout);
}
......@@ -243,9 +243,9 @@ public:
// Cross Xmm/GPR cast instructions.
template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
typedef void (AssemblerX86Base::*TypedEmitRegs)(Type, DReg_t, SReg_t);
typedef void (AssemblerX86Base::*TypedEmitRegs)(Type, DReg_t, Type, SReg_t);
typedef void (AssemblerX86Base::*TypedEmitAddr)(
Type, DReg_t, const typename Traits::Address &);
Type, DReg_t, Type, const typename Traits::Address &);
TypedEmitRegs RegReg;
TypedEmitAddr RegAddr;
......@@ -299,7 +299,14 @@ public:
typename Traits::GPRRegister src);
void mov(Type Ty, const typename Traits::Address &dst, const Immediate &imm);
void movFromAh(const typename Traits::GPRRegister dst);
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type
movabs(const typename Traits::GPRRegister Dst, uint64_t Imm64);
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type
movabs(const typename Traits::GPRRegister, uint64_t) {
llvm::report_fatal_error("movabs is only supported in 64-bit x86 targets.");
}
void movzx(Type Ty, typename Traits::GPRRegister dst,
typename Traits::GPRRegister src);
......@@ -328,11 +335,13 @@ public:
void movss(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
void movd(typename Traits::XmmRegister dst, typename Traits::GPRRegister src);
void movd(typename Traits::XmmRegister dst,
void movd(Type SrcTy, typename Traits::XmmRegister dst,
typename Traits::GPRRegister src);
void movd(Type SrcTy, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void movd(typename Traits::GPRRegister dst, typename Traits::XmmRegister src);
void movd(const typename Traits::Address &dst,
void movd(Type DestTy, typename Traits::GPRRegister dst,
typename Traits::XmmRegister src);
void movd(Type DestTy, const typename Traits::Address &dst,
typename Traits::XmmRegister src);
void movq(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
......@@ -504,9 +513,9 @@ public:
void cvttps2dq(Type, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst,
void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
typename Traits::GPRRegister src);
void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst,
void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
const typename Traits::Address &src);
void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
......@@ -514,9 +523,9 @@ public:
void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst,
void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
typename Traits::XmmRegister src);
void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst,
void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
const typename Traits::Address &src);
void ucomiss(Type Ty, typename Traits::XmmRegister a,
......@@ -719,6 +728,12 @@ public:
void cbw();
void cwd();
void cdq();
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type cqo();
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type cqo() {
llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
}
void div(Type Ty, typename Traits::GPRRegister reg);
void div(Type Ty, const typename Traits::Address &address);
......@@ -936,7 +951,7 @@ private:
typename Traits::GPRRegister>::value;
return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
isByteSizedArithType(Ty);
isByteSizedType(Ty);
};
// assembleAndEmitRex is used for determining which (if any) rex prefix should
......
......@@ -362,8 +362,7 @@ void ELFRelocationSection::writeData(const GlobalContext &Ctx, ELFStreamer &Str,
llvm::report_fatal_error("Missing symbol mentioned in reloc");
if (IsELF64) {
llvm_unreachable(
"Not tested -- check that Fixup.offset() is correct even for pc-rel");
// TODO(jpp): check that Fixup.offset() is correct even for pc-rel.
Elf64_Rela Rela;
Rela.r_offset = Fixup.position();
Rela.setSymbolAndType(Symbol->getNumber(), Fixup.kind());
......
......@@ -206,7 +206,7 @@ MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress(
} else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
Disp = CR->getOffset();
Fixup = Asm->createFixup(llvm::ELF::R_386_32, CR);
Fixup = Asm->createFixup(RelFixup, CR);
} else {
llvm_unreachable("Unexpected offset type");
}
......
......@@ -179,8 +179,8 @@ MachineTraits<TargetX8664>::X86OperandMem::toAsmAddress(
Disp = static_cast<int32_t>(CI->getValue());
} else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
Disp = CR->getOffset();
Fixup = Asm->createFixup(llvm::ELF::R_386_32, CR);
Disp = CR->getOffset() - 4;
Fixup = Asm->createFixup(PcRelFixup, CR);
} else {
llvm_unreachable("Unexpected offset type");
}
......
......@@ -1100,6 +1100,8 @@ class InstX86Movsx
: public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movsx> {
public:
static InstX86Movsx *create(Cfg *Func, Variable *Dest, Operand *Src) {
assert(typeWidthInBytes(Dest->getType()) >
typeWidthInBytes(Src->getType()));
return new (Func->allocate<InstX86Movsx>()) InstX86Movsx(Func, Dest, Src);
}
......@@ -1116,6 +1118,8 @@ class InstX86Movzx
: public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx> {
public:
static InstX86Movzx *create(Cfg *Func, Variable *Dest, Operand *Src) {
assert(typeWidthInBytes(Dest->getType()) >
typeWidthInBytes(Src->getType()));
return new (Func->allocate<InstX86Movzx>()) InstX86Movzx(Func, Dest, Src);
}
......
......@@ -792,7 +792,7 @@ void TargetDataX8632::lowerJumpTables() {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JT : Ctx->getJumpTables())
Writer->writeJumpTable(JT, llvm::ELF::R_386_32);
Writer->writeJumpTable(JT, TargetX8632::Traits::RelFixup);
} break;
case FT_Asm:
// Already emitted from Cfg
......@@ -821,7 +821,8 @@ void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
Writer->writeDataSection(Vars, TargetX8632::Traits::RelFixup,
SectionSuffix);
} break;
case FT_Asm:
case FT_Iasm: {
......
......@@ -68,6 +68,7 @@ template <> struct MachineTraits<TargetX8632> {
static const GPRRegister Encoded_Reg_Accumulator = RegX8632::Encoded_Reg_eax;
static const GPRRegister Encoded_Reg_Counter = RegX8632::Encoded_Reg_ecx;
static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32;
static const FixupKind RelFixup = llvm::ELF::R_386_32;
class Operand {
public:
......@@ -272,6 +273,7 @@ template <> struct MachineTraits<TargetX8632> {
};
static const char *TargetName;
static constexpr Type WordType = IceType_i32;
static IceString getRegName(SizeT RegNum, Type Ty) {
assert(RegNum < RegisterSet::Reg_NUM);
......
......@@ -123,7 +123,7 @@ getRegisterForGprArgNum(uint32_t ArgNum) {
}
// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
// OperandList in lowerCall. std::max() was supposed to work, but it doesn't.
// OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
} // end of anonymous namespace
......@@ -239,7 +239,6 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
Variable *ReturnRegHi = nullptr;
if (Dest) {
switch (Dest->getType()) {
case IceType_NUM:
......@@ -250,12 +249,8 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
case IceType_i8:
case IceType_i16:
case IceType_i32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_i64:
// TODO(jpp): return i64 in a GPR.
ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_f32:
case IceType_f64:
......@@ -271,27 +266,16 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
}
}
Operand *CallTarget = legalize(Instr->getCallTarget());
Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm);
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
if (NeedSandboxing) {
if (llvm::isa<Constant>(CallTarget)) {
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
} else {
Variable *CallTargetVar = nullptr;
_mov(CallTargetVar, CallTarget);
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
CallTarget = CallTargetVar;
}
llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
}
Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall);
if (NeedSandboxing)
_bundle_unlock();
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
if (NeedSandboxing) {
llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
}
// Add the appropriate offset to esp. The call instruction takes care
// of resetting the stack offset during emission.
......@@ -315,25 +299,11 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
assert(ReturnReg && "x86-64 always returns value on registers.");
// Assign the result of the call to Dest.
if (ReturnRegHi) {
assert(Dest->getType() == IceType_i64);
split64(Dest);
Variable *DestLo = Dest->getLo();
Variable *DestHi = Dest->getHi();
_mov(DestLo, ReturnReg);
_mov(DestHi, ReturnRegHi);
return;
}
assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 ||
Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
isVectorType(Dest->getType()));
if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
if (isVectorType(Dest->getType())) {
_movp(Dest, ReturnReg);
} else {
assert(isScalarFloatingType(Dest->getType()) ||
isScalarIntegerType(Dest->getType()));
_mov(Dest, ReturnReg);
}
}
......@@ -356,36 +326,36 @@ void TargetX8664::lowerArguments() {
++i) {
Variable *Arg = Args[i];
Type Ty = Arg->getType();
if ((isVectorType(Ty) || isScalarFloatingType(Ty)) &&
NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
// Replace Arg in the argument list with the home register. Then
// generate an instruction in the prolog to copy the home register
// to the assigned location of Arg.
int32_t RegNum = getRegisterForXmmArgNum(NumXmmArgs);
Variable *RegisterArg = nullptr;
int32_t RegNum = Variable::NoRegister;
if ((isVectorType(Ty) || isScalarFloatingType(Ty))) {
if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) {
continue;
}
RegNum = getRegisterForXmmArgNum(NumXmmArgs);
++NumXmmArgs;
Variable *RegisterArg = Func->makeVariable(Ty);
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
RegisterArg->setIsArg();
Arg->setIsArg(false);
Args[i] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
} else if (isScalarIntegerType(Ty) &&
NumGprArgs < Traits::X86_MAX_GPR_ARGS) {
int32_t RegNum = getRegisterForGprArgNum(NumGprArgs);
RegisterArg = Func->makeVariable(Ty);
} else if (isScalarIntegerType(Ty)) {
if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {
continue;
}
RegNum = getRegisterForGprArgNum(NumGprArgs);
++NumGprArgs;
Variable *RegisterArg = Func->makeVariable(Ty);
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
RegisterArg->setIsArg();
Arg->setIsArg(false);
Args[i] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
RegisterArg = Func->makeVariable(Ty);
}
assert(RegNum != Variable::NoRegister);
assert(RegisterArg != nullptr);
// Replace Arg in the argument list with the home register. Then
// generate an instruction in the prolog to copy the home register
// to the assigned location of Arg.
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
RegisterArg->setIsArg();
Arg->setIsArg(false);
Args[i] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
}
}
......@@ -393,19 +363,11 @@ void TargetX8664::lowerRet(const InstRet *Inst) {
Variable *Reg = nullptr;
if (Inst->hasRetValue()) {
Operand *Src0 = legalize(Inst->getRetValue());
// TODO(jpp): this is not needed.
if (Src0->getType() == IceType_i64) {
Variable *eax =
legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
Variable *edx =
legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
Reg = eax;
Context.insert(InstFakeUse::create(Func, edx));
} else if (isScalarFloatingType(Src0->getType())) {
_fld(Src0);
} else if (isVectorType(Src0->getType())) {
if (isVectorType(Src0->getType()) ||
isScalarFloatingType(Src0->getType())) {
Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
} else {
assert(isScalarIntegerType(Src0->getType()));
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
}
}
......@@ -577,19 +539,17 @@ void TargetX8664::addProlog(CfgNode *Node) {
unsigned NumGPRArgs = 0;
for (Variable *Arg : Args) {
// Skip arguments passed in registers.
if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
++NumXmmArgs;
continue;
}
if (isScalarFloatingType(Arg->getType()) &&
NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
++NumXmmArgs;
continue;
}
if (isScalarIntegerType(Arg->getType()) &&
NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
++NumGPRArgs;
continue;
if (isVectorType(Arg->getType()) || isScalarFloatingType(Arg->getType())) {
if (NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
++NumXmmArgs;
continue;
}
} else {
assert(isScalarIntegerType(Arg->getType()));
if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
++NumGPRArgs;
continue;
}
}
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
}
......@@ -679,23 +639,9 @@ void TargetX8664::addEpilog(CfgNode *Node) {
}
}
if (!Ctx->getFlags().getUseSandboxing())
return;
// Change the original ret instruction into a sandboxed return sequence.
// t:ecx = pop
// bundle_lock
// and t, ~31
// jmp *t
// bundle_unlock
// FakeUse <original_ret_operand>
Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
_pop(T_ecx);
lowerIndirectJump(T_ecx);
if (RI->getSrcSize()) {
Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
Context.insert(InstFakeUse::create(Func, RetValue));
if (Ctx->getFlags().getUseSandboxing()) {
llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
}
RI->setDeleted();
}
void TargetX8664::emitJumpTable(const Cfg *Func,
......@@ -858,8 +804,7 @@ void TargetDataX8664::lowerJumpTables() {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JumpTable : Ctx->getJumpTables())
// TODO(jpp): not 386.
Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
Writer->writeJumpTable(JumpTable, TargetX8664::Traits::RelFixup);
} break;
case FT_Asm:
// Already emitted from Cfg
......@@ -888,8 +833,8 @@ void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
// TODO(jpp): not 386.
Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
Writer->writeDataSection(Vars, TargetX8664::Traits::RelFixup,
SectionSuffix);
} break;
case FT_Asm:
case FT_Iasm: {
......
......@@ -66,7 +66,8 @@ template <> struct MachineTraits<TargetX8664> {
using RegisterSet = ::Ice::RegX8664;
static const GPRRegister Encoded_Reg_Accumulator = RegX8664::Encoded_Reg_eax;
static const GPRRegister Encoded_Reg_Counter = RegX8664::Encoded_Reg_ecx;
static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32; // TODO(jpp): ???
static const FixupKind PcRelFixup = llvm::ELF::R_X86_64_PC32;
static const FixupKind RelFixup = llvm::ELF::R_X86_64_32S;
class Operand {
public:
......@@ -270,8 +271,8 @@ template <> struct MachineTraits<TargetX8664> {
static Address ofConstPool(Assembler *Asm, const Constant *Imm) {
// TODO(jpp): ???
AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Imm);
const RelocOffsetT Offset = 0;
AssemblerFixup *Fixup = Asm->createFixup(RelFixup, Imm);
const RelocOffsetT Offset = 4;
return Address(ABSOLUTE, Offset, Fixup);
}
};
......@@ -293,6 +294,7 @@ template <> struct MachineTraits<TargetX8664> {
};
static const char *TargetName;
static constexpr Type WordType = IceType_i64;
static IceString getRegName(SizeT RegNum, Type Ty) {
assert(RegNum < RegisterSet::Reg_NUM);
......@@ -331,7 +333,7 @@ template <> struct MachineTraits<TargetX8664> {
#define X(val, encode, name64, name32, name16, name8, scratch, preserved, \
stackptr, frameptr, isInt, isFP) \
(*IntegerRegisters)[RegisterSet::val] = isInt; \
(*IntegerRegistersI8)[RegisterSet::val] = 1; \
(*IntegerRegistersI8)[RegisterSet::val] = isInt; \
(*FloatRegisters)[RegisterSet::val] = isFP; \
(*VectorRegisters)[RegisterSet::val] = isFP; \
(*ScratchRegs)[RegisterSet::val] = scratch;
......@@ -450,7 +452,7 @@ template <> struct MachineTraits<TargetX8664> {
/// address.
static const uint32_t X86_STACK_ALIGNMENT_BYTES;
/// Size of the return address on the stack
static const uint32_t X86_RET_IP_SIZE_BYTES = 4;
static const uint32_t X86_RET_IP_SIZE_BYTES = 8;
/// The number of different NOP instructions
static const uint32_t X86_NUM_NOP_VARIANTS = 5;
......
......@@ -21,6 +21,7 @@
#include "IceInst.h"
#include "IceSwitchLowering.h"
#include "IceTargetLowering.h"
#include "IceUtils.h"
#include <type_traits>
#include <utility>
......@@ -80,10 +81,9 @@ public:
: Traits::RegisterSet::Reg_esp;
}
size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of 4 bytes. In particular, i1,
// i8, and i16 are rounded up to 4 bytes.
// TODO(jpp): this needs to round to multiples of 8 bytes in x86-64.
return (typeWidthInBytes(Ty) + 3) & ~3;
// Round up to the next multiple of WordType bytes.
const uint32_t WordSizeInBytes = typeWidthInBytes(Traits::WordType);
return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes);
}
SizeT getMinJumpTableSize() const override { return 4; }
......@@ -98,14 +98,40 @@ public:
void emit(const ConstantDouble *C) const final;
void initNodeForLowering(CfgNode *Node) override;
/// Ensure that a 64-bit Variable has been split into 2 32-bit
/// x86-32: Ensure that a 64-bit Variable has been split into 2 32-bit
/// Variables, creating them if necessary. This is needed for all
/// I64 operations, and it is needed for pushing F64 arguments for
/// function calls using the 32-bit push instruction (though the
/// latter could be done by directly writing to the stack).
void split64(Variable *Var);
Operand *loOperand(Operand *Operand);
Operand *hiOperand(Operand *Operand);
///
/// x86-64: Complains loudly if invoked because the cpu can handle
/// 64-bit types natively.
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type split64(Variable *Var);
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type split64(Variable *) {
llvm::report_fatal_error(
"Hey, yo! This is x86-64. Watcha doin'? (split64)");
}
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, Operand>::type *
loOperand(Operand *Operand);
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, Operand>::type *loOperand(Operand *) {
llvm::report_fatal_error(
"Hey, yo! This is x86-64. Watcha doin'? (loOperand)");
}
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, Operand>::type *
hiOperand(Operand *Operand);
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, Operand>::type *hiOperand(Operand *) {
llvm::report_fatal_error(
"Hey, yo! This is x86-64. Watcha doin'? (hiOperand)");
}
void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t &InArgsSizeBytes);
typename Traits::Address stackVarToAsmOperand(const Variable *Var) const;
......@@ -128,6 +154,19 @@ protected:
void lowerExtractElement(const InstExtractElement *Inst) override;
void lowerFcmp(const InstFcmp *Inst) override;
void lowerIcmp(const InstIcmp *Inst) override;
/// Complains loudly if invoked because the cpu can handle 64-bit types
/// natively.
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type
lowerIcmp64(const InstIcmp *) {
llvm::report_fatal_error(
"Hey, yo! This is x86-64. Watcha doin'? (lowerIcmp64)");
}
/// x86lowerIcmp64 handles 64-bit icmp lowering.
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type
lowerIcmp64(const InstIcmp *Inst);
void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override;
void lowerInsertElement(const InstInsertElement *Inst) override;
void lowerLoad(const InstLoad *Inst) override;
......
......@@ -538,7 +538,8 @@ TEST_F(AssemblerX8632Test, MovdToXmm) {
\
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##Src, Immediate(Value)); \
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movd(XmmRegister::Encoded_Reg_##Dst, GPRRegister::Encoded_Reg_##Src); \
__ movd(IceType_i32, XmmRegister::Encoded_Reg_##Dst, \
GPRRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
\
......@@ -560,7 +561,7 @@ TEST_F(AssemblerX8632Test, MovdToXmm) {
const uint64_t V1 = 0xFFFFFFFF00000000ull; \
\
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
__ movd(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movd(IceType_i32, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
\
AssembledTest test = assemble(); \
\
......@@ -609,7 +610,8 @@ TEST_F(AssemblerX8632Test, MovdFromXmm) {
const uint32_t V0 = Value; \
\
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
__ movd(GPRRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src); \
__ movd(IceType_i32, GPRRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
\
......@@ -631,7 +633,7 @@ TEST_F(AssemblerX8632Test, MovdFromXmm) {
const uint32_t V1 = ~(Value); \
\
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
__ movd(dwordAddress(T1), XmmRegister::Encoded_Reg_##Src); \
__ movd(IceType_i32, dwordAddress(T1), XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
\
......
......@@ -1072,7 +1072,7 @@ TEST_F(AssemblerX8632Test, Cvt) {
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
Immediate(Inst##Size##SrcValue)); \
__ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
__ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
GPRRegister::Encoded_Reg_##GPR); \
\
AssembledTest test = assemble(); \
......@@ -1092,7 +1092,7 @@ TEST_F(AssemblerX8632Test, Cvt) {
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
Immediate(Inst##Size##DstValue)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
__ cvt##Inst(IceType_f##Size, GPRRegister::Encoded_Reg_##GPR, \
__ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
......@@ -1132,7 +1132,7 @@ TEST_F(AssemblerX8632Test, Cvt) {
const uint32_t T1 = allocateDword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
__ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
......@@ -1152,7 +1152,7 @@ TEST_F(AssemblerX8632Test, Cvt) {
\
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
Immediate(Inst##Size##DstValue)); \
__ cvt##Inst(IceType_f##Size, GPRRegister::Encoded_Reg_##GPR, \
__ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
dwordAddress(T0)); \
\
AssembledTest test = assemble(); \
......
......@@ -1104,15 +1104,16 @@ TEST_F(AssemblerX8664Test, Cvt) {
reset(); \
} while (0)
#define TestImplSXmmReg(Dst, GPR, Inst, Size) \
#define TestImplSXmmReg(Dst, GPR, Inst, Size, IntType) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")"; \
"(" #Dst ", " #GPR ", cvt" #Inst ", " #IntType ", f" #Size ")"; \
const uint32_t T0 = allocateDqword(); \
\
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
__ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \
__ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR()); \
__ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \
Encoded_GPR_##GPR()); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##DstValue); \
......@@ -1122,21 +1123,23 @@ TEST_F(AssemblerX8664Test, Cvt) {
reset(); \
} while (0)
#define TestImplSRegXmm(GPR, Src, Inst, Size) \
#define TestImplSRegXmm(GPR, Src, Inst, IntSize, Size) \
do { \
static constexpr char TestString[] = \
"(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")"; \
"(" #GPR ", " #Src ", cvt" #Inst ", " #IntSize ", f" #Size ")"; \
const uint32_t T0 = allocateDqword(); \
\
__ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
__ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
__ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src()); \
__ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \
Encoded_Xmm_##Src()); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##SrcValue); \
test.run(); \
\
ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \
ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \
test.GPR()) \
<< TestString; \
reset(); \
} while (0)
......@@ -1160,15 +1163,16 @@ TEST_F(AssemblerX8664Test, Cvt) {
reset(); \
} while (0)
#define TestImplSXmmAddr(Dst, Inst, Size) \
#define TestImplSXmmAddr(Dst, Inst, Size, IntType) \
do { \
static constexpr char TestString[] = \
"(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \
"(" #Dst ", Addr, cvt" #Inst ", f" #Size ", " #IntType ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDword(); \
\
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
__ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
__ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \
dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##DstValue); \
......@@ -1179,20 +1183,22 @@ TEST_F(AssemblerX8664Test, Cvt) {
reset(); \
} while (0)
#define TestImplSRegAddr(GPR, Inst, Size) \
#define TestImplSRegAddr(GPR, Inst, IntSize, Size) \
do { \
static constexpr char TestString[] = \
"(" #GPR ", Addr, cvt" #Inst ", f" #Size ")"; \
"(" #GPR ", Addr, cvt" #Inst ", f" #Size ", " #IntSize ")"; \
const uint32_t T0 = allocateDqword(); \
\
__ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
__ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), dwordAddress(T0)); \
__ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \
dwordAddress(T0)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##SrcValue); \
test.run(); \
\
ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \
ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \
test.GPR()) \
<< TestString; \
reset(); \
} while (0)
......@@ -1203,10 +1209,14 @@ TEST_F(AssemblerX8664Test, Cvt) {
TestImplPXmmAddr(Src, dq2ps, Size); \
TestImplPXmmXmm(Dst, Src, tps2dq, Size); \
TestImplPXmmAddr(Src, tps2dq, Size); \
TestImplSXmmReg(Dst, GPR, si2ss, Size); \
TestImplSXmmAddr(Dst, si2ss, Size); \
TestImplSRegXmm(GPR, Src, tss2si, Size); \
TestImplSRegAddr(GPR, tss2si, Size); \
TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i32); \
TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i64); \
TestImplSXmmAddr(Dst, si2ss, Size, IceType_i32); \
TestImplSXmmAddr(Dst, si2ss, Size, IceType_i64); \
TestImplSRegXmm(GPR, Src, tss2si, 32, Size); \
TestImplSRegXmm(GPR, Src, tss2si, 64, Size); \
TestImplSRegAddr(GPR, tss2si, 32, Size); \
TestImplSRegAddr(GPR, tss2si, 64, Size); \
TestImplPXmmXmm(Dst, Src, float2float, Size); \
TestImplPXmmAddr(Src, float2float, Size); \
} while (0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment