Commit 1d235425 by John Porto

Subzero. Native 64-bit int arithmetic on x86-64.

This CL modifies the x86 instruction selection template to allow native 64-bit GPR support. It also enables x86-64 crosstests. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1273153002.
parent 83ccadcf
...@@ -325,6 +325,7 @@ $(OBJDIR)/unittest/AssemblerX8664: $(OBJDIR)/unittest ...@@ -325,6 +325,7 @@ $(OBJDIR)/unittest/AssemblerX8664: $(OBJDIR)/unittest
RT_SRC := runtime/szrt.c runtime/szrt_ll.ll runtime/szrt_profiler.c RT_SRC := runtime/szrt.c runtime/szrt_ll.ll runtime/szrt_profiler.c
RT_OBJ := build/runtime/szrt_native_x8632.o build/runtime/szrt_sb_x8632.o \ RT_OBJ := build/runtime/szrt_native_x8632.o build/runtime/szrt_sb_x8632.o \
build/runtime/szrt_native_x8664.o build/runtime/szrt_sb_x8664.o \
build/runtime/szrt_native_arm32.o build/runtime/szrt_sb_arm32.o build/runtime/szrt_native_arm32.o build/runtime/szrt_sb_arm32.o
runtime: $(RT_OBJ) runtime: $(RT_OBJ)
...@@ -348,10 +349,13 @@ else ...@@ -348,10 +349,13 @@ else
check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
# Do all native/sse2 tests, but only test_vector_ops for native/sse4.1. # Do all native/sse2 tests, but only test_vector_ops for native/sse4.1.
# For (slow) sandboxed tests, limit to Om1/sse4.1. # For (slow) sandboxed tests, limit to Om1/sse4.1.
# TODO(jpp): implement x8664 sandbox, then enable xtests.
./pydir/crosstest_generator.py -v --lit \ ./pydir/crosstest_generator.py -v --lit \
--toolchain-root $(TOOLCHAIN_ROOT) \ --toolchain-root $(TOOLCHAIN_ROOT) \
-i x8632,native,sse2 -i x8632,native,sse4.1,test_vector_ops \ -i x8632,native,sse2 -i x8632,native,sse4.1,test_vector_ops \
-i x8632,sandbox,sse4.1,Om1 \ -i x8632,sandbox,sse4.1,Om1 \
-i x8664,native,sse2 -i x8664,native,sse4.1,test_vector_ops \
-e x8664,native,sse2,test_global \
-i arm32,native,neon,simple_loop \ -i arm32,native,neon,simple_loop \
-i arm32,native,neon,mem_intrin \ -i arm32,native,neon,mem_intrin \
-i arm32,native,neon,test_bitmanip \ -i arm32,native,neon,test_bitmanip \
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <cstring> #include <cstring>
#include "mem_intrin.h" #include "mem_intrin.h"
#include "xdefs.h"
typedef int elem_t; typedef int elem_t;
...@@ -15,9 +16,9 @@ typedef int elem_t; ...@@ -15,9 +16,9 @@ typedef int elem_t;
* Reset buf to the sequence of bytes: n, n+1, n+2 ... length - 1 * Reset buf to the sequence of bytes: n, n+1, n+2 ... length - 1
*/ */
static void __attribute__((noinline)) static void __attribute__((noinline))
reset_buf(uint8_t *buf, uint8_t init, size_t length) { reset_buf(uint8_t *buf, uint8_t init, SizeT length) {
size_t i; SizeT i;
size_t v = init; SizeT v = init;
for (i = 0; i < length; ++i) for (i = 0; i < length; ++i)
buf[i] = v++; buf[i] = v++;
} }
...@@ -27,8 +28,8 @@ reset_buf(uint8_t *buf, uint8_t init, size_t length) { ...@@ -27,8 +28,8 @@ reset_buf(uint8_t *buf, uint8_t init, size_t length) {
* smaller buffers, whose total won't approach 2**16). * smaller buffers, whose total won't approach 2**16).
*/ */
static int __attribute__((noinline)) static int __attribute__((noinline))
fletcher_checksum(uint8_t *buf, size_t length) { fletcher_checksum(uint8_t *buf, SizeT length) {
size_t i; SizeT i;
int sum = 0; int sum = 0;
int sum_of_sums = 0; int sum_of_sums = 0;
const int kModulus = 255; const int kModulus = 255;
...@@ -63,20 +64,20 @@ int memset_test_fixed_len(uint8_t init) { ...@@ -63,20 +64,20 @@ int memset_test_fixed_len(uint8_t init) {
return fletcher_checksum((uint8_t *)buf, BYTE_LENGTH); return fletcher_checksum((uint8_t *)buf, BYTE_LENGTH);
} }
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) { int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
reset_buf(buf, init, length); reset_buf(buf, init, length);
memcpy((void *)buf2, (void *)buf, length); memcpy((void *)buf2, (void *)buf, length);
return fletcher_checksum(buf2, length); return fletcher_checksum(buf2, length);
} }
int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) { int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
int sum1; int sum1;
int sum2; int sum2;
const int overlap_bytes = 4 * sizeof(elem_t); const int overlap_bytes = 4 * sizeof(elem_t);
if (length <= overlap_bytes) if (length <= overlap_bytes)
return 0; return 0;
uint8_t *overlap_buf = buf + overlap_bytes; uint8_t *overlap_buf = buf + overlap_bytes;
size_t reduced_length = length - overlap_bytes; SizeT reduced_length = length - overlap_bytes;
reset_buf(buf, init, length); reset_buf(buf, init, length);
/* Test w/ overlap. */ /* Test w/ overlap. */
...@@ -88,7 +89,7 @@ int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) { ...@@ -88,7 +89,7 @@ int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) {
return sum1 + sum2; return sum1 + sum2;
} }
int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) { int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
memset((void *)buf, init, length); memset((void *)buf, init, length);
memset((void *)buf2, init + 4, length); memset((void *)buf2, init + 4, length);
return fletcher_checksum(buf, length) + fletcher_checksum(buf2, length); return fletcher_checksum(buf, length) + fletcher_checksum(buf2, length);
......
...@@ -4,10 +4,11 @@ ...@@ -4,10 +4,11 @@
* There is no include guard since this will be included multiple times, * There is no include guard since this will be included multiple times,
* under different namespaces. * under different namespaces.
*/ */
#include "xdefs.h"
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length); int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length); int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length); int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memcpy_test_fixed_len(uint8_t init); int memcpy_test_fixed_len(uint8_t init);
int memmove_test_fixed_len(uint8_t init); int memmove_test_fixed_len(uint8_t init);
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#include <cstdio> #include <cstdio>
#include "mem_intrin.h" #include "mem_intrin.h"
#include "xdefs.h"
namespace Subzero_ { namespace Subzero_ {
#include "mem_intrin.h" #include "mem_intrin.h"
} }
...@@ -12,7 +14,7 @@ namespace Subzero_ { ...@@ -12,7 +14,7 @@ namespace Subzero_ {
#define XSTR(s) STR(s) #define XSTR(s) STR(s)
#define STR(s) #s #define STR(s) #s
void testFixedLen(size_t &TotalTests, size_t &Passes, size_t &Failures) { void testFixedLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
#define do_test_fixed(test_func) \ #define do_test_fixed(test_func) \
for (uint8_t init_val = 0; init_val < 100; ++init_val) { \ for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
++TotalTests; \ ++TotalTests; \
...@@ -33,11 +35,11 @@ void testFixedLen(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -33,11 +35,11 @@ void testFixedLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
#undef do_test_fixed #undef do_test_fixed
} }
void testVariableLen(size_t &TotalTests, size_t &Passes, size_t &Failures) { void testVariableLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
uint8_t buf[256]; uint8_t buf[256];
uint8_t buf2[256]; uint8_t buf2[256];
#define do_test_variable(test_func) \ #define do_test_variable(test_func) \
for (size_t len = 4; len < 128; ++len) { \ for (SizeT len = 4; len < 128; ++len) { \
for (uint8_t init_val = 0; init_val < 100; ++init_val) { \ for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
++TotalTests; \ ++TotalTests; \
int llc_result = test_func(buf, buf2, init_val, len); \ int llc_result = test_func(buf, buf2, init_val, len); \
...@@ -58,7 +60,11 @@ void testVariableLen(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -58,7 +60,11 @@ void testVariableLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
#undef do_test_variable #undef do_test_variable
} }
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
unsigned TotalTests = 0; unsigned TotalTests = 0;
unsigned Passes = 0; unsigned Passes = 0;
unsigned Failures = 0; unsigned Failures = 0;
......
...@@ -6,7 +6,11 @@ ...@@ -6,7 +6,11 @@
int simple_loop(int *a, int n); int simple_loop(int *a, int n);
int Subzero_simple_loop(int *a, int n); int Subzero_simple_loop(int *a, int n);
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
unsigned TotalTests = 0; unsigned TotalTests = 0;
unsigned Passes = 0; unsigned Passes = 0;
unsigned Failures = 0; unsigned Failures = 0;
......
//===- subzero/crosstest/stack_hack.x8664.c - X8664 stack hack ------------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements main() for crosstests in x86-64.
//
//===----------------------------------------------------------------------===//
#include <assert.h>
#include <stdint.h>
#include <sys/mman.h>
// X8664_STACK_HACK needs to be defined before xdefs.h is included.
#define X8664_STACK_HACK
#include "xdefs.h"
/// xSetStack is used to set %rsp to NewRsp. OldRsp is a pointer that will be
/// used to save the old %rsp value.
#define xSetStack(NewRsp, OldRsp) \
do { \
__asm__ volatile("xchgq %1, %%rsp\n\t" \
"xchgq %1, %0" \
: "=r"(*(OldRsp)) \
: "r"(NewRsp)); \
} while (0)
extern int wrapped_main(int argc, char *argv[]);
unsigned char *xStackStart(uint32 StackEnd, uint32 Size) {
const uint32 PageBoundary = 4 << 20; // 4 MB.
const uint64 StackStart = StackEnd - Size;
assert(StackStart + (PageBoundary - 1) & ~(PageBoundary - 1) &&
"StackStart not aligned to page boundary.");
(void)PageBoundary;
assert((StackStart & 0xFFFFFFFF00000000ull) == 0 && "StackStart wraps.");
return (unsigned char *)StackStart;
}
unsigned char *xAllocStack(uint64 StackEnd, uint32 Size) {
assert((StackEnd & 0xFFFFFFFF00000000ull) == 0 && "Invalid StackEnd.");
void *Stack =
mmap(xStackStart(StackEnd, Size), Size, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_PRIVATE | MAP_GROWSDOWN | MAP_ANONYMOUS, -1, 0);
assert(Stack != MAP_FAILED && "mmap failed. no stack.");
return Stack;
}
void xDeallocStack(uint64 StackEnd, uint32 Size) {
assert((StackEnd & 0xFFFFFFFF00000000ull) == 0 && "Invalid StackEnd.");
munmap(xStackStart(StackEnd, Size), Size);
}
int main(int argc, char *argv[]) {
// These "locals" need to live **NOT** in the stack.
static int Argc;
static char **Argv;
static const uint32_t StackEnd = 0x80000000;
static const uint32_t StackSize = 40 * 1024 * 1024;
static unsigned char *new_rsp;
static unsigned char *old_rsp;
static unsigned char *dummy_rsp;
static int Failures;
Argc = argc;
Argv = argv;
new_rsp = xAllocStack(StackEnd, StackSize) + StackSize;
xSetStack(new_rsp, &old_rsp);
Failures = wrapped_main(Argc, Argv);
xSetStack(old_rsp, &new_rsp);
xDeallocStack(StackEnd, StackSize);
return Failures;
}
...@@ -17,13 +17,14 @@ ...@@ -17,13 +17,14 @@
#include <stdint.h> #include <stdint.h>
#include "test_arith.h" #include "test_arith.h"
#include "xdefs.h"
#define X(inst, op, isdiv, isshift) \ #define X(inst, op, isdiv, isshift) \
bool test##inst(bool a, bool b) { return a op b; } \ bool test##inst(bool a, bool b) { return a op b; } \
uint8_t test##inst(uint8_t a, uint8_t b) { return a op b; } \ uint8_t test##inst(uint8_t a, uint8_t b) { return a op b; } \
uint16_t test##inst(uint16_t a, uint16_t b) { return a op b; } \ uint16_t test##inst(uint16_t a, uint16_t b) { return a op b; } \
uint32_t test##inst(uint32_t a, uint32_t b) { return a op b; } \ uint32_t test##inst(uint32_t a, uint32_t b) { return a op b; } \
uint64_t test##inst(uint64_t a, uint64_t b) { return a op b; } \ uint64 test##inst(uint64 a, uint64 b) { return a op b; } \
v4ui32 test##inst(v4ui32 a, v4ui32 b) { return a op b; } \ v4ui32 test##inst(v4ui32 a, v4ui32 b) { return a op b; } \
v8ui16 test##inst(v8ui16 a, v8ui16 b) { return a op b; } \ v8ui16 test##inst(v8ui16 a, v8ui16 b) { return a op b; } \
v16ui8 test##inst(v16ui8 a, v16ui8 b) { return a op b; } v16ui8 test##inst(v16ui8 a, v16ui8 b) { return a op b; }
...@@ -35,7 +36,7 @@ UINTOP_TABLE ...@@ -35,7 +36,7 @@ UINTOP_TABLE
myint8_t test##inst(myint8_t a, myint8_t b) { return a op b; } \ myint8_t test##inst(myint8_t a, myint8_t b) { return a op b; } \
int16_t test##inst(int16_t a, int16_t b) { return a op b; } \ int16_t test##inst(int16_t a, int16_t b) { return a op b; } \
int32_t test##inst(int32_t a, int32_t b) { return a op b; } \ int32_t test##inst(int32_t a, int32_t b) { return a op b; } \
int64_t test##inst(int64_t a, int64_t b) { return a op b; } \ int64 test##inst(int64 a, int64 b) { return a op b; } \
v4si32 test##inst(v4si32 a, v4si32 b) { return a op b; } \ v4si32 test##inst(v4si32 a, v4si32 b) { return a op b; } \
v8si16 test##inst(v8si16 a, v8si16 b) { return a op b; } \ v8si16 test##inst(v8si16 a, v8si16 b) { return a op b; } \
v16si8 test##inst(v16si8 a, v16si8 b) { return a op b; } v16si8 test##inst(v16si8 a, v16si8 b) { return a op b; }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <stdint.h> #include <stdint.h>
#include "test_arith.def" #include "test_arith.def"
#include "xdefs.h"
#include "vectors.h" #include "vectors.h"
...@@ -22,7 +23,7 @@ ...@@ -22,7 +23,7 @@
uint8_t test##inst(uint8_t a, uint8_t b); \ uint8_t test##inst(uint8_t a, uint8_t b); \
uint16_t test##inst(uint16_t a, uint16_t b); \ uint16_t test##inst(uint16_t a, uint16_t b); \
uint32_t test##inst(uint32_t a, uint32_t b); \ uint32_t test##inst(uint32_t a, uint32_t b); \
uint64_t test##inst(uint64_t a, uint64_t b); \ uint64 test##inst(uint64 a, uint64 b); \
v4ui32 test##inst(v4ui32 a, v4ui32 b); \ v4ui32 test##inst(v4ui32 a, v4ui32 b); \
v8ui16 test##inst(v8ui16 a, v8ui16 b); \ v8ui16 test##inst(v8ui16 a, v8ui16 b); \
v16ui8 test##inst(v16ui8 a, v16ui8 b); v16ui8 test##inst(v16ui8 a, v16ui8 b);
...@@ -34,7 +35,7 @@ UINTOP_TABLE ...@@ -34,7 +35,7 @@ UINTOP_TABLE
myint8_t test##inst(myint8_t a, myint8_t b); \ myint8_t test##inst(myint8_t a, myint8_t b); \
int16_t test##inst(int16_t a, int16_t b); \ int16_t test##inst(int16_t a, int16_t b); \
int32_t test##inst(int32_t a, int32_t b); \ int32_t test##inst(int32_t a, int32_t b); \
int64_t test##inst(int64_t a, int64_t b); \ int64 test##inst(int64 a, int64 b); \
v4si32 test##inst(v4si32 a, v4si32 b); \ v4si32 test##inst(v4si32 a, v4si32 b); \
v8si16 test##inst(v8si16 a, v8si16 b); \ v8si16 test##inst(v8si16 a, v8si16 b); \
v16si8 test##inst(v16si8 a, v16si8 b); v16si8 test##inst(v16si8 a, v16si8 b);
......
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated // Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively. // object files, respectively.
#include "test_arith.h" #include "test_arith.h"
#include "xdefs.h"
namespace Subzero_ { namespace Subzero_ {
#include "test_arith.h" #include "test_arith.h"
} }
...@@ -363,7 +365,11 @@ void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -363,7 +365,11 @@ void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
...@@ -372,7 +378,7 @@ int main(int argc, char **argv) { ...@@ -372,7 +378,7 @@ int main(int argc, char **argv) {
testsInt<uint8_t, myint8_t>(TotalTests, Passes, Failures); testsInt<uint8_t, myint8_t>(TotalTests, Passes, Failures);
testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures); testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures);
testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures); testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures);
testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures); testsInt<uint64, int64>(TotalTests, Passes, Failures);
testsVecInt<v4ui32, v4si32>(TotalTests, Passes, Failures); testsVecInt<v4ui32, v4si32>(TotalTests, Passes, Failures);
testsVecInt<v8ui16, v8si16>(TotalTests, Passes, Failures); testsVecInt<v8ui16, v8si16>(TotalTests, Passes, Failures);
testsVecInt<v16ui8, v16si8>(TotalTests, Passes, Failures); testsVecInt<v16ui8, v16si8>(TotalTests, Passes, Failures);
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#ifndef TEST_BIT_MANIP_DEF #ifndef TEST_BIT_MANIP_DEF
#define TEST_BIT_MANIP_DEF #define TEST_BIT_MANIP_DEF
#include "xdefs.h"
#define STR(s) #s #define STR(s) #s
#define BMI_OPS \ #define BMI_OPS \
...@@ -25,13 +27,13 @@ ...@@ -25,13 +27,13 @@
#define BMI_TYPES \ #define BMI_TYPES \
/* type */ \ /* type */ \
X(uint32_t) \ X(uint32) \
X(uint64_t) X(uint64)
// #define X(type) // #define X(type)
#define FOR_ALL_BMI_TYPES_INST(F, inst) \ #define FOR_ALL_BMI_TYPES_INST(F, inst) \
F(inst, uint32_t) \ F(inst, uint32) \
F(inst, uint64_t) F(inst, uint64)
#define FOR_ALL_BMI_OP_TYPES(X) \ #define FOR_ALL_BMI_OP_TYPES(X) \
FOR_ALL_BMI_TYPES_INST(X, ctlz) \ FOR_ALL_BMI_TYPES_INST(X, ctlz) \
...@@ -42,7 +44,7 @@ ...@@ -42,7 +44,7 @@
#define BSWAP_TABLE \ #define BSWAP_TABLE \
/* type, builtin_name */ \ /* type, builtin_name */ \
X(uint16_t, __builtin_bswap16) \ X(uint16_t, __builtin_bswap16) \
X(uint32_t, __builtin_bswap32) \ X(uint32, __builtin_bswap32) \
X(uint64_t, __builtin_bswap64) X(uint64, __builtin_bswap64)
#endif // TEST_BIT_MANIP_DEF #endif // TEST_BIT_MANIP_DEF
...@@ -23,11 +23,13 @@ ...@@ -23,11 +23,13 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated // Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively. // object files, respectively.
#include "test_bitmanip.h" #include "test_bitmanip.h"
#include "xdefs.h"
namespace Subzero_ { namespace Subzero_ {
#include "test_bitmanip.h" #include "test_bitmanip.h"
} }
volatile uint64_t Values[] = { volatile uint64 Values[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001, 0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0xc0de, 0xabcd, 0xdcba, 0x007fffff /*Max subnormal + */, 0xfffe, 0xffff, 0xc0de, 0xabcd, 0xdcba, 0x007fffff /*Max subnormal + */,
0x00800000 /*Min+ */, 0x7f7fffff /*Max+ */, 0x7f800000 /*+Inf*/, 0x00800000 /*Min+ */, 0x7f7fffff /*Max+ */, 0x7f800000 /*+Inf*/,
...@@ -71,9 +73,9 @@ void testBitManip(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -71,9 +73,9 @@ void testBitManip(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} else { } else {
++Failures; ++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type)) std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
<< "(" << static_cast<uint64_t>(Value) << "(" << static_cast<uint64>(Value)
<< "): sz=" << static_cast<uint64_t>(ResultSz) << "): sz=" << static_cast<uint64>(ResultSz)
<< " llc=" << static_cast<uint64_t>(ResultLlc) << "\n"; << " llc=" << static_cast<uint64>(ResultLlc) << "\n";
} }
} }
} }
...@@ -101,24 +103,28 @@ void testByteSwap(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -101,24 +103,28 @@ void testByteSwap(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} else { } else {
++Failures; ++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type)) std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
<< "(" << static_cast<uint64_t>(Value) << "(" << static_cast<uint64>(Value)
<< "): sz=" << static_cast<uint64_t>(ResultSz) << "): sz=" << static_cast<uint64>(ResultSz)
<< " llc=" << static_cast<uint64_t>(ResultLlc) << "\n"; << " llc=" << static_cast<uint64>(ResultLlc) << "\n";
} }
} }
} }
} }
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
testBitManip<uint32_t>(TotalTests, Passes, Failures); testBitManip<uint32_t>(TotalTests, Passes, Failures);
testBitManip<uint64_t>(TotalTests, Passes, Failures); testBitManip<uint64>(TotalTests, Passes, Failures);
testByteSwap<uint16_t>(TotalTests, Passes, Failures); testByteSwap<uint16_t>(TotalTests, Passes, Failures);
testByteSwap<uint32_t>(TotalTests, Passes, Failures); testByteSwap<uint32_t>(TotalTests, Passes, Failures);
testByteSwap<uint64_t>(TotalTests, Passes, Failures); testByteSwap<uint64>(TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n"; << " Failures=" << Failures << "\n";
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <cstring> #include <cstring>
#include "test_calling_conv.h" #include "test_calling_conv.h"
#include "xdefs.h"
#define CALL_AS_TYPE(Ty, Func) (reinterpret_cast<Ty *>(Func)) #define CALL_AS_TYPE(Ty, Func) (reinterpret_cast<Ty *>(Func))
...@@ -37,9 +38,9 @@ void caller_vvvvv(void) { ...@@ -37,9 +38,9 @@ void caller_vvvvv(void) {
void caller_vlvlivfvdviv(void) { void caller_vlvlivfvdviv(void) {
v4f32 arg1 = {0, 1, 2, 3}; v4f32 arg1 = {0, 1, 2, 3};
int64_t arg2 = 4; int64 arg2 = 4;
v4f32 arg3 = {6, 7, 8, 9}; v4f32 arg3 = {6, 7, 8, 9};
int64_t arg4 = 10; int64 arg4 = 10;
int arg5 = 11; int arg5 = 11;
v4f32 arg6 = {12, 13, 14, 15}; v4f32 arg6 = {12, 13, 14, 15};
float arg7 = 16; float arg7 = 16;
...@@ -75,8 +76,8 @@ callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) { ...@@ -75,8 +76,8 @@ callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) {
} }
void __attribute__((noinline)) void __attribute__((noinline))
callee_vlvlivfvdviv(v4f32 arg1, int64_t arg2, v4f32 arg3, int64_t arg4, callee_vlvlivfvdviv(v4f32 arg1, int64 arg2, v4f32 arg3, int64 arg4, int arg5,
int arg5, v4f32 arg6, float arg7, v4f32 arg8, double arg9, v4f32 arg6, float arg7, v4f32 arg8, double arg9,
v4f32 arg10, int arg11, v4f32 arg12) { v4f32 arg10, int arg11, v4f32 arg12) {
switch (ArgNum) { switch (ArgNum) {
HANDLE_ARG(1); HANDLE_ARG(1);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "test_calling_conv.def" #include "test_calling_conv.def"
#include "vectors.h" #include "vectors.h"
#include "xdefs.h"
typedef void (*CalleePtrTy)(); typedef void (*CalleePtrTy)();
extern CalleePtrTy Callee; extern CalleePtrTy Callee;
...@@ -31,6 +32,6 @@ typedef void(callee_vvvvv_Ty)(v4si32, v4si32, v4si32, v4si32, v4si32); ...@@ -31,6 +32,6 @@ typedef void(callee_vvvvv_Ty)(v4si32, v4si32, v4si32, v4si32, v4si32);
callee_vvvvv_Ty callee_vvvvv; callee_vvvvv_Ty callee_vvvvv;
void caller_vlvlivfvdviv(); void caller_vlvlivfvdviv();
typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64_t, v4f32, int64_t, int, v4f32, typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64, v4f32, int64, int, v4f32,
float, v4f32, double, v4f32, int, v4f32); float, v4f32, double, v4f32, int, v4f32);
callee_vlvlivfvdviv_Ty callee_vlvlivfvdviv; callee_vlvlivfvdviv_Ty callee_vlvlivfvdviv;
...@@ -162,7 +162,11 @@ void testCallee(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -162,7 +162,11 @@ void testCallee(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <stdint.h> #include <stdint.h>
#include "test_cast.h" #include "test_cast.h"
#include "xdefs.h"
template <typename FromType, typename ToType> template <typename FromType, typename ToType>
ToType __attribute__((noinline)) cast(FromType a) { ToType __attribute__((noinline)) cast(FromType a) {
...@@ -38,8 +39,8 @@ template <typename ToType> class Caster { ...@@ -38,8 +39,8 @@ template <typename ToType> class Caster {
static ToType f(uint16_t a) { return cast<uint16_t, ToType>(a); } static ToType f(uint16_t a) { return cast<uint16_t, ToType>(a); }
static ToType f(int32_t a) { return cast<int32_t, ToType>(a); } static ToType f(int32_t a) { return cast<int32_t, ToType>(a); }
static ToType f(uint32_t a) { return cast<uint32_t, ToType>(a); } static ToType f(uint32_t a) { return cast<uint32_t, ToType>(a); }
static ToType f(int64_t a) { return cast<int64_t, ToType>(a); } static ToType f(int64 a) { return cast<int64, ToType>(a); }
static ToType f(uint64_t a) { return cast<uint64_t, ToType>(a); } static ToType f(uint64 a) { return cast<uint64, ToType>(a); }
static ToType f(float a) { return cast<float, ToType>(a); } static ToType f(float a) { return cast<float, ToType>(a); }
static ToType f(double a) { return cast<double, ToType>(a); } static ToType f(double a) { return cast<double, ToType>(a); }
}; };
...@@ -56,8 +57,8 @@ template class Caster<int16_t>; ...@@ -56,8 +57,8 @@ template class Caster<int16_t>;
template class Caster<uint16_t>; template class Caster<uint16_t>;
template class Caster<int32_t>; template class Caster<int32_t>;
template class Caster<uint32_t>; template class Caster<uint32_t>;
template class Caster<int64_t>; template class Caster<int64>;
template class Caster<uint64_t>; template class Caster<uint64>;
template class Caster<float>; template class Caster<float>;
template class Caster<double>; template class Caster<double>;
...@@ -67,8 +68,8 @@ template class Caster<double>; ...@@ -67,8 +68,8 @@ template class Caster<double>;
double makeBitCasters() { double makeBitCasters() {
double Result = 0; double Result = 0;
Result += castBits<uint32_t, float>(0); Result += castBits<uint32_t, float>(0);
Result += castBits<uint64_t, double>(0); Result += castBits<uint64, double>(0);
Result += castBits<float, uint32_t>(0); Result += castBits<float, uint32_t>(0);
Result += castBits<double, uint64_t>(0); Result += castBits<double, uint64>(0);
return Result; return Result;
} }
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "test_arith.def" #include "test_arith.def"
#include "vectors.h" #include "vectors.h"
#include "xdefs.h"
// Include test_cast.h twice - once normally, and once within the // Include test_cast.h twice - once normally, and once within the
// Subzero_ namespace, corresponding to the llc and Subzero translated // Subzero_ namespace, corresponding to the llc and Subzero translated
...@@ -82,8 +83,8 @@ void testValue(FromType Val, size_t &TotalTests, size_t &Passes, ...@@ -82,8 +83,8 @@ void testValue(FromType Val, size_t &TotalTests, size_t &Passes,
COMPARE(cast, FromType, int16_t, Val, FromTypeString); COMPARE(cast, FromType, int16_t, Val, FromTypeString);
COMPARE(cast, FromType, uint32_t, Val, FromTypeString); COMPARE(cast, FromType, uint32_t, Val, FromTypeString);
COMPARE(cast, FromType, int32_t, Val, FromTypeString); COMPARE(cast, FromType, int32_t, Val, FromTypeString);
COMPARE(cast, FromType, uint64_t, Val, FromTypeString); COMPARE(cast, FromType, uint64, Val, FromTypeString);
COMPARE(cast, FromType, int64_t, Val, FromTypeString); COMPARE(cast, FromType, int64, Val, FromTypeString);
COMPARE(cast, FromType, float, Val, FromTypeString); COMPARE(cast, FromType, float, Val, FromTypeString);
COMPARE(cast, FromType, double, Val, FromTypeString); COMPARE(cast, FromType, double, Val, FromTypeString);
} }
...@@ -110,7 +111,11 @@ void testVector(size_t &TotalTests, size_t &Passes, size_t &Failures, ...@@ -110,7 +111,11 @@ void testVector(size_t &TotalTests, size_t &Passes, size_t &Failures,
} }
} }
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
...@@ -147,7 +152,7 @@ int main(int argc, char **argv) { ...@@ -147,7 +152,7 @@ int main(int argc, char **argv) {
0x80000000, 0x80000001, 0xfffffffe, 0xffffffff}; 0x80000000, 0x80000001, 0xfffffffe, 0xffffffff};
static const size_t NumValsSi32 = sizeof(ValsSi32) / sizeof(*ValsSi32); static const size_t NumValsSi32 = sizeof(ValsSi32) / sizeof(*ValsSi32);
volatile uint64_t ValsUi64[] = { volatile uint64 ValsUi64[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001, 0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001, 0xfffe, 0xffff, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x100000000ull, 0x100000001ull, 0xfffffffe, 0xffffffff, 0x100000000ull, 0x100000001ull,
...@@ -155,7 +160,7 @@ int main(int argc, char **argv) { ...@@ -155,7 +160,7 @@ int main(int argc, char **argv) {
0x8000000000000001ull, 0xfffffffffffffffeull, 0xffffffffffffffffull}; 0x8000000000000001ull, 0xfffffffffffffffeull, 0xffffffffffffffffull};
static const size_t NumValsUi64 = sizeof(ValsUi64) / sizeof(*ValsUi64); static const size_t NumValsUi64 = sizeof(ValsUi64) / sizeof(*ValsUi64);
volatile int64_t ValsSi64[] = { volatile int64 ValsSi64[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001, 0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001, 0xfffe, 0xffff, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x100000000ll, 0x100000001ll, 0xfffffffe, 0xffffffff, 0x100000000ll, 0x100000001ll,
...@@ -203,13 +208,13 @@ int main(int argc, char **argv) { ...@@ -203,13 +208,13 @@ int main(int argc, char **argv) {
testValue<int32_t>(Val, TotalTests, Passes, Failures, "int32_t"); testValue<int32_t>(Val, TotalTests, Passes, Failures, "int32_t");
} }
for (size_t i = 0; i < NumValsUi64; ++i) { for (size_t i = 0; i < NumValsUi64; ++i) {
uint64_t Val = ValsUi64[i]; uint64 Val = ValsUi64[i];
testValue<uint64_t>(Val, TotalTests, Passes, Failures, "uint64_t"); testValue<uint64>(Val, TotalTests, Passes, Failures, "uint64");
COMPARE(castBits, uint64_t, double, Val, "uint64_t"); COMPARE(castBits, uint64, double, Val, "uint64");
} }
for (size_t i = 0; i < NumValsSi64; ++i) { for (size_t i = 0; i < NumValsSi64; ++i) {
int64_t Val = ValsSi64[i]; int64 Val = ValsSi64[i];
testValue<int64_t>(Val, TotalTests, Passes, Failures, "int64_t"); testValue<int64>(Val, TotalTests, Passes, Failures, "int64");
} }
for (size_t i = 0; i < NumValsF32; ++i) { for (size_t i = 0; i < NumValsF32; ++i) {
for (unsigned j = 0; j < 2; ++j) { for (unsigned j = 0; j < 2; ++j) {
...@@ -226,7 +231,7 @@ int main(int argc, char **argv) { ...@@ -226,7 +231,7 @@ int main(int argc, char **argv) {
if (j > 0) if (j > 0)
Val = -Val; Val = -Val;
testValue<double>(Val, TotalTests, Passes, Failures, "double"); testValue<double>(Val, TotalTests, Passes, Failures, "double");
COMPARE(castBits, double, uint64_t, Val, "double"); COMPARE(castBits, double, uint64, Val, "double");
} }
} }
testVector<v4ui32, v4f32>(TotalTests, Passes, Failures, "v4ui32", "v4f32"); testVector<v4ui32, v4f32>(TotalTests, Passes, Failures, "v4ui32", "v4f32");
......
...@@ -159,7 +159,11 @@ void testsVector(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -159,7 +159,11 @@ void testsVector(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
......
...@@ -15,12 +15,13 @@ ...@@ -15,12 +15,13 @@
#include <stdint.h> #include <stdint.h>
#include "test_icmp.h" #include "test_icmp.h"
#include "xdefs.h"
#define X(cmp, op) \ #define X(cmp, op) \
bool icmp##cmp(uint8_t a, uint8_t b) { return a op b; } \ bool icmp##cmp(uint8_t a, uint8_t b) { return a op b; } \
bool icmp##cmp(uint16_t a, uint16_t b) { return a op b; } \ bool icmp##cmp(uint16_t a, uint16_t b) { return a op b; } \
bool icmp##cmp(uint32_t a, uint32_t b) { return a op b; } \ bool icmp##cmp(uint32_t a, uint32_t b) { return a op b; } \
bool icmp##cmp(uint64_t a, uint64_t b) { return a op b; } \ bool icmp##cmp(uint64 a, uint64 b) { return a op b; } \
v4ui32 icmp##cmp(v4ui32 a, v4ui32 b) { return a op b; } \ v4ui32 icmp##cmp(v4ui32 a, v4ui32 b) { return a op b; } \
v8ui16 icmp##cmp(v8ui16 a, v8ui16 b) { return a op b; } \ v8ui16 icmp##cmp(v8ui16 a, v8ui16 b) { return a op b; } \
v16ui8 icmp##cmp(v16ui8 a, v16ui8 b) { return a op b; } v16ui8 icmp##cmp(v16ui8 a, v16ui8 b) { return a op b; }
...@@ -31,7 +32,7 @@ ICMP_U_TABLE ...@@ -31,7 +32,7 @@ ICMP_U_TABLE
bool icmp##cmp(myint8_t a, myint8_t b) { return a op b; } \ bool icmp##cmp(myint8_t a, myint8_t b) { return a op b; } \
bool icmp##cmp(int16_t a, int16_t b) { return a op b; } \ bool icmp##cmp(int16_t a, int16_t b) { return a op b; } \
bool icmp##cmp(int32_t a, int32_t b) { return a op b; } \ bool icmp##cmp(int32_t a, int32_t b) { return a op b; } \
bool icmp##cmp(int64_t a, int64_t b) { return a op b; } \ bool icmp##cmp(int64 a, int64 b) { return a op b; } \
v4si32 icmp##cmp(v4si32 a, v4si32 b) { return a op b; } \ v4si32 icmp##cmp(v4si32 a, v4si32 b) { return a op b; } \
v8si16 icmp##cmp(v8si16 a, v8si16 b) { return a op b; } \ v8si16 icmp##cmp(v8si16 a, v8si16 b) { return a op b; } \
v16si8 icmp##cmp(v16si8 a, v16si8 b) { return a op b; } v16si8 icmp##cmp(v16si8 a, v16si8 b) { return a op b; }
......
...@@ -15,12 +15,13 @@ ...@@ -15,12 +15,13 @@
#include "test_icmp.def" #include "test_icmp.def"
#include "vectors.h" #include "vectors.h"
#include "xdefs.h"
#define X(cmp, op) \ #define X(cmp, op) \
bool icmp##cmp(uint8_t a, uint8_t b); \ bool icmp##cmp(uint8_t a, uint8_t b); \
bool icmp##cmp(uint16_t a, uint16_t b); \ bool icmp##cmp(uint16_t a, uint16_t b); \
bool icmp##cmp(uint32_t a, uint32_t b); \ bool icmp##cmp(uint32_t a, uint32_t b); \
bool icmp##cmp(uint64_t a, uint64_t b); \ bool icmp##cmp(uint64 a, uint64 b); \
v4ui32 icmp##cmp(v4ui32 a, v4ui32 b); \ v4ui32 icmp##cmp(v4ui32 a, v4ui32 b); \
v8ui16 icmp##cmp(v8ui16 a, v8ui16 b); \ v8ui16 icmp##cmp(v8ui16 a, v8ui16 b); \
v16ui8 icmp##cmp(v16ui8 a, v16ui8 b); v16ui8 icmp##cmp(v16ui8 a, v16ui8 b);
...@@ -31,7 +32,7 @@ ICMP_U_TABLE ...@@ -31,7 +32,7 @@ ICMP_U_TABLE
bool icmp##cmp(myint8_t a, myint8_t b); \ bool icmp##cmp(myint8_t a, myint8_t b); \
bool icmp##cmp(int16_t a, int16_t b); \ bool icmp##cmp(int16_t a, int16_t b); \
bool icmp##cmp(int32_t a, int32_t b); \ bool icmp##cmp(int32_t a, int32_t b); \
bool icmp##cmp(int64_t a, int64_t b); \ bool icmp##cmp(int64 a, int64 b); \
v4si32 icmp##cmp(v4si32 a, v4si32 b); \ v4si32 icmp##cmp(v4si32 a, v4si32 b); \
v8si16 icmp##cmp(v8si16 a, v8si16 b); \ v8si16 icmp##cmp(v8si16 a, v8si16 b); \
v16si8 icmp##cmp(v16si8 a, v16si8 b); v16si8 icmp##cmp(v16si8 a, v16si8 b);
......
...@@ -23,10 +23,13 @@ ...@@ -23,10 +23,13 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated // Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively. // object files, respectively.
#include "test_icmp.h" #include "test_icmp.h"
namespace Subzero_ { namespace Subzero_ {
#include "test_icmp.h" #include "test_icmp.h"
} }
#include "xdefs.h"
volatile unsigned Values[] = { volatile unsigned Values[] = {
0x0, 0x1, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001, 0x0, 0x1, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x7e, 0x7f, 0x80, 0x81, 0xfffffffe, 0xffffffff, 0x7e, 0x7f, 0x80, 0x81,
...@@ -265,7 +268,11 @@ void testsVecI1(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -265,7 +268,11 @@ void testsVecI1(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
...@@ -273,7 +280,7 @@ int main(int argc, char **argv) { ...@@ -273,7 +280,7 @@ int main(int argc, char **argv) {
testsInt<uint8_t, myint8_t>(TotalTests, Passes, Failures); testsInt<uint8_t, myint8_t>(TotalTests, Passes, Failures);
testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures); testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures);
testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures); testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures);
testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures); testsInt<uint64, int64>(TotalTests, Passes, Failures);
testsVecInt<v4ui32, v4si32>(TotalTests, Passes, Failures); testsVecInt<v4ui32, v4si32>(TotalTests, Passes, Failures);
testsVecInt<v8ui16, v8si16>(TotalTests, Passes, Failures); testsVecInt<v8ui16, v8si16>(TotalTests, Passes, Failures);
testsVecInt<v16ui8, v16si8>(TotalTests, Passes, Failures); testsVecInt<v16ui8, v16si8>(TotalTests, Passes, Failures);
......
...@@ -130,7 +130,11 @@ void testSelectI1(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -130,7 +130,11 @@ void testSelectI1(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
......
...@@ -22,7 +22,11 @@ ...@@ -22,7 +22,11 @@
DECLARE_TESTS() DECLARE_TESTS()
DECLARE_TESTS(Subzero_) DECLARE_TESTS(Subzero_)
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
......
...@@ -25,7 +25,11 @@ namespace Subzero_ { ...@@ -25,7 +25,11 @@ namespace Subzero_ {
#include "test_strengthreduce.h" #include "test_strengthreduce.h"
} }
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#ifndef TEST_SYNC_ATOMIC_DEF #ifndef TEST_SYNC_ATOMIC_DEF
#define TEST_SYNC_ATOMIC_DEF #define TEST_SYNC_ATOMIC_DEF
#include "xdefs.h"
#define STR(s) #s #define STR(s) #s
#define RMWOP_TABLE \ #define RMWOP_TABLE \
...@@ -30,14 +32,14 @@ ...@@ -30,14 +32,14 @@
X(uint8_t) \ X(uint8_t) \
X(uint16_t) \ X(uint16_t) \
X(uint32_t) \ X(uint32_t) \
X(uint64_t) X(uint64)
//#define X(type) //#define X(type)
#define FOR_ALL_RMWTYPES_INST(F, inst) \ #define FOR_ALL_RMWTYPES_INST(F, inst) \
F(inst, uint8_t) \ F(inst, uint8_t) \
F(inst, uint16_t) \ F(inst, uint16_t) \
F(inst, uint32_t) \ F(inst, uint32_t) \
F(inst, uint64_t) F(inst, uint64)
#define FOR_ALL_RMWOP_TYPES(X) \ #define FOR_ALL_RMWOP_TYPES(X) \
FOR_ALL_RMWTYPES_INST(X, add) \ FOR_ALL_RMWTYPES_INST(X, add) \
......
...@@ -28,11 +28,12 @@ ...@@ -28,11 +28,12 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated // Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively. // object files, respectively.
#include "test_sync_atomic.h" #include "test_sync_atomic.h"
#include "xdefs.h"
namespace Subzero_ { namespace Subzero_ {
#include "test_sync_atomic.h" #include "test_sync_atomic.h"
} }
volatile uint64_t Values[] = { volatile uint64 Values[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001, 0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x007fffff /*Max subnormal + */, 0x00800000 /*Min+ */, 0xfffe, 0xffff, 0x007fffff /*Max subnormal + */, 0x00800000 /*Min+ */,
0x7f7fffff /*Max+ */, 0x7f800000 /*+Inf*/, 0xff800000 /*-Inf*/, 0x7f7fffff /*Max+ */, 0x7f800000 /*+Inf*/, 0xff800000 /*-Inf*/,
...@@ -51,7 +52,7 @@ struct { ...@@ -51,7 +52,7 @@ struct {
volatile uint8_t l8; volatile uint8_t l8;
volatile uint16_t l16; volatile uint16_t l16;
volatile uint32_t l32; volatile uint32_t l32;
volatile uint64_t l64; volatile uint64 l64;
} AtomicLocs; } AtomicLocs;
template <typename Type> template <typename Type>
...@@ -91,12 +92,12 @@ void testAtomicRMW(volatile Type *AtomicLoc, size_t &TotalTests, size_t &Passes, ...@@ -91,12 +92,12 @@ void testAtomicRMW(volatile Type *AtomicLoc, size_t &TotalTests, size_t &Passes,
} else { } else {
++Failures; ++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type)) std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
<< "(" << static_cast<uint64_t>(Value1) << ", " << "(" << static_cast<uint64>(Value1) << ", "
<< static_cast<uint64_t>(Value2) << static_cast<uint64>(Value2)
<< "): sz1=" << static_cast<uint64_t>(ResultSz1) << "): sz1=" << static_cast<uint64>(ResultSz1)
<< " llc1=" << static_cast<uint64_t>(ResultLlc1) << " llc1=" << static_cast<uint64>(ResultLlc1)
<< " sz2=" << static_cast<uint64_t>(ResultSz2) << " sz2=" << static_cast<uint64>(ResultSz2)
<< " llc2=" << static_cast<uint64_t>(ResultLlc2) << "\n"; << " llc2=" << static_cast<uint64>(ResultLlc2) << "\n";
} }
} }
} }
...@@ -137,12 +138,12 @@ void testValCompareAndSwap(volatile Type *AtomicLoc, size_t &TotalTests, ...@@ -137,12 +138,12 @@ void testValCompareAndSwap(volatile Type *AtomicLoc, size_t &TotalTests,
} else { } else {
++Failures; ++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type)) std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
<< "(" << static_cast<uint64_t>(Value1) << ", " << "(" << static_cast<uint64>(Value1) << ", "
<< static_cast<uint64_t>(Value2) << static_cast<uint64>(Value2)
<< "): sz1=" << static_cast<uint64_t>(ResultSz1) << "): sz1=" << static_cast<uint64>(ResultSz1)
<< " llc1=" << static_cast<uint64_t>(ResultLlc1) << " llc1=" << static_cast<uint64>(ResultLlc1)
<< " sz2=" << static_cast<uint64_t>(ResultSz2) << " sz2=" << static_cast<uint64>(ResultSz2)
<< " llc2=" << static_cast<uint64_t>(ResultLlc2) << "\n"; << " llc2=" << static_cast<uint64>(ResultLlc2) << "\n";
} }
} }
} }
...@@ -166,6 +167,22 @@ template <typename Type> void *threadWrapper(void *Data) { ...@@ -166,6 +167,22 @@ template <typename Type> void *threadWrapper(void *Data) {
return NULL; return NULL;
} }
#ifndef X8664_STACK_HACK
void AllocStackForThread(uint32, pthread_attr_t *) {}
#else // defined(X8664_STACK_HACK)
void AllocStackForThread(uint32 m, pthread_attr_t *attr) {
static const uint32_t ThreadStackBase = 0x60000000;
static const uint32_t ThreadStackSize = 4 << 20; // 4MB.
if (pthread_attr_setstack(
attr, xAllocStack(ThreadStackBase - 2 * m * ThreadStackSize,
ThreadStackSize),
ThreadStackSize) != 0) {
std::cout << "pthread_attr_setstack: " << strerror(errno) << "\n";
abort();
}
}
#endif // X8664_STACK_HACK
template <typename Type> template <typename Type>
void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests, void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
size_t &Passes, size_t &Failures) { size_t &Passes, size_t &Failures) {
...@@ -184,7 +201,7 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests, ...@@ -184,7 +201,7 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs); const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
// Just test a few values, otherwise it takes a *really* long time. // Just test a few values, otherwise it takes a *really* long time.
volatile uint64_t ValuesSubset[] = {1, 0x7e, 0x000fffffffffffffffll}; volatile uint64 ValuesSubset[] = {1, 0x7e, 0x000fffffffffffffffll};
const size_t NumValuesSubset = sizeof(ValuesSubset) / sizeof(*ValuesSubset); const size_t NumValuesSubset = sizeof(ValuesSubset) / sizeof(*ValuesSubset);
for (size_t f = 0; f < NumFuncs; ++f) { for (size_t f = 0; f < NumFuncs; ++f) {
...@@ -200,12 +217,18 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests, ...@@ -200,12 +217,18 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
++TotalTests; ++TotalTests;
const size_t NumThreads = 4; const size_t NumThreads = 4;
pthread_t t[NumThreads]; pthread_t t[NumThreads];
pthread_attr_t attr[NumThreads];
// Try N threads w/ just Llc. // Try N threads w/ just Llc.
*AtomicLoc = Value1; *AtomicLoc = Value1;
for (size_t m = 0; m < NumThreads; ++m) { for (size_t m = 0; m < NumThreads; ++m) {
pthread_create(&t[m], NULL, &threadWrapper<Type>, pthread_attr_init(&attr[m]);
reinterpret_cast<void *>(&TDataLlc)); AllocStackForThread(m, &attr[m]);
if (pthread_create(&t[m], &attr[m], &threadWrapper<Type>,
reinterpret_cast<void *>(&TDataLlc)) != 0) {
std::cout << "pthread_create failed w/ " << strerror(errno) << "\n";
abort();
}
} }
for (size_t m = 0; m < NumThreads; ++m) { for (size_t m = 0; m < NumThreads; ++m) {
pthread_join(t[m], NULL); pthread_join(t[m], NULL);
...@@ -215,7 +238,9 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests, ...@@ -215,7 +238,9 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
// Try N threads w/ both Sz and Llc. // Try N threads w/ both Sz and Llc.
*AtomicLoc = Value1; *AtomicLoc = Value1;
for (size_t m = 0; m < NumThreads; ++m) { for (size_t m = 0; m < NumThreads; ++m) {
if (pthread_create(&t[m], NULL, &threadWrapper<Type>, pthread_attr_init(&attr[m]);
AllocStackForThread(m, &attr[m]);
if (pthread_create(&t[m], &attr[m], &threadWrapper<Type>,
m % 2 == 0 m % 2 == 0
? reinterpret_cast<void *>(&TDataLlc) ? reinterpret_cast<void *>(&TDataLlc)
: reinterpret_cast<void *>(&TDataSz)) != 0) { : reinterpret_cast<void *>(&TDataSz)) != 0) {
...@@ -238,18 +263,21 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests, ...@@ -238,18 +263,21 @@ void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
} else { } else {
++Failures; ++Failures;
std::cout << "test_with_threads_" << Funcs[f].Name std::cout << "test_with_threads_" << Funcs[f].Name
<< (8 * sizeof(Type)) << "(" << (8 * sizeof(Type)) << "(" << static_cast<uint64>(Value1)
<< static_cast<uint64_t>(Value1) << ", " << ", " << static_cast<uint64>(Value2)
<< static_cast<uint64_t>(Value2) << "): llc=" << static_cast<uint64>(ResultLlc)
<< "): llc=" << static_cast<uint64_t>(ResultLlc) << " mixed=" << static_cast<uint64>(ResultMixed) << "\n";
<< " mixed=" << static_cast<uint64_t>(ResultMixed) << "\n";
} }
} }
} }
} }
} }
int main(int argc, char **argv) { #ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
...@@ -257,18 +285,17 @@ int main(int argc, char **argv) { ...@@ -257,18 +285,17 @@ int main(int argc, char **argv) {
testAtomicRMW<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures); testAtomicRMW<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testAtomicRMW<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures); testAtomicRMW<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures);
testAtomicRMW<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures); testAtomicRMW<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures);
testAtomicRMW<uint64_t>(&AtomicLocs.l64, TotalTests, Passes, Failures); testAtomicRMW<uint64>(&AtomicLocs.l64, TotalTests, Passes, Failures);
testValCompareAndSwap<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures); testValCompareAndSwap<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testValCompareAndSwap<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, testValCompareAndSwap<uint16_t>(&AtomicLocs.l16, TotalTests, Passes,
Failures); Failures);
testValCompareAndSwap<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, testValCompareAndSwap<uint32_t>(&AtomicLocs.l32, TotalTests, Passes,
Failures); Failures);
testValCompareAndSwap<uint64_t>(&AtomicLocs.l64, TotalTests, Passes, testValCompareAndSwap<uint64>(&AtomicLocs.l64, TotalTests, Passes, Failures);
Failures);
testAtomicRMWThreads<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures); testAtomicRMWThreads<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures); testAtomicRMWThreads<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures); testAtomicRMWThreads<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint64_t>(&AtomicLocs.l64, TotalTests, Passes, Failures); testAtomicRMWThreads<uint64>(&AtomicLocs.l64, TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n"; << " Failures=" << Failures << "\n";
......
...@@ -130,7 +130,11 @@ void testExtractElement(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -130,7 +130,11 @@ void testExtractElement(size_t &TotalTests, size_t &Passes, size_t &Failures) {
free(TestVectors); free(TestVectors);
} }
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
#endif // X8664_STACK_HACK
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
size_t Failures = 0; size_t Failures = 0;
......
//===- subzero/crosstest/xdefs.h - Definitions for the crosstests. --------===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines the int64 and uint64 types to avoid link-time errors when compiling
// the crosstests in LP64.
//
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_CROSSTEST_XDEFS_H_
#define SUBZERO_CROSSTEST_XDEFS_H_
typedef unsigned int int32;
typedef unsigned int uint32;
typedef long long int64;
typedef unsigned long long uint64;
typedef unsigned int SizeT;
#ifdef X8664_STACK_HACK
// the X86_STACK_HACK is an intrusive way of getting the crosstests to run in
// x86_64 LP64 even with an ILP32 model. This hack allocates a new stack for
// running the tests in the low 4GB of the address space.
#ifdef __cplusplus
#define XTEST_EXTERN extern "C"
#else // !defined(__cplusplus)
#define XTEST_EXTERN extern
#endif // __cplusplus
/// xAllocStack allocates the memory chunk [StackEnd - Size - 1, StackEnd). It
/// requires StackEnd to be less than 32-bits long. Conversely, xDeallocStack
/// frees that memory chunk.
/// {@
XTEST_EXTERN unsigned char *xAllocStack(uint64 StackEnd, uint32 Size);
XTEST_EXTERN void xDeallocStack(uint64 StackEnd, uint32 Size);
/// @}
// wrapped_main is invoked by the x86-64 stack hack main. We declare a prototype
// so the compiler (and not the linker) can yell if a test's wrapped_main
// prototype does not match what we want.
XTEST_EXTERN int wrapped_main(int argc, char *argv[]);
#undef XTEST_EXTERN
#endif // X8664_STACK_HACK
#endif // SUBZERO_CROSSTEST_XDEFS_H_
...@@ -124,6 +124,8 @@ def main(): ...@@ -124,6 +124,8 @@ def main():
MakeRuntimesForTarget(targets.X8632Target, ll_files, MakeRuntimesForTarget(targets.X8632Target, ll_files,
srcdir, tempdir, rtdir, args.verbose) srcdir, tempdir, rtdir, args.verbose)
MakeRuntimesForTarget(targets.X8664Target, ll_files,
srcdir, tempdir, rtdir, args.verbose)
MakeRuntimesForTarget(targets.ARM32Target, ll_files, MakeRuntimesForTarget(targets.ARM32Target, ll_files,
srcdir, tempdir, rtdir, args.verbose) srcdir, tempdir, rtdir, args.verbose)
......
...@@ -177,6 +177,18 @@ def main(): ...@@ -177,6 +177,18 @@ def main():
'szrt_{sb}_' + args.target + '.o' 'szrt_{sb}_' + args.target + '.o'
).format(root=nacl_root, sb='sb' if args.sandbox else 'native')) ).format(root=nacl_root, sb='sb' if args.sandbox else 'native'))
pure_c = os.path.splitext(args.driver)[1] == '.c' pure_c = os.path.splitext(args.driver)[1] == '.c'
# TargetX8664 is ilp32, but clang does not currently support such
# configuration. In order to run the crosstests we play nasty, dangerous
# tricks with the stack pointer.
needs_stack_hack = (args.target == 'x8664')
stack_hack_params = []
if needs_stack_hack:
shellcmd('{bin}/clang -g -o stack_hack.x8664.{key}.o -c '
'stack_hack.x8664.c'.format(bin=bindir, key=key))
stack_hack_params.append('-DX8664_STACK_HACK')
stack_hack_params.append('stack_hack.x8664.{key}.o'.format(key=key))
# Set compiler to clang, clang++, pnacl-clang, or pnacl-clang++. # Set compiler to clang, clang++, pnacl-clang, or pnacl-clang++.
compiler = '{bin}/{prefix}{cc}'.format( compiler = '{bin}/{prefix}{cc}'.format(
bin=bindir, prefix='pnacl-' if args.sandbox else '', bin=bindir, prefix='pnacl-' if args.sandbox else '',
...@@ -189,7 +201,7 @@ def main(): ...@@ -189,7 +201,7 @@ def main():
'-lm', '-lpthread', '-lm', '-lpthread',
'-Wl,--defsym=__Sz_AbsoluteZero=0'] + '-Wl,--defsym=__Sz_AbsoluteZero=0'] +
target_info.cross_headers) target_info.cross_headers)
shellcmd([compiler, args.driver] + objs + shellcmd([compiler] + stack_hack_params + [args.driver] + objs +
['-o', os.path.join(args.dir, args.output)] + sb_native_args) ['-o', os.path.join(args.dir, args.output)] + sb_native_args)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -55,15 +55,17 @@ def main(): ...@@ -55,15 +55,17 @@ def main():
root = FindBaseNaCl() root = FindBaseNaCl()
# The rest of the attribute sets. # The rest of the attribute sets.
targets = [ 'x8632', 'arm32' ] targets = [ 'x8632', 'x8664', 'arm32' ]
sandboxing = [ 'native', 'sandbox' ] sandboxing = [ 'native', 'sandbox' ]
opt_levels = [ 'Om1', 'O2' ] opt_levels = [ 'Om1', 'O2' ]
arch_attrs = { 'x8632': [ 'sse2', 'sse4.1' ], arch_attrs = { 'x8632': [ 'sse2', 'sse4.1' ],
'x8664': [ 'sse2', 'sse4.1' ],
'arm32': [ 'neon', 'hwdiv-arm' ] } 'arm32': [ 'neon', 'hwdiv-arm' ] }
flat_attrs = [] flat_attrs = []
for v in arch_attrs.values(): for v in arch_attrs.values():
flat_attrs += v flat_attrs += v
arch_flags = { 'x8632': [], arch_flags = { 'x8632': [],
'x8664': [],
# ARM doesn't have an integrated assembler yet. # ARM doesn't have an integrated assembler yet.
'arm32': ['--filetype=asm'] } 'arm32': ['--filetype=asm'] }
# all_keys is only used in the help text. # all_keys is only used in the help text.
......
...@@ -40,6 +40,5 @@ ARM32Target = TargetInfo(target='arm32', ...@@ -40,6 +40,5 @@ ARM32Target = TargetInfo(target='arm32',
ld_emu='armelf_nacl', ld_emu='armelf_nacl',
cross_headers=['-isystem', FindARMCrossInclude()]) cross_headers=['-isystem', FindARMCrossInclude()])
def ConvertTripleToNaCl(nonsfi_triple): def ConvertTripleToNaCl(nonsfi_triple):
return nonsfi_triple.replace('linux', 'nacl') return nonsfi_triple.replace('linux', 'nacl')
#include <inttypes.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
...@@ -53,7 +54,7 @@ void __Sz_profile_summary() { ...@@ -53,7 +54,7 @@ void __Sz_profile_summary() {
printf("%s", SubzeroLogo); printf("%s", SubzeroLogo);
for (const struct BlockProfileInfo **curr = &__Sz_block_profile_info; for (const struct BlockProfileInfo **curr = &__Sz_block_profile_info;
*curr != NULL; ++curr) { *curr != NULL; ++curr) {
printf("%lld\t%s\n", (*curr)->Counter, (*curr)->BlockName); printf("%" PRIu64 "\t%s\n", (*curr)->Counter, (*curr)->BlockName);
} }
fflush(stdout); fflush(stdout);
} }
...@@ -243,9 +243,9 @@ public: ...@@ -243,9 +243,9 @@ public:
// Cross Xmm/GPR cast instructions. // Cross Xmm/GPR cast instructions.
template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp { template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
typedef void (AssemblerX86Base::*TypedEmitRegs)(Type, DReg_t, SReg_t); typedef void (AssemblerX86Base::*TypedEmitRegs)(Type, DReg_t, Type, SReg_t);
typedef void (AssemblerX86Base::*TypedEmitAddr)( typedef void (AssemblerX86Base::*TypedEmitAddr)(
Type, DReg_t, const typename Traits::Address &); Type, DReg_t, Type, const typename Traits::Address &);
TypedEmitRegs RegReg; TypedEmitRegs RegReg;
TypedEmitAddr RegAddr; TypedEmitAddr RegAddr;
...@@ -299,7 +299,14 @@ public: ...@@ -299,7 +299,14 @@ public:
typename Traits::GPRRegister src); typename Traits::GPRRegister src);
void mov(Type Ty, const typename Traits::Address &dst, const Immediate &imm); void mov(Type Ty, const typename Traits::Address &dst, const Immediate &imm);
void movFromAh(const typename Traits::GPRRegister dst); template <typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type
movabs(const typename Traits::GPRRegister Dst, uint64_t Imm64);
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type
movabs(const typename Traits::GPRRegister, uint64_t) {
llvm::report_fatal_error("movabs is only supported in 64-bit x86 targets.");
}
void movzx(Type Ty, typename Traits::GPRRegister dst, void movzx(Type Ty, typename Traits::GPRRegister dst,
typename Traits::GPRRegister src); typename Traits::GPRRegister src);
...@@ -328,11 +335,13 @@ public: ...@@ -328,11 +335,13 @@ public:
void movss(Type Ty, typename Traits::XmmRegister dst, void movss(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src); typename Traits::XmmRegister src);
void movd(typename Traits::XmmRegister dst, typename Traits::GPRRegister src); void movd(Type SrcTy, typename Traits::XmmRegister dst,
void movd(typename Traits::XmmRegister dst, typename Traits::GPRRegister src);
void movd(Type SrcTy, typename Traits::XmmRegister dst,
const typename Traits::Address &src); const typename Traits::Address &src);
void movd(typename Traits::GPRRegister dst, typename Traits::XmmRegister src); void movd(Type DestTy, typename Traits::GPRRegister dst,
void movd(const typename Traits::Address &dst, typename Traits::XmmRegister src);
void movd(Type DestTy, const typename Traits::Address &dst,
typename Traits::XmmRegister src); typename Traits::XmmRegister src);
void movq(typename Traits::XmmRegister dst, typename Traits::XmmRegister src); void movq(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
...@@ -504,9 +513,9 @@ public: ...@@ -504,9 +513,9 @@ public:
void cvttps2dq(Type, typename Traits::XmmRegister dst, void cvttps2dq(Type, typename Traits::XmmRegister dst,
const typename Traits::Address &src); const typename Traits::Address &src);
void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
typename Traits::GPRRegister src); typename Traits::GPRRegister src);
void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
const typename Traits::Address &src); const typename Traits::Address &src);
void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst, void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
...@@ -514,9 +523,9 @@ public: ...@@ -514,9 +523,9 @@ public:
void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst, void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
const typename Traits::Address &src); const typename Traits::Address &src);
void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst, void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
typename Traits::XmmRegister src); typename Traits::XmmRegister src);
void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst, void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
const typename Traits::Address &src); const typename Traits::Address &src);
void ucomiss(Type Ty, typename Traits::XmmRegister a, void ucomiss(Type Ty, typename Traits::XmmRegister a,
...@@ -719,6 +728,12 @@ public: ...@@ -719,6 +728,12 @@ public:
void cbw(); void cbw();
void cwd(); void cwd();
void cdq(); void cdq();
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type cqo();
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type cqo() {
llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
}
void div(Type Ty, typename Traits::GPRRegister reg); void div(Type Ty, typename Traits::GPRRegister reg);
void div(Type Ty, const typename Traits::Address &address); void div(Type Ty, const typename Traits::Address &address);
...@@ -936,7 +951,7 @@ private: ...@@ -936,7 +951,7 @@ private:
typename Traits::GPRRegister>::value; typename Traits::GPRRegister>::value;
return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 && return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
isByteSizedArithType(Ty); isByteSizedType(Ty);
}; };
// assembleAndEmitRex is used for determining which (if any) rex prefix should // assembleAndEmitRex is used for determining which (if any) rex prefix should
......
...@@ -362,8 +362,7 @@ void ELFRelocationSection::writeData(const GlobalContext &Ctx, ELFStreamer &Str, ...@@ -362,8 +362,7 @@ void ELFRelocationSection::writeData(const GlobalContext &Ctx, ELFStreamer &Str,
llvm::report_fatal_error("Missing symbol mentioned in reloc"); llvm::report_fatal_error("Missing symbol mentioned in reloc");
if (IsELF64) { if (IsELF64) {
llvm_unreachable( // TODO(jpp): check that Fixup.offset() is correct even for pc-rel.
"Not tested -- check that Fixup.offset() is correct even for pc-rel");
Elf64_Rela Rela; Elf64_Rela Rela;
Rela.r_offset = Fixup.position(); Rela.r_offset = Fixup.position();
Rela.setSymbolAndType(Symbol->getNumber(), Fixup.kind()); Rela.setSymbolAndType(Symbol->getNumber(), Fixup.kind());
......
...@@ -206,7 +206,7 @@ MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress( ...@@ -206,7 +206,7 @@ MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress(
} else if (const auto CR = } else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) { llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
Disp = CR->getOffset(); Disp = CR->getOffset();
Fixup = Asm->createFixup(llvm::ELF::R_386_32, CR); Fixup = Asm->createFixup(RelFixup, CR);
} else { } else {
llvm_unreachable("Unexpected offset type"); llvm_unreachable("Unexpected offset type");
} }
......
...@@ -179,8 +179,8 @@ MachineTraits<TargetX8664>::X86OperandMem::toAsmAddress( ...@@ -179,8 +179,8 @@ MachineTraits<TargetX8664>::X86OperandMem::toAsmAddress(
Disp = static_cast<int32_t>(CI->getValue()); Disp = static_cast<int32_t>(CI->getValue());
} else if (const auto CR = } else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) { llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
Disp = CR->getOffset(); Disp = CR->getOffset() - 4;
Fixup = Asm->createFixup(llvm::ELF::R_386_32, CR); Fixup = Asm->createFixup(PcRelFixup, CR);
} else { } else {
llvm_unreachable("Unexpected offset type"); llvm_unreachable("Unexpected offset type");
} }
......
...@@ -1100,6 +1100,8 @@ class InstX86Movsx ...@@ -1100,6 +1100,8 @@ class InstX86Movsx
: public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movsx> { : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movsx> {
public: public:
static InstX86Movsx *create(Cfg *Func, Variable *Dest, Operand *Src) { static InstX86Movsx *create(Cfg *Func, Variable *Dest, Operand *Src) {
assert(typeWidthInBytes(Dest->getType()) >
typeWidthInBytes(Src->getType()));
return new (Func->allocate<InstX86Movsx>()) InstX86Movsx(Func, Dest, Src); return new (Func->allocate<InstX86Movsx>()) InstX86Movsx(Func, Dest, Src);
} }
...@@ -1116,6 +1118,8 @@ class InstX86Movzx ...@@ -1116,6 +1118,8 @@ class InstX86Movzx
: public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx> { : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx> {
public: public:
static InstX86Movzx *create(Cfg *Func, Variable *Dest, Operand *Src) { static InstX86Movzx *create(Cfg *Func, Variable *Dest, Operand *Src) {
assert(typeWidthInBytes(Dest->getType()) >
typeWidthInBytes(Src->getType()));
return new (Func->allocate<InstX86Movzx>()) InstX86Movzx(Func, Dest, Src); return new (Func->allocate<InstX86Movzx>()) InstX86Movzx(Func, Dest, Src);
} }
......
...@@ -792,7 +792,7 @@ void TargetDataX8632::lowerJumpTables() { ...@@ -792,7 +792,7 @@ void TargetDataX8632::lowerJumpTables() {
case FT_Elf: { case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter(); ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JT : Ctx->getJumpTables()) for (const JumpTableData &JT : Ctx->getJumpTables())
Writer->writeJumpTable(JT, llvm::ELF::R_386_32); Writer->writeJumpTable(JT, TargetX8632::Traits::RelFixup);
} break; } break;
case FT_Asm: case FT_Asm:
// Already emitted from Cfg // Already emitted from Cfg
...@@ -821,7 +821,8 @@ void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars, ...@@ -821,7 +821,8 @@ void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
switch (Ctx->getFlags().getOutFileType()) { switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: { case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter(); ELFObjectWriter *Writer = Ctx->getObjectWriter();
Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix); Writer->writeDataSection(Vars, TargetX8632::Traits::RelFixup,
SectionSuffix);
} break; } break;
case FT_Asm: case FT_Asm:
case FT_Iasm: { case FT_Iasm: {
......
...@@ -68,6 +68,7 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -68,6 +68,7 @@ template <> struct MachineTraits<TargetX8632> {
static const GPRRegister Encoded_Reg_Accumulator = RegX8632::Encoded_Reg_eax; static const GPRRegister Encoded_Reg_Accumulator = RegX8632::Encoded_Reg_eax;
static const GPRRegister Encoded_Reg_Counter = RegX8632::Encoded_Reg_ecx; static const GPRRegister Encoded_Reg_Counter = RegX8632::Encoded_Reg_ecx;
static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32; static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32;
static const FixupKind RelFixup = llvm::ELF::R_386_32;
class Operand { class Operand {
public: public:
...@@ -272,6 +273,7 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -272,6 +273,7 @@ template <> struct MachineTraits<TargetX8632> {
}; };
static const char *TargetName; static const char *TargetName;
static constexpr Type WordType = IceType_i32;
static IceString getRegName(SizeT RegNum, Type Ty) { static IceString getRegName(SizeT RegNum, Type Ty) {
assert(RegNum < RegisterSet::Reg_NUM); assert(RegNum < RegisterSet::Reg_NUM);
......
...@@ -123,7 +123,7 @@ getRegisterForGprArgNum(uint32_t ArgNum) { ...@@ -123,7 +123,7 @@ getRegisterForGprArgNum(uint32_t ArgNum) {
} }
// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
// OperandList in lowerCall. std::max() was supposed to work, but it doesn't. // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; } constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
} // end of anonymous namespace } // end of anonymous namespace
...@@ -239,7 +239,6 @@ void TargetX8664::lowerCall(const InstCall *Instr) { ...@@ -239,7 +239,6 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
Variable *Dest = Instr->getDest(); Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary. // ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr; Variable *ReturnReg = nullptr;
Variable *ReturnRegHi = nullptr;
if (Dest) { if (Dest) {
switch (Dest->getType()) { switch (Dest->getType()) {
case IceType_NUM: case IceType_NUM:
...@@ -250,12 +249,8 @@ void TargetX8664::lowerCall(const InstCall *Instr) { ...@@ -250,12 +249,8 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
case IceType_i8: case IceType_i8:
case IceType_i16: case IceType_i16:
case IceType_i32: case IceType_i32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_i64: case IceType_i64:
// TODO(jpp): return i64 in a GPR. ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
break; break;
case IceType_f32: case IceType_f32:
case IceType_f64: case IceType_f64:
...@@ -271,27 +266,16 @@ void TargetX8664::lowerCall(const InstCall *Instr) { ...@@ -271,27 +266,16 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
} }
} }
Operand *CallTarget = legalize(Instr->getCallTarget()); Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm);
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
if (NeedSandboxing) { if (NeedSandboxing) {
if (llvm::isa<Constant>(CallTarget)) { llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
} else {
Variable *CallTargetVar = nullptr;
_mov(CallTargetVar, CallTarget);
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
CallTarget = CallTargetVar;
}
} }
Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall); Context.insert(NewCall);
if (NeedSandboxing) if (NeedSandboxing) {
_bundle_unlock(); llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
if (ReturnRegHi) }
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
// Add the appropriate offset to esp. The call instruction takes care // Add the appropriate offset to esp. The call instruction takes care
// of resetting the stack offset during emission. // of resetting the stack offset during emission.
...@@ -315,25 +299,11 @@ void TargetX8664::lowerCall(const InstCall *Instr) { ...@@ -315,25 +299,11 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
assert(ReturnReg && "x86-64 always returns value on registers."); assert(ReturnReg && "x86-64 always returns value on registers.");
// Assign the result of the call to Dest. if (isVectorType(Dest->getType())) {
if (ReturnRegHi) {
assert(Dest->getType() == IceType_i64);
split64(Dest);
Variable *DestLo = Dest->getLo();
Variable *DestHi = Dest->getHi();
_mov(DestLo, ReturnReg);
_mov(DestHi, ReturnRegHi);
return;
}
assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 ||
Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
isVectorType(Dest->getType()));
if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
_movp(Dest, ReturnReg); _movp(Dest, ReturnReg);
} else { } else {
assert(isScalarFloatingType(Dest->getType()) ||
isScalarIntegerType(Dest->getType()));
_mov(Dest, ReturnReg); _mov(Dest, ReturnReg);
} }
} }
...@@ -356,36 +326,36 @@ void TargetX8664::lowerArguments() { ...@@ -356,36 +326,36 @@ void TargetX8664::lowerArguments() {
++i) { ++i) {
Variable *Arg = Args[i]; Variable *Arg = Args[i];
Type Ty = Arg->getType(); Type Ty = Arg->getType();
if ((isVectorType(Ty) || isScalarFloatingType(Ty)) && Variable *RegisterArg = nullptr;
NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { int32_t RegNum = Variable::NoRegister;
// Replace Arg in the argument list with the home register. Then if ((isVectorType(Ty) || isScalarFloatingType(Ty))) {
// generate an instruction in the prolog to copy the home register if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) {
// to the assigned location of Arg. continue;
int32_t RegNum = getRegisterForXmmArgNum(NumXmmArgs); }
RegNum = getRegisterForXmmArgNum(NumXmmArgs);
++NumXmmArgs; ++NumXmmArgs;
Variable *RegisterArg = Func->makeVariable(Ty); RegisterArg = Func->makeVariable(Ty);
if (BuildDefs::dump()) } else if (isScalarIntegerType(Ty)) {
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {
RegisterArg->setRegNum(RegNum); continue;
RegisterArg->setIsArg(); }
Arg->setIsArg(false); RegNum = getRegisterForGprArgNum(NumGprArgs);
Args[i] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
} else if (isScalarIntegerType(Ty) &&
NumGprArgs < Traits::X86_MAX_GPR_ARGS) {
int32_t RegNum = getRegisterForGprArgNum(NumGprArgs);
++NumGprArgs; ++NumGprArgs;
Variable *RegisterArg = Func->makeVariable(Ty); RegisterArg = Func->makeVariable(Ty);
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
RegisterArg->setIsArg();
Arg->setIsArg(false);
Args[i] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
} }
assert(RegNum != Variable::NoRegister);
assert(RegisterArg != nullptr);
// Replace Arg in the argument list with the home register. Then
// generate an instruction in the prolog to copy the home register
// to the assigned location of Arg.
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
RegisterArg->setIsArg();
Arg->setIsArg(false);
Args[i] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
} }
} }
...@@ -393,19 +363,11 @@ void TargetX8664::lowerRet(const InstRet *Inst) { ...@@ -393,19 +363,11 @@ void TargetX8664::lowerRet(const InstRet *Inst) {
Variable *Reg = nullptr; Variable *Reg = nullptr;
if (Inst->hasRetValue()) { if (Inst->hasRetValue()) {
Operand *Src0 = legalize(Inst->getRetValue()); Operand *Src0 = legalize(Inst->getRetValue());
// TODO(jpp): this is not needed. if (isVectorType(Src0->getType()) ||
if (Src0->getType() == IceType_i64) { isScalarFloatingType(Src0->getType())) {
Variable *eax =
legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
Variable *edx =
legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
Reg = eax;
Context.insert(InstFakeUse::create(Func, edx));
} else if (isScalarFloatingType(Src0->getType())) {
_fld(Src0);
} else if (isVectorType(Src0->getType())) {
Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0); Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
} else { } else {
assert(isScalarIntegerType(Src0->getType()));
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax); _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
} }
} }
...@@ -577,19 +539,17 @@ void TargetX8664::addProlog(CfgNode *Node) { ...@@ -577,19 +539,17 @@ void TargetX8664::addProlog(CfgNode *Node) {
unsigned NumGPRArgs = 0; unsigned NumGPRArgs = 0;
for (Variable *Arg : Args) { for (Variable *Arg : Args) {
// Skip arguments passed in registers. // Skip arguments passed in registers.
if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { if (isVectorType(Arg->getType()) || isScalarFloatingType(Arg->getType())) {
++NumXmmArgs; if (NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
continue; ++NumXmmArgs;
} continue;
if (isScalarFloatingType(Arg->getType()) && }
NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { } else {
++NumXmmArgs; assert(isScalarIntegerType(Arg->getType()));
continue; if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
} ++NumGPRArgs;
if (isScalarIntegerType(Arg->getType()) && continue;
NumGPRArgs < Traits::X86_MAX_GPR_ARGS) { }
++NumGPRArgs;
continue;
} }
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
} }
...@@ -679,23 +639,9 @@ void TargetX8664::addEpilog(CfgNode *Node) { ...@@ -679,23 +639,9 @@ void TargetX8664::addEpilog(CfgNode *Node) {
} }
} }
if (!Ctx->getFlags().getUseSandboxing()) if (Ctx->getFlags().getUseSandboxing()) {
return; llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
// Change the original ret instruction into a sandboxed return sequence.
// t:ecx = pop
// bundle_lock
// and t, ~31
// jmp *t
// bundle_unlock
// FakeUse <original_ret_operand>
Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
_pop(T_ecx);
lowerIndirectJump(T_ecx);
if (RI->getSrcSize()) {
Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
Context.insert(InstFakeUse::create(Func, RetValue));
} }
RI->setDeleted();
} }
void TargetX8664::emitJumpTable(const Cfg *Func, void TargetX8664::emitJumpTable(const Cfg *Func,
...@@ -858,8 +804,7 @@ void TargetDataX8664::lowerJumpTables() { ...@@ -858,8 +804,7 @@ void TargetDataX8664::lowerJumpTables() {
case FT_Elf: { case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter(); ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JumpTable : Ctx->getJumpTables()) for (const JumpTableData &JumpTable : Ctx->getJumpTables())
// TODO(jpp): not 386. Writer->writeJumpTable(JumpTable, TargetX8664::Traits::RelFixup);
Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
} break; } break;
case FT_Asm: case FT_Asm:
// Already emitted from Cfg // Already emitted from Cfg
...@@ -888,8 +833,8 @@ void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars, ...@@ -888,8 +833,8 @@ void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,
switch (Ctx->getFlags().getOutFileType()) { switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: { case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter(); ELFObjectWriter *Writer = Ctx->getObjectWriter();
// TODO(jpp): not 386. Writer->writeDataSection(Vars, TargetX8664::Traits::RelFixup,
Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix); SectionSuffix);
} break; } break;
case FT_Asm: case FT_Asm:
case FT_Iasm: { case FT_Iasm: {
......
...@@ -66,7 +66,8 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -66,7 +66,8 @@ template <> struct MachineTraits<TargetX8664> {
using RegisterSet = ::Ice::RegX8664; using RegisterSet = ::Ice::RegX8664;
static const GPRRegister Encoded_Reg_Accumulator = RegX8664::Encoded_Reg_eax; static const GPRRegister Encoded_Reg_Accumulator = RegX8664::Encoded_Reg_eax;
static const GPRRegister Encoded_Reg_Counter = RegX8664::Encoded_Reg_ecx; static const GPRRegister Encoded_Reg_Counter = RegX8664::Encoded_Reg_ecx;
static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32; // TODO(jpp): ??? static const FixupKind PcRelFixup = llvm::ELF::R_X86_64_PC32;
static const FixupKind RelFixup = llvm::ELF::R_X86_64_32S;
class Operand { class Operand {
public: public:
...@@ -270,8 +271,8 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -270,8 +271,8 @@ template <> struct MachineTraits<TargetX8664> {
static Address ofConstPool(Assembler *Asm, const Constant *Imm) { static Address ofConstPool(Assembler *Asm, const Constant *Imm) {
// TODO(jpp): ??? // TODO(jpp): ???
AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Imm); AssemblerFixup *Fixup = Asm->createFixup(RelFixup, Imm);
const RelocOffsetT Offset = 0; const RelocOffsetT Offset = 4;
return Address(ABSOLUTE, Offset, Fixup); return Address(ABSOLUTE, Offset, Fixup);
} }
}; };
...@@ -293,6 +294,7 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -293,6 +294,7 @@ template <> struct MachineTraits<TargetX8664> {
}; };
static const char *TargetName; static const char *TargetName;
static constexpr Type WordType = IceType_i64;
static IceString getRegName(SizeT RegNum, Type Ty) { static IceString getRegName(SizeT RegNum, Type Ty) {
assert(RegNum < RegisterSet::Reg_NUM); assert(RegNum < RegisterSet::Reg_NUM);
...@@ -331,7 +333,7 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -331,7 +333,7 @@ template <> struct MachineTraits<TargetX8664> {
#define X(val, encode, name64, name32, name16, name8, scratch, preserved, \ #define X(val, encode, name64, name32, name16, name8, scratch, preserved, \
stackptr, frameptr, isInt, isFP) \ stackptr, frameptr, isInt, isFP) \
(*IntegerRegisters)[RegisterSet::val] = isInt; \ (*IntegerRegisters)[RegisterSet::val] = isInt; \
(*IntegerRegistersI8)[RegisterSet::val] = 1; \ (*IntegerRegistersI8)[RegisterSet::val] = isInt; \
(*FloatRegisters)[RegisterSet::val] = isFP; \ (*FloatRegisters)[RegisterSet::val] = isFP; \
(*VectorRegisters)[RegisterSet::val] = isFP; \ (*VectorRegisters)[RegisterSet::val] = isFP; \
(*ScratchRegs)[RegisterSet::val] = scratch; (*ScratchRegs)[RegisterSet::val] = scratch;
...@@ -450,7 +452,7 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -450,7 +452,7 @@ template <> struct MachineTraits<TargetX8664> {
/// address. /// address.
static const uint32_t X86_STACK_ALIGNMENT_BYTES; static const uint32_t X86_STACK_ALIGNMENT_BYTES;
/// Size of the return address on the stack /// Size of the return address on the stack
static const uint32_t X86_RET_IP_SIZE_BYTES = 4; static const uint32_t X86_RET_IP_SIZE_BYTES = 8;
/// The number of different NOP instructions /// The number of different NOP instructions
static const uint32_t X86_NUM_NOP_VARIANTS = 5; static const uint32_t X86_NUM_NOP_VARIANTS = 5;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "IceInst.h" #include "IceInst.h"
#include "IceSwitchLowering.h" #include "IceSwitchLowering.h"
#include "IceTargetLowering.h" #include "IceTargetLowering.h"
#include "IceUtils.h"
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
...@@ -80,10 +81,9 @@ public: ...@@ -80,10 +81,9 @@ public:
: Traits::RegisterSet::Reg_esp; : Traits::RegisterSet::Reg_esp;
} }
size_t typeWidthInBytesOnStack(Type Ty) const override { size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of 4 bytes. In particular, i1, // Round up to the next multiple of WordType bytes.
// i8, and i16 are rounded up to 4 bytes. const uint32_t WordSizeInBytes = typeWidthInBytes(Traits::WordType);
// TODO(jpp): this needs to round to multiples of 8 bytes in x86-64. return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes);
return (typeWidthInBytes(Ty) + 3) & ~3;
} }
SizeT getMinJumpTableSize() const override { return 4; } SizeT getMinJumpTableSize() const override { return 4; }
...@@ -98,14 +98,40 @@ public: ...@@ -98,14 +98,40 @@ public:
void emit(const ConstantDouble *C) const final; void emit(const ConstantDouble *C) const final;
void initNodeForLowering(CfgNode *Node) override; void initNodeForLowering(CfgNode *Node) override;
/// Ensure that a 64-bit Variable has been split into 2 32-bit /// x86-32: Ensure that a 64-bit Variable has been split into 2 32-bit
/// Variables, creating them if necessary. This is needed for all /// Variables, creating them if necessary. This is needed for all
/// I64 operations, and it is needed for pushing F64 arguments for /// I64 operations, and it is needed for pushing F64 arguments for
/// function calls using the 32-bit push instruction (though the /// function calls using the 32-bit push instruction (though the
/// latter could be done by directly writing to the stack). /// latter could be done by directly writing to the stack).
void split64(Variable *Var); ///
Operand *loOperand(Operand *Operand); /// x86-64: Complains loudly if invoked because the cpu can handle
Operand *hiOperand(Operand *Operand); /// 64-bit types natively.
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type split64(Variable *Var);
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type split64(Variable *) {
llvm::report_fatal_error(
"Hey, yo! This is x86-64. Watcha doin'? (split64)");
}
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, Operand>::type *
loOperand(Operand *Operand);
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, Operand>::type *loOperand(Operand *) {
llvm::report_fatal_error(
"Hey, yo! This is x86-64. Watcha doin'? (loOperand)");
}
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, Operand>::type *
hiOperand(Operand *Operand);
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, Operand>::type *hiOperand(Operand *) {
llvm::report_fatal_error(
"Hey, yo! This is x86-64. Watcha doin'? (hiOperand)");
}
void finishArgumentLowering(Variable *Arg, Variable *FramePtr, void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t &InArgsSizeBytes); size_t BasicFrameOffset, size_t &InArgsSizeBytes);
typename Traits::Address stackVarToAsmOperand(const Variable *Var) const; typename Traits::Address stackVarToAsmOperand(const Variable *Var) const;
...@@ -128,6 +154,19 @@ protected: ...@@ -128,6 +154,19 @@ protected:
void lowerExtractElement(const InstExtractElement *Inst) override; void lowerExtractElement(const InstExtractElement *Inst) override;
void lowerFcmp(const InstFcmp *Inst) override; void lowerFcmp(const InstFcmp *Inst) override;
void lowerIcmp(const InstIcmp *Inst) override; void lowerIcmp(const InstIcmp *Inst) override;
/// Complains loudly if invoked because the cpu can handle 64-bit types
/// natively.
template <typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type
lowerIcmp64(const InstIcmp *) {
llvm::report_fatal_error(
"Hey, yo! This is x86-64. Watcha doin'? (lowerIcmp64)");
}
/// x86lowerIcmp64 handles 64-bit icmp lowering.
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type
lowerIcmp64(const InstIcmp *Inst);
void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override; void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override;
void lowerInsertElement(const InstInsertElement *Inst) override; void lowerInsertElement(const InstInsertElement *Inst) override;
void lowerLoad(const InstLoad *Inst) override; void lowerLoad(const InstLoad *Inst) override;
......
...@@ -538,7 +538,8 @@ TEST_F(AssemblerX8632Test, MovdToXmm) { ...@@ -538,7 +538,8 @@ TEST_F(AssemblerX8632Test, MovdToXmm) {
\ \
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##Src, Immediate(Value)); \ __ mov(IceType_i32, GPRRegister::Encoded_Reg_##Src, Immediate(Value)); \
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ __ movss(IceType_f64, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movd(XmmRegister::Encoded_Reg_##Dst, GPRRegister::Encoded_Reg_##Src); \ __ movd(IceType_i32, XmmRegister::Encoded_Reg_##Dst, \
GPRRegister::Encoded_Reg_##Src); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
\ \
...@@ -560,7 +561,7 @@ TEST_F(AssemblerX8632Test, MovdToXmm) { ...@@ -560,7 +561,7 @@ TEST_F(AssemblerX8632Test, MovdToXmm) {
const uint64_t V1 = 0xFFFFFFFF00000000ull; \ const uint64_t V1 = 0xFFFFFFFF00000000ull; \
\ \
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ __ movss(IceType_f64, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
__ movd(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ __ movd(IceType_i32, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
\ \
...@@ -609,7 +610,8 @@ TEST_F(AssemblerX8632Test, MovdFromXmm) { ...@@ -609,7 +610,8 @@ TEST_F(AssemblerX8632Test, MovdFromXmm) {
const uint32_t V0 = Value; \ const uint32_t V0 = Value; \
\ \
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \ __ movss(IceType_f64, XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
__ movd(GPRRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src); \ __ movd(IceType_i32, GPRRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
\ \
...@@ -631,7 +633,7 @@ TEST_F(AssemblerX8632Test, MovdFromXmm) { ...@@ -631,7 +633,7 @@ TEST_F(AssemblerX8632Test, MovdFromXmm) {
const uint32_t V1 = ~(Value); \ const uint32_t V1 = ~(Value); \
\ \
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \ __ movss(IceType_f64, XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
__ movd(dwordAddress(T1), XmmRegister::Encoded_Reg_##Src); \ __ movd(IceType_i32, dwordAddress(T1), XmmRegister::Encoded_Reg_##Src); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
\ \
......
...@@ -1072,7 +1072,7 @@ TEST_F(AssemblerX8632Test, Cvt) { ...@@ -1072,7 +1072,7 @@ TEST_F(AssemblerX8632Test, Cvt) {
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \ __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
Immediate(Inst##Size##SrcValue)); \ Immediate(Inst##Size##SrcValue)); \
__ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \ __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
GPRRegister::Encoded_Reg_##GPR); \ GPRRegister::Encoded_Reg_##GPR); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
...@@ -1092,7 +1092,7 @@ TEST_F(AssemblerX8632Test, Cvt) { ...@@ -1092,7 +1092,7 @@ TEST_F(AssemblerX8632Test, Cvt) {
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \ __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
Immediate(Inst##Size##DstValue)); \ Immediate(Inst##Size##DstValue)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \ __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
__ cvt##Inst(IceType_f##Size, GPRRegister::Encoded_Reg_##GPR, \ __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
XmmRegister::Encoded_Reg_##Src); \ XmmRegister::Encoded_Reg_##Src); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
...@@ -1132,7 +1132,7 @@ TEST_F(AssemblerX8632Test, Cvt) { ...@@ -1132,7 +1132,7 @@ TEST_F(AssemblerX8632Test, Cvt) {
const uint32_t T1 = allocateDword(); \ const uint32_t T1 = allocateDword(); \
\ \
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \ __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
dwordAddress(T1)); \ dwordAddress(T1)); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
...@@ -1152,7 +1152,7 @@ TEST_F(AssemblerX8632Test, Cvt) { ...@@ -1152,7 +1152,7 @@ TEST_F(AssemblerX8632Test, Cvt) {
\ \
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \ __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
Immediate(Inst##Size##DstValue)); \ Immediate(Inst##Size##DstValue)); \
__ cvt##Inst(IceType_f##Size, GPRRegister::Encoded_Reg_##GPR, \ __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
dwordAddress(T0)); \ dwordAddress(T0)); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
......
...@@ -1104,15 +1104,16 @@ TEST_F(AssemblerX8664Test, Cvt) { ...@@ -1104,15 +1104,16 @@ TEST_F(AssemblerX8664Test, Cvt) {
reset(); \ reset(); \
} while (0) } while (0)
#define TestImplSXmmReg(Dst, GPR, Inst, Size) \ #define TestImplSXmmReg(Dst, GPR, Inst, Size, IntType) \
do { \ do { \
static constexpr char TestString[] = \ static constexpr char TestString[] = \
"(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")"; \ "(" #Dst ", " #GPR ", cvt" #Inst ", " #IntType ", f" #Size ")"; \
const uint32_t T0 = allocateDqword(); \ const uint32_t T0 = allocateDqword(); \
\ \
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
__ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \ __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \
__ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR()); \ __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \
Encoded_GPR_##GPR()); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##DstValue); \ test.setDqwordTo(T0, Inst##Size##DstValue); \
...@@ -1122,21 +1123,23 @@ TEST_F(AssemblerX8664Test, Cvt) { ...@@ -1122,21 +1123,23 @@ TEST_F(AssemblerX8664Test, Cvt) {
reset(); \ reset(); \
} while (0) } while (0)
#define TestImplSRegXmm(GPR, Src, Inst, Size) \ #define TestImplSRegXmm(GPR, Src, Inst, IntSize, Size) \
do { \ do { \
static constexpr char TestString[] = \ static constexpr char TestString[] = \
"(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")"; \ "(" #GPR ", " #Src ", cvt" #Inst ", " #IntSize ", f" #Size ")"; \
const uint32_t T0 = allocateDqword(); \ const uint32_t T0 = allocateDqword(); \
\ \
__ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \ __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
__ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \ __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
__ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src()); \ __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \
Encoded_Xmm_##Src()); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##SrcValue); \ test.setDqwordTo(T0, Inst##Size##SrcValue); \
test.run(); \ test.run(); \
\ \
ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \ ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \
test.GPR()) \
<< TestString; \ << TestString; \
reset(); \ reset(); \
} while (0) } while (0)
...@@ -1160,15 +1163,16 @@ TEST_F(AssemblerX8664Test, Cvt) { ...@@ -1160,15 +1163,16 @@ TEST_F(AssemblerX8664Test, Cvt) {
reset(); \ reset(); \
} while (0) } while (0)
#define TestImplSXmmAddr(Dst, Inst, Size) \ #define TestImplSXmmAddr(Dst, Inst, Size, IntType) \
do { \ do { \
static constexpr char TestString[] = \ static constexpr char TestString[] = \
"(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \ "(" #Dst ", Addr, cvt" #Inst ", f" #Size ", " #IntType ")"; \
const uint32_t T0 = allocateDqword(); \ const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDword(); \ const uint32_t T1 = allocateDword(); \
\ \
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
__ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \
dwordAddress(T1)); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##DstValue); \ test.setDqwordTo(T0, Inst##Size##DstValue); \
...@@ -1179,20 +1183,22 @@ TEST_F(AssemblerX8664Test, Cvt) { ...@@ -1179,20 +1183,22 @@ TEST_F(AssemblerX8664Test, Cvt) {
reset(); \ reset(); \
} while (0) } while (0)
#define TestImplSRegAddr(GPR, Inst, Size) \ #define TestImplSRegAddr(GPR, Inst, IntSize, Size) \
do { \ do { \
static constexpr char TestString[] = \ static constexpr char TestString[] = \
"(" #GPR ", Addr, cvt" #Inst ", f" #Size ")"; \ "(" #GPR ", Addr, cvt" #Inst ", f" #Size ", " #IntSize ")"; \
const uint32_t T0 = allocateDqword(); \ const uint32_t T0 = allocateDqword(); \
\ \
__ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \ __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
__ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), dwordAddress(T0)); \ __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \
dwordAddress(T0)); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##SrcValue); \ test.setDqwordTo(T0, Inst##Size##SrcValue); \
test.run(); \ test.run(); \
\ \
ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \ ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \
test.GPR()) \
<< TestString; \ << TestString; \
reset(); \ reset(); \
} while (0) } while (0)
...@@ -1203,10 +1209,14 @@ TEST_F(AssemblerX8664Test, Cvt) { ...@@ -1203,10 +1209,14 @@ TEST_F(AssemblerX8664Test, Cvt) {
TestImplPXmmAddr(Src, dq2ps, Size); \ TestImplPXmmAddr(Src, dq2ps, Size); \
TestImplPXmmXmm(Dst, Src, tps2dq, Size); \ TestImplPXmmXmm(Dst, Src, tps2dq, Size); \
TestImplPXmmAddr(Src, tps2dq, Size); \ TestImplPXmmAddr(Src, tps2dq, Size); \
TestImplSXmmReg(Dst, GPR, si2ss, Size); \ TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i32); \
TestImplSXmmAddr(Dst, si2ss, Size); \ TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i64); \
TestImplSRegXmm(GPR, Src, tss2si, Size); \ TestImplSXmmAddr(Dst, si2ss, Size, IceType_i32); \
TestImplSRegAddr(GPR, tss2si, Size); \ TestImplSXmmAddr(Dst, si2ss, Size, IceType_i64); \
TestImplSRegXmm(GPR, Src, tss2si, 32, Size); \
TestImplSRegXmm(GPR, Src, tss2si, 64, Size); \
TestImplSRegAddr(GPR, tss2si, 32, Size); \
TestImplSRegAddr(GPR, tss2si, 64, Size); \
TestImplPXmmXmm(Dst, Src, float2float, Size); \ TestImplPXmmXmm(Dst, Src, float2float, Size); \
TestImplPXmmAddr(Src, float2float, Size); \ TestImplPXmmAddr(Src, float2float, Size); \
} while (0) } while (0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment