Commit 7fa22d8a by Matt Wala

Lower the rest of the vector arithmetic operations.

The instructions emitted by the lowering operations require memory operands to be aligned to 16 bytes. Since there is no support for aligning memory operands in Subzero, do the arithmetic in registers for now. Add vector arithmetic to the arith crosstest. Pass the -mstackrealign parameter to the crosstest clang so that llc code called back from Subzero code (helper calls) doesn't assume that the stack is aligned at the entry to the call. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/397833002
parent 83b8036b
...@@ -130,6 +130,7 @@ if __name__ == '__main__': ...@@ -130,6 +130,7 @@ if __name__ == '__main__':
objs.append(bitcode) objs.append(bitcode)
linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++' linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++'
shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', args.driver] + # TODO: Remove -mstackrealign after Subzero supports stack alignment.
objs + shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32',
'-mstackrealign', args.driver] + objs +
['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)]) ['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)])
...@@ -10,7 +10,10 @@ ...@@ -10,7 +10,10 @@
uint8_t test##inst(uint8_t a, uint8_t b) { return a op b; } \ uint8_t test##inst(uint8_t a, uint8_t b) { return a op b; } \
uint16_t test##inst(uint16_t a, uint16_t b) { return a op b; } \ uint16_t test##inst(uint16_t a, uint16_t b) { return a op b; } \
uint32_t test##inst(uint32_t a, uint32_t b) { return a op b; } \ uint32_t test##inst(uint32_t a, uint32_t b) { return a op b; } \
uint64_t test##inst(uint64_t a, uint64_t b) { return a op b; } uint64_t test##inst(uint64_t a, uint64_t b) { return a op b; } \
v4ui32 test##inst(v4ui32 a, v4ui32 b) { return a op b; } \
v8ui16 test##inst(v8ui16 a, v8ui16 b) { return a op b; } \
v16ui8 test##inst(v16ui8 a, v16ui8 b) { return a op b; }
UINTOP_TABLE UINTOP_TABLE
#undef X #undef X
...@@ -19,12 +22,16 @@ UINTOP_TABLE ...@@ -19,12 +22,16 @@ UINTOP_TABLE
int8_t test##inst(int8_t a, int8_t b) { return a op b; } \ int8_t test##inst(int8_t a, int8_t b) { return a op b; } \
int16_t test##inst(int16_t a, int16_t b) { return a op b; } \ int16_t test##inst(int16_t a, int16_t b) { return a op b; } \
int32_t test##inst(int32_t a, int32_t b) { return a op b; } \ int32_t test##inst(int32_t a, int32_t b) { return a op b; } \
int64_t test##inst(int64_t a, int64_t b) { return a op b; } int64_t test##inst(int64_t a, int64_t b) { return a op b; } \
v4si32 test##inst(v4si32 a, v4si32 b) { return a op b; } \
v8si16 test##inst(v8si16 a, v8si16 b) { return a op b; } \
v16si8 test##inst(v16si8 a, v16si8 b) { return a op b; }
SINTOP_TABLE SINTOP_TABLE
#undef X #undef X
#define X(inst, op, func) \ #define X(inst, op, func) \
float test##inst(float a, float b) { return func(a op b); } \ float test##inst(float a, float b) { return func(a op b); } \
double test##inst(double a, double b) { return func(a op b); } double test##inst(double a, double b) { return func(a op b); } \
v4f32 test##inst(v4f32 a, v4f32 b) { return func(a op b); }
FPOP_TABLE FPOP_TABLE
#undef X #undef X
...@@ -42,4 +42,27 @@ ...@@ -42,4 +42,27 @@
// instruction and "(a + b)" for the Fadd instruction. The two // instruction and "(a + b)" for the Fadd instruction. The two
// versions of myFrem() are defined in a separate bitcode file. // versions of myFrem() are defined in a separate bitcode file.
#define INT_VALUE_ARRAY \
{ 0x0, 0x1, 0x7ffffffe, 0x7fffffff, \
0x80000000, 0x80000001, 0xfffffffe, 0xffffffff, \
0x7e, 0x7f, 0x80, 0x81, \
0xfe, 0xff, 0x100, 0x101, \
0x7ffe, 0x7fff, 0x8000, 0x8001, \
0xfffe, 0xffff, 0x10000, 0x10001 }
#define FP_VALUE_ARRAY(NegInf, PosInf, NegNan, NaN) \
{ 0, 1, 0x7e, \
0x7f, 0x80, 0x81, \
0xfe, 0xff, 0x7ffe, \
0x7fff, 0x8000, 0x8001, \
0xfffe, 0xffff, 0x7ffffffe, \
0x7fffffff, 0x80000000, 0x80000001, \
0xfffffffe, 0xffffffff, 0x100000000ll, \
0x100000001ll, 0x7ffffffffffffffell, 0x7fffffffffffffffll, \
0x8000000000000000ll, 0x8000000000000001ll, 0xfffffffffffffffell, \
0xffffffffffffffffll, NegInf, PosInf, \
Nan, NegNan, -0.0, \
FLT_MIN, FLT_MAX, DBL_MIN, \
DBL_MAX }
#endif // TEST_ARITH_DEF #endif // TEST_ARITH_DEF
#include <stdint.h> #include <stdint.h>
#include "test_arith.def" #include "test_arith.def"
// Vector types
typedef int32_t v4si32 __attribute__((vector_size(16)));
typedef uint32_t v4ui32 __attribute__((vector_size(16)));
typedef int16_t v8si16 __attribute__((vector_size(16)));
typedef uint16_t v8ui16 __attribute__((vector_size(16)));
typedef int8_t v16si8 __attribute__((vector_size(16)));
typedef uint8_t v16ui8 __attribute__((vector_size(16)));
typedef float v4f32 __attribute__((vector_size(16)));
#define X(inst, op, isdiv) \ #define X(inst, op, isdiv) \
bool test##inst(bool a, bool b); \ bool test##inst(bool a, bool b); \
uint8_t test##inst(uint8_t a, uint8_t b); \ uint8_t test##inst(uint8_t a, uint8_t b); \
uint16_t test##inst(uint16_t a, uint16_t b); \ uint16_t test##inst(uint16_t a, uint16_t b); \
uint32_t test##inst(uint32_t a, uint32_t b); \ uint32_t test##inst(uint32_t a, uint32_t b); \
uint64_t test##inst(uint64_t a, uint64_t b); uint64_t test##inst(uint64_t a, uint64_t b); \
v4ui32 test##inst(v4ui32 a, v4ui32 b); \
v8ui16 test##inst(v8ui16 a, v8ui16 b); \
v16ui8 test##inst(v16ui8 a, v16ui8 b);
UINTOP_TABLE UINTOP_TABLE
#undef X #undef X
...@@ -15,18 +27,24 @@ UINTOP_TABLE ...@@ -15,18 +27,24 @@ UINTOP_TABLE
int8_t test##inst(int8_t a, int8_t b); \ int8_t test##inst(int8_t a, int8_t b); \
int16_t test##inst(int16_t a, int16_t b); \ int16_t test##inst(int16_t a, int16_t b); \
int32_t test##inst(int32_t a, int32_t b); \ int32_t test##inst(int32_t a, int32_t b); \
int64_t test##inst(int64_t a, int64_t b); int64_t test##inst(int64_t a, int64_t b); \
v4si32 test##inst(v4si32 a, v4si32 b); \
v8si16 test##inst(v8si16 a, v8si16 b); \
v16si8 test##inst(v16si8 a, v16si8 b);
SINTOP_TABLE SINTOP_TABLE
#undef X #undef X
float myFrem(float a, float b); float myFrem(float a, float b);
double myFrem(double a, double b); double myFrem(double a, double b);
v4f32 myFrem(v4f32 a, v4f32 b);
#define X(inst, op, func) \ #define X(inst, op, func) \
float test##inst(float a, float b); \ float test##inst(float a, float b); \
double test##inst(double a, double b); double test##inst(double a, double b); \
v4f32 test##inst(v4f32 a, v4f32 b);
FPOP_TABLE FPOP_TABLE
#undef X #undef X
float mySqrt(float a); float mySqrt(float a);
double mySqrt(double a); double mySqrt(double a);
// mySqrt for v4f32 is currently unsupported.
...@@ -9,3 +9,8 @@ define double @_Z6myFremdd(double %a, double %b) { ...@@ -9,3 +9,8 @@ define double @_Z6myFremdd(double %a, double %b) {
%rem = frem double %a, %b %rem = frem double %a, %b
ret double %rem ret double %rem
} }
define <4 x float> @_Z6myFremDv4_fS_(<4 x float> %a, <4 x float> %b) {
%rem = frem <4 x float> %a, %b
ret <4 x float> %rem
}
...@@ -4,7 +4,10 @@ ...@@ -4,7 +4,10 @@
#include <stdint.h> #include <stdint.h>
#include <climits> // CHAR_BIT
#include <limits>
#include <cfloat> #include <cfloat>
#include <cmath> // fmodf
#include <cstring> // memcmp #include <cstring> // memcmp
#include <iostream> #include <iostream>
...@@ -16,14 +19,20 @@ namespace Subzero_ { ...@@ -16,14 +19,20 @@ namespace Subzero_ {
#include "test_arith.h" #include "test_arith.h"
} }
volatile unsigned Values[] = { 0x0, 0x1, 0x7ffffffe, 0x7fffffff, volatile unsigned Values[] = INT_VALUE_ARRAY;
0x80000000, 0x80000001, 0xfffffffe, 0xffffffff,
0x7e, 0x7f, 0x80, 0x81,
0xfe, 0xff, 0x100, 0x101,
0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x10000, 0x10001, };
const static size_t NumValues = sizeof(Values) / sizeof(*Values); const static size_t NumValues = sizeof(Values) / sizeof(*Values);
template <class T> bool inputsMayTriggerException(T Value1, T Value2) {
// Avoid HW divide-by-zero exception.
if (Value2 == 0)
return true;
// Avoid HW overflow exception (on x86-32). TODO: adjust
// for other architecture.
if (Value1 == std::numeric_limits<T>::min() && Value2 == -1)
return true;
return false;
}
template <typename TypeUnsigned, typename TypeSigned> template <typename TypeUnsigned, typename TypeSigned>
void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
typedef TypeUnsigned (*FuncTypeUnsigned)(TypeUnsigned, TypeUnsigned); typedef TypeUnsigned (*FuncTypeUnsigned)(TypeUnsigned, TypeUnsigned);
...@@ -48,9 +57,9 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -48,9 +57,9 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
(FuncTypeUnsigned)(FuncTypeSigned)Subzero_::test##inst, isdiv \ (FuncTypeUnsigned)(FuncTypeSigned)Subzero_::test##inst, isdiv \
} \ } \
, ,
SINTOP_TABLE SINTOP_TABLE
#undef X #undef X
}; };
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs); const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
if (sizeof(TypeUnsigned) <= sizeof(uint32_t)) { if (sizeof(TypeUnsigned) <= sizeof(uint32_t)) {
...@@ -62,12 +71,8 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -62,12 +71,8 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
TypeUnsigned Value1 = Values[i]; TypeUnsigned Value1 = Values[i];
TypeUnsigned Value2 = Values[j]; TypeUnsigned Value2 = Values[j];
// Avoid HW divide-by-zero exception. // Avoid HW divide-by-zero exception.
if (Funcs[f].ExcludeDivExceptions && Value2 == 0) if (Funcs[f].ExcludeDivExceptions &&
continue; inputsMayTriggerException<TypeSigned>(Value1, Value2))
// Avoid HW overflow exception (on x86-32). TODO: adjust
// for other architectures.
if (Funcs[f].ExcludeDivExceptions && Value1 == 0x80000000 &&
Value2 == 0xffffffff)
continue; continue;
++TotalTests; ++TotalTests;
TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2); TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2);
...@@ -76,9 +81,9 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -76,9 +81,9 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
++Passes; ++Passes;
} else { } else {
++Failures; ++Failures;
std::cout << "test" << Funcs[f].Name << (8 * sizeof(TypeUnsigned)) std::cout << "test" << Funcs[f].Name
<< "(" << Value1 << ", " << Value2 << (CHAR_BIT * sizeof(TypeUnsigned)) << "(" << Value1
<< "): sz=" << (unsigned)ResultSz << ", " << Value2 << "): sz=" << (unsigned)ResultSz
<< " llc=" << (unsigned)ResultLlc << std::endl; << " llc=" << (unsigned)ResultLlc << std::endl;
} }
} }
...@@ -96,8 +101,8 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -96,8 +101,8 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
(((TypeUnsigned)Values[iHi]) << 32) + Values[iLo]; (((TypeUnsigned)Values[iHi]) << 32) + Values[iLo];
TypeUnsigned Value2 = TypeUnsigned Value2 =
(((TypeUnsigned)Values[jHi]) << 32) + Values[jLo]; (((TypeUnsigned)Values[jHi]) << 32) + Values[jLo];
// Avoid HW divide-by-zero exception. if (Funcs[f].ExcludeDivExceptions &&
if (Funcs[f].ExcludeDivExceptions && Value2 == 0) inputsMayTriggerException<TypeSigned>(Value1, Value2))
continue; continue;
++TotalTests; ++TotalTests;
TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2); TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2);
...@@ -107,8 +112,8 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -107,8 +112,8 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} else { } else {
++Failures; ++Failures;
std::cout << "test" << Funcs[f].Name std::cout << "test" << Funcs[f].Name
<< (8 * sizeof(TypeUnsigned)) << "(" << Value1 << ", " << (CHAR_BIT * sizeof(TypeUnsigned)) << "(" << Value1
<< Value2 << "): sz=" << (unsigned)ResultSz << ", " << Value2 << "): sz=" << (unsigned)ResultSz
<< " llc=" << (unsigned)ResultLlc << std::endl; << " llc=" << (unsigned)ResultLlc << std::endl;
} }
} }
...@@ -119,27 +124,112 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -119,27 +124,112 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
// Vectors are deterministically constructed by selecting elements from
// a pool of scalar values based on a pseudorandom sequence. Testing
// all possible combinations of scalar values from the value table is
// not tractable.
// TODO: Replace with a portable PRNG from C++11.
class PRNG {
public:
PRNG(uint32_t Seed = 1) : State(Seed) {}
uint32_t operator()() {
// Lewis, Goodman, and Miller (1969)
State = (16807 * State) % 2147483647;
return State;
}
private:
uint32_t State;
};
const static size_t MaxTestsPerFunc = 100000;
template <typename Type, typename ElementType, typename CastType>
void outputVector(const Type Vect) {
const static size_t NumElementsInType = sizeof(Type) / sizeof(ElementType);
for (size_t i = 0; i < NumElementsInType; ++i) {
if (i > 0)
std::cout << ", ";
std::cout << (CastType) Vect[i];
}
}
template <typename TypeUnsigned, typename TypeSigned,
typename ElementTypeUnsigned, typename ElementTypeSigned>
void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
typedef TypeUnsigned (*FuncTypeUnsigned)(TypeUnsigned, TypeUnsigned);
typedef TypeSigned (*FuncTypeSigned)(TypeSigned, TypeSigned);
static struct {
const char *Name;
FuncTypeUnsigned FuncLlc;
FuncTypeUnsigned FuncSz;
bool ExcludeDivExceptions; // for divide related tests
} Funcs[] = {
#define X(inst, op, isdiv) \
{ \
STR(inst), (FuncTypeUnsigned)test##inst, \
(FuncTypeUnsigned)Subzero_::test##inst, isdiv \
} \
,
UINTOP_TABLE
#undef X
#define X(inst, op, isdiv) \
{ \
STR(inst), (FuncTypeUnsigned)(FuncTypeSigned)test##inst, \
(FuncTypeUnsigned)(FuncTypeSigned)Subzero_::test##inst, isdiv \
} \
,
SINTOP_TABLE
#undef X
};
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
const static size_t NumElementsInType =
sizeof(TypeUnsigned) / sizeof(ElementTypeUnsigned);
for (size_t f = 0; f < NumFuncs; ++f) {
PRNG Index;
for (size_t i = 0; i < MaxTestsPerFunc; ++i) {
// Initialize the test vectors.
TypeUnsigned Value1, Value2;
for (size_t j = 0; j < NumElementsInType;) {
ElementTypeUnsigned Element1 = Values[Index() % NumElementsInType];
ElementTypeUnsigned Element2 = Values[Index() % NumElementsInType];
if (Funcs[f].ExcludeDivExceptions &&
inputsMayTriggerException<ElementTypeSigned>(Element1, Element2))
continue;
Value1[j] = Element1;
Value2[j] = Element2;
++j;
}
// Perform the test.
TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2);
TypeUnsigned ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
++TotalTests;
if (!memcmp(&ResultSz, &ResultLlc, sizeof(ResultSz))) {
++Passes;
} else {
std::cout << "test" << Funcs[f].Name << "v" << NumElementsInType << "i"
<< (CHAR_BIT * sizeof(ElementTypeUnsigned)) << "(";
outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(Value1);
std::cout << ", ";
outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(Value2);
std::cout << "): sz=";
outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(ResultSz);
std::cout << " llc=";
outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(ResultLlc);
std::cout << std::endl;
}
}
}
}
template <typename Type> template <typename Type>
void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) { void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
static const Type NegInf = -1.0 / 0.0; static const Type NegInf = -1.0 / 0.0;
static const Type PosInf = 1.0 / 0.0; static const Type PosInf = 1.0 / 0.0;
static const Type Nan = 0.0 / 0.0; static const Type Nan = 0.0 / 0.0;
static const Type NegNan = -0.0 / 0.0; static const Type NegNan = -0.0 / 0.0;
volatile Type Values[] = { volatile Type Values[] = FP_VALUE_ARRAY(NegInf, PosInf, NegNan, Nan);
0, 1, 0x7e,
0x7f, 0x80, 0x81,
0xfe, 0xff, 0x7ffe,
0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x7ffffffe,
0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x100000000ll,
0x100000001ll, 0x7ffffffffffffffell, 0x7fffffffffffffffll,
0x8000000000000000ll, 0x8000000000000001ll, 0xfffffffffffffffell,
0xffffffffffffffffll, NegInf, PosInf,
Nan, NegNan, -0.0,
FLT_MIN, FLT_MAX,
DBL_MIN, DBL_MAX
};
const static size_t NumValues = sizeof(Values) / sizeof(*Values); const static size_t NumValues = sizeof(Values) / sizeof(*Values);
typedef Type (*FuncType)(Type, Type); typedef Type (*FuncType)(Type, Type);
static struct { static struct {
...@@ -152,7 +242,7 @@ void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -152,7 +242,7 @@ void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
, ,
FPOP_TABLE FPOP_TABLE
#undef X #undef X
}; };
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs); const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
for (size_t f = 0; f < NumFuncs; ++f) { for (size_t f = 0; f < NumFuncs; ++f) {
...@@ -169,8 +259,8 @@ void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -169,8 +259,8 @@ void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} else { } else {
++Failures; ++Failures;
std::cout << std::fixed << "test" << Funcs[f].Name std::cout << std::fixed << "test" << Funcs[f].Name
<< (8 * sizeof(Type)) << "(" << Value1 << ", " << Value2 << (CHAR_BIT * sizeof(Type)) << "(" << Value1 << ", "
<< "): sz=" << ResultSz << " llc=" << ResultLlc << Value2 << "): sz=" << ResultSz << " llc=" << ResultLlc
<< std::endl; << std::endl;
} }
} }
...@@ -186,14 +276,66 @@ void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -186,14 +276,66 @@ void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
++Passes; ++Passes;
} else { } else {
++Failures; ++Failures;
std::cout << std::fixed << "test_sqrt" std::cout << std::fixed << "test_sqrt" << (CHAR_BIT * sizeof(Type)) << "("
<< (8 * sizeof(Type)) << "(" << Value << Value << "): sz=" << ResultSz << " llc=" << ResultLlc
<< "): sz=" << ResultSz << " llc=" << ResultLlc
<< std::endl; << std::endl;
} }
} }
} }
void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
static const float NegInf = -1.0 / 0.0;
static const float PosInf = 1.0 / 0.0;
static const float Nan = 0.0 / 0.0;
static const float NegNan = -0.0 / 0.0;
volatile float Values[] = FP_VALUE_ARRAY(NegInf, PosInf, NegNan, Nan);
const static size_t NumValues = sizeof(Values) / sizeof(*Values);
typedef v4f32 (*FuncType)(v4f32, v4f32);
static struct {
const char *Name;
FuncType FuncLlc;
FuncType FuncSz;
} Funcs[] = {
#define X(inst, op, func) \
{ STR(inst), (FuncType)test##inst, (FuncType)Subzero_::test##inst } \
,
FPOP_TABLE
#undef X
};
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
const static size_t NumElementsInType = 4;
for (size_t f = 0; f < NumFuncs; ++f) {
PRNG Index;
for (size_t i = 0; i < MaxTestsPerFunc; ++i) {
// Initialize the test vectors.
v4f32 Value1, Value2;
for (size_t j = 0; j < NumElementsInType; ++j) {
Value1[j] = Values[Index() % NumElementsInType];
Value2[j] = Values[Index() % NumElementsInType];
}
// Perform the test.
v4f32 ResultSz = Funcs[f].FuncSz(Value1, Value2);
v4f32 ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
++TotalTests;
if (!memcmp(&ResultSz, &ResultLlc, sizeof(ResultSz))) {
++Passes;
} else {
++Failures;
std::cout << std::fixed << "test" << Funcs[f].Name << "v4f32"
<< "(";
outputVector<v4f32, float, float>(Value1);
std::cout << ", ";
outputVector<v4f32, float, float>(Value2);
std::cout << "): sz=";
outputVector<v4f32, float, float>(ResultSz);
std::cout << " llc=";
outputVector<v4f32, float, float>(ResultLlc);
std::cout << std::endl;
}
}
}
}
int main(int argc, char **argv) { int main(int argc, char **argv) {
size_t TotalTests = 0; size_t TotalTests = 0;
size_t Passes = 0; size_t Passes = 0;
...@@ -203,10 +345,49 @@ int main(int argc, char **argv) { ...@@ -203,10 +345,49 @@ int main(int argc, char **argv) {
testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures); testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures);
testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures); testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures);
testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures); testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures);
testsVecInt<v4ui32, v4si32, uint32_t, int32_t>(TotalTests, Passes, Failures);
testsVecInt<v8ui16, v8si16, uint16_t, int16_t>(TotalTests, Passes, Failures);
testsVecInt<v16ui8, v16si8, uint8_t, int8_t>(TotalTests, Passes, Failures);
testsFp<float>(TotalTests, Passes, Failures); testsFp<float>(TotalTests, Passes, Failures);
testsFp<double>(TotalTests, Passes, Failures); testsFp<double>(TotalTests, Passes, Failures);
testsVecFp(TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n"; << " Failures=" << Failures << "\n";
return Failures; return Failures;
} }
extern "C" {
// Subzero helpers
v4si32 Sz_shl_v4i32(v4si32 a, v4si32 b) { return a << b; }
v4si32 Sz_ashr_v4i32(v4si32 a, v4si32 b) { return a >> b; }
v4ui32 Sz_lshr_v4i32(v4ui32 a, v4ui32 b) { return a >> b; }
v4si32 Sz_sdiv_v4i32(v4si32 a, v4si32 b) { return a / b; }
v4ui32 Sz_udiv_v4i32(v4ui32 a, v4ui32 b) { return a / b; }
v4si32 Sz_srem_v4i32(v4si32 a, v4si32 b) { return a % b; }
v4ui32 Sz_urem_v4i32(v4ui32 a, v4ui32 b) { return a % b; }
v8si16 Sz_shl_v8i16(v8si16 a, v8si16 b) { return a << b; }
v8si16 Sz_ashr_v8i16(v8si16 a, v8si16 b) { return a >> b; }
v8ui16 Sz_lshr_v8i16(v8ui16 a, v8ui16 b) { return a >> b; }
v8si16 Sz_sdiv_v8i16(v8si16 a, v8si16 b) { return a / b; }
v8ui16 Sz_udiv_v8i16(v8ui16 a, v8ui16 b) { return a / b; }
v8si16 Sz_srem_v8i16(v8si16 a, v8si16 b) { return a % b; }
v8ui16 Sz_urem_v8i16(v8ui16 a, v8ui16 b) { return a % b; }
v16ui8 Sz_mul_v16i8(v16ui8 a, v16ui8 b) { return a * b; }
v16si8 Sz_shl_v16i8(v16si8 a, v16si8 b) { return a << b; }
v16si8 Sz_ashr_v16i8(v16si8 a, v16si8 b) { return a >> b; }
v16ui8 Sz_lshr_v16i8(v16ui8 a, v16ui8 b) { return a >> b; }
v16si8 Sz_sdiv_v16i8(v16si8 a, v16si8 b) { return a / b; }
v16ui8 Sz_udiv_v16i8(v16ui8 a, v16ui8 b) { return a / b; }
v16si8 Sz_srem_v16i8(v16si8 a, v16si8 b) { return a % b; }
v16ui8 Sz_urem_v16i8(v16ui8 a, v16ui8 b) { return a % b; }
v4f32 Sz_frem_v4f32(v4f32 a, v4f32 b) {
v4f32 Result;
for (int i = 0; i < 4; ++i)
Result[i] = fmodf(a[i], b[i]);
return Result;
}
}
...@@ -312,6 +312,21 @@ bool InstX8632Movq::isRedundantAssign() const { ...@@ -312,6 +312,21 @@ bool InstX8632Movq::isRedundantAssign() const {
return false; return false;
} }
InstX8632Pshufd::InstX8632Pshufd(Cfg *Func, Variable *Dest, Operand *Source1,
Operand *Source2)
: InstX8632(Func, InstX8632::Pshufd, 2, Dest) {
addSource(Source1);
addSource(Source2);
}
InstX8632Shufps::InstX8632Shufps(Cfg *Func, Variable *Dest, Operand *Source1,
Operand *Source2)
: InstX8632(Func, InstX8632::Shufps, 3, Dest) {
addSource(Dest);
addSource(Source1);
addSource(Source2);
}
InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source) InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
: InstX8632(Func, InstX8632::Ret, Source ? 1 : 0, NULL) { : InstX8632(Func, InstX8632::Ret, Source ? 1 : 0, NULL) {
if (Source) if (Source)
...@@ -446,19 +461,23 @@ template <> const char *InstX8632Add::Opcode = "add"; ...@@ -446,19 +461,23 @@ template <> const char *InstX8632Add::Opcode = "add";
template <> const char *InstX8632Addps::Opcode = "addps"; template <> const char *InstX8632Addps::Opcode = "addps";
template <> const char *InstX8632Adc::Opcode = "adc"; template <> const char *InstX8632Adc::Opcode = "adc";
template <> const char *InstX8632Addss::Opcode = "addss"; template <> const char *InstX8632Addss::Opcode = "addss";
template <> const char *InstX8632Padd::Opcode = "padd";
template <> const char *InstX8632Sub::Opcode = "sub"; template <> const char *InstX8632Sub::Opcode = "sub";
template <> const char *InstX8632Subps::Opcode = "subps"; template <> const char *InstX8632Subps::Opcode = "subps";
template <> const char *InstX8632Subss::Opcode = "subss"; template <> const char *InstX8632Subss::Opcode = "subss";
template <> const char *InstX8632Psub::Opcode = "psub";
template <> const char *InstX8632Sbb::Opcode = "sbb"; template <> const char *InstX8632Sbb::Opcode = "sbb";
template <> const char *InstX8632Psub::Opcode = "psub";
template <> const char *InstX8632And::Opcode = "and"; template <> const char *InstX8632And::Opcode = "and";
template <> const char *InstX8632Pand::Opcode = "pand"; template <> const char *InstX8632Pand::Opcode = "pand";
template <> const char *InstX8632Or::Opcode = "or"; template <> const char *InstX8632Or::Opcode = "or";
template <> const char *InstX8632Por::Opcode = "por";
template <> const char *InstX8632Xor::Opcode = "xor"; template <> const char *InstX8632Xor::Opcode = "xor";
template <> const char *InstX8632Pxor::Opcode = "pxor"; template <> const char *InstX8632Pxor::Opcode = "pxor";
template <> const char *InstX8632Imul::Opcode = "imul"; template <> const char *InstX8632Imul::Opcode = "imul";
template <> const char *InstX8632Mulps::Opcode = "mulps"; template <> const char *InstX8632Mulps::Opcode = "mulps";
template <> const char *InstX8632Mulss::Opcode = "mulss"; template <> const char *InstX8632Mulss::Opcode = "mulss";
template <> const char *InstX8632Pmullw::Opcode = "pmullw";
template <> const char *InstX8632Pmuludq::Opcode = "pmuludq";
template <> const char *InstX8632Div::Opcode = "div"; template <> const char *InstX8632Div::Opcode = "div";
template <> const char *InstX8632Divps::Opcode = "divps"; template <> const char *InstX8632Divps::Opcode = "divps";
template <> const char *InstX8632Idiv::Opcode = "idiv"; template <> const char *InstX8632Idiv::Opcode = "idiv";
...@@ -490,6 +509,13 @@ template <> void InstX8632Addss::emit(const Cfg *Func) const { ...@@ -490,6 +509,13 @@ template <> void InstX8632Addss::emit(const Cfg *Func) const {
emitTwoAddress(buf, this, Func); emitTwoAddress(buf, this, Func);
} }
template <> void InstX8632Padd::emit(const Cfg *Func) const {
char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "padd%s",
TypeX8632Attributes[getDest()->getType()].PackString);
emitTwoAddress(buf, this, Func);
}
template <> void InstX8632Subss::emit(const Cfg *Func) const { template <> void InstX8632Subss::emit(const Cfg *Func) const {
char buf[30]; char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "sub%s", snprintf(buf, llvm::array_lengthof(buf), "sub%s",
...@@ -497,6 +523,13 @@ template <> void InstX8632Subss::emit(const Cfg *Func) const { ...@@ -497,6 +523,13 @@ template <> void InstX8632Subss::emit(const Cfg *Func) const {
emitTwoAddress(buf, this, Func); emitTwoAddress(buf, this, Func);
} }
template <> void InstX8632Psub::emit(const Cfg *Func) const {
char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "psub%s",
TypeX8632Attributes[getDest()->getType()].PackString);
emitTwoAddress(buf, this, Func);
}
template <> void InstX8632Mulss::emit(const Cfg *Func) const { template <> void InstX8632Mulss::emit(const Cfg *Func) const {
char buf[30]; char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "mul%s", snprintf(buf, llvm::array_lengthof(buf), "mul%s",
...@@ -504,6 +537,18 @@ template <> void InstX8632Mulss::emit(const Cfg *Func) const { ...@@ -504,6 +537,18 @@ template <> void InstX8632Mulss::emit(const Cfg *Func) const {
emitTwoAddress(buf, this, Func); emitTwoAddress(buf, this, Func);
} }
template <> void InstX8632Pmullw::emit(const Cfg *Func) const {
assert(getSrc(0)->getType() == IceType_v8i16 &&
getSrc(1)->getType() == IceType_v8i16);
emitTwoAddress(Opcode, this, Func);
}
template <> void InstX8632Pmuludq::emit(const Cfg *Func) const {
assert(getSrc(0)->getType() == IceType_v4i32 &&
getSrc(1)->getType() == IceType_v4i32);
emitTwoAddress(Opcode, this, Func);
}
template <> void InstX8632Divss::emit(const Cfg *Func) const { template <> void InstX8632Divss::emit(const Cfg *Func) const {
char buf[30]; char buf[30];
snprintf(buf, llvm::array_lengthof(buf), "div%s", snprintf(buf, llvm::array_lengthof(buf), "div%s",
...@@ -1093,11 +1138,23 @@ template <> void InstX8632Psra::emit(const Cfg *Func) const { ...@@ -1093,11 +1138,23 @@ template <> void InstX8632Psra::emit(const Cfg *Func) const {
emitTwoAddress(buf, this, Func); emitTwoAddress(buf, this, Func);
} }
template <> void InstX8632Psub::emit(const Cfg *Func) const { void InstX8632Pshufd::emit(const Cfg *Func) const {
char buf[30]; Ostream &Str = Func->getContext()->getStrEmit();
snprintf(buf, llvm::array_lengthof(buf), "psub%s", assert(getSrcSize() == 2);
TypeX8632Attributes[getDest()->getType()].PackString); Str << "\tpshufd\t";
emitTwoAddress(buf, this, Func); getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << "\n";
}
void InstX8632Pshufd::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = pshufd." << getDest()->getType() << " ";
dumpSources(Func);
} }
void InstX8632Ret::emit(const Cfg *Func) const { void InstX8632Ret::emit(const Cfg *Func) const {
...@@ -1112,6 +1169,25 @@ void InstX8632Ret::dump(const Cfg *Func) const { ...@@ -1112,6 +1169,25 @@ void InstX8632Ret::dump(const Cfg *Func) const {
dumpSources(Func); dumpSources(Func);
} }
void InstX8632Shufps::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 3);
Str << "\tshufps\t";
getDest()->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << ", ";
getSrc(2)->emit(Func);
Str << "\n";
}
void InstX8632Shufps::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = shufps." << getDest()->getType() << " ";
dumpSources(Func);
}
void InstX8632Xadd::emit(const Cfg *Func) const { void InstX8632Xadd::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
if (Locked) { if (Locked) {
......
...@@ -82,7 +82,7 @@ ...@@ -82,7 +82,7 @@
X(IceType_v16i8, "?", "" , "b", "xmmword ptr") \ X(IceType_v16i8, "?", "" , "b", "xmmword ptr") \
X(IceType_v8i16, "?", "" , "w", "xmmword ptr") \ X(IceType_v8i16, "?", "" , "w", "xmmword ptr") \
X(IceType_v4i32, "dq", "" , "d", "xmmword ptr") \ X(IceType_v4i32, "dq", "" , "d", "xmmword ptr") \
X(IceType_v4f32, "ps", "" , "", "xmmword ptr") \ X(IceType_v4f32, "ps", "" , "" , "xmmword ptr") \
//#define X(tag, cvt, sdss, width) //#define X(tag, cvt, sdss, width)
#endif // SUBZERO_SRC_ICEINSTX8632_DEF #endif // SUBZERO_SRC_ICEINSTX8632_DEF
...@@ -168,14 +168,19 @@ public: ...@@ -168,14 +168,19 @@ public:
Mulss, Mulss,
Neg, Neg,
Or, Or,
Padd,
Pand, Pand,
Pcmpeq, Pcmpeq,
Pcmpgt, Pcmpgt,
Pmullw,
Pmuludq,
Pop, Pop,
Push, Por,
Pshufd,
Psll, Psll,
Psra, Psra,
Psub, Psub,
Push,
Pxor, Pxor,
Ret, Ret,
Sar, Sar,
...@@ -184,6 +189,7 @@ public: ...@@ -184,6 +189,7 @@ public:
Shld, Shld,
Shr, Shr,
Shrd, Shrd,
Shufps,
Sqrtss, Sqrtss,
Store, Store,
StoreQ, StoreQ,
...@@ -455,6 +461,7 @@ typedef InstX8632Binop<InstX8632::Add> InstX8632Add; ...@@ -455,6 +461,7 @@ typedef InstX8632Binop<InstX8632::Add> InstX8632Add;
typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps; typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc; typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc;
typedef InstX8632Binop<InstX8632::Addss> InstX8632Addss; typedef InstX8632Binop<InstX8632::Addss> InstX8632Addss;
typedef InstX8632Binop<InstX8632::Padd> InstX8632Padd;
typedef InstX8632Binop<InstX8632::Sub> InstX8632Sub; typedef InstX8632Binop<InstX8632::Sub> InstX8632Sub;
typedef InstX8632Binop<InstX8632::Subps> InstX8632Subps; typedef InstX8632Binop<InstX8632::Subps> InstX8632Subps;
typedef InstX8632Binop<InstX8632::Subss> InstX8632Subss; typedef InstX8632Binop<InstX8632::Subss> InstX8632Subss;
...@@ -463,11 +470,14 @@ typedef InstX8632Binop<InstX8632::Psub> InstX8632Psub; ...@@ -463,11 +470,14 @@ typedef InstX8632Binop<InstX8632::Psub> InstX8632Psub;
typedef InstX8632Binop<InstX8632::And> InstX8632And; typedef InstX8632Binop<InstX8632::And> InstX8632And;
typedef InstX8632Binop<InstX8632::Pand> InstX8632Pand; typedef InstX8632Binop<InstX8632::Pand> InstX8632Pand;
typedef InstX8632Binop<InstX8632::Or> InstX8632Or; typedef InstX8632Binop<InstX8632::Or> InstX8632Or;
typedef InstX8632Binop<InstX8632::Por> InstX8632Por;
typedef InstX8632Binop<InstX8632::Xor> InstX8632Xor; typedef InstX8632Binop<InstX8632::Xor> InstX8632Xor;
typedef InstX8632Binop<InstX8632::Pxor> InstX8632Pxor; typedef InstX8632Binop<InstX8632::Pxor> InstX8632Pxor;
typedef InstX8632Binop<InstX8632::Imul> InstX8632Imul; typedef InstX8632Binop<InstX8632::Imul> InstX8632Imul;
typedef InstX8632Binop<InstX8632::Mulps> InstX8632Mulps; typedef InstX8632Binop<InstX8632::Mulps> InstX8632Mulps;
typedef InstX8632Binop<InstX8632::Mulss> InstX8632Mulss; typedef InstX8632Binop<InstX8632::Mulss> InstX8632Mulss;
typedef InstX8632Binop<InstX8632::Pmullw> InstX8632Pmullw;
typedef InstX8632Binop<InstX8632::Pmuludq> InstX8632Pmuludq;
typedef InstX8632Binop<InstX8632::Divps> InstX8632Divps; typedef InstX8632Binop<InstX8632::Divps> InstX8632Divps;
typedef InstX8632Binop<InstX8632::Divss> InstX8632Divss; typedef InstX8632Binop<InstX8632::Divss> InstX8632Divss;
typedef InstX8632Binop<InstX8632::Shl, true> InstX8632Shl; typedef InstX8632Binop<InstX8632::Shl, true> InstX8632Shl;
...@@ -984,6 +994,27 @@ private: ...@@ -984,6 +994,27 @@ private:
virtual ~InstX8632Push() {} virtual ~InstX8632Push() {}
}; };
// Pshufd - shuffle a vector of doublewords
class InstX8632Pshufd : public InstX8632 {
public:
static InstX8632Pshufd *create(Cfg *Func, Variable *Dest, Operand *Source1,
Operand *Source2) {
return new (Func->allocate<InstX8632Pshufd>())
InstX8632Pshufd(Func, Dest, Source1, Source2);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Pshufd); }
private:
InstX8632Pshufd(Cfg *Func, Variable *Dest, Operand *Source1,
Operand *Source2);
InstX8632Pshufd(const InstX8632Pshufd &) LLVM_DELETED_FUNCTION;
InstX8632Pshufd &operator=(const InstX8632Pshufd &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Pshufd() {}
static const char *Opcode;
};
// Ret instruction. Currently only supports the "ret" version that // Ret instruction. Currently only supports the "ret" version that
// does not pop arguments. This instruction takes a Source operand // does not pop arguments. This instruction takes a Source operand
// (for non-void returning functions) for liveness analysis, though // (for non-void returning functions) for liveness analysis, though
...@@ -1004,6 +1035,27 @@ private: ...@@ -1004,6 +1035,27 @@ private:
virtual ~InstX8632Ret() {} virtual ~InstX8632Ret() {}
}; };
// Shufps - select from two vectors of floating point values
class InstX8632Shufps : public InstX8632 {
public:
static InstX8632Shufps *create(Cfg *Func, Variable *Dest, Operand *Source1,
Operand *Source2) {
return new (Func->allocate<InstX8632Shufps>())
InstX8632Shufps(Func, Dest, Source1, Source2);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Shufps); }
private:
InstX8632Shufps(Cfg *Func, Variable *Dest, Operand *Source1,
Operand *Source2);
InstX8632Shufps(const InstX8632Shufps &) LLVM_DELETED_FUNCTION;
InstX8632Shufps &operator=(const InstX8632Shufps &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Shufps() {}
static const char *Opcode;
};
// Exchanging Add instruction. Exchanges the first operand (destination // Exchanging Add instruction. Exchanges the first operand (destination
// operand) with the second operand (source operand), then loads the sum // operand) with the second operand (source operand), then loads the sum
// of the two values into the destination operand. The destination may be // of the two values into the destination operand. The destination may be
......
...@@ -90,6 +90,20 @@ const unsigned X86_MAX_XMM_ARGS = 4; ...@@ -90,6 +90,20 @@ const unsigned X86_MAX_XMM_ARGS = 4;
// The number of bits in a byte // The number of bits in a byte
const unsigned X86_CHAR_BIT = 8; const unsigned X86_CHAR_BIT = 8;
// Return a string representation of the type that is suitable for use
// in an identifier.
IceString typeIdentString(const Type Ty) {
IceString Str;
llvm::raw_string_ostream BaseOS(Str);
Ostream OS(&BaseOS);
if (isVectorType(Ty)) {
OS << "v" << typeNumElements(Ty) << typeElementType(Ty);
} else {
OS << Ty;
}
return BaseOS.str();
}
// In some cases, there are x-macros tables for both high-level and // In some cases, there are x-macros tables for both high-level and
// low-level instructions/operands that use the same enum key value. // low-level instructions/operands that use the same enum key value.
// The tables are kept separate to maintain a proper separation // The tables are kept separate to maintain a proper separation
...@@ -1139,58 +1153,206 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1139,58 +1153,206 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
break; break;
} }
} else if (isVectorType(Dest->getType())) { } else if (isVectorType(Dest->getType())) {
// TODO: Trap on integer divide and integer modulo by zero.
// See: https://code.google.com/p/nativeclient/issues/detail?id=3899
//
// TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
// registers. This is a workaround of the fact that there is no
// support for aligning stack operands. Once there is support,
// remove LEGAL_HACK.
#define LEGAL_HACK(s) legalizeToVar((s))
switch (Inst->getOp()) { switch (Inst->getOp()) {
case InstArithmetic::_num: case InstArithmetic::_num:
llvm_unreachable("Unknown arithmetic operator"); llvm_unreachable("Unknown arithmetic operator");
break; break;
case InstArithmetic::Add: case InstArithmetic::Add: {
case InstArithmetic::And: Variable *T = makeReg(Dest->getType());
case InstArithmetic::Or: _movp(T, Src0);
case InstArithmetic::Xor: _padd(T, LEGAL_HACK(Src1));
case InstArithmetic::Sub: _movp(Dest, T);
case InstArithmetic::Mul: } break;
case InstArithmetic::Shl: case InstArithmetic::And: {
case InstArithmetic::Lshr: Variable *T = makeReg(Dest->getType());
case InstArithmetic::Ashr: _movp(T, Src0);
case InstArithmetic::Udiv: _pand(T, LEGAL_HACK(Src1));
case InstArithmetic::Sdiv: _movp(Dest, T);
case InstArithmetic::Urem: } break;
case InstArithmetic::Srem: case InstArithmetic::Or: {
// TODO(wala): Handle these. Variable *T = makeReg(Dest->getType());
Func->setError("Unhandled instruction"); _movp(T, Src0);
break; _por(T, LEGAL_HACK(Src1));
_movp(Dest, T);
} break;
case InstArithmetic::Xor: {
Variable *T = makeReg(Dest->getType());
_movp(T, Src0);
_pxor(T, LEGAL_HACK(Src1));
_movp(Dest, T);
} break;
case InstArithmetic::Sub: {
Variable *T = makeReg(Dest->getType());
_movp(T, Src0);
_psub(T, LEGAL_HACK(Src1));
_movp(Dest, T);
} break;
case InstArithmetic::Mul: {
if (Dest->getType() == IceType_v4i32) {
// Lowering sequence:
// Note: The mask arguments have index 0 on the left.
//
// movups T1, Src0
// pshufd T2, Src0, {1,0,3,0}
// pshufd T3, Src1, {1,0,3,0}
// # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
// pmuludq T1, Src1
// # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
// pmuludq T2, T3
// # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
// shufps T1, T2, {0,2,0,2}
// pshufd T4, T1, {0,2,1,3}
// movups Dest, T4
//
// TODO(wala): SSE4.1 has pmulld.
// Mask that directs pshufd to create a vector with entries
// Src[1, 0, 3, 0]
const unsigned Constant1030 = 0x31;
Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);
// Mask that directs shufps to create a vector with entries
// Dest[0, 2], Src[0, 2]
const unsigned Mask0202 = 0x88;
// Mask that directs pshufd to create a vector with entries
// Src[0, 2, 1, 3]
const unsigned Mask0213 = 0xd8;
Variable *T1 = makeReg(IceType_v4i32);
Variable *T2 = makeReg(IceType_v4i32);
Variable *T3 = makeReg(IceType_v4i32);
Variable *T4 = makeReg(IceType_v4i32);
_movp(T1, Src0);
// TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R
// with Src1 after stack operand alignment support is
// implemented.
Variable *Src0R = LEGAL_HACK(Src0);
Variable *Src1R = LEGAL_HACK(Src1);
_pshufd(T2, Src0R, Mask1030);
_pshufd(T3, Src1R, Mask1030);
_pmuludq(T1, Src1R);
_pmuludq(T2, T3);
_shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
_pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
_movp(Dest, T4);
} else if (Dest->getType() == IceType_v8i16) {
Variable *T = makeReg(IceType_v8i16);
_movp(T, Src0);
_pmullw(T, legalizeToVar(Src1));
_movp(Dest, T);
} else {
assert(Dest->getType() == IceType_v16i8);
// Sz_mul_v16i8
const IceString Helper = "Sz_mul_v16i8";
const SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
Call->addArg(Src0);
Call->addArg(Src1);
lowerCall(Call);
}
} break;
case InstArithmetic::Shl: {
// Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8
const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());
const SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
Call->addArg(Src0);
Call->addArg(Src1);
lowerCall(Call);
} break;
case InstArithmetic::Lshr: {
// Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
const SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
Call->addArg(Src0);
Call->addArg(Src1);
lowerCall(Call);
} break;
case InstArithmetic::Ashr: {
// Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8
const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());
const SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
Call->addArg(Src0);
Call->addArg(Src1);
lowerCall(Call);
} break;
case InstArithmetic::Udiv: {
// Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8
const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());
const SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
Call->addArg(Src0);
Call->addArg(Src1);
lowerCall(Call);
} break;
case InstArithmetic::Sdiv: {
// Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8
const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());
const SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
Call->addArg(Src0);
Call->addArg(Src1);
lowerCall(Call);
} break;
case InstArithmetic::Urem: {
// Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8
const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());
const SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
Call->addArg(Src0);
Call->addArg(Src1);
lowerCall(Call);
} break;
case InstArithmetic::Srem: {
// Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8
const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());
const SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
Call->addArg(Src0);
Call->addArg(Src1);
lowerCall(Call);
} break;
case InstArithmetic::Fadd: { case InstArithmetic::Fadd: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_addps(T, Src1); _addps(T, LEGAL_HACK(Src1));
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Fsub: { case InstArithmetic::Fsub: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_subps(T, Src1); _subps(T, LEGAL_HACK(Src1));
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Fmul: { case InstArithmetic::Fmul: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_mulps(T, Src1); _mulps(T, LEGAL_HACK(Src1));
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Fdiv: { case InstArithmetic::Fdiv: {
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_movp(T, Src0); _movp(T, Src0);
_divps(T, Src1); _divps(T, LEGAL_HACK(Src1));
_movp(Dest, T); _movp(Dest, T);
} break; } break;
case InstArithmetic::Frem: { case InstArithmetic::Frem: {
const SizeT MaxSrcs = 2; const SizeT MaxSrcs = 2;
InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs); InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);
Call->addArg(Src0); Call->addArg(Src0);
Call->addArg(Src1); Call->addArg(Src1);
lowerCall(Call); lowerCall(Call);
} break; } break;
} }
#undef LEGAL_HACK
} else { // Dest->getType() is non-i64 scalar } else { // Dest->getType() is non-i64 scalar
Variable *T_edx = NULL; Variable *T_edx = NULL;
Variable *T = NULL; Variable *T = NULL;
......
...@@ -276,6 +276,9 @@ protected: ...@@ -276,6 +276,9 @@ protected:
void _or(Variable *Dest, Operand *Src0) { void _or(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Or::create(Func, Dest, Src0)); Context.insert(InstX8632Or::create(Func, Dest, Src0));
} }
void _padd(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Padd::create(Func, Dest, Src0));
}
void _pand(Variable *Dest, Operand *Src0) { void _pand(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pand::create(Func, Dest, Src0)); Context.insert(InstX8632Pand::create(Func, Dest, Src0));
} }
...@@ -285,11 +288,20 @@ protected: ...@@ -285,11 +288,20 @@ protected:
void _pcmpgt(Variable *Dest, Operand *Src0) { void _pcmpgt(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pcmpgt::create(Func, Dest, Src0)); Context.insert(InstX8632Pcmpgt::create(Func, Dest, Src0));
} }
void _pmullw(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pmullw::create(Func, Dest, Src0));
}
void _pmuludq(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pmuludq::create(Func, Dest, Src0));
}
void _pop(Variable *Dest) { void _pop(Variable *Dest) {
Context.insert(InstX8632Pop::create(Func, Dest)); Context.insert(InstX8632Pop::create(Func, Dest));
} }
void _push(Operand *Src0, bool SuppressStackAdjustment = false) { void _por(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Push::create(Func, Src0, SuppressStackAdjustment)); Context.insert(InstX8632Por::create(Func, Dest, Src0));
}
void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
Context.insert(InstX8632Pshufd::create(Func, Dest, Src0, Src1));
} }
void _psll(Variable *Dest, Operand *Src0) { void _psll(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Psll::create(Func, Dest, Src0)); Context.insert(InstX8632Psll::create(Func, Dest, Src0));
...@@ -300,6 +312,9 @@ protected: ...@@ -300,6 +312,9 @@ protected:
void _psub(Variable *Dest, Operand *Src0) { void _psub(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Psub::create(Func, Dest, Src0)); Context.insert(InstX8632Psub::create(Func, Dest, Src0));
} }
void _push(Operand *Src0, bool SuppressStackAdjustment = false) {
Context.insert(InstX8632Push::create(Func, Src0, SuppressStackAdjustment));
}
void _pxor(Variable *Dest, Operand *Src0) { void _pxor(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pxor::create(Func, Dest, Src0)); Context.insert(InstX8632Pxor::create(Func, Dest, Src0));
} }
...@@ -324,6 +339,9 @@ protected: ...@@ -324,6 +339,9 @@ protected:
void _shrd(Variable *Dest, Variable *Src0, Variable *Src1) { void _shrd(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstX8632Shrd::create(Func, Dest, Src0, Src1)); Context.insert(InstX8632Shrd::create(Func, Dest, Src0, Src1));
} }
void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) {
Context.insert(InstX8632Shufps::create(Func, Dest, Src0, Src1));
}
void _sqrtss(Variable *Dest, Operand *Src0) { void _sqrtss(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Sqrtss::create(Func, Dest, Src0)); Context.insert(InstX8632Sqrtss::create(Func, Dest, Src0));
} }
......
...@@ -44,7 +44,320 @@ entry: ...@@ -44,7 +44,320 @@ entry:
%res = frem <4 x float> %arg0, %arg1 %res = frem <4 x float> %arg0, %arg1
ret <4 x float> %res ret <4 x float> %res
; CHECK-LABEL: test_frem: ; CHECK-LABEL: test_frem:
; CHECK: __frem_v4f32 ; CHECK: Sz_frem_v4f32
}
define <16 x i8> @test_add_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = add <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_add_v16i8:
; CHECK: paddb
}
define <16 x i8> @test_and_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = and <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_and_v16i8:
; CHECK: pand
}
define <16 x i8> @test_or_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = or <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_or_v16i8:
; CHECK: por
}
define <16 x i8> @test_xor_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = xor <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_xor_v16i8:
; CHECK: pxor
}
define <16 x i8> @test_sub_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = sub <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_sub_v16i8:
; CHECK: psubb
}
define <16 x i8> @test_mul_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = mul <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_mul_v16i8:
; CHECK: Sz_mul_v16i8
}
define <16 x i8> @test_shl_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = shl <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_shl_v16i8:
; CHECK: Sz_shl_v16i8
}
define <16 x i8> @test_lshr_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = lshr <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_lshr_v16i8:
; CHECK: Sz_lshr_v16i8
}
define <16 x i8> @test_ashr_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = ashr <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_ashr_v16i8:
; CHECK: Sz_ashr_v16i8
}
define <16 x i8> @test_udiv_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = udiv <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_udiv_v16i8:
; CHECK: Sz_udiv_v16i8
}
define <16 x i8> @test_sdiv_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = sdiv <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_sdiv_v16i8:
; CHECK: Sz_sdiv_v16i8
}
define <16 x i8> @test_urem_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = urem <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_urem_v16i8:
; CHECK: Sz_urem_v16i8
}
define <16 x i8> @test_srem_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
entry:
%res = srem <16 x i8> %arg0, %arg1
ret <16 x i8> %res
; CHECK-LABEL: test_srem_v16i8:
; CHECK: Sz_srem_v16i8
}
define <8 x i16> @test_add_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = add <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_add_v8i16:
; CHECK: paddw
}
define <8 x i16> @test_and_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = and <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_and_v8i16:
; CHECK: pand
}
define <8 x i16> @test_or_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = or <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_or_v8i16:
; CHECK: por
}
define <8 x i16> @test_xor_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = xor <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_xor_v8i16:
; CHECK: pxor
}
define <8 x i16> @test_sub_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = sub <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_sub_v8i16:
; CHECK: psubw
}
define <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = mul <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_mul_v8i16:
; CHECK: pmullw
}
define <8 x i16> @test_shl_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = shl <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_shl_v8i16:
; CHECK: Sz_shl_v8i16
}
define <8 x i16> @test_lshr_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = lshr <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_lshr_v8i16:
; CHECK: Sz_lshr_v8i16
}
define <8 x i16> @test_ashr_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = ashr <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_ashr_v8i16:
; CHECK: Sz_ashr_v8i16
}
define <8 x i16> @test_udiv_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = udiv <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_udiv_v8i16:
; CHECK: Sz_udiv_v8i16
}
define <8 x i16> @test_sdiv_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = sdiv <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_sdiv_v8i16:
; CHECK: Sz_sdiv_v8i16
}
define <8 x i16> @test_urem_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = urem <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_urem_v8i16:
; CHECK: Sz_urem_v8i16
}
define <8 x i16> @test_srem_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = srem <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_srem_v8i16:
; CHECK: Sz_srem_v8i16
}
define <4 x i32> @test_add_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = add <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_add_v4i32:
; CHECK: paddd
}
define <4 x i32> @test_and_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = and <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_and_v4i32:
; CHECK: pand
}
define <4 x i32> @test_or_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = or <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_or_v4i32:
; CHECK: por
}
define <4 x i32> @test_xor_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = xor <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_xor_v4i32:
; CHECK: pxor
}
define <4 x i32> @test_sub_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = sub <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_sub_v4i32:
; CHECK: psubd
}
define <4 x i32> @test_mul_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = mul <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_mul_v4i32:
; CHECK: pmuludq
; CHECK: pmuludq
}
define <4 x i32> @test_shl_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = shl <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_shl_v4i32:
; CHECK: Sz_shl_v4i32
}
define <4 x i32> @test_lshr_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = lshr <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_lshr_v4i32:
; CHECK: Sz_lshr_v4i32
}
define <4 x i32> @test_ashr_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = ashr <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_ashr_v4i32:
; CHECK: Sz_ashr_v4i32
}
define <4 x i32> @test_udiv_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = udiv <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_udiv_v4i32:
; CHECK: Sz_udiv_v4i32
}
define <4 x i32> @test_sdiv_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = sdiv <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_sdiv_v4i32:
; CHECK: Sz_sdiv_v4i32
}
define <4 x i32> @test_urem_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = urem <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_urem_v4i32:
; CHECK: Sz_urem_v4i32
}
define <4 x i32> @test_srem_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = srem <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_srem_v4i32:
; CHECK: Sz_srem_v4i32
} }
; ERRORS-NOT: ICE translation error ; ERRORS-NOT: ICE translation error
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment