Commit ba6a67c9 by John Porto

Subzero. Enables (most) crosstests for ARM32.

This patch enables many crosstests for ARM32. Very limited vector support is implemented (essentially, whatever it takes to compile the .ll files contain vector operations.) Atomics as well as vector crosstests are still disabled. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1359193003 .
parent 188eae5c
...@@ -382,11 +382,10 @@ check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime ...@@ -382,11 +382,10 @@ check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
-e x8664,native,sse2 \ -e x8664,native,sse2 \
-e x8664,native,sse4.1,test_vector_ops \ -e x8664,native,sse4.1,test_vector_ops \
-e x8664,native,sse2,test_global \ -e x8664,native,sse2,test_global \
-i arm32,native,neon,simple_loop \ -i arm32,native,neon \
-i arm32,native,neon,mem_intrin \ -e arm32,native,neon,test_sync_atomic \
-i arm32,native,neon,test_bitmanip \ -e arm32,native,neon,test_vector_ops \
-i arm32,native,neon,test_stacksave \ -e arm32,native,neon,test_select
-i arm32,native,neon,test_strengthreduce
PNACL_BIN_PATH=$(PNACL_BIN_PATH) \ PNACL_BIN_PATH=$(PNACL_BIN_PATH) \
$(LLVM_SRC_PATH)/utils/lit/lit.py -sv crosstest/Output $(LLVM_SRC_PATH)/utils/lit/lit.py -sv crosstest/Output
endif endif
......
...@@ -139,8 +139,8 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -139,8 +139,8 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
++Failures; ++Failures;
std::cout << "test" << Funcs[f].Name std::cout << "test" << Funcs[f].Name
<< (CHAR_BIT * sizeof(TypeUnsigned)) << "(" << Value1 << (CHAR_BIT * sizeof(TypeUnsigned)) << "(" << Value1
<< ", " << Value2 << "): sz=" << (unsigned)ResultSz << ", " << Value2 << "): sz=" << (uint64)ResultSz
<< " llc=" << (unsigned)ResultLlc << "\n"; << " llc=" << (uint64)ResultLlc << "\n";
} }
} }
} }
...@@ -154,6 +154,8 @@ const static size_t MaxTestsPerFunc = 100000; ...@@ -154,6 +154,8 @@ const static size_t MaxTestsPerFunc = 100000;
template <typename TypeUnsignedLabel, typename TypeSignedLabel> template <typename TypeUnsignedLabel, typename TypeSignedLabel>
void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
#ifndef ARM32
// TODO(jpp): remove this once vector support is implemented.
typedef typename Vectors<TypeUnsignedLabel>::Ty TypeUnsigned; typedef typename Vectors<TypeUnsignedLabel>::Ty TypeUnsigned;
typedef typename Vectors<TypeSignedLabel>::Ty TypeSigned; typedef typename Vectors<TypeSignedLabel>::Ty TypeSigned;
typedef typename Vectors<TypeUnsignedLabel>::ElementTy ElementTypeUnsigned; typedef typename Vectors<TypeUnsignedLabel>::ElementTy ElementTypeUnsigned;
...@@ -230,6 +232,7 @@ void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -230,6 +232,7 @@ void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
} }
#endif // ARM32
} }
template <typename Type> template <typename Type>
...@@ -305,6 +308,8 @@ void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -305,6 +308,8 @@ void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) { void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
#ifndef ARM32
// TODO(jpp): remove this once vector support is implemented.
static const float NegInf = -1.0 / 0.0; static const float NegInf = -1.0 / 0.0;
static const float PosInf = 1.0 / 0.0; static const float PosInf = 1.0 / 0.0;
static const float Nan = 0.0 / 0.0; static const float Nan = 0.0 / 0.0;
...@@ -363,6 +368,7 @@ void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -363,6 +368,7 @@ void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
} }
#endif // ARM32
} }
#ifdef X8664_STACK_HACK #ifdef X8664_STACK_HACK
......
...@@ -36,12 +36,12 @@ void caller_vvvvv(void) { ...@@ -36,12 +36,12 @@ void caller_vvvvv(void) {
CALL_AS_TYPE(callee_vvvvv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5); CALL_AS_TYPE(callee_vvvvv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5);
} }
void caller_vlvlivfvdviv(void) { void caller_vlvilvfvdviv(void) {
v4f32 arg1 = {0, 1, 2, 3}; v4f32 arg1 = {0, 1, 2, 3};
int64 arg2 = 4; int64 arg2 = 4;
v4f32 arg3 = {6, 7, 8, 9}; v4f32 arg3 = {6, 7, 8, 9};
int64 arg4 = 10; int arg4 = 10;
int arg5 = 11; int64 arg5 = 11;
v4f32 arg6 = {12, 13, 14, 15}; v4f32 arg6 = {12, 13, 14, 15};
float arg7 = 16; float arg7 = 16;
v4f32 arg8 = {17, 18, 19, 20}; v4f32 arg8 = {17, 18, 19, 20};
...@@ -50,7 +50,7 @@ void caller_vlvlivfvdviv(void) { ...@@ -50,7 +50,7 @@ void caller_vlvlivfvdviv(void) {
int arg11 = 26; int arg11 = 26;
v4f32 arg12 = {27, 28, 29, 30}; v4f32 arg12 = {27, 28, 29, 30};
CALL_AS_TYPE(callee_vlvlivfvdviv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5, CALL_AS_TYPE(callee_vlvilvfvdviv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9, arg10, arg6, arg7, arg8, arg9, arg10,
arg11, arg12); arg11, arg12);
} }
...@@ -66,6 +66,8 @@ void __attribute__((noinline)) callee_i(int arg1) { ...@@ -66,6 +66,8 @@ void __attribute__((noinline)) callee_i(int arg1) {
void __attribute__((noinline)) void __attribute__((noinline))
callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) { callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) {
#ifndef ARM32
// TODO(jpp): remove this once vector support is implemented.
switch (ArgNum) { switch (ArgNum) {
HANDLE_ARG(1); HANDLE_ARG(1);
HANDLE_ARG(2); HANDLE_ARG(2);
...@@ -73,24 +75,28 @@ callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) { ...@@ -73,24 +75,28 @@ callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) {
HANDLE_ARG(4); HANDLE_ARG(4);
HANDLE_ARG(5); HANDLE_ARG(5);
} }
#endif // ARM32
} }
void __attribute__((noinline)) void __attribute__((noinline))
callee_vlvlivfvdviv(v4f32 arg1, int64 arg2, v4f32 arg3, int64 arg4, int arg5, callee_vlvilvfvdviv(v4f32 arg1, int64 arg2, v4f32 arg3, int arg4, int64 arg5,
v4f32 arg6, float arg7, v4f32 arg8, double arg9, v4f32 arg6, float arg7, v4f32 arg8, double arg9,
v4f32 arg10, int arg11, v4f32 arg12) { v4f32 arg10, int arg11, v4f32 arg12) {
switch (ArgNum) { switch (ArgNum) {
#ifndef ARM32
// TODO(jpp): remove this once vector support is implemented.
HANDLE_ARG(1); HANDLE_ARG(1);
HANDLE_ARG(2);
HANDLE_ARG(3); HANDLE_ARG(3);
HANDLE_ARG(6);
HANDLE_ARG(8);
HANDLE_ARG(10);
HANDLE_ARG(12);
#endif // ARM32
HANDLE_ARG(2);
HANDLE_ARG(4); HANDLE_ARG(4);
HANDLE_ARG(5); HANDLE_ARG(5);
HANDLE_ARG(6);
HANDLE_ARG(7); HANDLE_ARG(7);
HANDLE_ARG(8);
HANDLE_ARG(9); HANDLE_ARG(9);
HANDLE_ARG(10);
HANDLE_ARG(11); HANDLE_ARG(11);
HANDLE_ARG(12);
} }
} }
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
/* caller, callee, argc */ \ /* caller, callee, argc */ \
X(caller_i, callee_i, 1) \ X(caller_i, callee_i, 1) \
X(caller_vvvvv, callee_vvvvv, 5) \ X(caller_vvvvv, callee_vvvvv, 5) \
X(caller_vlvlivfvdviv, callee_vlvlivfvdviv, 12) \ X(caller_vlvilvfvdviv, callee_vlvilvfvdviv, 12) \
// #define X(caller, callee, argc) // #define X(caller, callee, argc)
#endif // TEST_CALLING_CONV_DEF #endif // TEST_CALLING_CONV_DEF
...@@ -31,7 +31,7 @@ void caller_vvvvv(); ...@@ -31,7 +31,7 @@ void caller_vvvvv();
typedef void(callee_vvvvv_Ty)(v4si32, v4si32, v4si32, v4si32, v4si32); typedef void(callee_vvvvv_Ty)(v4si32, v4si32, v4si32, v4si32, v4si32);
callee_vvvvv_Ty callee_vvvvv; callee_vvvvv_Ty callee_vvvvv;
void caller_vlvlivfvdviv(); void caller_vlvilvfvdviv();
typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64, v4f32, int64, int, v4f32, typedef void(callee_vlvilvfvdviv_Ty)(v4f32, int64, v4f32, int, int64, v4f32,
float, v4f32, double, v4f32, int, v4f32); float, v4f32, double, v4f32, int, v4f32);
callee_vlvlivfvdviv_Ty callee_vlvlivfvdviv; callee_vlvilvfvdviv_Ty callee_vlvilvfvdviv;
...@@ -92,6 +92,8 @@ void testValue(FromType Val, size_t &TotalTests, size_t &Passes, ...@@ -92,6 +92,8 @@ void testValue(FromType Val, size_t &TotalTests, size_t &Passes,
template <typename FromType, typename ToType> template <typename FromType, typename ToType>
void testVector(size_t &TotalTests, size_t &Passes, size_t &Failures, void testVector(size_t &TotalTests, size_t &Passes, size_t &Failures,
const char *FromTypeString, const char *ToTypeString) { const char *FromTypeString, const char *ToTypeString) {
#ifndef ARM32
// TODO(jpp): remove this once vector support is implemented.
const static size_t NumElementsInType = Vectors<FromType>::NumElements; const static size_t NumElementsInType = Vectors<FromType>::NumElements;
PRNG Index; PRNG Index;
static const float NegInf = -1.0 / 0.0; static const float NegInf = -1.0 / 0.0;
...@@ -109,6 +111,7 @@ void testVector(size_t &TotalTests, size_t &Passes, size_t &Failures, ...@@ -109,6 +111,7 @@ void testVector(size_t &TotalTests, size_t &Passes, size_t &Failures,
} }
COMPARE_VEC(cast, FromType, ToType, Value, FromTypeString, ToTypeString); COMPARE_VEC(cast, FromType, ToType, Value, FromTypeString, ToTypeString);
} }
#endif // ARM32
} }
#ifdef X8664_STACK_HACK #ifdef X8664_STACK_HACK
......
...@@ -116,6 +116,8 @@ void testsScalar(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -116,6 +116,8 @@ void testsScalar(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
void testsVector(size_t &TotalTests, size_t &Passes, size_t &Failures) { void testsVector(size_t &TotalTests, size_t &Passes, size_t &Failures) {
#ifndef ARM32
// TODO(jpp): remove this once vector support is implemented.
typedef v4si32 (*FuncTypeVector)(v4f32, v4f32); typedef v4si32 (*FuncTypeVector)(v4f32, v4f32);
static struct { static struct {
const char *Name; const char *Name;
...@@ -157,6 +159,7 @@ void testsVector(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -157,6 +159,7 @@ void testsVector(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
} }
#endif // ARM32
} }
#ifdef X8664_STACK_HACK #ifdef X8664_STACK_HACK
......
...@@ -125,6 +125,8 @@ const static size_t MaxTestsPerFunc = 100000; ...@@ -125,6 +125,8 @@ const static size_t MaxTestsPerFunc = 100000;
template <typename TypeUnsignedLabel, typename TypeSignedLabel> template <typename TypeUnsignedLabel, typename TypeSignedLabel>
void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
#ifndef ARM32
// TODO(jpp): remove this once vector support is implemented.
typedef typename Vectors<TypeUnsignedLabel>::Ty TypeUnsigned; typedef typename Vectors<TypeUnsignedLabel>::Ty TypeUnsigned;
typedef typename Vectors<TypeSignedLabel>::Ty TypeSigned; typedef typename Vectors<TypeSignedLabel>::Ty TypeSigned;
typedef TypeUnsigned (*FuncTypeUnsigned)(TypeUnsigned, TypeUnsigned); typedef TypeUnsigned (*FuncTypeUnsigned)(TypeUnsigned, TypeUnsigned);
...@@ -181,6 +183,7 @@ void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -181,6 +183,7 @@ void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
} }
#endif // ARM32
} }
// Return true on wraparound // Return true on wraparound
...@@ -199,6 +202,8 @@ template <typename T> bool incrementI1Vector(typename Vectors<T>::Ty &Vect) { ...@@ -199,6 +202,8 @@ template <typename T> bool incrementI1Vector(typename Vectors<T>::Ty &Vect) {
template <typename T> template <typename T>
void testsVecI1(size_t &TotalTests, size_t &Passes, size_t &Failures) { void testsVecI1(size_t &TotalTests, size_t &Passes, size_t &Failures) {
#ifndef ARM32
// TODO(jpp): remove this once vector support is implemented.
typedef typename Vectors<T>::Ty Ty; typedef typename Vectors<T>::Ty Ty;
typedef Ty (*FuncType)(Ty, Ty); typedef Ty (*FuncType)(Ty, Ty);
static struct { static struct {
...@@ -266,6 +271,7 @@ void testsVecI1(size_t &TotalTests, size_t &Passes, size_t &Failures) { ...@@ -266,6 +271,7 @@ void testsVecI1(size_t &TotalTests, size_t &Passes, size_t &Failures) {
} }
} }
} }
#endif // ARM32
} }
#ifdef X8664_STACK_HACK #ifdef X8664_STACK_HACK
......
...@@ -122,7 +122,8 @@ def main(): ...@@ -122,7 +122,8 @@ def main():
bitcode_nonfinal = os.path.join(args.dir, base + '.' + key + '.bc') bitcode_nonfinal = os.path.join(args.dir, base + '.' + key + '.bc')
bitcode = os.path.join(args.dir, base + '.' + key + '.pnacl.ll') bitcode = os.path.join(args.dir, base + '.' + key + '.pnacl.ll')
shellcmd(['{bin}/pnacl-clang'.format(bin=bindir), shellcmd(['{bin}/pnacl-clang'.format(bin=bindir),
('-O2' if args.clang_opt else '-O0'), '-c', arg, ('-O2' if args.clang_opt else '-O0'),
('-DARM32' if args.target == 'arm32' else ''), '-c', arg,
'-o', bitcode_nonfinal]) '-o', bitcode_nonfinal])
shellcmd(['{bin}/pnacl-opt'.format(bin=bindir), shellcmd(['{bin}/pnacl-opt'.format(bin=bindir),
'-pnacl-abi-simplify-preopt', '-pnacl-abi-simplify-preopt',
...@@ -185,12 +186,16 @@ def main(): ...@@ -185,12 +186,16 @@ def main():
# configuration. In order to run the crosstests we play nasty, dangerous # configuration. In order to run the crosstests we play nasty, dangerous
# tricks with the stack pointer. # tricks with the stack pointer.
needs_stack_hack = (args.target == 'x8664') needs_stack_hack = (args.target == 'x8664')
stack_hack_params = [] target_params = []
if needs_stack_hack: if needs_stack_hack:
shellcmd('{bin}/clang -g -o stack_hack.x8664.{key}.o -c ' shellcmd('{bin}/clang -g -o stack_hack.x8664.{key}.o -c '
'stack_hack.x8664.c'.format(bin=bindir, key=key)) 'stack_hack.x8664.c'.format(bin=bindir, key=key))
stack_hack_params.append('-DX8664_STACK_HACK') target_params.append('-DX8664_STACK_HACK')
stack_hack_params.append('stack_hack.x8664.{key}.o'.format(key=key)) target_params.append('stack_hack.x8664.{key}.o'.format(key=key))
if args.target == 'arm32':
target_params.append('-DARM32')
target_params.append('-static')
# Set compiler to clang, clang++, pnacl-clang, or pnacl-clang++. # Set compiler to clang, clang++, pnacl-clang, or pnacl-clang++.
compiler = '{bin}/{prefix}{cc}'.format( compiler = '{bin}/{prefix}{cc}'.format(
...@@ -204,7 +209,7 @@ def main(): ...@@ -204,7 +209,7 @@ def main():
'-lm', '-lpthread', '-lm', '-lpthread',
'-Wl,--defsym=__Sz_AbsoluteZero=0'] + '-Wl,--defsym=__Sz_AbsoluteZero=0'] +
target_info.cross_headers) target_info.cross_headers)
shellcmd([compiler] + stack_hack_params + [args.driver] + objs + shellcmd([compiler] + target_params + [args.driver] + objs +
['-o', os.path.join(args.dir, args.output)] + sb_native_args) ['-o', os.path.join(args.dir, args.output)] + sb_native_args)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -33,7 +33,7 @@ const struct TypeARM32Attributes_ { ...@@ -33,7 +33,7 @@ const struct TypeARM32Attributes_ {
int8_t SExtAddrOffsetBits; int8_t SExtAddrOffsetBits;
int8_t ZExtAddrOffsetBits; int8_t ZExtAddrOffsetBits;
} TypeARM32Attributes[] = { } TypeARM32Attributes[] = {
#define X(tag, elementty, int_width, vec_width, sbits, ubits) \ #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr) \
{ int_width, vec_width, sbits, ubits } \ { int_width, vec_width, sbits, ubits } \
, ,
ICETYPEARM32_TABLE ICETYPEARM32_TABLE
...@@ -211,8 +211,6 @@ bool OperandARM32Mem::canHoldOffset(Type Ty, bool SignExt, int32_t Offset) { ...@@ -211,8 +211,6 @@ bool OperandARM32Mem::canHoldOffset(Type Ty, bool SignExt, int32_t Offset) {
return Offset == 0; return Offset == 0;
// Note that encodings for offsets are sign-magnitude for ARM, so we check // Note that encodings for offsets are sign-magnitude for ARM, so we check
// with IsAbsoluteUint(). // with IsAbsoluteUint().
if (isScalarFloatingType(Ty))
return Utils::IsAligned(Offset, 4) && Utils::IsAbsoluteUint(Bits, Offset);
return Utils::IsAbsoluteUint(Bits, Offset); return Utils::IsAbsoluteUint(Bits, Offset);
} }
...@@ -392,6 +390,11 @@ InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1, ...@@ -392,6 +390,11 @@ InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1,
InstARM32Vmrs::InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate) InstARM32Vmrs::InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {} : InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {}
InstARM32Vabs::InstARM32Vabs(Cfg *Func, Variable *Dest, Variable *Src,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vabs, 1, Dest, Predicate) {
addSource(Src);
}
// ======================== Dump routines ======================== // // ======================== Dump routines ======================== //
// Two-addr ops // Two-addr ops
...@@ -408,9 +411,6 @@ template <> const char *InstARM32Uxt::Opcode = "uxt"; // still requires b/h ...@@ -408,9 +411,6 @@ template <> const char *InstARM32Uxt::Opcode = "uxt"; // still requires b/h
template <> const char *InstARM32Vsqrt::Opcode = "vsqrt"; template <> const char *InstARM32Vsqrt::Opcode = "vsqrt";
// Mov-like ops // Mov-like ops
template <> const char *InstARM32Ldr::Opcode = "ldr"; template <> const char *InstARM32Ldr::Opcode = "ldr";
template <> const char *InstARM32Mov::Opcode = "mov";
// FP
template <> const char *InstARM32Vldr::Opcode = "vldr";
// Three-addr ops // Three-addr ops
template <> const char *InstARM32Adc::Opcode = "adc"; template <> const char *InstARM32Adc::Opcode = "adc";
template <> const char *InstARM32Add::Opcode = "add"; template <> const char *InstARM32Add::Opcode = "add";
...@@ -447,113 +447,56 @@ void InstARM32::dump(const Cfg *Func) const { ...@@ -447,113 +447,56 @@ void InstARM32::dump(const Cfg *Func) const {
Inst::dump(Func); Inst::dump(Func);
} }
template <> void InstARM32Mov::emit(const Cfg *Func) const { void InstARM32Mov::emitMultiDestSingleSource(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Variable *Dest = getDest();
if (Dest->hasReg()) {
IceString ActualOpcode = Opcode;
Operand *Src0 = getSrc(0);
if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
if (!Src0V->hasReg()) {
// Always use the whole stack slot. A 32-bit load has a larger range of
// offsets than 16-bit, etc.
ActualOpcode = IceString("ldr");
}
} else {
if (llvm::isa<OperandARM32Mem>(Src0))
ActualOpcode = IceString("ldr") + getWidthString(Dest->getType());
}
Str << "\t" << ActualOpcode << getPredicate() << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
} else {
Variable *Src0 = llvm::cast<Variable>(getSrc(0));
assert(Src0->hasReg());
Str << "\t"
<< "str" << getPredicate() << "\t";
Src0->emit(Func);
Str << ", ";
Dest->emit(Func);
}
}
template <> void InstARM32Mov::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
(void)Func;
llvm_unreachable("Not yet implemented");
}
template <> void InstARM32Vldr::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
assert(getDest()->hasReg());
Str << "\t" << Opcode << getPredicate() << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
}
template <> void InstARM32Vldr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Vmov::emitMultiDestSingleSource(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
Variable *Dest0 = getDest(); auto *Dest = llvm::cast<Variable64On32>(getDest());
Operand *Src0 = getSrc(0); Operand *Src = getSrc(0);
assert(Dest0->hasReg()); assert(Dest->getType() == IceType_i64);
assert(Dest1->hasReg()); assert(Dest->getHi()->hasReg());
assert(!llvm::isa<OperandARM32Mem>(Src0)); assert(Dest->getLo()->hasReg());
assert(!llvm::isa<OperandARM32Mem>(Src));
Str << "\t" Str << "\t"
<< "vmov" << getPredicate() << "\t"; << "vmov" << getPredicate() << "\t";
Dest0->emit(Func); Dest->getLo()->emit(Func);
Str << ", "; Str << ", ";
Dest1->emit(Func); Dest->getHi()->emit(Func);
Str << ", "; Str << ", ";
Src0->emit(Func); Src->emit(Func);
} }
void InstARM32Vmov::emitSingleDestMultiSource(const Cfg *Func) const { void InstARM32Mov::emitSingleDestMultiSource(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
Variable *Dest0 = getDest(); Variable *Dest = getDest();
Operand *Src0 = getSrc(0); auto *Src = llvm::cast<Variable64On32>(getSrc(0));
Operand *Src1 = getSrc(1);
assert(Dest0->hasReg()); assert(Src->getType() == IceType_i64);
assert(!llvm::isa<OperandARM32Mem>(Src0)); assert(Src->getHi()->hasReg());
assert(!llvm::isa<OperandARM32Mem>(Src1)); assert(Src->getLo()->hasReg());
assert(Dest->hasReg());
Str << "\t" Str << "\t"
<< "vmov" << getPredicate() << "\t"; << "vmov" << getPredicate() << "\t";
Dest0->emit(Func); Dest->emit(Func);
Str << ", "; Str << ", ";
Src0->emit(Func); Src->getLo()->emit(Func);
Str << ", "; Str << ", ";
Src1->emit(Func); Src->getHi()->emit(Func);
} }
namespace { namespace {
bool isVariableWithoutRegister(const Operand *Op) { bool isVariableWithoutRegister(const Operand *Op) {
if (const auto *OpV = llvm::dyn_cast<const Variable>(Op)) { if (const auto *OpV = llvm::dyn_cast<const Variable>(Op)) {
return !OpV->hasReg(); return !OpV->hasReg();
} }
return false; return false;
} }
bool isMemoryAccess(Operand *Op) { bool isMemoryAccess(Operand *Op) {
return isVariableWithoutRegister(Op) || llvm::isa<OperandARM32Mem>(Op); return isVariableWithoutRegister(Op) || llvm::isa<OperandARM32Mem>(Op);
} }
...@@ -561,27 +504,38 @@ bool isMemoryAccess(Operand *Op) { ...@@ -561,27 +504,38 @@ bool isMemoryAccess(Operand *Op) {
bool isMoveBetweenCoreAndVFPRegisters(Variable *Dest, Operand *Src) { bool isMoveBetweenCoreAndVFPRegisters(Variable *Dest, Operand *Src) {
const Type DestTy = Dest->getType(); const Type DestTy = Dest->getType();
const Type SrcTy = Src->getType(); const Type SrcTy = Src->getType();
assert(!(isScalarIntegerType(DestTy) && isScalarIntegerType(SrcTy)) && return !isVectorType(DestTy) && !isVectorType(SrcTy) &&
"At most one of vmov's operands can be a core register."); (isScalarIntegerType(DestTy) == isScalarFloatingType(SrcTy));
return isScalarIntegerType(DestTy) || isScalarIntegerType(SrcTy);
} }
} // end of anonymous namespace } // end of anonymous namespace
void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const { void InstARM32Mov::emitSingleDestSingleSource(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
Variable *Dest = getDest(); Variable *Dest = getDest();
if (Dest->hasReg()) { if (Dest->hasReg()) {
Type DestTy = Dest->getType();
Operand *Src0 = getSrc(0); Operand *Src0 = getSrc(0);
const char *ActualOpcode = isMemoryAccess(Src0) ? "vldr" : "vmov"; const bool DestIsVector = isVectorType(DestTy);
const bool DestIsScalarFP = isScalarFloatingType(Dest->getType());
const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
const char *LoadOpcode =
DestIsVector ? "vld1" : (DestIsScalarFP ? "vldr" : "ldr");
const char *RegMovOpcode =
(DestIsVector || DestIsScalarFP || CoreVFPMove) ? "vmov" : "mov";
const char *ActualOpcode = isMemoryAccess(Src0) ? LoadOpcode : RegMovOpcode;
// when vmov{c}'ing, we need to emit a width string. Otherwise, the // when vmov{c}'ing, we need to emit a width string. Otherwise, the
// assembler might be tempted to assume we want a vector vmov{c}, and that // assembler might be tempted to assume we want a vector vmov{c}, and that
// is disallowed because ARM. // is disallowed because ARM.
const char *NoWidthString = "";
const char *WidthString = const char *WidthString =
(isMemoryAccess(Src0) || isMoveBetweenCoreAndVFPRegisters(Dest, Src0)) isMemoryAccess(Src0)
? "" ? (DestIsVector ? ".64" : NoWidthString)
: getVecWidthString(Src0->getType()); : (!CoreVFPMove ? getVecWidthString(DestTy) : NoWidthString);
Str << "\t" << ActualOpcode << getPredicate() << WidthString << "\t"; Str << "\t" << ActualOpcode << getPredicate() << WidthString << "\t";
Dest->emit(Func); Dest->emit(Func);
Str << ", "; Str << ", ";
...@@ -589,18 +543,24 @@ void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const { ...@@ -589,18 +543,24 @@ void InstARM32Vmov::emitSingleDestSingleSource(const Cfg *Func) const {
} else { } else {
Variable *Src0 = llvm::cast<Variable>(getSrc(0)); Variable *Src0 = llvm::cast<Variable>(getSrc(0));
assert(Src0->hasReg()); assert(Src0->hasReg());
Str << "\t" const char *ActualOpcode =
"vstr" << getPredicate() << "\t"; isVectorType(Src0->getType())
? "vst1"
: (isScalarFloatingType(Src0->getType()) ? "vstr" : "str");
const char *NoWidthString = "";
const char *WidthString =
isVectorType(Src0->getType()) ? ".64" : NoWidthString;
Str << "\t" << ActualOpcode << getPredicate() << WidthString << "\t";
Src0->emit(Func); Src0->emit(Func);
Str << ", "; Str << ", ";
Dest->emit(Func); Dest->emit(Func);
} }
} }
void InstARM32Vmov::emit(const Cfg *Func) const { void InstARM32Mov::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
assert(isMultiDest() + isMultiSource() <= 1 && "Invalid vmov type."); assert(!(isMultiDest() && isMultiSource()) && "Invalid vmov type.");
if (isMultiDest()) { if (isMultiDest()) {
emitMultiDestSingleSource(Func); emitMultiDestSingleSource(Func);
return; return;
...@@ -614,21 +574,37 @@ void InstARM32Vmov::emit(const Cfg *Func) const { ...@@ -614,21 +574,37 @@ void InstARM32Vmov::emit(const Cfg *Func) const {
emitSingleDestSingleSource(Func); emitSingleDestSingleSource(Func);
} }
void InstARM32Vmov::emitIAS(const Cfg *Func) const { void InstARM32Mov::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
(void)Func; (void)Func;
llvm_unreachable("Not yet implemented"); llvm_unreachable("Not yet implemented");
} }
void InstARM32Vmov::dump(const Cfg *Func) const { void InstARM32Mov::dump(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
assert(getSrcSize() == 1);
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
dumpOpcodePred(Str, "vmov", getDest()->getType()); Variable *Dest = getDest();
if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
Dest64->getLo()->dump(Func);
Str << ", ";
Dest64->getHi()->dump(Func);
} else {
Dest->dump(Func);
}
dumpOpcodePred(Str, " = mov", getDest()->getType());
Str << " "; Str << " ";
dumpDest(Func);
Operand *Src = getSrc(0);
if (auto *Src64 = llvm::dyn_cast<Variable64On32>(Src)) {
Src64->getLo()->dump(Func);
Str << ", "; Str << ", ";
dumpSources(Func); Src64->getHi()->dump(Func);
} else {
Src->dump(Func);
}
} }
void InstARM32Br::emit(const Cfg *Func) const { void InstARM32Br::emit(const Cfg *Func) const {
...@@ -748,8 +724,16 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const { ...@@ -748,8 +724,16 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
assert(getDest()->hasReg()); assert(getDest()->hasReg());
Type Ty = getSrc(0)->getType(); Variable *Dest = getDest();
Str << "\t" << Opcode << getWidthString(Ty) << getPredicate() << "\t"; Type DestTy = Dest->getType();
const bool DestIsVector = isVectorType(DestTy);
const bool DestIsScalarFloat = isScalarFloatingType(DestTy);
const char *ActualOpcode =
DestIsVector ? "vld1" : (DestIsScalarFloat ? "vldr" : "ldr");
const char *VectorMarker = DestIsVector ? ".64" : "";
const char *WidthString = DestIsVector ? "" : getWidthString(DestTy);
Str << "\t" << ActualOpcode << WidthString << getPredicate() << VectorMarker
<< "\t";
getDest()->emit(Func); getDest()->emit(Func);
Str << ", "; Str << ", ";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
...@@ -799,15 +783,28 @@ template <> void InstARM32Movt::emit(const Cfg *Func) const { ...@@ -799,15 +783,28 @@ template <> void InstARM32Movt::emit(const Cfg *Func) const {
void InstARM32Pop::emit(const Cfg *Func) const { void InstARM32Pop::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
assert(Dests.size() > 0); SizeT IntegerCount = 0;
for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType())) {
++IntegerCount;
}
}
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
if (IntegerCount == 0) {
Str << "\t@ empty pop";
return;
}
Str << "\t" Str << "\t"
<< "pop" << "pop"
<< "\t{"; << "\t{";
for (SizeT I = 0; I < Dests.size(); ++I) { bool PrintComma = false;
if (I > 0) for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", "; Str << ", ";
Dests[I]->emit(Func); Op->emit(Func);
PrintComma = true;
}
} }
Str << "}"; Str << "}";
} }
...@@ -866,12 +863,31 @@ void InstARM32AdjustStack::dump(const Cfg *Func) const { ...@@ -866,12 +863,31 @@ void InstARM32AdjustStack::dump(const Cfg *Func) const {
void InstARM32Push::emit(const Cfg *Func) const { void InstARM32Push::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
assert(getSrcSize() > 0); SizeT IntegerCount = 0;
for (SizeT i = 0; i < getSrcSize(); ++i) {
if (isScalarIntegerType(getSrc(i)->getType())) {
++IntegerCount;
}
}
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
if (IntegerCount == 0) {
Str << "\t"
<< "@empty push";
return;
}
Str << "\t" Str << "\t"
<< "push" << "push"
<< "\t{"; << "\t{";
emitSources(Func); bool PrintComma = false;
for (SizeT i = 0; i < getSrcSize(); ++i) {
Operand *Op = getSrc(i);
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
}
}
Str << "}"; Str << "}";
} }
...@@ -923,8 +939,12 @@ void InstARM32Str::emit(const Cfg *Func) const { ...@@ -923,8 +939,12 @@ void InstARM32Str::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2); assert(getSrcSize() == 2);
Type Ty = getSrc(0)->getType(); Type Ty = getSrc(0)->getType();
const char *Opcode = isScalarFloatingType(Ty) ? "vstr" : "str"; const bool IsVectorStore = isVectorType(Ty);
Str << "\t" << Opcode << getWidthString(Ty) << getPredicate() << "\t"; const char *Opcode =
IsVectorStore ? "vst1" : (isScalarFloatingType(Ty) ? "vstr" : "str");
const char *VecEltWidthString = IsVectorStore ? ".64" : "";
Str << "\t" << Opcode << getWidthString(Ty) << getPredicate()
<< VecEltWidthString << "\t";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
Str << ", "; Str << ", ";
getSrc(1)->emit(Func); getSrc(1)->emit(Func);
...@@ -1119,6 +1139,33 @@ void InstARM32Vmrs::dump(const Cfg *Func) const { ...@@ -1119,6 +1139,33 @@ void InstARM32Vmrs::dump(const Cfg *Func) const {
"FPSCR{n,z,c,v}"; "FPSCR{n,z,c,v}";
} }
void InstARM32Vabs::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Str << "\t"
"vabs" << getPredicate() << getVecWidthString(getSrc(0)->getType())
<< "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
}
void InstARM32Vabs::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Vabs::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = vabs" << getPredicate() << getVecWidthString(getSrc(0)->getType());
}
void OperandARM32Mem::emit(const Cfg *Func) const { void OperandARM32Mem::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
...@@ -1128,13 +1175,13 @@ void OperandARM32Mem::emit(const Cfg *Func) const { ...@@ -1128,13 +1175,13 @@ void OperandARM32Mem::emit(const Cfg *Func) const {
switch (getAddrMode()) { switch (getAddrMode()) {
case PostIndex: case PostIndex:
case NegPostIndex: case NegPostIndex:
Str << "], "; Str << "]";
break; break;
default: default:
Str << ", ";
break; break;
} }
if (isRegReg()) { if (isRegReg()) {
Str << ", ";
if (isNegAddrMode()) { if (isNegAddrMode()) {
Str << "-"; Str << "-";
} }
...@@ -1144,7 +1191,11 @@ void OperandARM32Mem::emit(const Cfg *Func) const { ...@@ -1144,7 +1191,11 @@ void OperandARM32Mem::emit(const Cfg *Func) const {
<< getShiftAmt(); << getShiftAmt();
} }
} else { } else {
getOffset()->emit(Func); ConstantInteger32 *Offset = getOffset();
if (Offset && Offset->getValue() != 0) {
Str << ", ";
Offset->emit(Func);
}
} }
switch (getAddrMode()) { switch (getAddrMode()) {
case Offset: case Offset:
......
...@@ -350,23 +350,24 @@ ...@@ -350,23 +350,24 @@
// the # of offset bits allowed as part of an addressing mode (for sign or zero // the # of offset bits allowed as part of an addressing mode (for sign or zero
// extending load/stores). // extending load/stores).
#define ICETYPEARM32_TABLE \ #define ICETYPEARM32_TABLE \
/* tag, element type, int_width, vec_width, addr bits sext, zext */ \ /* tag, element type, int_width, vec_width, addr bits sext, zext, \
X(IceType_void, IceType_void, "" , "" , 0 , 0) \ reg-reg addr allowed */ \
X(IceType_i1, IceType_void, "b", "" , 8 , 12) \ X(IceType_void, IceType_void, "" , "" , 0 , 0 , 0) \
X(IceType_i8, IceType_void, "b", "" , 8 , 12) \ X(IceType_i1, IceType_void, "b", "" , 8 , 12, 1) \
X(IceType_i16, IceType_void, "h", "" , 8 , 8) \ X(IceType_i8, IceType_void, "b", "" , 8 , 12, 1) \
X(IceType_i32, IceType_void, "" , "" , 12, 12) \ X(IceType_i16, IceType_void, "h", "" , 8 , 8 , 1) \
X(IceType_i64, IceType_void, "d", "" , 8 , 8) \ X(IceType_i32, IceType_void, "" , "" , 12, 12, 1) \
X(IceType_f32, IceType_void, "" , ".f32", 10, 10) \ X(IceType_i64, IceType_void, "d", "" , 8 , 8 , 1) \
X(IceType_f64, IceType_void, "" , ".f64", 10, 10) \ X(IceType_f32, IceType_void, "" , ".f32", 8, 8 , 0) \
X(IceType_v4i1, IceType_i32 , "" , ".i32", 0 , 0) \ X(IceType_f64, IceType_void, "" , ".f64", 8, 8 , 0) \
X(IceType_v8i1, IceType_i16 , "" , ".i16", 0 , 0) \ X(IceType_v4i1, IceType_i32 , "" , ".i32", 0 , 0 , 1) \
X(IceType_v16i1, IceType_i8 , "" , ".i8" , 0 , 0) \ X(IceType_v8i1, IceType_i16 , "" , ".i16", 0 , 0 , 1) \
X(IceType_v16i8, IceType_i8 , "" , ".i8" , 0 , 0) \ X(IceType_v16i1, IceType_i8 , "" , ".i8" , 0 , 0 , 1) \
X(IceType_v8i16, IceType_i16 , "" , ".i16", 0 , 0) \ X(IceType_v16i8, IceType_i8 , "" , ".i8" , 0 , 0 , 1) \
X(IceType_v4i32, IceType_i32 , "" , ".i32", 0 , 0) \ X(IceType_v8i16, IceType_i16 , "" , ".i16", 0 , 0 , 1) \
X(IceType_v4f32, IceType_f32 , "" , ".f32", 0 , 0) X(IceType_v4i32, IceType_i32 , "" , ".i32", 0 , 0 , 1) \
//#define X(tag, elementty, int_width, vec_width, sbits, ubits) X(IceType_v4f32, IceType_f32 , "" , ".f32", 0 , 0 , 1)
//#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr)
// Shifter types for Data-processing operands as defined in section A5.1.2. // Shifter types for Data-processing operands as defined in section A5.1.2.
#define ICEINSTARM32SHIFT_TABLE \ #define ICEINSTARM32SHIFT_TABLE \
......
...@@ -320,12 +320,11 @@ public: ...@@ -320,12 +320,11 @@ public:
Udiv, Udiv,
Umull, Umull,
Uxt, Uxt,
Vabs,
Vadd, Vadd,
Vcmp, Vcmp,
Vcvt, Vcvt,
Vdiv, Vdiv,
Vldr,
Vmov,
Vmrs, Vmrs,
Vmul, Vmul,
Vsqrt, Vsqrt,
...@@ -780,13 +779,6 @@ using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>; ...@@ -780,13 +779,6 @@ using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>; using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32Movlike<InstARM32::Ldr>; using InstARM32Ldr = InstARM32Movlike<InstARM32::Ldr>;
/// Move instruction (variable <- flex). This is more of a pseudo-inst. If var
/// is a register, then we use "mov". If var is stack, then we use "str" to
/// store to the stack.
using InstARM32Mov = InstARM32Movlike<InstARM32::Mov>;
/// Represents various vector mov instruction forms (simple single source,
/// single dest forms only, not the 2 GPR <-> 1 D reg forms, etc.).
using InstARM32Vldr = InstARM32Movlike<InstARM32::Vldr>;
/// MovT leaves the bottom bits alone so dest is also a source. This helps /// MovT leaves the bottom bits alone so dest is also a source. This helps
/// indicate that a previous MovW setting dest is not dead code. /// indicate that a previous MovW setting dest is not dead code.
using InstARM32Movt = InstARM32TwoAddrGPR<InstARM32::Movt>; using InstARM32Movt = InstARM32TwoAddrGPR<InstARM32::Movt>;
...@@ -1120,90 +1112,47 @@ private: ...@@ -1120,90 +1112,47 @@ private:
}; };
/// Handles (some of) vmov's various formats. /// Handles (some of) vmov's various formats.
class InstARM32Vmov final : public InstARM32Pred { class InstARM32Mov final : public InstARM32Pred {
InstARM32Vmov() = delete; InstARM32Mov() = delete;
InstARM32Vmov(const InstARM32Vmov &) = delete; InstARM32Mov(const InstARM32Mov &) = delete;
InstARM32Vmov &operator=(const InstARM32Vmov &) = delete; InstARM32Mov &operator=(const InstARM32Mov &) = delete;
public: public:
/// RegisterPair is used to group registers in static InstARM32Mov *create(Cfg *Func, Variable *Dest, Operand *Src,
///
/// vmov D, (R, R)
///
/// and
///
/// vmov (R, R), D
struct RegisterPair {
explicit RegisterPair(Variable *V0, Variable *V1) : _0(V0), _1(V1) {
assert(V0->getType() == IceType_i32);
assert(V1->getType() == IceType_i32);
}
Variable *_0;
Variable *_1;
};
static InstARM32Vmov *create(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate) { CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vmov>()) return new (Func->allocate<InstARM32Mov>())
InstARM32Vmov(Func, Dest, Src, Predicate); InstARM32Mov(Func, Dest, Src, Predicate);
}
static InstARM32Vmov *create(Cfg *Func, const RegisterPair &Dests,
Variable *Src, CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vmov>())
InstARM32Vmov(Func, Dests, Src, Predicate);
}
static InstARM32Vmov *create(Cfg *Func, Variable *Dest,
const RegisterPair &Srcs,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vmov>())
InstARM32Vmov(Func, Dest, Srcs, Predicate);
} }
bool isRedundantAssign() const override { bool isRedundantAssign() const override {
return Dest1 == nullptr && getSrcSize() == 1 && return !isMultiDest() && !isMultiSource() &&
checkForRedundantAssign(getDest(), getSrc(0)); checkForRedundantAssign(getDest(), getSrc(0));
} }
bool isSimpleAssign() const override { return true; } bool isSimpleAssign() const override { return true; }
void emit(const Cfg *Func) const override; void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override; void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Vmov); } static bool classof(const Inst *Inst) { return isClassof(Inst, Mov); }
private:
InstARM32Vmov(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vmov, 1, Dest, Predicate) {
addSource(Src);
}
InstARM32Vmov(Cfg *Func, const RegisterPair &Dests, Variable *Src,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vmov, 1, Dests._0, Predicate),
Dest1(Dests._1) {
addSource(Src);
}
InstARM32Vmov(Cfg *Func, Variable *Dest, const RegisterPair &Srcs,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vmov, 2, Dest, Predicate) {
addSource(Srcs._0);
addSource(Srcs._1);
}
bool isMultiDest() const { bool isMultiDest() const {
assert(getDest() != nullptr); assert(getDest() != nullptr);
return Dest1 != nullptr; return llvm::isa<Variable64On32>(getDest());
} }
bool isMultiSource() const { bool isMultiSource() const {
assert(getSrcSize() >= 1); assert(getSrcSize() == 1);
return getSrcSize() > 1; return llvm::isa<Variable64On32>(getSrc(0));
}
private:
InstARM32Mov(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Mov, 1, Dest, Predicate) {
addSource(Src);
} }
void emitMultiDestSingleSource(const Cfg *Func) const; void emitMultiDestSingleSource(const Cfg *Func) const;
void emitSingleDestMultiSource(const Cfg *Func) const; void emitSingleDestMultiSource(const Cfg *Func) const;
void emitSingleDestSingleSource(const Cfg *Func) const; void emitSingleDestSingleSource(const Cfg *Func) const;
Variable *Dest1 = nullptr;
}; };
class InstARM32Vcmp final : public InstARM32Pred { class InstARM32Vcmp final : public InstARM32Pred {
...@@ -1246,15 +1195,33 @@ private: ...@@ -1246,15 +1195,33 @@ private:
InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate); InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate);
}; };
class InstARM32Vabs final : public InstARM32Pred {
InstARM32Vabs() = delete;
InstARM32Vabs(const InstARM32Vabs &) = delete;
InstARM32Vabs &operator=(const InstARM32Vabs &) = delete;
public:
static InstARM32Vabs *create(Cfg *Func, Variable *Dest, Variable *Src,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vabs>())
InstARM32Vabs(Func, Dest, Src, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Vabs); }
private:
InstARM32Vabs(Cfg *Func, Variable *Dest, Variable *Src,
CondARM32::Cond Predicate);
};
// Declare partial template specializations of emit() methods that already have // Declare partial template specializations of emit() methods that already have
// default implementations. Without this, there is the possibility of ODR // default implementations. Without this, there is the possibility of ODR
// violations and link errors. // violations and link errors.
template <> void InstARM32Ldr::emit(const Cfg *Func) const; template <> void InstARM32Ldr::emit(const Cfg *Func) const;
template <> void InstARM32Mov::emit(const Cfg *Func) const;
template <> void InstARM32Movw::emit(const Cfg *Func) const; template <> void InstARM32Movw::emit(const Cfg *Func) const;
template <> void InstARM32Movt::emit(const Cfg *Func) const; template <> void InstARM32Movt::emit(const Cfg *Func) const;
template <> void InstARM32Vldr::emit(const Cfg *Func) const;
} // end of namespace Ice } // end of namespace Ice
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "IceDefs.h" #include "IceDefs.h"
#include "IceELFObjectWriter.h" #include "IceELFObjectWriter.h"
#include "IceGlobalInits.h" #include "IceGlobalInits.h"
#include "IceInstARM32.def"
#include "IceInstARM32.h" #include "IceInstARM32.h"
#include "IceLiveness.h" #include "IceLiveness.h"
#include "IceOperand.h" #include "IceOperand.h"
...@@ -30,6 +31,7 @@ ...@@ -30,6 +31,7 @@
#include "llvm/Support/MathExtras.h" #include "llvm/Support/MathExtras.h"
#include <algorithm> #include <algorithm>
#include <utility>
namespace Ice { namespace Ice {
...@@ -380,8 +382,21 @@ IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const { ...@@ -380,8 +382,21 @@ IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
} }
Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) { Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
if (Ty == IceType_void) static const Type DefaultType[] = {
Ty = IceType_i32; #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128, alias_init) \
(isFP32) \
? IceType_f32 \
: ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
REGARM32_TABLE
#undef X
};
assert(RegNum < RegARM32::Reg_NUM);
if (Ty == IceType_void) {
assert(RegNum < llvm::array_lengthof(DefaultType));
Ty = DefaultType[RegNum];
}
if (PhysicalRegisters[Ty].empty()) if (PhysicalRegisters[Ty].empty())
PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
assert(RegNum < PhysicalRegisters[Ty].size()); assert(RegNum < PhysicalRegisters[Ty].size());
...@@ -425,11 +440,17 @@ void TargetARM32::emitVariable(const Variable *Var) const { ...@@ -425,11 +440,17 @@ void TargetARM32::emitVariable(const Variable *Var) const {
if (!hasFramePointer()) if (!hasFramePointer())
Offset += getStackAdjustment(); Offset += getStackAdjustment();
} }
if (!isLegalVariableStackOffset(Offset)) { const Type VarTy = Var->getType();
// In general, no Variable64On32 should be emited in textual asm output. It
// turns out that some lowering sequences Fake-Def/Fake-Use such a variables.
// If they end up being assigned an illegal offset we get a runtime error. We
// liberally allow Variable64On32 to have illegal offsets because offsets
// don't matter in FakeDefs/FakeUses.
if (!llvm::isa<Variable64On32>(Var) &&
!isLegalVariableStackOffset(VarTy, Offset)) {
llvm::report_fatal_error("Illegal stack offset"); llvm::report_fatal_error("Illegal stack offset");
} }
const Type FrameSPTy = stackSlotType(); Str << "[" << getRegName(BaseRegNum, VarTy);
Str << "[" << getRegName(BaseRegNum, FrameSPTy);
if (Offset != 0) { if (Offset != 0) {
Str << ", " << getConstantPrefix() << Offset; Str << ", " << getConstantPrefix() << Offset;
} }
...@@ -592,17 +613,14 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, ...@@ -592,17 +613,14 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
// value from the stack slot. // value from the stack slot.
if (Arg->hasReg()) { if (Arg->hasReg()) {
assert(Ty != IceType_i64); assert(Ty != IceType_i64);
OperandARM32Mem *Mem = OperandARM32Mem::create( // This should be simple, just load the parameter off the stack using a nice
// sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for
// fp types, cannot have an index register), so we legalize the memory
// operand instead.
auto *Mem = OperandARM32Mem::create(
Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
Ctx->getConstantInt32(Arg->getStackOffset()))); Ctx->getConstantInt32(Arg->getStackOffset())));
if (isVectorType(Arg->getType())) { legalizeToReg(Mem, Arg->getRegNum());
// Use vld1.$elem or something?
UnimplementedError(Func->getContext()->getFlags());
} else if (isFloatingType(Arg->getType())) {
_vldr(Arg, Mem);
} else {
_ldr(Arg, Mem);
}
// This argument-copying instruction uses an explicit OperandARM32Mem // This argument-copying instruction uses an explicit OperandARM32Mem
// operand instead of a Variable, so its fill-from-stack operation has to // operand instead of a Variable, so its fill-from-stack operation has to
// be tracked separately for statistics. // be tracked separately for statistics.
...@@ -894,16 +912,15 @@ void TargetARM32::addEpilog(CfgNode *Node) { ...@@ -894,16 +912,15 @@ void TargetARM32::addEpilog(CfgNode *Node) {
RI->setDeleted(); RI->setDeleted();
} }
bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { bool TargetARM32::isLegalVariableStackOffset(Type Ty, int32_t Offset) const {
constexpr bool SignExt = false; constexpr bool SignExt = false;
// TODO(jvoung): vldr of FP stack slots has a different limit from the plain return OperandARM32Mem::canHoldOffset(Ty, SignExt, Offset);
// stackSlotType().
return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
} }
StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
int32_t StackAdjust,
Variable *OrigBaseReg) { Variable *OrigBaseReg) {
int32_t Offset = Var->getStackOffset(); int32_t Offset = Var->getStackOffset() + StackAdjust;
// Legalize will likely need a movw/movt combination, but if the top bits are // Legalize will likely need a movw/movt combination, but if the top bits are
// all 0 from negating the offset and subtracting, we could use that instead. // all 0 from negating the offset and subtracting, we could use that instead.
bool ShouldSub = (-Offset & 0xFFFF0000) == 0; bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
...@@ -937,7 +954,9 @@ void TargetARM32::legalizeStackSlots() { ...@@ -937,7 +954,9 @@ void TargetARM32::legalizeStackSlots() {
Func->dump("Before legalizeStackSlots"); Func->dump("Before legalizeStackSlots");
assert(hasComputedFrame()); assert(hasComputedFrame());
// Early exit, if SpillAreaSizeBytes is really small. // Early exit, if SpillAreaSizeBytes is really small.
if (isLegalVariableStackOffset(SpillAreaSizeBytes)) // TODO(jpp): this is not safe -- loads and stores of q registers can't have
// offsets.
if (isLegalVariableStackOffset(IceType_v4i32, SpillAreaSizeBytes))
return; return;
Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
int32_t StackAdjust = 0; int32_t StackAdjust = 0;
...@@ -978,64 +997,77 @@ void TargetARM32::legalizeStackSlots() { ...@@ -978,64 +997,77 @@ void TargetARM32::legalizeStackSlots() {
continue; continue;
} }
} }
// For now, only Mov instructions can have stack variables. We need to // For now, only Mov instructions can have stack variables. We need to
// know the type of instruction because we currently create a fresh one // know the type of instruction because we currently create a fresh one
// to replace Dest/Source, rather than mutate in place. // to replace Dest/Source, rather than mutate in place.
auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); bool MayNeedOffsetRewrite = false;
if (!MovInst) { if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
MayNeedOffsetRewrite =
!MovInstr->isMultiDest() && !MovInstr->isMultiSource();
}
if (!MayNeedOffsetRewrite) {
continue; continue;
} }
assert(Dest != nullptr);
Type DestTy = Dest->getType();
assert(DestTy != IceType_i64);
if (!Dest->hasReg()) { if (!Dest->hasReg()) {
int32_t Offset = Dest->getStackOffset(); int32_t Offset = Dest->getStackOffset();
Offset += StackAdjust; Offset += StackAdjust;
if (!isLegalVariableStackOffset(Offset)) { if (!isLegalVariableStackOffset(DestTy, Offset)) {
if (NewBaseReg) { if (NewBaseReg) {
int32_t OffsetDiff = Offset - NewBaseOffset; int32_t OffsetDiff = Offset - NewBaseOffset;
if (isLegalVariableStackOffset(OffsetDiff)) { if (isLegalVariableStackOffset(DestTy, OffsetDiff)) {
StackVariable *NewDest = StackVariable *NewDest =
Func->makeVariable<StackVariable>(stackSlotType()); Func->makeVariable<StackVariable>(stackSlotType());
NewDest->setMustNotHaveReg(); NewDest->setMustNotHaveReg();
NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum()); NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum());
NewDest->setStackOffset(OffsetDiff); NewDest->setStackOffset(OffsetDiff);
Variable *NewDestVar = NewDest; Variable *NewDestVar = NewDest;
_mov(NewDestVar, MovInst->getSrc(0)); _mov(NewDestVar, CurInstr->getSrc(0));
MovInst->setDeleted(); CurInstr->setDeleted();
continue; continue;
} }
} }
StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg); StackVariable *LegalDest =
legalizeVariableSlot(Dest, StackAdjust, OrigBaseReg);
assert(LegalDest != Dest); assert(LegalDest != Dest);
Variable *LegalDestVar = LegalDest; Variable *LegalDestVar = LegalDest;
_mov(LegalDestVar, MovInst->getSrc(0)); _mov(LegalDestVar, CurInstr->getSrc(0));
MovInst->setDeleted(); CurInstr->setDeleted();
NewBaseReg = LegalDest; NewBaseReg = LegalDest;
NewBaseOffset = Offset; NewBaseOffset = Offset;
continue; continue;
} }
} }
assert(MovInst->getSrcSize() == 1); assert(CurInstr->getSrcSize() == 1);
Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0)); Variable *Var = llvm::dyn_cast<Variable>(CurInstr->getSrc(0));
if (Var && !Var->hasReg()) { if (Var && !Var->hasReg()) {
Type VarTy = Var->getType();
int32_t Offset = Var->getStackOffset(); int32_t Offset = Var->getStackOffset();
Offset += StackAdjust; Offset += StackAdjust;
if (!isLegalVariableStackOffset(Offset)) { if (!isLegalVariableStackOffset(VarTy, Offset)) {
if (NewBaseReg) { if (NewBaseReg) {
int32_t OffsetDiff = Offset - NewBaseOffset; int32_t OffsetDiff = Offset - NewBaseOffset;
if (isLegalVariableStackOffset(OffsetDiff)) { if (isLegalVariableStackOffset(VarTy, OffsetDiff)) {
StackVariable *NewVar = StackVariable *NewVar =
Func->makeVariable<StackVariable>(stackSlotType()); Func->makeVariable<StackVariable>(stackSlotType());
NewVar->setMustNotHaveReg(); NewVar->setMustNotHaveReg();
NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum()); NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum());
NewVar->setStackOffset(OffsetDiff); NewVar->setStackOffset(OffsetDiff);
_mov(Dest, NewVar); _mov(Dest, NewVar);
MovInst->setDeleted(); CurInstr->setDeleted();
continue; continue;
} }
} }
StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg); StackVariable *LegalVar =
legalizeVariableSlot(Var, StackAdjust, OrigBaseReg);
assert(LegalVar != Var); assert(LegalVar != Var);
_mov(Dest, LegalVar); _mov(Dest, LegalVar);
MovInst->setDeleted(); CurInstr->setDeleted();
NewBaseReg = LegalVar; NewBaseReg = LegalVar;
NewBaseOffset = Offset; NewBaseOffset = Offset;
continue; continue;
...@@ -1427,6 +1459,20 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1427,6 +1459,20 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
} }
case InstArithmetic::Shl: { case InstArithmetic::Shl: {
// a=b<<c ==> // a=b<<c ==>
// pnacl-llc does:
// mov t_b.lo, b.lo
// mov t_b.hi, b.hi
// mov t_c.lo, c.lo
// rsb T0, t_c.lo, #32
// lsr T1, t_b.lo, T0
// orr t_a.hi, T1, t_b.hi, lsl t_c.lo
// sub T2, t_c.lo, #32
// cmp T2, #0
// lslge t_a.hi, t_b.lo, T2
// lsl t_a.lo, t_b.lo, t_c.lo
// mov a.lo, t_a.lo
// mov a.hi, t_a.hi
//
// GCC 4.8 does: // GCC 4.8 does:
// sub t_c1, c.lo, #32 // sub t_c1, c.lo, #32
// lsl t_hi, b.hi, c.lo // lsl t_hi, b.hi, c.lo
...@@ -1436,78 +1482,88 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1436,78 +1482,88 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// lsl t_lo, b.lo, c.lo // lsl t_lo, b.lo, c.lo
// a.lo = t_lo // a.lo = t_lo
// a.hi = t_hi // a.hi = t_hi
//
// These are incompatible, therefore we mimic pnacl-llc.
// Can be strength-reduced for constant-shifts, but we don't do that for // Can be strength-reduced for constant-shifts, but we don't do that for
// now. // now.
// Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
// ARM, shifts only take the lower 8 bits of the shift register, and // ARM, shifts only take the lower 8 bits of the shift register, and
// saturate to the range 0-32, so the negative value will saturate to 32. // saturate to the range 0-32, so the negative value will saturate to 32.
Variable *T_Hi = makeReg(IceType_i32); Constant *_32 = Ctx->getConstantInt32(32);
Constant *_0 = Ctx->getConstantZero(IceType_i32);
Variable *Src1RLo = legalizeToReg(Src1Lo); Variable *Src1RLo = legalizeToReg(Src1Lo);
Constant *ThirtyTwo = Ctx->getConstantInt32(32); Variable *T0 = makeReg(IceType_i32);
Variable *T_C1 = makeReg(IceType_i32); Variable *T1 = makeReg(IceType_i32);
Variable *T_C2 = makeReg(IceType_i32); Variable *T2 = makeReg(IceType_i32);
_sub(T_C1, Src1RLo, ThirtyTwo); Variable *TA_Hi = makeReg(IceType_i32);
_lsl(T_Hi, Src0RHi, Src1RLo); Variable *TA_Lo = makeReg(IceType_i32);
_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, _rsb(T0, Src1RLo, _32);
OperandARM32::LSL, T_C1)); _lsr(T1, Src0RLo, T0);
_rsb(T_C2, Src1RLo, ThirtyTwo); _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
OperandARM32::LSR, T_C2));
_mov(DestHi, T_Hi);
Variable *T_Lo = makeReg(IceType_i32);
// _mov seems to sometimes have better register preferencing than lsl.
// Otherwise mov w/ lsl shifted register is a pseudo-instruction that
// maps to lsl.
_mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
OperandARM32::LSL, Src1RLo)); OperandARM32::LSL, Src1RLo));
_mov(DestLo, T_Lo); _sub(T2, Src1RLo, _32);
_cmp(T2, _0);
_lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
_set_dest_nonkillable();
_lsl(TA_Lo, Src0RLo, Src1RLo);
_mov(DestLo, TA_Lo);
_mov(DestHi, TA_Hi);
return; return;
} }
case InstArithmetic::Lshr: case InstArithmetic::Lshr:
// a=b>>c (unsigned) ==> case InstArithmetic::Ashr: {
// GCC 4.8 does: // a=b>>c
// pnacl-llc does:
// mov t_b.lo, b.lo
// mov t_b.hi, b.hi
// mov t_c.lo, c.lo
// lsr T0, t_b.lo, t_c.lo
// rsb T1, t_c.lo, #32
// orr t_a.lo, T0, t_b.hi, lsl T1
// sub T2, t_c.lo, #32
// cmp T2, #0
// [al]srge t_a.lo, t_b.hi, T2
// [al]sr t_a.hi, t_b.hi, t_c.lo
// mov a.lo, t_a.lo
// mov a.hi, t_a.hi
//
// GCC 4.8 does (lsr):
// rsb t_c1, c.lo, #32 // rsb t_c1, c.lo, #32
// lsr t_lo, b.lo, c.lo // lsr t_lo, b.lo, c.lo
// orr t_lo, t_lo, b.hi, lsl t_c1 // orr t_lo, t_lo, b.hi, lsl t_c1
// sub t_c2, c.lo, #32 // sub t_c2, c.lo, #32
// orr t_lo, t_lo, b.hi, lsr t_c2 // orr t_lo, t_lo, b.hi, lsr t_c2
// lsr t_hi, b.hi, c.lo // lsr t_hi, b.hi, c.lo
// a.lo = t_lo // mov a.lo, t_lo
// a.hi = t_hi // mov a.hi, t_hi
case InstArithmetic::Ashr: { //
// a=b>>c (signed) ==> ... // These are incompatible, therefore we mimic pnacl-llc.
// Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
// next orr should be conditioned on PLUS. The last two right shifts Constant *_32 = Ctx->getConstantInt32(32);
// should also be arithmetic. Constant *_0 = Ctx->getConstantZero(IceType_i32);
bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
Variable *T_Lo = makeReg(IceType_i32);
Variable *Src1RLo = legalizeToReg(Src1Lo); Variable *Src1RLo = legalizeToReg(Src1Lo);
Constant *ThirtyTwo = Ctx->getConstantInt32(32); Variable *T0 = makeReg(IceType_i32);
Variable *T_C1 = makeReg(IceType_i32); Variable *T1 = makeReg(IceType_i32);
Variable *T_C2 = makeReg(IceType_i32); Variable *T2 = makeReg(IceType_i32);
_rsb(T_C1, Src1RLo, ThirtyTwo); Variable *TA_Lo = makeReg(IceType_i32);
_lsr(T_Lo, Src0RLo, Src1RLo); Variable *TA_Hi = makeReg(IceType_i32);
_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, _lsr(T0, Src0RLo, Src1RLo);
OperandARM32::LSL, T_C1)); _rsb(T1, Src1RLo, _32);
OperandARM32::ShiftKind RShiftKind; _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
CondARM32::Cond Pred; OperandARM32::LSL, T1));
_sub(T2, Src1RLo, _32);
_cmp(T2, _0);
if (IsAshr) { if (IsAshr) {
_subs(T_C2, Src1RLo, ThirtyTwo); _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
RShiftKind = OperandARM32::ASR; _set_dest_nonkillable();
Pred = CondARM32::PL; _asr(TA_Hi, Src0RHi, Src1RLo);
} else { } else {
_sub(T_C2, Src1RLo, ThirtyTwo); _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
RShiftKind = OperandARM32::LSR; _set_dest_nonkillable();
Pred = CondARM32::AL; _lsr(TA_Hi, Src0RHi, Src1RLo);
} }
_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, _mov(DestLo, TA_Lo);
RShiftKind, T_C2), _mov(DestHi, TA_Hi);
Pred);
_mov(DestLo, T_Lo);
Variable *T_Hi = makeReg(IceType_i32);
_mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
RShiftKind, Src1RLo));
_mov(DestHi, T_Hi);
return; return;
} }
case InstArithmetic::Fadd: case InstArithmetic::Fadd:
...@@ -1527,9 +1583,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1527,9 +1583,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
} }
return; return;
} else if (isVectorType(Dest->getType())) { } else if (isVectorType(Dest->getType())) {
UnimplementedError(Func->getContext()->getFlags());
// Add a fake def to keep liveness consistent in the meantime. // Add a fake def to keep liveness consistent in the meantime.
Context.insert(InstFakeDef::create(Func, Dest)); Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags());
return; return;
} }
// Dest->getType() is a non-i64 scalar. // Dest->getType() is a non-i64 scalar.
...@@ -1585,25 +1643,25 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1585,25 +1643,25 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
case InstArithmetic::Fadd: { case InstArithmetic::Fadd: {
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vadd(T, Src0R, Src1R); _vadd(T, Src0R, Src1R);
_vmov(Dest, T); _mov(Dest, T);
return; return;
} }
case InstArithmetic::Fsub: { case InstArithmetic::Fsub: {
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vsub(T, Src0R, Src1R); _vsub(T, Src0R, Src1R);
_vmov(Dest, T); _mov(Dest, T);
return; return;
} }
case InstArithmetic::Fmul: { case InstArithmetic::Fmul: {
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vmul(T, Src0R, Src1R); _vmul(T, Src0R, Src1R);
_vmov(Dest, T); _mov(Dest, T);
return; return;
} }
case InstArithmetic::Fdiv: { case InstArithmetic::Fdiv: {
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vdiv(T, Src0R, Src1R); _vdiv(T, Src0R, Src1R);
_vmov(Dest, T); _mov(Dest, T);
return; return;
} }
} }
...@@ -1677,7 +1735,8 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { ...@@ -1677,7 +1735,8 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Variable *T_Lo = nullptr, *T_Hi = nullptr; Variable *T_Lo = makeReg(IceType_i32);
Variable *T_Hi = makeReg(IceType_i32);
_mov(T_Lo, Src0Lo); _mov(T_Lo, Src0Lo);
_mov(DestLo, T_Lo); _mov(DestLo, T_Lo);
_mov(T_Hi, Src0Hi); _mov(T_Hi, Src0Hi);
...@@ -1696,10 +1755,11 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { ...@@ -1696,10 +1755,11 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
NewSrc = legalize(Src0, Legal_Reg); NewSrc = legalize(Src0, Legal_Reg);
} }
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
UnimplementedError(Func->getContext()->getFlags()); Variable *SrcR = legalizeToReg(NewSrc);
_mov(Dest, SrcR);
} else if (isFloatingType(Dest->getType())) { } else if (isFloatingType(Dest->getType())) {
Variable *SrcR = legalizeToReg(NewSrc); Variable *SrcR = legalizeToReg(NewSrc);
_vmov(Dest, SrcR); _mov(Dest, SrcR);
} else { } else {
_mov(Dest, NewSrc); _mov(Dest, NewSrc);
} }
...@@ -1769,7 +1829,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1769,7 +1829,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
ParameterAreaSizeBytes = ParameterAreaSizeBytes =
applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
} }
} }
...@@ -1809,19 +1869,6 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1809,19 +1869,6 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
lowerStore(InstStore::create(Func, StackArg.first, Addr)); lowerStore(InstStore::create(Func, StackArg.first, Addr));
} }
// Copy arguments to be passed in registers to the appropriate registers.
for (auto &GPRArg : GPRArgs) {
Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
// Generate a FakeUse of register arguments so that they do not get dead
// code eliminated as a result of the FakeKill of scratch registers after
// the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
for (auto &FPArg : FPArgs) {
Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
Context.insert(InstFakeUse::create(Func, Reg));
}
// Generate the call instruction. Assign its result to a temporary with high // Generate the call instruction. Assign its result to a temporary with high
// register allocation weight. // register allocation weight.
Variable *Dest = Instr->getDest(); Variable *Dest = Instr->getDest();
...@@ -1872,6 +1919,19 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1872,6 +1919,19 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
if (!llvm::isa<ConstantRelocatable>(CallTarget)) { if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
CallTarget = legalize(CallTarget, Legal_Reg); CallTarget = legalize(CallTarget, Legal_Reg);
} }
// Copy arguments to be passed in registers to the appropriate registers.
for (auto &FPArg : FPArgs) {
Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
Context.insert(InstFakeUse::create(Func, Reg));
}
for (auto &GPRArg : GPRArgs) {
Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
// Generate a FakeUse of register arguments so that they do not get dead
// code eliminated as a result of the FakeKill of scratch registers after
// the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall); Context.insert(NewCall);
if (ReturnRegHi) if (ReturnRegHi)
...@@ -1908,7 +1968,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1908,7 +1968,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
_mov(DestHi, ReturnRegHi); _mov(DestHi, ReturnRegHi);
} else { } else {
if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
_vmov(Dest, ReturnReg); _mov(Dest, ReturnReg);
} else { } else {
assert(isIntegerType(Dest->getType()) && assert(isIntegerType(Dest->getType()) &&
typeWidthInBytes(Dest->getType()) <= 4); typeWidthInBytes(Dest->getType()) <= 4);
...@@ -1918,6 +1978,13 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1918,6 +1978,13 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
} }
} }
namespace {
void forceHiLoInReg(Variable64On32 *Var) {
Var->getHi()->setMustHaveReg();
Var->getLo()->setMustHaveReg();
}
} // end of anonymous namespace
void TargetARM32::lowerCast(const InstCast *Inst) { void TargetARM32::lowerCast(const InstCast *Inst) {
InstCast::OpKind CastKind = Inst->getCastKind(); InstCast::OpKind CastKind = Inst->getCastKind();
Variable *Dest = Inst->getDest(); Variable *Dest = Inst->getDest();
...@@ -1928,6 +1995,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -1928,6 +1995,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
return; return;
case InstCast::Sext: { case InstCast::Sext: {
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
} else if (Dest->getType() == IceType_i64) { } else if (Dest->getType() == IceType_i64) {
// t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
...@@ -1978,6 +2048,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -1978,6 +2048,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
} }
case InstCast::Zext: { case InstCast::Zext: {
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
} else if (Dest->getType() == IceType_i64) { } else if (Dest->getType() == IceType_i64) {
// t1=uxtb src; dst.lo=t1; dst.hi=0 // t1=uxtb src; dst.lo=t1; dst.hi=0
...@@ -2024,6 +2097,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2024,6 +2097,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
} }
case InstCast::Trunc: { case InstCast::Trunc: {
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
} else { } else {
if (Src0->getType() == IceType_i64) if (Src0->getType() == IceType_i64)
...@@ -2044,6 +2120,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2044,6 +2120,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
// fpext: dest.f64 = fptrunc src0.fp32 // fpext: dest.f64 = fptrunc src0.fp32
const bool IsTrunc = CastKind == InstCast::Fptrunc; const bool IsTrunc = CastKind == InstCast::Fptrunc;
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
break; break;
} }
...@@ -2057,6 +2136,26 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2057,6 +2136,26 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
} }
case InstCast::Fptosi: case InstCast::Fptosi:
case InstCast::Fptoui: { case InstCast::Fptoui: {
if (isVectorType(Dest->getType())) {
Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags());
break;
}
const bool DestIsSigned = CastKind == InstCast::Fptosi;
const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
if (llvm::isa<Variable64On32>(Dest)) {
const char *HelperName =
Src0IsF32 ? (DestIsSigned ? H_fptosi_f32_i64 : H_fptoui_f32_i64)
: (DestIsSigned ? H_fptosi_f64_i64 : H_fptoui_f64_i64);
static constexpr SizeT MaxSrcs = 1;
InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
Call->addArg(Src0);
lowerCall(Call);
break;
}
// fptosi: // fptosi:
// t1.fp = vcvt src0.fp // t1.fp = vcvt src0.fp
// t2.i32 = vmov t1.fp // t2.i32 = vmov t1.fp
...@@ -2065,28 +2164,14 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2065,28 +2164,14 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
// t1.fp = vcvt src0.fp // t1.fp = vcvt src0.fp
// t2.u32 = vmov t1.fp // t2.u32 = vmov t1.fp
// dest.uint = conv t2.u32 @ Truncates the result if needed. // dest.uint = conv t2.u32 @ Truncates the result if needed.
if (isVectorType(Dest->getType())) {
UnimplementedError(Func->getContext()->getFlags());
break;
}
if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) {
Context.insert(InstFakeDef::create(Func, Dest64On32->getLo()));
Context.insert(InstFakeDef::create(Func, Dest64On32->getHi()));
UnimplementedError(Func->getContext()->getFlags());
break;
}
const bool DestIsSigned = CastKind == InstCast::Fptosi;
Variable *Src0R = legalizeToReg(Src0); Variable *Src0R = legalizeToReg(Src0);
Variable *T_fp = makeReg(IceType_f32); Variable *T_fp = makeReg(IceType_f32);
if (isFloat32Asserting32Or64(Src0->getType())) { const InstARM32Vcvt::VcvtVariant Conversion =
_vcvt(T_fp, Src0R, Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui); : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
} else { _vcvt(T_fp, Src0R, Conversion);
_vcvt(T_fp, Src0R,
DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
}
Variable *T = makeReg(IceType_i32); Variable *T = makeReg(IceType_i32);
_vmov(T, T_fp); _mov(T, T_fp);
if (Dest->getType() != IceType_i32) { if (Dest->getType() != IceType_i32) {
Variable *T_1 = makeReg(Dest->getType()); Variable *T_1 = makeReg(Dest->getType());
lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T)); lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
...@@ -2097,6 +2182,25 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2097,6 +2182,25 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
} }
case InstCast::Sitofp: case InstCast::Sitofp:
case InstCast::Uitofp: { case InstCast::Uitofp: {
if (isVectorType(Dest->getType())) {
Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags());
break;
}
const bool SourceIsSigned = CastKind == InstCast::Sitofp;
const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
if (Src0->getType() == IceType_i64) {
const char *HelperName =
DestIsF32 ? (SourceIsSigned ? H_sitofp_i64_f32 : H_uitofp_i64_f32)
: (SourceIsSigned ? H_sitofp_i64_f64 : H_uitofp_i64_f64);
static constexpr SizeT MaxSrcs = 1;
InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
Call->addArg(Src0);
lowerCall(Call);
break;
}
// sitofp: // sitofp:
// t1.i32 = sext src.int @ sign-extends src0 if needed. // t1.i32 = sext src.int @ sign-extends src0 if needed.
// t2.fp32 = vmov t1.i32 // t2.fp32 = vmov t1.i32
...@@ -2105,17 +2209,6 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2105,17 +2209,6 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
// t1.i32 = zext src.int @ zero-extends src0 if needed. // t1.i32 = zext src.int @ zero-extends src0 if needed.
// t2.fp32 = vmov t1.i32 // t2.fp32 = vmov t1.i32
// t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
if (isVectorType(Dest->getType())) {
UnimplementedError(Func->getContext()->getFlags());
break;
}
if (Src0->getType() == IceType_i64) {
// avoid cryptic liveness errors
Context.insert(InstFakeDef::create(Func, Dest));
UnimplementedError(Func->getContext()->getFlags());
break;
}
const bool SourceIsSigned = CastKind == InstCast::Sitofp;
if (Src0->getType() != IceType_i32) { if (Src0->getType() != IceType_i32) {
Variable *Src0R_32 = makeReg(IceType_i32); Variable *Src0R_32 = makeReg(IceType_i32);
lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext
...@@ -2125,16 +2218,14 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2125,16 +2218,14 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
} }
Variable *Src0R = legalizeToReg(Src0); Variable *Src0R = legalizeToReg(Src0);
Variable *Src0R_f32 = makeReg(IceType_f32); Variable *Src0R_f32 = makeReg(IceType_f32);
_vmov(Src0R_f32, Src0R); _mov(Src0R_f32, Src0R);
Src0R = Src0R_f32; Src0R = Src0R_f32;
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
if (isFloat32Asserting32Or64(Dest->getType())) { const InstARM32Vcvt::VcvtVariant Conversion =
_vcvt(T, Src0R, DestIsF32
SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s); ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
} else { : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
_vcvt(T, Src0R, _vcvt(T, Src0R, Conversion);
SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
}
_mov(Dest, T); _mov(Dest, T);
break; break;
} }
...@@ -2153,9 +2244,6 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2153,9 +2244,6 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
case IceType_i1: case IceType_i1:
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
break; break;
case IceType_v4i1:
UnimplementedError(Func->getContext()->getFlags());
break;
case IceType_i8: case IceType_i8:
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
break; break;
...@@ -2166,7 +2254,7 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2166,7 +2254,7 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
case IceType_f32: { case IceType_f32: {
Variable *Src0R = legalizeToReg(Src0); Variable *Src0R = legalizeToReg(Src0);
Variable *T = makeReg(DestType); Variable *T = makeReg(DestType);
_vmov(T, Src0R); _mov(T, Src0R);
lowerAssign(InstAssign::create(Func, Dest, T)); lowerAssign(InstAssign::create(Func, Dest, T));
break; break;
} }
...@@ -2175,13 +2263,17 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2175,13 +2263,17 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
// dest[31..0] = t0 // dest[31..0] = t0
// dest[63..32] = t1 // dest[63..32] = t1
assert(Src0->getType() == IceType_f64); assert(Src0->getType() == IceType_f64);
Variable *T0 = makeReg(IceType_i32); auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
Variable *T1 = makeReg(IceType_i32); T->initHiLo(Func);
forceHiLoInReg(T);
Variable *Src0R = legalizeToReg(Src0); Variable *Src0R = legalizeToReg(Src0);
_vmov(InstARM32Vmov::RegisterPair(T0, T1), Src0R); _mov(T, Src0R);
Context.insert(InstFakeDef::create(Func, T->getLo()));
Context.insert(InstFakeDef::create(Func, T->getHi()));
auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T0)); lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo()));
lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T1)); lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi()));
Context.insert(InstFakeUse::create(Func, T));
break; break;
} }
case IceType_f64: { case IceType_f64: {
...@@ -2190,41 +2282,47 @@ void TargetARM32::lowerCast(const InstCast *Inst) { ...@@ -2190,41 +2282,47 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
// vmov T2, T0, T1 // vmov T2, T0, T1
// Dest <- T2 // Dest <- T2
assert(Src0->getType() == IceType_i64); assert(Src0->getType() == IceType_i64);
Variable *SrcLo = legalizeToReg(loOperand(Src0)); auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
Variable *SrcHi = legalizeToReg(hiOperand(Src0)); Src64->initHiLo(Func);
Variable *T = makeReg(IceType_f64); forceHiLoInReg(Src64);
_vmov(T, InstARM32Vmov::RegisterPair(SrcLo, SrcHi)); Variable *T = Src64->getLo();
_mov(T, legalizeToReg(loOperand(Src0)));
T = Src64->getHi();
_mov(T, legalizeToReg(hiOperand(Src0)));
T = makeReg(IceType_f64);
Context.insert(InstFakeDef::create(Func, Src64));
_mov(T, Src64);
Context.insert(InstFakeUse::create(Func, Src64->getLo()));
Context.insert(InstFakeUse::create(Func, Src64->getHi()));
lowerAssign(InstAssign::create(Func, Dest, T)); lowerAssign(InstAssign::create(Func, Dest, T));
break; break;
} }
case IceType_v4i1:
case IceType_v8i1: case IceType_v8i1:
UnimplementedError(Func->getContext()->getFlags());
break;
case IceType_v16i1: case IceType_v16i1:
UnimplementedError(Func->getContext()->getFlags());
break;
case IceType_v8i16: case IceType_v8i16:
UnimplementedError(Func->getContext()->getFlags());
break;
case IceType_v16i8: case IceType_v16i8:
UnimplementedError(Func->getContext()->getFlags());
break;
case IceType_v4i32:
// avoid cryptic liveness errors
Context.insert(InstFakeDef::create(Func, Dest));
UnimplementedError(Func->getContext()->getFlags());
break;
case IceType_v4f32: case IceType_v4f32:
case IceType_v4i32: {
// avoid cryptic liveness errors
Variable *T = makeReg(DestType);
Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
break; break;
} }
}
break; break;
} }
} }
} }
void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) { void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
(void)Inst; Variable *Dest = Inst->getDest();
Type DestType = Dest->getType();
Variable *T = makeReg(DestType);
Context.insert(InstFakeDef::create(Func, T));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
} }
...@@ -2269,6 +2367,9 @@ struct { ...@@ -2269,6 +2367,9 @@ struct {
void TargetARM32::lowerFcmp(const InstFcmp *Inst) { void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
Variable *Dest = Inst->getDest(); Variable *Dest = Inst->getDest();
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
return; return;
} }
...@@ -2306,6 +2407,9 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) { ...@@ -2306,6 +2407,9 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Operand *Src1 = legalizeUndef(Inst->getSrc(1)); Operand *Src1 = legalizeUndef(Inst->getSrc(1));
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
return; return;
} }
...@@ -2514,7 +2618,7 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -2514,7 +2618,7 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
if (Val->getType() == IceType_i64) { if (Val->getType() == IceType_i64) {
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Constant *Zero = Ctx->getConstantZero(IceType_i32); Constant *Zero = Ctx->getConstantZero(IceType_i32);
Variable *T = nullptr; Variable *T = makeReg(Zero->getType());
_mov(T, Zero); _mov(T, Zero);
_mov(DestHi, T); _mov(DestHi, T);
} }
...@@ -2561,11 +2665,20 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -2561,11 +2665,20 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return; return;
} }
case Intrinsics::Fabs: { case Intrinsics::Fabs: {
Variable *Dest = Instr->getDest();
Type DestTy = Dest->getType();
Variable *T = makeReg(DestTy);
if (isVectorType(DestTy)) {
// Add a fake def to keep liveness consistent in the meantime. // Add a fake def to keep liveness consistent in the meantime.
Context.insert(InstFakeDef::create(Func, Instr->getDest())); Context.insert(InstFakeDef::create(Func, T));
_mov(Instr->getDest(), T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
return; return;
} }
_vabs(T, legalizeToReg(Instr->getArg(0)));
_mov(Dest, T);
return;
}
case Intrinsics::Longjmp: { case Intrinsics::Longjmp: {
InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
Call->addArg(Instr->getArg(0)); Call->addArg(Instr->getArg(0));
...@@ -2628,7 +2741,7 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -2628,7 +2741,7 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Variable *Dest = Instr->getDest(); Variable *Dest = Instr->getDest();
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
_vsqrt(T, Src); _vsqrt(T, Src);
_vmov(Dest, T); _mov(Dest, T);
return; return;
} }
case Intrinsics::Stacksave: { case Intrinsics::Stacksave: {
...@@ -2674,7 +2787,7 @@ void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) { ...@@ -2674,7 +2787,7 @@ void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
// of T2 as if it was used as a source. // of T2 as if it was used as a source.
_set_dest_nonkillable(); _set_dest_nonkillable();
_mov(DestLo, T2); _mov(DestLo, T2);
Variable *T3 = nullptr; Variable *T3 = makeReg(Zero->getType());
_mov(T3, Zero); _mov(T3, Zero);
_mov(DestHi, T3); _mov(DestHi, T3);
return; return;
...@@ -2734,7 +2847,8 @@ void TargetARM32::lowerRet(const InstRet *Inst) { ...@@ -2734,7 +2847,8 @@ void TargetARM32::lowerRet(const InstRet *Inst) {
Reg = Q0; Reg = Q0;
} else { } else {
Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
_mov(Reg, Src0F, CondARM32::AL);
} }
} }
// Add a ret instruction even if sandboxing is enabled, because addEpilog // Add a ret instruction even if sandboxing is enabled, because addEpilog
...@@ -2758,6 +2872,9 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) { ...@@ -2758,6 +2872,9 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) {
Operand *Condition = Inst->getCondition(); Operand *Condition = Inst->getCondition();
if (isVectorType(DestTy)) { if (isVectorType(DestTy)) {
Variable *T = makeReg(DestTy);
Context.insert(InstFakeDef::create(Func, T));
_mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
return; return;
} }
...@@ -2772,16 +2889,16 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) { ...@@ -2772,16 +2889,16 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) {
SrcF = legalizeUndef(SrcF); SrcF = legalizeUndef(SrcF);
// Set the low portion. // Set the low portion.
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *TLo = nullptr;
Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex); Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex);
Variable *TLo = makeReg(SrcFLo->getType());
_mov(TLo, SrcFLo); _mov(TLo, SrcFLo);
Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex);
_mov_nonkillable(TLo, SrcTLo, Cond); _mov_nonkillable(TLo, SrcTLo, Cond);
_mov(DestLo, TLo); _mov(DestLo, TLo);
// Set the high portion. // Set the high portion.
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Variable *THi = nullptr;
Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex); Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex);
Variable *THi = makeReg(SrcFHi->getType());
_mov(THi, SrcFHi); _mov(THi, SrcFHi);
Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex);
_mov_nonkillable(THi, SrcTHi, Cond); _mov_nonkillable(THi, SrcTHi, Cond);
...@@ -2793,17 +2910,17 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) { ...@@ -2793,17 +2910,17 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) {
Variable *T = makeReg(DestTy); Variable *T = makeReg(DestTy);
SrcF = legalizeToReg(SrcF); SrcF = legalizeToReg(SrcF);
assert(DestTy == SrcF->getType()); assert(DestTy == SrcF->getType());
_vmov(T, SrcF); _mov(T, SrcF);
SrcT = legalizeToReg(SrcT); SrcT = legalizeToReg(SrcT);
assert(DestTy == SrcT->getType()); assert(DestTy == SrcT->getType());
_vmov(T, SrcT, Cond); _mov(T, SrcT, Cond);
_set_dest_nonkillable(); _set_dest_nonkillable();
_vmov(Dest, T); _mov(Dest, T);
return; return;
} }
Variable *T = nullptr;
SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); SrcF = legalize(SrcF, Legal_Reg | Legal_Flex);
Variable *T = makeReg(SrcF->getType());
_mov(T, SrcF); _mov(T, SrcF);
SrcT = legalize(SrcT, Legal_Reg | Legal_Flex); SrcT = legalize(SrcT, Legal_Reg | Legal_Flex);
_mov_nonkillable(T, SrcT, Cond); _mov_nonkillable(T, SrcT, Cond);
...@@ -2823,9 +2940,6 @@ void TargetARM32::lowerStore(const InstStore *Inst) { ...@@ -2823,9 +2940,6 @@ void TargetARM32::lowerStore(const InstStore *Inst) {
_str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr))); _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
_str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr))); _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
} else { } else {
if (isVectorType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
}
Variable *ValueR = legalizeToReg(Value); Variable *ValueR = legalizeToReg(Value);
_str(ValueR, NewAddr); _str(ValueR, NewAddr);
} }
...@@ -2878,6 +2992,7 @@ void TargetARM32::prelowerPhis() { ...@@ -2878,6 +2992,7 @@ void TargetARM32::prelowerPhis() {
Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Variable *Reg = makeReg(Ty, RegNum); Variable *Reg = makeReg(Ty, RegNum);
Context.insert(InstFakeDef::create(Func, Reg));
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
return Reg; return Reg;
} }
...@@ -2887,16 +3002,7 @@ Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { ...@@ -2887,16 +3002,7 @@ Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
Type Ty = Src->getType(); Type Ty = Src->getType();
Variable *Reg = makeReg(Ty, RegNum); Variable *Reg = makeReg(Ty, RegNum);
if (isVectorType(Ty)) {
// TODO(jpp): Src must be a register, or an address with base register.
_vmov(Reg, Src);
} else if (isFloatingType(Ty)) {
_vmov(Reg, Src);
} else {
// Mov's Src operand can really only be the flexible second operand type or
// a register. Users should guarantee that.
_mov(Reg, Src); _mov(Reg, Src);
}
return Reg; return Reg;
} }
...@@ -2912,10 +3018,22 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, ...@@ -2912,10 +3018,22 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
// type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
// can always copy to a register. // can always copy to a register.
if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
static const struct {
bool CanHaveOffset;
bool CanHaveIndex;
} MemTraits[] = {
#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr) \
{ (ubits) > 0, rraddr } \
,
ICETYPEARM32_TABLE
#undef X
};
// Before doing anything with a Mem operand, we need to ensure that the // Before doing anything with a Mem operand, we need to ensure that the
// Base and Index components are in physical registers. // Base and Index components are in physical registers.
Variable *Base = Mem->getBase(); Variable *Base = Mem->getBase();
Variable *Index = Mem->getIndex(); Variable *Index = Mem->getIndex();
ConstantInteger32 *Offset = Mem->getOffset();
assert(Index == nullptr || Offset == nullptr);
Variable *RegBase = nullptr; Variable *RegBase = nullptr;
Variable *RegIndex = nullptr; Variable *RegIndex = nullptr;
if (Base) { if (Base) {
...@@ -2923,32 +3041,43 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, ...@@ -2923,32 +3041,43 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
} }
if (Index) { if (Index) {
RegIndex = legalizeToReg(Index); RegIndex = legalizeToReg(Index);
if (!MemTraits[Ty].CanHaveIndex) {
Variable *T = makeReg(IceType_i32, getReservedTmpReg());
_add(T, RegBase, RegIndex);
RegBase = T;
RegIndex = nullptr;
} }
}
if (Offset && Offset->getValue() != 0) {
static constexpr bool SignExt = false;
if (!MemTraits[Ty].CanHaveOffset ||
!OperandARM32Mem::canHoldOffset(Ty, SignExt, Offset->getValue())) {
Variable *T = legalizeToReg(Offset, getReservedTmpReg());
_add(T, T, RegBase);
RegBase = T;
Offset = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0));
}
}
// Create a new operand if there was a change. // Create a new operand if there was a change.
if (Base != RegBase || Index != RegIndex) { if (Base != RegBase || Index != RegIndex) {
// There is only a reg +/- reg or reg + imm form. // There is only a reg +/- reg or reg + imm form.
// Figure out which to re-create. // Figure out which to re-create.
if (Mem->isRegReg()) { if (RegBase && RegIndex) {
Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex, Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
Mem->getShiftOp(), Mem->getShiftAmt(), Mem->getShiftOp(), Mem->getShiftAmt(),
Mem->getAddrMode()); Mem->getAddrMode());
} else { } else {
Mem = OperandARM32Mem::create(Func, Ty, RegBase, Mem->getOffset(), Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
Mem->getAddrMode()); Mem->getAddrMode());
} }
} }
if (!(Allowed & Legal_Mem)) { if (Allowed & Legal_Mem) {
Variable *Reg = makeReg(Ty, RegNum); From = Mem;
if (isVectorType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
} else if (isFloatingType(Ty)) {
_vldr(Reg, Mem);
} else { } else {
Variable *Reg = makeReg(Ty, RegNum);
_ldr(Reg, Mem); _ldr(Reg, Mem);
}
From = Reg; From = Reg;
} else {
From = Mem;
} }
return From; return From;
} }
......
...@@ -189,7 +189,6 @@ protected: ...@@ -189,7 +189,6 @@ protected:
// The following are helpers that insert lowered ARM32 instructions with // The following are helpers that insert lowered ARM32 instructions with
// minimal syntactic overhead, so that the lowering code can look as close to // minimal syntactic overhead, so that the lowering code can look as close to
// assembly as practical. // assembly as practical.
void _add(Variable *Dest, Variable *Src0, Operand *Src1, void _add(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Add::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Add::create(Func, Dest, Src0, Src1, Pred));
...@@ -246,6 +245,10 @@ protected: ...@@ -246,6 +245,10 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Eor::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Eor::create(Func, Dest, Src0, Src1, Pred));
} }
/// _ldr, for all your memory to Variable data moves. It handles all types
/// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
/// type (e.g., no immediates for vector loads, and no index registers for fp
/// loads.)
void _ldr(Variable *Dest, OperandARM32Mem *Addr, void _ldr(Variable *Dest, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Ldr::create(Func, Dest, Addr, Pred)); Context.insert(InstARM32Ldr::create(Func, Dest, Addr, Pred));
...@@ -266,14 +269,17 @@ protected: ...@@ -266,14 +269,17 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred)); Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
} }
/// If Dest=nullptr is passed in, then a new variable is created, marked as /// _mov, for all your Variable to Variable data movement needs. It handles
/// infinite register allocation weight, and returned through the in/out Dest /// all types (integer, floating point, and vectors), as well as moves between
/// argument. /// Core and VFP registers. This is not a panacea: you must obey the (weird,
void _mov(Variable *&Dest, Operand *Src0, /// confusing, non-uniform) rules for data moves in ARM.
CondARM32::Cond Pred = CondARM32::AL, void _mov(Variable *Dest, Operand *Src0,
int32_t RegNum = Variable::NoRegister) { CondARM32::Cond Pred = CondARM32::AL) {
if (Dest == nullptr) // _mov used to be unique in the sense that it would create a temporary
Dest = makeReg(Src0->getType(), RegNum); // automagically if Dest was nullptr. It won't do that anymore, so we keep
// an assert around just in case there is some untested code path where Dest
// is nullptr.
assert(Dest != nullptr);
Context.insert(InstARM32Mov::create(Func, Dest, Src0, Pred)); Context.insert(InstARM32Mov::create(Func, Dest, Src0, Pred));
} }
void _mov_nonkillable(Variable *Dest, Operand *Src0, void _mov_nonkillable(Variable *Dest, Operand *Src0,
...@@ -348,6 +354,8 @@ protected: ...@@ -348,6 +354,8 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sdiv::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Sdiv::create(Func, Dest, Src0, Src1, Pred));
} }
/// _str, for all your Variable to memory transfers. Addr has the same
/// restrictions that it does in _ldr.
void _str(Variable *Value, OperandARM32Mem *Addr, void _str(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Str::create(Func, Value, Addr, Pred)); Context.insert(InstARM32Str::create(Func, Value, Addr, Pred));
...@@ -387,6 +395,10 @@ protected: ...@@ -387,6 +395,10 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred)); Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
} }
void _vabs(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vabs::create(Func, Dest, Src, Pred));
}
void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) { void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vadd::create(Func, Dest, Src0, Src1)); Context.insert(InstARM32Vadd::create(Func, Dest, Src0, Src1));
} }
...@@ -397,10 +409,6 @@ protected: ...@@ -397,10 +409,6 @@ protected:
void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) { void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vdiv::create(Func, Dest, Src0, Src1)); Context.insert(InstARM32Vdiv::create(Func, Dest, Src0, Src1));
} }
void _vldr(Variable *Dest, OperandARM32Mem *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vldr::create(Func, Dest, Src, Pred));
}
void _vcmp(Variable *Src0, Variable *Src1, void _vcmp(Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vcmp::create(Func, Src0, Src1, Pred)); Context.insert(InstARM32Vcmp::create(Func, Src0, Src1, Pred));
...@@ -408,33 +416,6 @@ protected: ...@@ -408,33 +416,6 @@ protected:
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmrs::create(Func, Pred)); Context.insert(InstARM32Vmrs::create(Func, Pred));
} }
// There are a whole bunch of vmov variants, to transfer within S/D/Q
// registers, between core integer registers and S/D, and from small
// immediates into S/D. For integer -> S/D/Q there is a variant which takes
// two integer register to fill a D, or to fill two consecutive S registers.
// Vmov can also be used to insert-element. E.g.,
// "vmov.8 d0[1], r0"
// but insert-element is a "two-address" operation where only part of the
// register is modified. This cannot model that.
//
// This represents the simple single source, single dest variants only.
void _vmov(Variable *Dest, Operand *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmov::create(Func, Dest, Src0, Pred));
}
// This represents the single source, multi dest variant.
void _vmov(InstARM32Vmov::RegisterPair Dests, Variable *Src0) {
constexpr CondARM32::Cond Pred = CondARM32::AL;
Context.insert(InstARM32Vmov::create(Func, Dests, Src0, Pred));
// The Vmov instruction created above does not define Dests._1. Therefore
// we add a Dest._1 = FakeDef pseudo instruction.
Context.insert(InstFakeDef::create(Func, Dests._1));
}
// This represents the multi source, single dest variant.
void _vmov(Variable *Dest, InstARM32Vmov::RegisterPair Srcs) {
constexpr CondARM32::Cond Pred = CondARM32::AL;
Context.insert(InstARM32Vmov::create(Func, Dest, Srcs, Pred));
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1)); Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
} }
...@@ -451,10 +432,11 @@ protected: ...@@ -451,10 +432,11 @@ protected:
/// offset, such that the addressing mode offset bits are now legal. /// offset, such that the addressing mode offset bits are now legal.
void legalizeStackSlots(); void legalizeStackSlots();
/// Returns true if the given Offset can be represented in a stack ldr/str. /// Returns true if the given Offset can be represented in a stack ldr/str.
bool isLegalVariableStackOffset(int32_t Offset) const; bool isLegalVariableStackOffset(Type Ty, int32_t Offset) const;
/// Assuming Var needs its offset legalized, define a new base register /// Assuming Var needs its offset legalized, define a new base register
/// centered on the given Var's offset and use it. /// centered on the given Var's offset plus StackAdjust, and use it.
StackVariable *legalizeVariableSlot(Variable *Var, Variable *OrigBaseReg); StackVariable *legalizeVariableSlot(Variable *Var, int32_t StackAdjust,
Variable *OrigBaseReg);
TargetARM32Features CPUFeatures; TargetARM32Features CPUFeatures;
bool UsesFramePointer = false; bool UsesFramePointer = false;
......
...@@ -91,13 +91,11 @@ entry: ...@@ -91,13 +91,11 @@ entry:
; ARM32-LABEL: pass64BitArg ; ARM32-LABEL: pass64BitArg
; ARM32: sub sp, {{.*}} #16 ; ARM32: sub sp, {{.*}} #16
; ARM32: str {{.*}}, [sp, #4]
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: movw r2, #123 ; ARM32: movw r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16 ; ARM32: add sp, {{.*}} #16
; ARM32: sub sp, {{.*}} #16 ; ARM32: sub sp, {{.*}} #16
; ARM32: str {{.*}}, [sp, #4]
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
...@@ -105,7 +103,6 @@ entry: ...@@ -105,7 +103,6 @@ entry:
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16 ; ARM32: add sp, {{.*}} #16
; ARM32: sub sp, {{.*}} #16 ; ARM32: sub sp, {{.*}} #16
; ARM32: str {{.*}}, [sp, #4]
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
...@@ -147,9 +144,9 @@ entry: ...@@ -147,9 +144,9 @@ entry:
; ARM32-LABEL: pass64BitConstArg ; ARM32-LABEL: pass64BitConstArg
; ARM32: sub sp, {{.*}} #16 ; ARM32: sub sp, {{.*}} #16
; ARM32: movw [[REG1:r.*]], {{.*}} ; 0xbeef ; ARM32: movw [[REG1:r.*]], {{.*}} ; 0xbeef
; ARM32: movt [[REG1:r.*]], {{.*}} ; 0xdead ; ARM32: movt [[REG1]], {{.*}} ; 0xdead
; ARM32: movw [[REG2:r.*]], {{.*}} ; 0x5678 ; ARM32: movw [[REG2:r.*]], {{.*}} ; 0x5678
; ARM32: movt [[REG2:r.*]], {{.*}} ; 0x1234 ; ARM32: movt [[REG2]], {{.*}} ; 0x1234
; ARM32: str [[REG1]], [sp, #4] ; ARM32: str [[REG1]], [sp, #4]
; ARM32: str [[REG2]], [sp] ; ARM32: str [[REG2]], [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
...@@ -438,12 +435,13 @@ entry: ...@@ -438,12 +435,13 @@ entry:
; OPTM1: je ; OPTM1: je
; ARM32-LABEL: shl64BitSigned ; ARM32-LABEL: shl64BitSigned
; ARM32: sub [[REG3:r.*]], [[REG2:r.*]], #32 ; ARM32: rsb [[T0:r[0-9]+]], r2, #32
; ARM32: lsl [[REG1:r.*]], {{r.*}}, [[REG2]] ; ARM32: lsr [[T1:r[0-9]+]], r0, [[T0]]
; ARM32: orr [[REG1]], [[REG1]], [[REG0:r.*]], lsl [[REG3]] ; ARM32: orr [[T2:r[0-9]+]], [[T1]], r1, lsl r2
; ARM32: rsb [[REG4:r.*]], [[REG2]], #32 ; ARM32: sub [[T3:r[0-9]+]], r2, #32
; ARM32: orr [[REG1]], [[REG1]], [[REG0]], lsr [[REG4]] ; ARM32: cmp [[T3]], #0
; ARM32: lsl {{.*}}, [[REG0]], [[REG2]] ; ARM32: lslge [[T2]], r0, [[T3]]
; ARM32: lsl r{{[0-9]+}}, r0, r2
define internal i32 @shl64BitSignedTrunc(i64 %a, i64 %b) { define internal i32 @shl64BitSignedTrunc(i64 %a, i64 %b) {
entry: entry:
...@@ -484,11 +482,12 @@ entry: ...@@ -484,11 +482,12 @@ entry:
; OPTM1: je ; OPTM1: je
; ARM32-LABEL: shl64BitUnsigned ; ARM32-LABEL: shl64BitUnsigned
; ARM32: sub
; ARM32: lsl
; ARM32: orr
; ARM32: rsb ; ARM32: rsb
; ARM32: lsr
; ARM32: orr ; ARM32: orr
; ARM32: sub
; ARM32: cmp
; ARM32: lslge
; ARM32: lsl ; ARM32: lsl
define internal i64 @shr64BitSigned(i64 %a, i64 %b) { define internal i64 @shr64BitSigned(i64 %a, i64 %b) {
...@@ -511,12 +510,13 @@ entry: ...@@ -511,12 +510,13 @@ entry:
; OPTM1: sar {{.*}},0x1f ; OPTM1: sar {{.*}},0x1f
; ARM32-LABEL: shr64BitSigned ; ARM32-LABEL: shr64BitSigned
; ARM32: rsb ; ARM32: lsr [[T0:r[0-9]+]], r0, r2
; ARM32: lsr ; ARM32: rsb [[T1:r[0-9]+]], r2, #32
; ARM32: orr ; ARM32: orr r0, [[T0]], r1, lsl [[T1]]
; ARM32: subs ; ARM32: sub [[T2:r[0-9]+]], r2, #32
; ARM32: orrpl ; ARM32: cmp [[T2]], #0
; ARM32: asr ; ARM32: asrge r0, r1, [[T2]]
; ARM32: asr r{{[0-9]+}}, r1, r2
define internal i32 @shr64BitSignedTrunc(i64 %a, i64 %b) { define internal i32 @shr64BitSignedTrunc(i64 %a, i64 %b) {
entry: entry:
...@@ -538,11 +538,12 @@ entry: ...@@ -538,11 +538,12 @@ entry:
; OPTM1: sar {{.*}},0x1f ; OPTM1: sar {{.*}},0x1f
; ARM32-LABEL: shr64BitSignedTrunc ; ARM32-LABEL: shr64BitSignedTrunc
; ARM32: rsb
; ARM32: lsr ; ARM32: lsr
; ARM32: rsb
; ARM32: orr ; ARM32: orr
; ARM32: subs ; ARM32: sub
; ARM32: orrpl ; ARM32: cmp
; ARM32: asrge
define internal i64 @shr64BitUnsigned(i64 %a, i64 %b) { define internal i64 @shr64BitUnsigned(i64 %a, i64 %b) {
entry: entry:
...@@ -562,11 +563,12 @@ entry: ...@@ -562,11 +563,12 @@ entry:
; OPTM1: je ; OPTM1: je
; ARM32-LABEL: shr64BitUnsigned ; ARM32-LABEL: shr64BitUnsigned
; ARM32: rsb
; ARM32: lsr ; ARM32: lsr
; ARM32: rsb
; ARM32: orr ; ARM32: orr
; ARM32: sub ; ARM32: sub
; ARM32: orr ; ARM32: cmp
; ARM32: lsrge
; ARM32: lsr ; ARM32: lsr
define internal i32 @shr64BitUnsignedTrunc(i64 %a, i64 %b) { define internal i32 @shr64BitUnsignedTrunc(i64 %a, i64 %b) {
...@@ -588,11 +590,12 @@ entry: ...@@ -588,11 +590,12 @@ entry:
; OPTM1: je ; OPTM1: je
; ARM32-LABEL: shr64BitUnsignedTrunc ; ARM32-LABEL: shr64BitUnsignedTrunc
; ARM32: rsb
; ARM32: lsr ; ARM32: lsr
; ARM32: rsb
; ARM32: orr ; ARM32: orr
; ARM32: sub ; ARM32: sub
; ARM32: orr ; ARM32: cmp
; ARM32: lsrge
define internal i64 @and64BitSigned(i64 %a, i64 %b) { define internal i64 @and64BitSigned(i64 %a, i64 %b) {
entry: entry:
......
...@@ -54,7 +54,7 @@ entry: ...@@ -54,7 +54,7 @@ entry:
; ARM32-LABEL: cast_d2ll_const ; ARM32-LABEL: cast_d2ll_const
; ARM32-DAG: movw [[ADDR:r[0-9]+]], #:lower16:.L$ ; ARM32-DAG: movw [[ADDR:r[0-9]+]], #:lower16:.L$
; ARM32-DAG: movt [[ADDR]], #:upper16:.L$ ; ARM32-DAG: movt [[ADDR]], #:upper16:.L$
; ARM32-DAG: vldr [[DREG:d[0-9]+]], {{\[}}[[ADDR]], #0{{\]}} ; ARM32-DAG: vldr [[DREG:d[0-9]+]], {{\[}}[[ADDR]]{{\]}}
; ARM32: vmov r{{[0-9]+}}, r{{[0-9]+}}, [[DREG]] ; ARM32: vmov r{{[0-9]+}}, r{{[0-9]+}}, [[DREG]]
define internal double @cast_ll2d(i64 %ll) { define internal double @cast_ll2d(i64 %ll) {
......
...@@ -99,7 +99,7 @@ entry: ...@@ -99,7 +99,7 @@ entry:
; ARM32-LABEL: doubleToSigned32Const ; ARM32-LABEL: doubleToSigned32Const
; ARM32-DAG: movw [[ADDR:r[0-9]+]], #:lower16:.L$ ; ARM32-DAG: movw [[ADDR:r[0-9]+]], #:lower16:.L$
; ARM32-DAG: movt [[ADDR]], #:upper16:.L$ ; ARM32-DAG: movt [[ADDR]], #:upper16:.L$
; ARM32-DAG: vldr [[DREG:d[0-9]+]], {{\[}}[[ADDR]], #0{{\]}} ; ARM32-DAG: vldr [[DREG:d[0-9]+]], {{\[}}[[ADDR]]{{\]}}
; ARM32-DAG: vcvt.s32.f64 [[REG:s[0-9]+]], [[DREG]] ; ARM32-DAG: vcvt.s32.f64 [[REG:s[0-9]+]], [[DREG]]
; ARM32-DAF: vmov {{r[0-9]+}}, [[REG]] ; ARM32-DAF: vmov {{r[0-9]+}}, [[REG]]
......
...@@ -183,8 +183,8 @@ entry: ...@@ -183,8 +183,8 @@ entry:
; CHECK-NEXT: mov {{.*}} [esp+0x14] ; CHECK-NEXT: mov {{.*}} [esp+0x14]
; CHECK: ret ; CHECK: ret
; ARM32-LABEL: test_returning64_even_arg2 ; ARM32-LABEL: test_returning64_even_arg2
; ARM32-NEXT: ldr r0, [sp] ; ARM32-DAG: ldr r0, [sp]
; ARM32-NEXT: ldr r1, [sp, #4] ; ARM32-DAG: ldr r1, [sp, #4]
; ARM32-NEXT: bx lr ; ARM32-NEXT: bx lr
define i64 @test_returning64_even_arg2b(i64 %arg0, i32 %arg1, i32 %arg1b, i64 %arg2) { define i64 @test_returning64_even_arg2b(i64 %arg0, i32 %arg1, i32 %arg1b, i64 %arg2) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment