Subzero. ARM32. Strength reduce multiplications.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1469113003 .

Subzero. ARM32. Strength reduce multiplications.
98cc08ca · John Porto · 614140e2 · 98cc08ca · 98cc08ca · 98cc08ca
Commit 98cc08ca authored Nov 24, 2015 by John Porto
7 changed files
--- a/crosstest/test_arith.cpp
+++ b/crosstest/test_arith.cpp
@@ -49,3 +49,37 @@ SINTOP_TABLE
  v4f32 test##inst(v4f32 a, v4f32 b) { return func(a op b); }
 FPOP_TABLE
 #undef X
+#define X(mult_by)                                                             \
+  bool testMultiplyBy##mult_by(bool a, bool /*unused*/) {                      \
+    return a * (mult_by);                                                      \
+  }                                                                            \
+  bool testMultiplyByNeg##mult_by(bool a, bool /*unused*/) {                   \
+    return a * (-(mult_by));                                                   \
+  }                                                                            \
+  uint8_t testMultiplyBy##mult_by(uint8_t a, uint8_t /*unused*/) {             \
+    return a * (mult_by);                                                      \
+  }                                                                            \
+  uint8_t testMultiplyByNeg##mult_by(uint8_t a, uint8_t /*unused*/) {          \
+    return a * (-(mult_by));                                                   \
+  }                                                                            \
+  uint16_t testMultiplyBy##mult_by(uint16_t a, uint16_t /*unused*/) {          \
+    return a * (mult_by);                                                      \
+  }                                                                            \
+  uint16_t testMultiplyByNeg##mult_by(uint16_t a, uint16_t /*unused*/) {       \
+    return a * (-(mult_by));                                                   \
+  }                                                                            \
+  uint32_t testMultiplyBy##mult_by(uint32_t a, uint32_t /*unused*/) {          \
+    return a * (mult_by);                                                      \
+  }                                                                            \
+  uint32_t testMultiplyByNeg##mult_by(uint32_t a, uint32_t /*unused*/) {       \
+    return a * (-(mult_by));                                                   \
+  }                                                                            \
+  uint64_t testMultiplyBy##mult_by(uint64_t a, uint64_t /*unused*/) {          \
+    return a * (mult_by);                                                      \
+  }                                                                            \
+  uint64_t testMultiplyByNeg##mult_by(uint64_t a, uint64_t /*unused*/) {       \
+    return a * (-(mult_by));                                                   \
+  }
+MULIMM_TABLE
+#undef X
--- a/crosstest/test_arith.def
+++ b/crosstest/test_arith.def
@@ -17,35 +17,35 @@
 #define XSTR(s) STR(s)
 #define STR(s) #s
-#define UINTOP_TABLE                 \
+#define UINTOP_TABLE                                                           \
-  /* inst, operator, div, shift */   \
+  /* inst, operator, div, shift */                                             \
-  X(Add,   +,        0,   0)         \
+  X(Add,   +,        0,   0)                                                   \
-  X(Sub,   -,        0,   0)         \
+  X(Sub,   -,        0,   0)                                                   \
-  X(Mul,   *,        0,   0)         \
+  X(Mul,   *,        0,   0)                                                   \
-  X(Udiv,  /,        1,   0)         \
+  X(Udiv,  /,        1,   0)                                                   \
-  X(Urem,  %,        1,   0)         \
+  X(Urem,  %,        1,   0)                                                   \
-  X(Shl,   <<,       0,   1)         \
+  X(Shl,   <<,       0,   1)                                                   \
-  X(Lshr,  >>,       0,   1)         \
+  X(Lshr,  >>,       0,   1)                                                   \
-  X(And,   &,        0,   0)         \
+  X(And,   &,        0,   0)                                                   \
-  X(Or,    |,        0,   0)         \
+  X(Or,    |,        0,   0)                                                   \
-  X(Xor,   ^,        0,   0)         \
+  X(Xor,   ^,        0,   0)                                                   \
 //#define X(inst, op, isdiv, isshift)
-#define SINTOP_TABLE                 \
+#define SINTOP_TABLE                                                           \
-  /* inst, operator, div, shift */   \
+  /* inst, operator, div, shift */                                             \
-  X(Sdiv,  /,        1,   0)         \
+  X(Sdiv,  /,        1,   0)                                                   \
-  X(Srem,  %,        1,   0)         \
+  X(Srem,  %,        1,   0)                                                   \
-  X(Ashr,  >>,       0,   1)         \
+  X(Ashr,  >>,       0,   1)                                                   \
 //#define X(inst, op, isdiv, isshift)
 #define COMMA ,
-#define FPOP_TABLE           \
+#define FPOP_TABLE                                                             \
-  /* inst, infix_op, func */ \
+  /* inst, infix_op, func */                                                   \
-  X(Fadd,  +,              ) \
+  X(Fadd,  +,              )                                                   \
-  X(Fsub,  -,              ) \
+  X(Fsub,  -,              )                                                   \
-  X(Fmul,  *,              ) \
+  X(Fmul,  *,              )                                                   \
-  X(Fdiv,  /,              ) \
+  X(Fdiv,  /,              )                                                   \
-  X(Frem,  COMMA,    myFrem) \
+  X(Frem,  COMMA,    myFrem)                                                   \
 //#define X(inst, op, func)
 // Note: The above definition of COMMA, plus the "func" argument to
@@ -55,30 +55,51 @@
 // instruction and "(a + b)" for the Fadd instruction.  The two
 // versions of myFrem() are defined in a separate bitcode file.
-#define INT_VALUE_ARRAY                           \
+#define INT_VALUE_ARRAY                                                        \
-{ 0x0,        0x1,        0x7ffffffe, 0x7fffffff, \
+{ 0x0,        0x1,        0x7ffffffe, 0x7fffffff,                              \
-  0x80000000, 0x80000001, 0xfffffffe, 0xffffffff, \
+  0x80000000, 0x80000001, 0xfffffffe, 0xffffffff,                              \
-  0x1e, 0x1f, 0x20, 0x21, 0x3e, 0x3f, 0x40, 0x41, \
+  0x1e, 0x1f, 0x20, 0x21, 0x3e, 0x3f, 0x40, 0x41,                              \
-  0x7e,       0x7f,       0x80,       0x81,       \
+  0x7e,       0x7f,       0x80,       0x81,                                    \
-  0xfe,       0xff,       0x100,      0x101,      \
+  0xfe,       0xff,       0x100,      0x101,                                   \
-  0x7ffe,     0x7fff,     0x8000,     0x8001,     \
+  0x7ffe,     0x7fff,     0x8000,     0x8001,                                  \
  0xfffe,     0xffff,     0x10000,    0x10001 }
-#define FP_VALUE_ARRAY(NegInf, PosInf, NegNan, NaN)                 \
+#define FP_VALUE_ARRAY(NegInf, PosInf, NegNan, NaN)                            \
-{ 0,                    1,                    1.4,                  \
+{ 0,                    1,                    1.4,                             \
-  1.5,                  1.6,                  -1.4,                 \
+  1.5,                  1.6,                  -1.4,                            \
-  -1.5,                 -1.6,                 0x7e,                 \
+  -1.5,                 -1.6,                 0x7e,                            \
-  0x7f,                 0x80,                 0x81,                 \
+  0x7f,                 0x80,                 0x81,                            \
-  0xfe,                 0xff,                 0x7ffe,               \
+  0xfe,                 0xff,                 0x7ffe,                          \
-  0x7fff,               0x8000,               0x8001,               \
+  0x7fff,               0x8000,               0x8001,                          \
-  0xfffe,               0xffff,               0x7ffffffe,           \
+  0xfffe,               0xffff,               0x7ffffffe,                      \
-  0x7fffffff,           0x80000000,           0x80000001,           \
+  0x7fffffff,           0x80000000,           0x80000001,                      \
-  0xfffffffe,           0xffffffff,           0x100000000ll,        \
+  0xfffffffe,           0xffffffff,           0x100000000ll,                   \
-  0x100000001ll,        0x7ffffffffffffffell, 0x7fffffffffffffffll, \
+  0x100000001ll,        0x7ffffffffffffffell, 0x7fffffffffffffffll,            \
-  0x8000000000000000ll, 0x8000000000000001ll, 0xfffffffffffffffell, \
+  0x8000000000000000ll, 0x8000000000000001ll, 0xfffffffffffffffell,            \
-  0xffffffffffffffffll, NegInf,               PosInf,               \
+  0xffffffffffffffffll, NegInf,               PosInf,                          \
-  Nan,                  NegNan,               -0.0,                 \
+  Nan,                  NegNan,               -0.0,                            \
-  10.0,                 FLT_MIN,              FLT_MAX,              \
+  10.0,                 FLT_MIN,              FLT_MAX,                         \
  DBL_MIN,              DBL_MAX }
+#define MULIMM_TABLE                                                           \
+   /* mult_by */                                                               \
+  X(         0)                                                                \
+  X(         1)                                                                \
+  X(         2)                                                                \
+  X(         3)                                                                \
+  X(         4)                                                                \
+  X(         5)                                                                \
+  X(         7)                                                                \
+  X(         8)                                                                \
+  X(         9)                                                                \
+  X(        10)                                                                \
+  X(        25)                                                                \
+  X(       100)                                                                \
+  X(       232)                                                                \
+  X(0x00FFF001)                                                                \
+  X(0x01000000)                                                                \
+  X(0x7FFFF07F)                                                                \
+  X(0x80000000)                                                                \
+//#define X(mult_by)
 #endif // TEST_ARITH_DEF
--- a/crosstest/test_arith.h
+++ b/crosstest/test_arith.h
@@ -60,3 +60,17 @@ double mySqrt(double a);
 float myFabs(float a);
 double myFabs(double a);
 v4f32 myFabs(v4f32 a);
+#define X(mult_by)                                                             \
+  bool testMultiplyBy##mult_by(bool a, bool);                                  \
+  bool testMultiplyByNeg##mult_by(bool a, bool);                               \
+  uint8_t testMultiplyBy##mult_by(uint8_t a, uint8_t);                         \
+  uint8_t testMultiplyByNeg##mult_by(uint8_t a, uint8_t);                      \
+  uint16_t testMultiplyBy##mult_by(uint16_t a, uint16_t);                      \
+  uint16_t testMultiplyByNeg##mult_by(uint16_t a, uint16_t);                   \
+  uint32_t testMultiplyBy##mult_by(uint32_t a, uint32_t);                      \
+  uint32_t testMultiplyByNeg##mult_by(uint32_t a, uint32_t);                   \
+  uint64_t testMultiplyBy##mult_by(uint64_t a, uint64_t);                      \
+  uint64_t testMultiplyByNeg##mult_by(uint64_t a, uint64_t);
+MULIMM_TABLE
+#undef X
--- a/crosstest/test_arith_main.cpp
+++ b/crosstest/test_arith_main.cpp
@@ -73,7 +73,15 @@ void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
  ,
          SINTOP_TABLE
 #undef X
-  };
+#define X(mult_by)                                                             \
+  {                                                                            \
+    "Mult-By-" STR(mult_by), testMultiplyBy##mult_by,                          \
+        Subzero_::testMultiplyBy##mult_by, NULL, NULL, false                   \
+  }                                                                            \
+  , {"Mult-By-Neg-" STR(mult_by), testMultiplyByNeg##mult_by,                  \
+     Subzero_::testMultiplyByNeg##mult_by, NULL, NULL, false},
+              MULIMM_TABLE};
+#undef X
  const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
  if (sizeof(TypeUnsigned) <= sizeof(uint32_t)) {

--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -977,15 +977,16 @@ private:
  // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
  // without specifying a physical register. This is needed for creating unbound
  // temporaries during Ice -> ARM lowering, but before register allocation.
-  // This a safe-guard that, during the legalization post-passes no unbound
+  // This a safe-guard that no unbound temporaries are created during the
-  // temporaries are created.
+  // legalization post-passes.
  bool AllowTemporaryWithNoReg = true;
  // ForbidTemporaryWithoutReg is a RAII class that manages
  // AllowTemporaryWithNoReg.
  class ForbidTemporaryWithoutReg {
    ForbidTemporaryWithoutReg() = delete;
-    ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg&) = delete;
+    ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
-    ForbidTemporaryWithoutReg &operator=(const ForbidTemporaryWithoutReg&) = delete;
+    ForbidTemporaryWithoutReg &
+    operator=(const ForbidTemporaryWithoutReg &) = delete;
  public:
    explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {

--- a/tests_lit/llvm2ice_tests/arith.ll
+++ b/tests_lit/llvm2ice_tests/arith.ll
@@ -11,7 +11,7 @@
 ; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
 ; RUN:   -i %s --args -O2 --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command FileCheck --check-prefix ARM32 %s
+; RUN:   --command FileCheck --check-prefix ARM32 --check-prefix ARM-OPT2 %s
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
 ; RUN:   -i %s --args -O2 --mattr=hwdiv-arm --skip-unimplemented \
@@ -21,7 +21,7 @@
 ; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
 ; RUN:   -i %s --args -Om1 --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command FileCheck --check-prefix ARM32 %s
+; RUN:   --command FileCheck --check-prefix ARM32 --check-prefix ARM32-OPTM1 %s
 ;
 ; RUN: %if --need=target_MIPS32 --need=allow_dump \
 ; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target mips32\
@@ -117,8 +117,11 @@ entry:
 ; CHECK-LABEL: MulImm
 ; CHECK: imul e{{.*}},e{{.*}},0x63
 ; ARM32-LABEL: MulImm
-; ARM32: movw {{.*}}, #99
+; ARM32-OPTM1: movw {{.*}}, #99
-; ARM32: mul r{{.*}}, r{{.*}}, r{{.*}}
+; ARM32-OPTM1: mul r{{.*}}, r{{.*}}, r{{.*}}
+; ARM32-OPT2: rsb [[T:r[0-9]+]], [[S:r[0-9]+]], [[S]], lsl #2
+; ARM32-OPT2-DAG: add [[T]], [[T]], [[S]], lsl #7
+; ARM32-OPT2-DAG: sub [[T]], [[T]], [[S]], lsl #5
 ; MIPS32-LABEL: MulImm
 ; MIPS32: mul