Lower byte swap intrinsic.

Clump the negate instruction w/ the bswap instruction as an "inplace" operation. One difference is that bswap has stricter requirements the operand type. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882 R=stichnot@chromium.org, wala@chromium.org Review URL: https://codereview.chromium.org/401533002

Lower byte swap intrinsic.
7fa813b3 · Jan Voung · 49889239 · 7fa813b3 · 7fa813b3 · 7fa813b3
Commit 7fa813b3 authored Jul 18, 2014 by Jan Voung
9 changed files
--- a/crosstest/test_bitmanip.cpp
+++ b/crosstest/test_bitmanip.cpp
@@ -38,3 +38,8 @@

 FOR_ALL_BMI_OP_TYPES(X)
 #undef X
+
+#define X(type, builtin_name)                                                  \
+  type test_bswap(type a) { return builtin_name(a); }
+BSWAP_TABLE
+#undef X
--- a/crosstest/test_bitmanip.def
+++ b/crosstest/test_bitmanip.def
@@ -39,4 +39,10 @@
  FOR_ALL_BMI_TYPES_INST(X, ctpop)
 //#define X(inst, type)

+#define BSWAP_TABLE              \
+  /* type, builtin_name */       \
+  X(uint16_t, __builtin_bswap16) \
+  X(uint32_t, __builtin_bswap32) \
+  X(uint64_t, __builtin_bswap64)
+
 #endif // TEST_BIT_MANIP_DEF
--- a/crosstest/test_bitmanip.h
+++ b/crosstest/test_bitmanip.h
@@ -22,3 +22,7 @@

 FOR_ALL_BMI_OP_TYPES(X)
 #undef X
+
+#define X(type, builtin_name) type test_bswap(type);
+BSWAP_TABLE
+#undef X
--- a/crosstest/test_bitmanip_main.cpp
+++ b/crosstest/test_bitmanip_main.cpp
@@ -28,27 +28,31 @@ namespace Subzero_ {
 }

 volatile uint64_t Values[] = {
-    0,                    1,                    0x7e,
-    0x7f,                 0x80,                 0x81,
-    0xfe,                 0xff,                 0x7ffe,
-    0x7fff,               0x8000,               0x8001,
-    0xfffe,               0xffff,
-    0x007fffff /*Max subnormal + */,
-    0x00800000 /*Min+ */, 0x7f7fffff /*Max+ */,
-    0x7f800000 /*+Inf*/,  0xff800000 /*-Inf*/,
-    0x7fa00000 /*SNaN*/,  0x7fc00000 /*QNaN*/,
-    0x7ffffffe,           0x7fffffff,           0x80000000,
-    0x80000001,           0xfffffffe,           0xffffffff,
-    0x100000000ll,        0x100000001ll,
-    0x000fffffffffffffll /*Max subnormal + */,
-    0x0010000000000000ll /*Min+ */,
-    0x7fefffffffffffffll /*Max+ */,
-    0x7ff0000000000000ll /*+Inf*/,
-    0xfff0000000000000ll /*-Inf*/,
-    0x7ff0000000000001ll /*SNaN*/,
-    0x7ff8000000000000ll /*QNaN*/,
-    0x7ffffffffffffffell, 0x7fffffffffffffffll, 0x8000000000000000ll,
-    0x8000000000000001ll, 0xfffffffffffffffell, 0xffffffffffffffffll };
+    0,                              1,
+    0x7e,                           0x7f,
+    0x80,                           0x81,
+    0xfe,                           0xff,
+    0x7ffe,                         0x7fff,
+    0x8000,                         0x8001,
+    0xfffe,                         0xffff,
+    0xc0de,                         0xabcd,
+    0xdcba,                         0x007fffff /*Max subnormal + */,
+    0x00800000 /*Min+ */,           0x7f7fffff /*Max+ */,
+    0x7f800000 /*+Inf*/,            0xff800000 /*-Inf*/,
+    0x7fa00000 /*SNaN*/,            0x7fc00000 /*QNaN*/,
+    0x7ffffffe,                     0x7fffffff,
+    0x80000000,                     0x80000001,
+    0xfffffffe,                     0xffffffff,
+    0x12345678,                     0xabcd1234,
+    0x1234dcba,                     0x100000000ll,
+    0x100000001ll,                  0x123456789abcdef1ll,
+    0x987654321ab1fedcll,           0x000fffffffffffffll /*Max subnormal + */,
+    0x0010000000000000ll /*Min+ */, 0x7fefffffffffffffll /*Max+ */,
+    0x7ff0000000000000ll /*+Inf*/,  0xfff0000000000000ll /*-Inf*/,
+    0x7ff0000000000001ll /*SNaN*/,  0x7ff8000000000000ll /*QNaN*/,
+    0x7ffffffffffffffell,           0x7fffffffffffffffll,
+    0x8000000000000000ll,           0x8000000000000001ll,
+    0xfffffffffffffffell,           0xffffffffffffffffll};

 const static size_t NumValues = sizeof(Values) / sizeof(*Values);

@@ -96,6 +100,25 @@ void testBitManip(size_t &TotalTests, size_t &Passes, size_t &Failures) {
  }
 }

+template <typename Type>
+void testByteSwap(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+  for (size_t i = 0; i < NumValues; ++i) {
+    Type Value = static_cast<Type>(Values[i]);
+    ++TotalTests;
+    Type ResultSz = test_bswap(Value);
+    Type ResultLlc = Subzero_::test_bswap(Value);
+    if (ResultSz == ResultLlc) {
+      ++Passes;
+    } else {
+      ++Failures;
+      std::cout << "test_bswap" << (CHAR_BIT * sizeof(Type)) << "("
+                << static_cast<uint64_t>(Value)
+                << "): sz=" << static_cast<uint64_t>(ResultSz)
+                << " llc=" << static_cast<uint64_t>(ResultLlc) << "\n";
+    }
+  }
+}
+
 int main(int argc, char **argv) {
  size_t TotalTests = 0;
  size_t Passes = 0;
@@ -103,6 +126,9 @@ int main(int argc, char **argv) {

  testBitManip<uint32_t>(TotalTests, Passes, Failures);
  testBitManip<uint64_t>(TotalTests, Passes, Failures);
+  testByteSwap<uint16_t>(TotalTests, Passes, Failures);
+  testByteSwap<uint32_t>(TotalTests, Passes, Failures);
+  testByteSwap<uint64_t>(TotalTests, Passes, Failures);

  std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
            << " Failures=" << Failures << "\n";

--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -95,11 +95,6 @@ InstX8632Mul::InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1,
  addSource(Source2);
 }

-InstX8632Neg::InstX8632Neg(Cfg *Func, Operand *SrcDest)
-    : InstX8632(Func, InstX8632::Neg, 1, llvm::dyn_cast<Variable>(SrcDest)) {
-  addSource(SrcDest);
-}
-
 InstX8632Shld::InstX8632Shld(Cfg *Func, Variable *Dest, Variable *Source1,
                             Variable *Source2)
    : InstX8632(Func, InstX8632::Shld, 3, Dest) {
@@ -440,6 +435,9 @@ void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
 }


+// In-place ops
+template <> const char *InstX8632Bswap::Opcode = "bswap";
+template <> const char *InstX8632Neg::Opcode = "neg";
 // Unary ops
 template <> const char *InstX8632Bsf::Opcode = "bsf";
 template <> const char *InstX8632Bsr::Opcode = "bsr";
@@ -473,6 +471,7 @@ template <> const char *InstX8632Div::Opcode = "div";
 template <> const char *InstX8632Divps::Opcode = "divps";
 template <> const char *InstX8632Idiv::Opcode = "idiv";
 template <> const char *InstX8632Divss::Opcode = "divss";
+template <> const char *InstX8632Rol::Opcode = "rol";
 template <> const char *InstX8632Shl::Opcode = "shl";
 template <> const char *InstX8632Psll::Opcode = "psll";
 template <> const char *InstX8632Shr::Opcode = "shr";
@@ -611,21 +610,6 @@ void InstX8632Mul::dump(const Cfg *Func) const {
  dumpSources(Func);
 }

-void InstX8632Neg::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Str << "\tneg\t";
-  getSrc(0)->emit(Func);
-  Str << "\n";
-}
-
-void InstX8632Neg::dump(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = neg." << getDest()->getType() << " ";
-  dumpSources(Func);
-}
-
 void InstX8632Shld::emit(const Cfg *Func) const {
  Ostream &Str = Func->getContext()->getStrEmit();
  assert(getSrcSize() == 3);

--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -141,6 +141,7 @@ public:
    Br,
    Bsf,
    Bsr,
+    Bswap,
    Call,
    Cdq,
    Cmov,
@@ -188,6 +189,7 @@ public:
    Push,
    Pxor,
    Ret,
+    Rol,
    Sar,
    Sbb,
    Shl,
@@ -352,6 +354,42 @@ private:
  virtual ~InstX8632Call() {}
 };

+// Instructions of the form x := op(x).
+template <InstX8632::InstKindX8632 K>
+class InstX8632Inplaceop : public InstX8632 {
+public:
+  static InstX8632Inplaceop *create(Cfg *Func, Operand *SrcDest) {
+    return new (Func->allocate<InstX8632Inplaceop>())
+        InstX8632Inplaceop(Func, SrcDest);
+  }
+  virtual void emit(const Cfg *Func) const {
+    Ostream &Str = Func->getContext()->getStrEmit();
+    assert(getSrcSize() == 1);
+    Str << "\t" << Opcode << "\t";
+    getSrc(0)->emit(Func);
+    Str << "\n";
+  }
+  virtual void dump(const Cfg *Func) const {
+    Ostream &Str = Func->getContext()->getStrDump();
+    dumpDest(Func);
+    Str << " = " << Opcode << "." << getDest()->getType() << " ";
+    dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
+
+private:
+  InstX8632Inplaceop(Cfg *Func, Operand *SrcDest)
+      : InstX8632(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
+    addSource(SrcDest);
+  }
+  InstX8632Inplaceop(const InstX8632Inplaceop &) LLVM_DELETED_FUNCTION;
+  InstX8632Inplaceop &
+  operator=(const InstX8632Inplaceop &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632Inplaceop() {}
+  static const char *Opcode;
+};
+
+// Instructions of the form x := op(y)
 template <InstX8632::InstKindX8632 K>
 class InstX8632Unaryop : public InstX8632 {
 public:
@@ -506,6 +544,8 @@ private:
  static const char *Opcode;
 };

+typedef InstX8632Inplaceop<InstX8632::Bswap> InstX8632Bswap;
+typedef InstX8632Inplaceop<InstX8632::Neg> InstX8632Neg;
 typedef InstX8632Unaryop<InstX8632::Bsf> InstX8632Bsf;
 typedef InstX8632Unaryop<InstX8632::Bsr> InstX8632Bsr;
 typedef InstX8632Unaryop<InstX8632::Lea> InstX8632Lea;
@@ -535,6 +575,7 @@ typedef InstX8632Binop<InstX8632::Pmullw> InstX8632Pmullw;
 typedef InstX8632Binop<InstX8632::Pmuludq> InstX8632Pmuludq;
 typedef InstX8632Binop<InstX8632::Divps> InstX8632Divps;
 typedef InstX8632Binop<InstX8632::Divss> InstX8632Divss;
+typedef InstX8632Binop<InstX8632::Rol, true> InstX8632Rol;
 typedef InstX8632Binop<InstX8632::Shl, true> InstX8632Shl;
 typedef InstX8632Binop<InstX8632::Psll> InstX8632Psll;
 typedef InstX8632Binop<InstX8632::Shr, true> InstX8632Shr;
@@ -590,23 +631,6 @@ private:
  virtual ~InstX8632Mul() {}
 };

-// Neg instruction - Two's complement negation.
-class InstX8632Neg : public InstX8632 {
-public:
-  static InstX8632Neg *create(Cfg *Func, Operand *SrcDest) {
-    return new (Func->allocate<InstX8632Neg>()) InstX8632Neg(Func, SrcDest);
-  }
-  virtual void emit(const Cfg *Func) const;
-  virtual void dump(const Cfg *Func) const;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Neg); }
-
-private:
-  InstX8632Neg(Cfg *Func, Operand *SrcDest);
-  InstX8632Neg(const InstX8632Neg &) LLVM_DELETED_FUNCTION;
-  InstX8632Neg &operator=(const InstX8632Neg &) LLVM_DELETED_FUNCTION;
-  virtual ~InstX8632Neg() {}
-};
-
 // Shld instruction - shift across a pair of operands.  TODO: Verify
 // that the validator accepts the shld instruction.
 class InstX8632Shld : public InstX8632 {

--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2596,9 +2596,35 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
    _mfence();
    return;
  }
-  case Intrinsics::Bswap:
-    Func->setError("Unhandled intrinsic");
+  case Intrinsics::Bswap: {
+    Variable *Dest = Instr->getDest();
+    Operand *Val = Instr->getArg(0);
+    // In 32-bit mode, bswap only works on 32-bit arguments, and the
+    // argument must be a register. Use rotate left for 16-bit bswap.
+    if (Val->getType() == IceType_i64) {
+      Variable *T_Lo = legalizeToVar(loOperand(Val));
+      Variable *T_Hi = legalizeToVar(hiOperand(Val));
+      Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+      Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+      _bswap(T_Lo);
+      _bswap(T_Hi);
+      _mov(DestLo, T_Hi);
+      _mov(DestHi, T_Lo);
+    } else if (Val->getType() == IceType_i32) {
+      Variable *T = legalizeToVar(Val);
+      _bswap(T);
+      _mov(Dest, T);
+    } else {
+      assert(Val->getType() == IceType_i16);
+      Val = legalize(Val);
+      Constant *Eight = Ctx->getConstantInt(IceType_i16, 8);
+      Variable *T = NULL;
+      _mov(T, Val);
+      _rol(T, Eight);
+      _mov(Dest, T);
+    }
    return;
+  }
  case Intrinsics::Ctpop: {
    Variable *Dest = Instr->getDest();
    Operand *Val = Instr->getArg(0);

--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -196,6 +196,9 @@ protected:
  void _bsr(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Bsr::create(Func, Dest, Src0));
  }
+  void _bswap(Variable *SrcDest) {
+    Context.insert(InstX8632Bswap::create(Func, SrcDest));
+  }
  void _cdq(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Cdq::create(Func, Dest, Src0));
  }
@@ -342,6 +345,9 @@ protected:
  void _ret(Variable *Src0 = NULL) {
    Context.insert(InstX8632Ret::create(Func, Src0));
  }
+  void _rol(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Rol::create(Func, Dest, Src0));
+  }
  void _sar(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Sar::create(Func, Dest, Src0));
  }

--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -22,6 +22,9 @@ declare i32 @llvm.nacl.setjmp(i8*)
 declare float @llvm.sqrt.f32(float)
 declare double @llvm.sqrt.f64(double)
 declare void @llvm.trap()
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
 declare i32 @llvm.ctlz.i32(i32, i1)
 declare i64 @llvm.ctlz.i64(i64, i1)
 declare i32 @llvm.cttz.i32(i32, i1)
@@ -242,6 +245,33 @@ NonZero:
 ; CHECK-LABEL: test_trap
 ; CHECK: ud2

+define i32 @test_bswap_16(i32 %x) {
+entry:
+  %x_trunc = trunc i32 %x to i16
+  %r = call i16 @llvm.bswap.i16(i16 %x_trunc)
+  %r_zext = zext i16 %r to i32
+  ret i32 %r_zext
+}
+; CHECK-LABEL: test_bswap_16
+; CHECK: rol {{.*}}, 8
+
+define i32 @test_bswap_32(i32 %x) {
+entry:
+  %r = call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %r
+}
+; CHECK-LABEL: test_bswap_32
+; CHECK: bswap e{{.*}}
+
+define i64 @test_bswap_64(i64 %x) {
+entry:
+  %r = call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %r
+}
+; CHECK-LABEL: test_bswap_64
+; CHECK: bswap e{{.*}}
+; CHECK: bswap e{{.*}}
+
 define i32 @test_ctlz_32(i32 %x) {
 entry:
  %r = call i32 @llvm.ctlz.i32(i32 %x, i1 0)