Support saturating vector add and subtract on ARM32.

Bug b/37495545 Change-Id: I1d025f894bb7cf08dcaafd950605781633596ab3 Reviewed-on: https://chromium-review.googlesource.com/689098Reviewed-by: Jim Stichnoth <stichnot@chromium.org> Reviewed-on: https://swiftshader-review.googlesource.com/12748Reviewed-by: Nicolas Capens <nicolascapens@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com>

Support saturating vector add and subtract on ARM32.
b40560b1 · Casey Dahlin · Nicolas Capens · 8d90a34e · b40560b1 · b40560b1
Commit b40560b1 authored Jun 28, 2017 by Casey Dahlin Committed by Nicolas Capens Sep 29, 2017
6 changed files
--- a/third_party/subzero/src/IceAssemblerARM32.cpp
+++ b/third_party/subzero/src/IceAssemblerARM32.cpp
@@ -3381,6 +3381,62 @@ void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn,
  emitVFPddd(Cond, VsubdOpcode, OpDd, OpDn, OpDm, Vsubd);
 }
+void AssemblerARM32::vqaddqi(Type ElmtTy, const Operand *OpQd,
+                             const Operand *OpQm, const Operand *OpQn) {
+  // VQADD (integer) - ARM section A8.6.369, encoding A1:
+  //   vqadd<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
+  //
+  // 111100100Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
+  // size is 8, 16, 32, or 64.
+  assert(isScalarIntegerType(ElmtTy) &&
+         "vqaddqi expects vector with integer element type");
+  constexpr const char *Vqaddqi = "vqaddqi";
+  constexpr IValueT VqaddqiOpcode = B4;
+  emitSIMDqqq(VqaddqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqi);
+}
+void AssemblerARM32::vqaddqu(Type ElmtTy, const Operand *OpQd,
+                             const Operand *OpQm, const Operand *OpQn) {
+  // VQADD (integer) - ARM section A8.6.369, encoding A1:
+  //   vqadd<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
+  //
+  // 111100110Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
+  // size is 8, 16, 32, or 64.
+  assert(isScalarIntegerType(ElmtTy) &&
+         "vqaddqu expects vector with integer element type");
+  constexpr const char *Vqaddqu = "vqaddqu";
+  constexpr IValueT VqaddquOpcode = B24 | B4;
+  emitSIMDqqq(VqaddquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqu);
+}
+void AssemblerARM32::vqsubqi(Type ElmtTy, const Operand *OpQd,
+                             const Operand *OpQm, const Operand *OpQn) {
+  // VQSUB (integer) - ARM section A8.6.369, encoding A1:
+  //   vqsub<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
+  //
+  // 111100100Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
+  // size is 8, 16, 32, or 64.
+  assert(isScalarIntegerType(ElmtTy) &&
+         "vqsubqi expects vector with integer element type");
+  constexpr const char *Vqsubqi = "vqsubqi";
+  constexpr IValueT VqsubqiOpcode = B9 | B4;
+  emitSIMDqqq(VqsubqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqi);
+}
+void AssemblerARM32::vqsubqu(Type ElmtTy, const Operand *OpQd,
+                             const Operand *OpQm, const Operand *OpQn) {
+  // VQSUB (integer) - ARM section A8.6.369, encoding A1:
+  //   vqsub<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
+  //
+  // 111100110Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
+  // size is 8, 16, 32, or 64.
+  assert(isScalarIntegerType(ElmtTy) &&
+         "vqsubqu expects vector with integer element type");
+  constexpr const char *Vqsubqu = "vqsubqu";
+  constexpr IValueT VqsubquOpcode = B24 | B9 | B4;
+  emitSIMDqqq(VqsubquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqu);
+}
 void AssemblerARM32::vsubqi(Type ElmtTy, const Operand *OpQd,
                            const Operand *OpQm, const Operand *OpQn) {
  // VSUB (integer) - ARM section A8.8.414, encoding A1:

--- a/third_party/subzero/src/IceAssemblerARM32.h
+++ b/third_party/subzero/src/IceAssemblerARM32.h
@@ -591,6 +591,18 @@ public:
  void vsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
              const Operand *OpQn);
+  // Integer vector saturating subtract.
+  void vqsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
+               const Operand *OpQn);
+  void vqsubqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
+               const Operand *OpQn);
+  // Integer vector saturating add.
+  void vqaddqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
+               const Operand *OpQn);
+  void vqaddqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
+               const Operand *OpQn);
  // Float vector subtract
  void vsubqf(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);

--- a/third_party/subzero/src/IceInstARM32.cpp
+++ b/third_party/subzero/src/IceInstARM32.cpp
@@ -1037,6 +1037,56 @@ template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const {
  assert(!Asm->needsTextFixup());
 }
+template <> void InstARM32Vqadd::emitIAS(const Cfg *Func) const {
+  auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+  const Variable *Dest = getDest();
+  Type DestTy = Dest->getType();
+  switch (DestTy) {
+  default:
+    llvm::report_fatal_error("Vqadd not defined on type " +
+                             typeStdString(DestTy));
+  case IceType_v16i8:
+  case IceType_v8i16:
+  case IceType_v4i32:
+    switch (Sign) {
+    case InstARM32::FS_None: // defaults to unsigned.
+    case InstARM32::FS_Unsigned:
+      Asm->vqaddqu(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
+      break;
+    case InstARM32::FS_Signed:
+      Asm->vqaddqi(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
+      break;
+    }
+    break;
+  }
+  assert(!Asm->needsTextFixup());
+}
+template <> void InstARM32Vqsub::emitIAS(const Cfg *Func) const {
+  auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+  const Variable *Dest = getDest();
+  Type DestTy = Dest->getType();
+  switch (DestTy) {
+  default:
+    llvm::report_fatal_error("Vqsub not defined on type " +
+                             typeStdString(DestTy));
+  case IceType_v16i8:
+  case IceType_v8i16:
+  case IceType_v4i32:
+    switch (Sign) {
+    case InstARM32::FS_None: // defaults to unsigned.
+    case InstARM32::FS_Unsigned:
+      Asm->vqsubqu(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
+      break;
+    case InstARM32::FS_Signed:
+      Asm->vqsubqi(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
+      break;
+    }
+    break;
+  }
+  assert(!Asm->needsTextFixup());
+}
 template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const {
  auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
  const Variable *Dest = getDest();
@@ -1639,6 +1689,10 @@ template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg";
 template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl";
 template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshr>::Opcode = "vshr";
 template <> const char *InstARM32Vsub::Opcode = "vsub";
+template <>
+const char *InstARM32ThreeAddrFP<InstARM32::Vqadd>::Opcode = "vqadd";
+template <>
+const char *InstARM32ThreeAddrFP<InstARM32::Vqsub>::Opcode = "vqsub";
 // Four-addr ops
 template <> const char *InstARM32Mla::Opcode = "mla";
 template <> const char *InstARM32Mls::Opcode = "mls";
@@ -3110,6 +3164,8 @@ template class InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
 template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
 template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
 template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
+template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
+template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
 template class InstARM32LoadBase<InstARM32::Ldr>;
 template class InstARM32LoadBase<InstARM32::Ldrex>;

--- a/third_party/subzero/src/IceInstARM32.h
+++ b/third_party/subzero/src/IceInstARM32.h
@@ -442,6 +442,8 @@ public:
    Vmvn,
    Vneg,
    Vorr,
+    Vqadd,
+    Vqsub,
    Vshl,
    Vshr,
    Vsqrt,
@@ -1016,6 +1018,8 @@ using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
 using InstARM32Vmvn = InstARM32UnaryopFP<InstARM32::Vmvn>;
 using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
 using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
+using InstARM32Vqadd = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
+using InstARM32Vqsub = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
 using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
 using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
 using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;

--- a/third_party/subzero/src/IceTargetLoweringARM32.cpp
+++ b/third_party/subzero/src/IceTargetLoweringARM32.cpp
@@ -5320,12 +5320,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
  case Intrinsics::Trap:
    _trap();
    return;
-  case Intrinsics::AddSaturateSigned: {
+  case Intrinsics::AddSaturateSigned:
-    UnimplementedLoweringError(this, Instr);
-    return;
-  }
  case Intrinsics::AddSaturateUnsigned: {
-    UnimplementedLoweringError(this, Instr);
+    bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
+    Variable *Src0 = legalizeToReg(Instr->getArg(0));
+    Variable *Src1 = legalizeToReg(Instr->getArg(1));
+    Variable *T = makeReg(DestTy);
+    _vqadd(T, Src0, Src1, Unsigned);
+    _mov(Dest, T);
    return;
  }
  case Intrinsics::LoadSubVector: {
@@ -5360,12 +5362,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
    UnimplementedLoweringError(this, Instr);
    return;
  }
-  case Intrinsics::SubtractSaturateSigned: {
+  case Intrinsics::SubtractSaturateSigned:
-    UnimplementedLoweringError(this, Instr);
-    return;
-  }
  case Intrinsics::SubtractSaturateUnsigned: {
-    UnimplementedLoweringError(this, Instr);
+    bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
+    Variable *Src0 = legalizeToReg(Instr->getArg(0));
+    Variable *Src1 = legalizeToReg(Instr->getArg(1));
+    Variable *T = makeReg(DestTy);
+    _vqsub(T, Src0, Src1, Unsigned);
+    _mov(Dest, T);
    return;
  }
  case Intrinsics::VectorPackSigned: {

--- a/third_party/subzero/src/IceTargetLoweringARM32.h
+++ b/third_party/subzero/src/IceTargetLoweringARM32.h
@@ -910,6 +910,14 @@ protected:
  void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
    Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
  }
+  void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
+    Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
+        ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
+  }
+  void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
+    Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
+        ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
+  }
  InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
    return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
  }