Implement floating-point rounding intrinsic.

BUG=swiftshader:15 Change-Id: I8e53f2fdb8208f8be0f4cdff3241b4a5efe9bc8a Reviewed-on: https://chromium-review.googlesource.com/404352Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Jim Stichnoth <stichnot@chromium.org>

Implement floating-point rounding intrinsic.
f0d12c30 · Nicolas Capens · Nicolas Capens · f8c9977b · f0d12c30 · f0d12c30
Commit f0d12c30 authored Oct 27, 2016 by Nicolas Capens Committed by Nicolas Capens Nov 03, 2016
9 changed files
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -511,7 +511,9 @@ public:
    kRoundUp = 0x2,
    kRoundToZero = 0x3
  };
-  void roundsd(XmmRegister dst, XmmRegister src, RoundingMode mode);
+  void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
+  void round(Type Ty, XmmRegister dst, const Address &src,
+             const Immediate &mode);

  //----------------------------------------------------------------------------
  //

--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -2392,17 +2392,58 @@ void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
 }

 template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::roundsd(XmmRegister dst, XmmRegister src,
-                                           RoundingMode mode) {
+void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
+                                         XmmRegister src,
+                                         const Immediate &mode) {
  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
  emitUint8(0x66);
  emitRexRB(RexTypeIrrelevant, dst, src);
  emitUint8(0x0F);
  emitUint8(0x3A);
-  emitUint8(0x0B);
+  switch (Ty) {
+  case IceType_v4f32:
+    emitUint8(0x08);
+    break;
+  case IceType_f32:
+    emitUint8(0x0A);
+    break;
+  case IceType_f64:
+    emitUint8(0x0B);
+    break;
+  default:
+    assert(false && "Unsupported round operand type");
+  }
  emitXmmRegisterOperand(dst, src);
  // Mask precision exeption.
-  emitUint8(static_cast<uint8_t>(mode) | 0x8);
+  emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
+                                         const Address &src,
+                                         const Immediate &mode) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitAddrSizeOverridePrefix();
+  emitRex(RexTypeIrrelevant, src, dst);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  switch (Ty) {
+  case IceType_v4f32:
+    emitUint8(0x08);
+    break;
+  case IceType_f32:
+    emitUint8(0x0A);
+    break;
+  case IceType_f64:
+    emitUint8(0x0B);
+    break;
+  default:
+    assert(false && "Unsupported round operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+  // Mask precision exeption.
+  emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
 }

 template <typename TraitsType>

--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -164,6 +164,7 @@ template <typename TraitsType> struct InstImpl {
      Pxor,
      Ret,
      Rol,
+      Round,
      Sar,
      Sbb,
      SbbRMW,
@@ -2564,6 +2565,25 @@ template <typename TraitsType> struct InstImpl {
    InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant);
  };

+  /// Round instruction
+  class InstX86Round final
+      : public InstX86BaseThreeAddressop<InstX86Base::Round> {
+  public:
+    static InstX86Round *create(Cfg *Func, Variable *Dest, Operand *Source,
+                                Operand *Imm) {
+      return new (Func->allocate<InstX86Round>())
+          InstX86Round(Func, Dest, Source, Imm);
+    }
+
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Round(Cfg *Func, Variable *Dest, Operand *Source, Operand *Imm)
+        : InstX86BaseThreeAddressop<InstX86Base::Round>(Func, Dest, Source,
+                                                        Imm) {}
+  };
+
  /// cmp - Integer compare instruction.
  class InstX86Icmp final : public InstX86Base {
    InstX86Icmp() = delete;
@@ -3229,6 +3249,7 @@ template <typename TraitsType> struct Insts {
  using Cmpxchg = typename InstImpl<TraitsType>::InstX86Cmpxchg;
  using Cmpxchg8b = typename InstImpl<TraitsType>::InstX86Cmpxchg8b;
  using Cvt = typename InstImpl<TraitsType>::InstX86Cvt;
+  using Round = typename InstImpl<TraitsType>::InstX86Round;
  using Icmp = typename InstImpl<TraitsType>::InstX86Icmp;
  using Ucomiss = typename InstImpl<TraitsType>::InstX86Ucomiss;
  using UD2 = typename InstImpl<TraitsType>::InstX86UD2;
@@ -3494,6 +3515,9 @@ template <typename TraitsType> struct Insts {
      "insertps";                                                              \
  template <>                                                                  \
  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Round::Base::Opcode = "round";      \
+  template <>                                                                  \
+  template <>                                                                  \
  const char *InstImpl<TraitsType>::InstX86Shufps::Base::Opcode = "shufps";    \
  template <>                                                                  \
  template <>                                                                  \

--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -1767,6 +1767,35 @@ void InstImpl<TraitsType>::InstX86Cvt::dump(const Cfg *Func) const {
 }

 template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Round::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 3);
+  Str << "\t" << this->Opcode
+      << Traits::TypeAttributes[this->getDest()->getType()].SpSdString
+      << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getDest()->emit(Func);
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Round::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  assert(InstX86Base::getTarget(Func)->getInstructionSet() >= Traits::SSE4_1);
+  const Variable *Dest = this->getDest();
+  Type Ty = Dest->getType();
+  static const ThreeOpImmEmitter<XmmRegister, XmmRegister> Emitter = {
+      &Assembler::round, &Assembler::round};
+  emitIASThreeOpImmOps<XmmRegister, XmmRegister, Traits::getEncodedXmm,
+                       Traits::getEncodedXmm>(Func, Ty, Dest, this->getSrc(0),
+                                              this->getSrc(1), Emitter);
+}
+
+template <typename TraitsType>
 void InstImpl<TraitsType>::InstX86Icmp::emit(const Cfg *Func) const {
  if (!BuildDefs::dump())
    return;

--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -68,6 +68,7 @@ public:
    MultiplyAddPairs,
    MultiplyHighSigned,
    MultiplyHighUnsigned,
+    Round,
    SignMask,
    StoreSubVector,
    SubtractSaturateSigned,

--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -619,6 +619,10 @@ protected:
    AutoMemorySandboxer<> _(this, &Dest, &Src0);
    Context.insert<typename Traits::Insts::Cvt>(Dest, Src0, Variant);
  }
+  void _round(Variable *Dest, Operand *Src0, Operand *Imm) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src0);
+    Context.insert<typename Traits::Insts::Round>(Dest, Src0, Imm);
+  }
  void _div(Variable *Dest, Operand *Src0, Operand *Src1) {
    AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
    Context.insert<typename Traits::Insts::Div>(Dest, Src0, Src1);
@@ -894,6 +898,10 @@ protected:
    AutoMemorySandboxer<> _(this, &Dest, &Src0);
    Context.insert<typename Traits::Insts::Rol>(Dest, Src0);
  }
+  void _round(Variable *Dest, Operand *Src, Constant *Imm) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src);
+    Context.insert<typename Traits::Insts::Round>(Dest, Src, Imm);
+  }
  X86OperandMem *_sandbox_mem_reference(X86OperandMem *Mem) {
    return dispatchToConcrete(&Traits::ConcreteTarget::_sandbox_mem_reference,
                              std::move(Mem));

--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4557,6 +4557,21 @@ void TargetX86Base<TraitsType>::lowerIntrinsicCall(
    _movp(Dest, T);
    return;
  }
+  case Intrinsics::Round: {
+    Variable *Dest = Instr->getDest();
+    Operand *Src = Instr->getArg(0);
+    Operand *Mode = Instr->getArg(1);
+    assert(llvm::isa<ConstantInteger32>(Mode) &&
+           "Round last argument must be a constant");
+    auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
+    int32_t Imm = llvm::cast<ConstantInteger32>(Mode)->getValue();
+    (void)Imm;
+    assert(Imm >= 0 && Imm < 4 && "Invalid rounding mode");
+    auto *T = makeReg(Dest->getType());
+    _round(T, SrcRM, Mode);
+    _movp(Dest, T);
+    return;
+  }
  default: // UnknownIntrinsic
    Func->setError("Unexpected intrinsic");
    return;

--- a/unittest/AssemblerX8632/XmmArith.cpp
+++ b/unittest/AssemblerX8632/XmmArith.cpp
@@ -2069,8 +2069,9 @@ TEST_F(AssemblerX8632Test, Roundsd) {
                                                                               \
    __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
-    __ roundsd(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src, \
-               AssemblerX8632::k##Mode);                                       \
+    __ round(IceType_f64, XmmRegister::Encoded_Reg_##Dst,                      \
+             XmmRegister::Encoded_Reg_##Src,                                   \
+             Immediate(AssemblerX8632::k##Mode));                              \
                                                                               \
    AssembledTest test = assemble();                                           \
    test.setDqwordTo(T0, V0);                                                  \

--- a/unittest/AssemblerX8664/XmmArith.cpp
+++ b/unittest/AssemblerX8664/XmmArith.cpp
@@ -2194,8 +2194,8 @@ TEST_F(AssemblerX8664Test, Roundsd) {
                                                                               \
    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
-    __ roundsd(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),                       \
-               AssemblerX8664::k##Mode);                                       \
+    __ round(IceType_f64, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),            \
+             Immediate(AssemblerX8664::k##Mode));                              \
                                                                               \
    AssembledTest test = assemble();                                           \
    test.setDqwordTo(T0, V0);                                                  \