Lower vector floating point arithmetic operations.

This adds lowering code for fadd, fsub, fmul, fdiv, and frem. frem, having no native x86 counterpart, is implemented by making a helper call. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/389653002

Lower vector floating point arithmetic operations.
8d1072e7 · Matt Wala · 78b4c0b8 · 8d1072e7 · 8d1072e7 · 8d1072e7
Commit 8d1072e7 authored Jul 11, 2014 by Matt Wala
5 changed files
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -399,9 +399,11 @@ void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
 }
 template <> const char *InstX8632Add::Opcode = "add";
+template <> const char *InstX8632Addps::Opcode = "addps";
 template <> const char *InstX8632Adc::Opcode = "adc";
 template <> const char *InstX8632Addss::Opcode = "addss";
 template <> const char *InstX8632Sub::Opcode = "sub";
+template <> const char *InstX8632Subps::Opcode = "subps";
 template <> const char *InstX8632Subss::Opcode = "subss";
 template <> const char *InstX8632Sbb::Opcode = "sbb";
 template <> const char *InstX8632And::Opcode = "and";
@@ -409,8 +411,10 @@ template <> const char *InstX8632Or::Opcode = "or";
 template <> const char *InstX8632Xor::Opcode = "xor";
 template <> const char *InstX8632Pxor::Opcode = "pxor";
 template <> const char *InstX8632Imul::Opcode = "imul";
+template <> const char *InstX8632Mulps::Opcode = "mulps";
 template <> const char *InstX8632Mulss::Opcode = "mulss";
 template <> const char *InstX8632Div::Opcode = "div";
+template <> const char *InstX8632Divps::Opcode = "divps";
 template <> const char *InstX8632Idiv::Opcode = "idiv";
 template <> const char *InstX8632Divss::Opcode = "divss";
 template <> const char *InstX8632Shl::Opcode = "shl";

--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -136,6 +136,7 @@ public:
    k__Start = Inst::Target,
    Adc,
    Add,
+    Addps,
    Addss,
    And,
    Br,
@@ -143,6 +144,7 @@ public:
    Cdq,
    Cvt,
    Div,
+    Divps,
    Divss,
    Fld,
    Fstp,
@@ -158,6 +160,7 @@ public:
    Movsx,
    Movzx,
    Mul,
+    Mulps,
    Mulss,
    Or,
    Pop,
@@ -174,6 +177,7 @@ public:
    Store,
    StoreQ,
    Sub,
+    Subps,
    Subss,
    Test,
    Ucomiss,
@@ -397,9 +401,11 @@ private:
 };
 typedef InstX8632Binop<InstX8632::Add> InstX8632Add;
+typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
 typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc;
 typedef InstX8632Binop<InstX8632::Addss> InstX8632Addss;
 typedef InstX8632Binop<InstX8632::Sub> InstX8632Sub;
+typedef InstX8632Binop<InstX8632::Subps> InstX8632Subps;
 typedef InstX8632Binop<InstX8632::Subss> InstX8632Subss;
 typedef InstX8632Binop<InstX8632::Sbb> InstX8632Sbb;
 typedef InstX8632Binop<InstX8632::And> InstX8632And;
@@ -407,7 +413,9 @@ typedef InstX8632Binop<InstX8632::Or> InstX8632Or;
 typedef InstX8632Binop<InstX8632::Xor> InstX8632Xor;
 typedef InstX8632Binop<InstX8632::Pxor> InstX8632Pxor;
 typedef InstX8632Binop<InstX8632::Imul> InstX8632Imul;
+typedef InstX8632Binop<InstX8632::Mulps> InstX8632Mulps;
 typedef InstX8632Binop<InstX8632::Mulss> InstX8632Mulss;
+typedef InstX8632Binop<InstX8632::Divps> InstX8632Divps;
 typedef InstX8632Binop<InstX8632::Divss> InstX8632Divss;
 typedef InstX8632Binop<InstX8632::Shl, true> InstX8632Shl;
 typedef InstX8632Binop<InstX8632::Shr, true> InstX8632Shr;

--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -1136,7 +1136,59 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
      llvm_unreachable("FP instruction with i64 type");
      break;
    }
-  } else { // Dest->getType() != IceType_i64
+  } else if (isVectorType(Dest->getType())) {
+    switch (Inst->getOp()) {
+    case InstArithmetic::_num:
+      llvm_unreachable("Unknown arithmetic operator");
+      break;
+    case InstArithmetic::Add:
+    case InstArithmetic::And:
+    case InstArithmetic::Or:
+    case InstArithmetic::Xor:
+    case InstArithmetic::Sub:
+    case InstArithmetic::Mul:
+    case InstArithmetic::Shl:
+    case InstArithmetic::Lshr:
+    case InstArithmetic::Ashr:
+    case InstArithmetic::Udiv:
+    case InstArithmetic::Sdiv:
+    case InstArithmetic::Urem:
+    case InstArithmetic::Srem:
+      // TODO(wala): Handle these.
+      Func->setError("Unhandled instruction");
+      break;
+    case InstArithmetic::Fadd: {
+      Variable *T = makeReg(Dest->getType());
+      _movp(T, Src0);
+      _addps(T, Src1);
+      _movp(Dest, T);
+    } break;
+    case InstArithmetic::Fsub: {
+      Variable *T = makeReg(Dest->getType());
+      _movp(T, Src0);
+      _subps(T, Src1);
+      _movp(Dest, T);
+    } break;
+    case InstArithmetic::Fmul: {
+      Variable *T = makeReg(Dest->getType());
+      _movp(T, Src0);
+      _mulps(T, Src1);
+      _movp(Dest, T);
+    } break;
+    case InstArithmetic::Fdiv: {
+      Variable *T = makeReg(Dest->getType());
+      _movp(T, Src0);
+      _divps(T, Src1);
+      _movp(Dest, T);
+    } break;
+    case InstArithmetic::Frem: {
+      const SizeT MaxSrcs = 1;
+      InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs);
+      Call->addArg(Src0);
+      lowerCall(Call);
+    } break;
+    }
+  } else { // Dest->getType() is non-i64 scalar
    Variable *T_edx = NULL;
    Variable *T = NULL;
    switch (Inst->getOp()) {

--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -148,6 +148,9 @@ protected:
  void _add(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Add::create(Func, Dest, Src0));
  }
+  void _addps(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Addps::create(Func, Dest, Src0));
+  }
  void _addss(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Addss::create(Func, Dest, Src0));
  }
@@ -180,6 +183,9 @@ protected:
  void _div(Variable *Dest, Operand *Src0, Operand *Src1) {
    Context.insert(InstX8632Div::create(Func, Dest, Src0, Src1));
  }
+  void _divps(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Divps::create(Func, Dest, Src0));
+  }
  void _divss(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Divss::create(Func, Dest, Src0));
  }
@@ -220,6 +226,9 @@ protected:
  void _mul(Variable *Dest, Variable *Src0, Operand *Src1) {
    Context.insert(InstX8632Mul::create(Func, Dest, Src0, Src1));
  }
+  void _mulps(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Mulps::create(Func, Dest, Src0));
+  }
  void _mulss(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Mulss::create(Func, Dest, Src0));
  }
@@ -232,6 +241,9 @@ protected:
  void _push(Operand *Src0, bool SuppressStackAdjustment = false) {
    Context.insert(InstX8632Push::create(Func, Src0, SuppressStackAdjustment));
  }
+  void _pxor(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Pxor::create(Func, Dest, Src0));
+  }
  void _ret(Variable *Src0 = NULL) {
    Context.insert(InstX8632Ret::create(Func, Src0));
  }
@@ -265,6 +277,9 @@ protected:
  void _sub(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Sub::create(Func, Dest, Src0));
  }
+  void _subps(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Subps::create(Func, Dest, Src0));
+  }
  void _subss(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Subss::create(Func, Dest, Src0));
  }
@@ -284,9 +299,6 @@ protected:
  void _xor(Variable *Dest, Operand *Src0) {
    Context.insert(InstX8632Xor::create(Func, Dest, Src0));
  }
-  void _pxor(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Pxor::create(Func, Dest, Src0));
-  }
  bool IsEbpBasedFrame;
  size_t FrameSizeLocals;

--- a/tests_lit/llvm2ice_tests/vector-arith.ll
+++ b/tests_lit/llvm2ice_tests/vector-arith.ll
+; This test checks support for vector arithmetic.
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
+; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
+; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
+; RUN:                           | FileCheck --check-prefix=DUMP %s
+define <4 x float> @test_fadd(<4 x float> %arg0, <4 x float> %arg1) {
+entry:
+  %res = fadd <4 x float> %arg0, %arg1
+  ret <4 x float> %res
+; CHECK-LABEL: test_fadd:
+; CHECK: addps
+}
+define <4 x float> @test_fsub(<4 x float> %arg0, <4 x float> %arg1) {
+entry:
+  %res = fsub <4 x float> %arg0, %arg1
+  ret <4 x float> %res
+; CHECK-LABEL: test_fsub:
+; CHECK: subps
+}
+define <4 x float> @test_fmul(<4 x float> %arg0, <4 x float> %arg1) {
+entry:
+  %res = fmul <4 x float> %arg0, %arg1
+  ret <4 x float> %res
+; CHECK-LABEL: test_fmul:
+; CHECK: mulps
+}
+define <4 x float> @test_fdiv(<4 x float> %arg0, <4 x float> %arg1) {
+entry:
+  %res = fdiv <4 x float> %arg0, %arg1
+  ret <4 x float> %res
+; CHECK-LABEL: test_fdiv:
+; CHECK: divps
+}
+define <4 x float> @test_frem(<4 x float> %arg0, <4 x float> %arg1) {
+entry:
+  %res = frem <4 x float> %arg0, %arg1
+  ret <4 x float> %res
+; CHECK-LABEL: test_frem:
+; CHECK: __frem_v4f32
+}
+; ERRORS-NOT: ICE translation error
+; DUMP-NOT: SZ