ARM32 vorr lowering

BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1639403004 .

ARM32 vorr lowering
cad0b759 · Eric Holk · e37076a1 · cad0b759 · cad0b759 · cad0b759
Commit cad0b759 authored Jan 27, 2016 by Eric Holk
6 changed files
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -673,6 +673,11 @@ template <> void InstARM32Vmla::emitIAS(const Cfg *Func) const {
  }
 }

+template <> void InstARM32Vorr::emitIAS(const Cfg *Func) const {
+  // TODO(kschimpf): add support for these instructions
+  emitUsingTextFixup(Func);
+}
+
 template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const {
  auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
  const Variable *Dest = getDest();
@@ -1040,6 +1045,7 @@ template <> const char *InstARM32Veor::Opcode = "veor";
 template <> const char *InstARM32Vmla::Opcode = "vmla";
 template <> const char *InstARM32Vmls::Opcode = "vmls";
 template <> const char *InstARM32Vmul::Opcode = "vmul";
+template <> const char *InstARM32Vorr::Opcode = "vorr";
 template <> const char *InstARM32Vsub::Opcode = "vsub";
 // Four-addr ops
 template <> const char *InstARM32Mla::Opcode = "mla";

--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -431,6 +431,7 @@ public:
    Vmls,
    Vmrs,
    Vmul,
+    Vorr,
    Vsqrt,
    Vsub
  };
@@ -925,6 +926,7 @@ using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
 using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
 using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
 using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
+using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
 using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
 using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
 using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;

--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -2808,6 +2808,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
    case InstArithmetic::Fsub:
    case InstArithmetic::Sub:
    case InstArithmetic::And:
+    case InstArithmetic::Or:
      break;
    }
  }
@@ -2968,8 +2969,13 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
  }
  case InstArithmetic::Or: {
    Variable *Src0R = Srcs.src0R(this);
-    Operand *Src1RF = Srcs.src1RF(this);
-    _orr(T, Src0R, Src1RF);
+    if (isVectorType(DestTy)) {
+      Variable *Src1R = legalizeToReg(Src1);
+      _vorr(T, Src0R, Src1R);
+    } else {
+      Operand *Src1RF = Srcs.src1RF(this);
+      _orr(T, Src0R, Src1RF);
+    }
    _mov(Dest, T);
    return;
  }

--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -802,6 +802,9 @@ protected:
  void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
    Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
  }
+  void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
+    Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
+  }
  void _vsqrt(Variable *Dest, Variable *Src,
              CondARM32::Cond Pred = CondARM32::AL) {
    Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);

--- a/tests_lit/assembler/arm32/and-vec.ll
+++ b/tests_lit/assembler/arm32/and-vec.ll
-; Show that we know how to translate vsub vector instructions.
+; Show that we know how to translate vand vector instructions.

 ; REQUIRES: allow_dump


--- a/tests_lit/assembler/arm32/or-vec.ll
+++ b/tests_lit/assembler/arm32/or-vec.ll
+; Show that we know how to translate vorr vector instructions.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+define internal <4 x i32> @testVor4i32(<4 x i32> %v1, <4 x i32> %v2) {
+; ASM-LABEL: testVor4i32:
+; DIS-LABEL: 00000000 <testVor4i32>:
+; IASM-LABEL: testVor4i32:
+
+entry:
+  %res = or <4 x i32> %v1, %v2
+
+; ASM:     vorr.i32        q0, q0, q1
+; DIS:   0:       f2200152
+; IASM:     vorr.i32
+
+  ret <4 x i32> %res
+}
+
+define internal <8 x i16> @testVor8i16(<8 x i16> %v1, <8 x i16> %v2) {
+; ASM-LABEL: testVor8i16:
+; DIS-LABEL: 00000010 <testVor8i16>:
+; IASM-LABEL: testVor8i16:
+
+entry:
+  %res = or <8 x i16> %v1, %v2
+
+; ASM:     vorr.i16        q0, q0, q1
+; DIS:   10:       f2200152
+; IASM:     vorr.i16
+
+  ret <8 x i16> %res
+}
+
+define internal <16 x i8> @testVor16i8(<16 x i8> %v1, <16 x i8> %v2) {
+; ASM-LABEL: testVor16i8:
+; DIS-LABEL: 00000020 <testVor16i8>:
+; IASM-LABEL: testVor16i8:
+
+entry:
+  %res = or <16 x i8> %v1, %v2
+
+; ASM:     vorr.i8        q0, q0, q1
+; DIS:   20:       f2200152
+; IASM:     vorr.i8
+
+  ret <16 x i8> %res
+}
+
+;;
+;; The following tests make sure logical or works on predicate vectors.
+;;
+
+define internal <4 x i1> @testVor4i1(<4 x i1> %v1, <4 x i1> %v2) {
+; ASM-LABEL: testVor4i1:
+; DIS-LABEL: 00000030 <testVor4i1>:
+; IASM-LABEL: testVor4i1:
+
+entry:
+  %res = or <4 x i1> %v1, %v2
+
+; ASM:     vorr.i32        q0, q0, q1
+; DIS:   30:       f2200152
+; IASM:     vorr.i32
+
+  ret <4 x i1> %res
+}
+
+define internal <8 x i1> @testVor8i1(<8 x i1> %v1, <8 x i1> %v2) {
+; ASM-LABEL: testVor8i1:
+; DIS-LABEL: 00000040 <testVor8i1>:
+; IASM-LABEL: testVor8i1:
+
+entry:
+  %res = or <8 x i1> %v1, %v2
+
+; ASM:     vorr.i16        q0, q0, q1
+; DIS:   40:       f2200152
+; IASM:     vorr.i16
+
+  ret <8 x i1> %res
+}
+
+define internal <16 x i1> @testVor16i1(<16 x i1> %v1, <16 x i1> %v2) {
+; ASM-LABEL: testVor16i1:
+; DIS-LABEL: 00000050 <testVor16i1>:
+; IASM-LABEL: testVor16i1:
+
+entry:
+  %res = or <16 x i1> %v1, %v2
+
+; ASM:     vorr.i8        q0, q0, q1
+; DIS:   50:       f2200152
+; IASM:     vorr.i8
+
+  ret <16 x i1> %res
+}