Initial support for vector addition on ARM32.

BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076 R=jpp@chromium.org Review URL: https://codereview.chromium.org/1635713002 .

Initial support for vector addition on ARM32.
40c69b43 · Eric Holk · 18273c0f · 40c69b43 · 40c69b43 · 40c69b43
Commit 40c69b43 authored Jan 26, 2016 by Eric Holk
4 changed files
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -603,16 +603,17 @@ template <> void InstARM32Vadd::emitIAS(const Cfg *Func) const {
  switch (Dest->getType()) {
  default:
    // TODO(kschimpf) Figure if more cases are needed.
-    Asm->setNeedsTextFixup();
+    emitUsingTextFixup(Func);
    break;
  case IceType_f32:
    Asm->vadds(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
+    assert(!Asm->needsTextFixup());
    break;
  case IceType_f64:
    Asm->vaddd(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
+    assert(!Asm->needsTextFixup());
    break;
  }
-  assert(!Asm->needsTextFixup());
 }

 template <> void InstARM32Vdiv::emitIAS(const Cfg *Func) const {

--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -2798,11 +2798,17 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
  }

  if (isVectorType(DestTy)) {
-    UnimplementedLoweringError(this, Instr);
-    return;
+    switch (Instr->getOp()) {
+    default:
+      UnimplementedLoweringError(this, Instr);
+      return;
+    // Explicitly whitelist vector instructions we have implemented/enabled.
+    case InstArithmetic::Fadd:
+    case InstArithmetic::Add:
+      break;
+    }
  }

-  // DestTy is a non-i64 scalar.
  Variable *T = makeReg(DestTy);

  // * Handle div/rem separately. They require a non-legalized Src1 to inspect
@@ -2900,6 +2906,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
    return;
  case InstArithmetic::Add: {
    if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
+      assert(!isVectorType(DestTy));
      Variable *Src0R = legalizeToReg(Src0);
      Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
      Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
@@ -2911,6 +2918,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
    if (Srcs.hasConstOperand()) {
      if (!Srcs.immediateIsFlexEncodable() &&
          Srcs.negatedImmediateIsFlexEncodable()) {
+        assert(!isVectorType(DestTy));
        Variable *Src0R = Srcs.src0R(this);
        Operand *Src1F = Srcs.negatedSrc1F(this);
        if (!Srcs.swappedOperands()) {
@@ -2923,8 +2931,13 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
      }
    }
    Variable *Src0R = Srcs.src0R(this);
-    Operand *Src1RF = Srcs.src1RF(this);
-    _add(T, Src0R, Src1RF);
+    if (isVectorType(DestTy)) {
+      Variable *Src1R = legalizeToReg(Src1);
+      _vadd(T, Src0R, Src1R);
+    } else {
+      Operand *Src1RF = Srcs.src1RF(this);
+      _add(T, Src0R, Src1RF);
+    }
    _mov(Dest, T);
    return;
  }

--- a/tests_lit/assembler/arm32/add-vec.ll
+++ b/tests_lit/assembler/arm32/add-vec.ll
+; Show that we know how to translate vadd vector instructions.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
+; RUN:   -reg-use q10,q11 \
+; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   -reg-use q10,q11 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN:   -reg-use q10,q11 \
+; RUN:   | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   -reg-use q10,q11 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+define internal <4 x float> @testVaddFloat4(<4 x float> %v1, <4 x float> %v2) {
+; ASM-LABEL: testVaddFloat4:
+; DIS-LABEL: 00000000 <testVaddFloat4>:
+; IASM-LABEL: testVaddFloat4:
+
+entry:
+  %res = fadd <4 x float> %v1, %v2
+
+; ASM:     vadd.f32        q10, q10, q11
+; DIS:   8:       f2444de6
+; IASM:     vadd.f32
+
+  ret <4 x float> %res
+}
+
+define internal <4 x i32> @testVadd4i32(<4 x i32> %v1, <4 x i32> %v2) {
+; ASM-LABEL: testVadd4i32:
+; DIS-LABEL: 00000020 <testVadd4i32>:
+; IASM-LABEL: testVadd4i32:
+
+entry:
+  %res = add <4 x i32> %v1, %v2
+
+; ASM:     vadd.i32        q10, q10, q11
+; DIS:   28:       f26448e6
+; IASM:     vadd.i32
+
+  ret <4 x i32> %res
+}
+
+define internal <8 x i16> @testVadd8i16(<8 x i16> %v1, <8 x i16> %v2) {
+; ASM-LABEL: testVadd8i16:
+; DIS-LABEL: 00000040 <testVadd8i16>:
+; IASM-LABEL: testVadd8i16:
+
+entry:
+  %res = add <8 x i16> %v1, %v2
+
+; ASM:     vadd.i16        q10, q10, q11
+; DIS:   48:       f25448e6
+; IASM:     vadd.i16
+
+  ret <8 x i16> %res
+}
+
+define internal <16 x i8> @testVadd16i8(<16 x i8> %v1, <16 x i8> %v2) {
+; ASM-LABEL: testVadd16i8:
+; DIS-LABEL: 00000060 <testVadd16i8>:
+; IASM-LABEL: testVadd16i8:
+
+entry:
+  %res = add <16 x i8> %v1, %v2
+
+; ASM:     vadd.i8        q10, q10, q11
+; DIS:   68:       f24448e6
+; IASM:     vadd.i8
+
+  ret <16 x i8> %res
+}
--- a/tests_lit/assembler/arm32/vldr-vector.ll
+++ b/tests_lit/assembler/arm32/vldr-vector.ll
+; Show that we know how to translate vector load instructions.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+define internal <4 x float> @testDerefFloat4(<4 x float> *%p) {
+; ASM-LABEL: testDerefFloat4:
+; DIS-LABEL: 00000000 <testDerefFloat4>:
+; IASM-LABEL: testDerefFloat4:
+
+entry:
+  %ret = load <4 x float>, <4 x float>* %p, align 4
+; ASM:     vld1.64	q0, [r0]
+; DIS:   0:       f4200acf
+
+  ret <4 x float> %ret
+}
+
+define internal <4 x i32> @testDeref4i32(<4 x i32> *%p) {
+; ASM-LABEL: testDeref4i32:
+; DIS-LABEL: 00000010 <testDeref4i32>:
+; IASM-LABEL: testDeref4i32:
+
+entry:
+  %ret = load <4 x i32>, <4 x i32>* %p, align 4
+; ASM:     vld1.64	q0, [r0]
+; DIS:   10:       f4200acf
+
+  ret <4 x i32> %ret
+}