[SubZero] Implement Fcmp, ICmp, Cast and Select for vector type

The patch scalarizes Fcmp, ICmp, Cast and Select for operands of vector type. R=stichnot@chromium.org Review URL: https://codereview.chromium.org/2412053002 . Patch from Jaydeep Patil <jaydeep.patil@imgtec.com>.

[SubZero] Implement Fcmp, ICmp, Cast and Select for vector type
3a01f337 · Jaydeep Patil · Jim Stichnoth · 45e4d5ed · 3a01f337 · 3a01f337
Commit 3a01f337 authored Oct 17, 2016 by Jaydeep Patil Committed by Jim Stichnoth Oct 17, 2016
9 changed files
--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -979,7 +979,7 @@ public:
  void setName(const Cfg *Func, const std::string &NewName) override {
    Variable::setName(Func, NewName);
    if (!Containers.empty()) {
-      for (SizeT i = 0; i < ElementsPerContainer; ++i) {
+      for (SizeT i = 0; i < ContainersPerVector; ++i) {
        Containers[i]->setName(Func, getName() + "__cont" + std::to_string(i));
      }
    }
@@ -995,7 +995,7 @@ public:
  const VarList &getContainers() const { return Containers; }

  void initVecElement(Cfg *Func) {
-    for (SizeT i = 0; i < ElementsPerContainer; ++i) {
+    for (SizeT i = 0; i < ContainersPerVector; ++i) {
      Variable *Var = Func->makeVariable(IceType_i32);
      Var->setIsArg(getIsArg());
      if (BuildDefs::dump()) {
@@ -1011,13 +1011,13 @@ public:
  }

  // A 128-bit vector value is mapped onto 4 32-bit register values.
-  static constexpr SizeT ElementsPerContainer = 4;
+  static constexpr SizeT ContainersPerVector = 4;

 protected:
  VariableVecOn32(const Cfg *Func, OperandKind K, Type Ty, SizeT Index)
      : Variable(Func, K, Ty, Index) {
    assert(typeWidthInBytes(Ty) ==
-           ElementsPerContainer * typeWidthInBytes(IceType_i32));
+           ContainersPerVector * typeWidthInBytes(IceType_i32));
  }

  VarList Containers;

--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -511,8 +511,11 @@ protected:
    Variable *T = Func->makeVariable(DestTy);
    if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T)) {
      VarVecOn32->initVecElement(Func);
+      auto *Undef = ConstantUndef::create(Ctx, DestTy);
+      Context.insert<InstAssign>(T, Undef);
+    } else {
+      Context.insert<InstFakeDef>(T);
    }
-    Context.insert<InstFakeDef>(T);

    for (SizeT I = 0; I < NumElements; ++I) {
      auto *Index = Ctx->getConstantInt32(I);

--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
--- a/tests_lit/llvm2ice_tests/fp.cmp.ll
+++ b/tests_lit/llvm2ice_tests/fp.cmp.ll
@@ -379,8 +379,8 @@ entry:
 ; ARM32-LABEL: fcmpFalseFloat
 ; ARM32: mov [[R:r[0-9]+]], #0
 ; MIPS32-LABEL: fcmpFalseFloat
-; MIPS32: addiu
-; MIPS32: sb
+; MIPS32: addiu [[R:.*]], $zero, 0
+; MIPS32: andi [[R]], [[R]], 1

 define internal i32 @fcmpFalseDouble(double %a, double %b) {
 entry:
@@ -393,8 +393,8 @@ entry:
 ; ARM32-LABEL: fcmpFalseDouble
 ; ARM32: mov [[R:r[0-9]+]], #0
 ; MIPS32-LABEL: fcmpFalseDouble
-; MIPS32: addiu
-; MIPS32: sb
+; MIPS32: addiu [[R:.*]], $zero, 0
+; MIPS32: andi [[R]], [[R]], 1

 define internal i32 @fcmpOeqFloat(float %a, float %b) {
 entry:
@@ -975,8 +975,8 @@ entry:
 ; ARM32-LABEL: fcmpTrueFloat
 ; ARM32: mov {{r[0-9]+}}, #1
 ; MIPS32-LABEL: fcmpTrueFloat
-; MIPS32: addiu
-; MIPS32: sb
+; MIPS32: addiu [[R:.*]], $zero, 1
+; MIPS32: andi [[R]], [[R]], 1

 define internal i32 @fcmpTrueDouble(double %a, double %b) {
 entry:
@@ -989,8 +989,8 @@ entry:
 ; ARM32-LABEL: fcmpTrueDouble
 ; ARM32: mov {{r[0-9]+}}, #1
 ; MIPS32-LABEL: fcmpTrueDouble
-; MIPS32: addiu
-; MIPS32: sb
+; MIPS32: addiu [[R:.*]], $zero, 1
+; MIPS32: andi [[R]], [[R]], 1

 define internal float @selectFloatVarVar(float %a, float %b) {
 entry:

--- a/tests_lit/llvm2ice_tests/vector-align.ll
+++ b/tests_lit/llvm2ice_tests/vector-align.ll
@@ -7,6 +7,12 @@
 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2  | FileCheck %s
 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 | FileCheck %s

+; RUN: %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target mips32\
+; RUN:   -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command FileCheck --check-prefix MIPS32 %s
+
 define internal <4 x i32> @test_add(i32 %addr_i, <4 x i32> %addend) {
 entry:
  %addr = inttoptr i32 %addr_i to <4 x i32>*
@@ -18,6 +24,12 @@ entry:
 ; CHECK-NOT: paddd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
 ; CHECK: paddd xmm{{.}},

+; MIPS32-LABEL: test_add
+; MIPS32: addu
+; MIPS32: addu
+; MIPS32: addu
+; MIPS32: addu
+
 define internal <4 x i32> @test_and(i32 %addr_i, <4 x i32> %addend) {
 entry:
  %addr = inttoptr i32 %addr_i to <4 x i32>*
@@ -29,6 +41,12 @@ entry:
 ; CHECK-NOT: pand xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
 ; CHECK: pand xmm{{.}},

+; MIPS32-LABEL: test_and
+; MIPS32: and
+; MIPS32: and
+; MIPS32: and
+; MIPS32: and
+
 define internal <4 x i32> @test_or(i32 %addr_i, <4 x i32> %addend) {
 entry:
  %addr = inttoptr i32 %addr_i to <4 x i32>*
@@ -40,6 +58,12 @@ entry:
 ; CHECK-NOT: por xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
 ; CHECK: por xmm{{.}},

+; MIPS32-LABEL: test_or
+; MIPS32: or
+; MIPS32: or
+; MIPS32: or
+; MIPS32: or
+
 define internal <4 x i32> @test_xor(i32 %addr_i, <4 x i32> %addend) {
 entry:
  %addr = inttoptr i32 %addr_i to <4 x i32>*
@@ -51,6 +75,12 @@ entry:
 ; CHECK-NOT: pxor xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
 ; CHECK: pxor xmm{{.}},

+; MIPS32-LABEL: test_xor
+; MIPS32: xor
+; MIPS32: xor
+; MIPS32: xor
+; MIPS32: xor
+
 define internal <4 x i32> @test_sub(i32 %addr_i, <4 x i32> %addend) {
 entry:
  %addr = inttoptr i32 %addr_i to <4 x i32>*
@@ -62,6 +92,12 @@ entry:
 ; CHECK-NOT: psubd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
 ; CHECK: psubd xmm{{.}},

+; MIPS32-LABEL: test_sub
+; MIPS32: subu
+; MIPS32: subu
+; MIPS32: subu
+; MIPS32: subu
+
 define internal <4 x float> @test_fadd(i32 %addr_i, <4 x float> %addend) {
 entry:
  %addr = inttoptr i32 %addr_i to <4 x float>*
@@ -73,6 +109,12 @@ entry:
 ; CHECK-NOT: addps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
 ; CHECK: addps xmm{{.}},

+; MIPS32-LABEL: test_fadd
+; MIPS32: add.s
+; MIPS32: add.s
+; MIPS32: add.s
+; MIPS32: add.s
+
 define internal <4 x float> @test_fsub(i32 %addr_i, <4 x float> %addend) {
 entry:
  %addr = inttoptr i32 %addr_i to <4 x float>*
@@ -83,3 +125,9 @@ entry:
 ; CHECK-LABEL: test_fsub
 ; CHECK-NOT: subps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}}
 ; CHECK: subps xmm{{.}},
+
+; MIPS32-LABEL: test_fsub
+; MIPS32: sub.s
+; MIPS32: sub.s
+; MIPS32: sub.s
+; MIPS32: sub.s
--- a/tests_lit/llvm2ice_tests/vector-fcmp.ll
+++ b/tests_lit/llvm2ice_tests/vector-fcmp.ll
@@ -4,6 +4,12 @@
 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s
 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s

+; RUN: %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target mips32\
+; RUN:   -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command FileCheck --check-prefix MIPS32 %s
+
 ; Check that sext elimination occurs when the result of the comparison
 ; instruction is alrady sign extended.  Sign extension to 4 x i32 uses
 ; the pslld instruction.
@@ -16,6 +22,19 @@ entry:
 ; CHECK: cmpeqps
 ; CHECK-NOT: pslld
 }
+; MIPS32-LABEL: sextElimination
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpFalseVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -25,6 +44,11 @@ entry:
 ; CHECK-LABEL: fcmpFalseVector
 ; CHECK: pxor
 }
+; MIPS32-LABEL: fcmpFalseVector
+; MIPS32: li v0,0
+; MIPS32: li v1,0
+; MIPS32: li a0,0
+; MIPS32: li a1,0

 define internal <4 x i32> @fcmpOeqVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -34,6 +58,19 @@ entry:
 ; CHECK-LABEL: fcmpOeqVector
 ; CHECK: cmpeqps
 }
+; MIPS32-LABEL: fcmpOeqVector
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpOgeVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -43,6 +80,19 @@ entry:
 ; CHECK-LABEL: fcmpOgeVector
 ; CHECK: cmpleps
 }
+; MIPS32-LABEL: fcmpOgeVector
+; MIPS32: c.ult.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ult.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ult.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ult.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpOgtVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -52,6 +102,19 @@ entry:
 ; CHECK-LABEL: fcmpOgtVector
 ; CHECK: cmpltps
 }
+; MIPS32-LABEL: fcmpOgtVector
+; MIPS32: c.ule.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ule.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ule.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ule.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpOleVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -61,6 +124,19 @@ entry:
 ; CHECK-LABEL: fcmpOleVector
 ; CHECK: cmpleps
 }
+; MIPS32-LABEL: fcmpOleVector
+; MIPS32: c.ole.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ole.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ole.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ole.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpOltVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -70,6 +146,19 @@ entry:
 ; CHECK-LABEL: fcmpOltVector
 ; CHECK: cmpltps
 }
+; MIPS32-LABEL: fcmpOltVector
+; MIPS32: c.olt.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.olt.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.olt.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.olt.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpOneVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -81,6 +170,19 @@ entry:
 ; CHECK: cmpordps
 ; CHECK: pand
 }
+; MIPS32-LABEL: fcmpOneVector
+; MIPS32: c.ueq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ueq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ueq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ueq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpOrdVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -90,6 +192,19 @@ entry:
 ; CHECK-LABEL: fcmpOrdVector
 ; CHECK: cmpordps
 }
+; MIPS32-LABEL: fcmpOrdVector
+; MIPS32: c.un.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.un.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.un.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.un.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpTrueVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -99,6 +214,11 @@ entry:
 ; CHECK-LABEL: fcmpTrueVector
 ; CHECK: pcmpeqd
 }
+; MIPS32-LABEL: fcmpTrueVector
+; MIPS32: li v0,1
+; MIPS32: li v1,1
+; MIPS32: li a0,1
+; MIPS32: li a1,1

 define internal <4 x i32> @fcmpUeqVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -110,6 +230,19 @@ entry:
 ; CHECK: cmpunordps
 ; CHECK: por
 }
+; MIPS32-LABEL: fcmpUeqVector
+; MIPS32: c.ueq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ueq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ueq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ueq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpUgeVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -119,6 +252,19 @@ entry:
 ; CHECK-LABEL: fcmpUgeVector
 ; CHECK: cmpnltps
 }
+; MIPS32-LABEL: fcmpUgeVector
+; MIPS32: c.olt.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.olt.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.olt.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.olt.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpUgtVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -128,6 +274,19 @@ entry:
 ; CHECK-LABEL: fcmpUgtVector
 ; CHECK: cmpnleps
 }
+; MIPS32-LABEL: fcmpUgtVector
+; MIPS32: c.ole.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ole.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ole.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.ole.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpUleVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -137,6 +296,19 @@ entry:
 ; CHECK-LABEL: fcmpUleVector
 ; CHECK: cmpnltps
 }
+; MIPS32-LABEL: fcmpUleVector
+; MIPS32: c.ule.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ule.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ule.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ule.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpUltVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -146,6 +318,19 @@ entry:
 ; CHECK-LABEL: fcmpUltVector
 ; CHECK: cmpnleps
 }
+; MIPS32-LABEL: fcmpUltVector
+; MIPS32: c.ult.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ult.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ult.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.ult.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpUneVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -155,6 +340,19 @@ entry:
 ; CHECK-LABEL: fcmpUneVector
 ; CHECK: cmpneqps
 }
+; MIPS32-LABEL: fcmpUneVector
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0
+; MIPS32: c.eq.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movt [[R]],zero,$fcc0

 define internal <4 x i32> @fcmpUnoVector(<4 x float> %a, <4 x float> %b) {
 entry:
@@ -164,3 +362,16 @@ entry:
 ; CHECK-LABEL: fcmpUnoVector
 ; CHECK: cmpunordps
 }
+; MIPS32-LABEL: fcmpUnoVector
+; MIPS32: c.un.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.un.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.un.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
+; MIPS32: c.un.s
+; MIPS32: li [[R:.*]],1
+; MIPS32: movf [[R]],zero,$fcc0
--- a/tests_lit/llvm2ice_tests/vector-icmp.ll
+++ b/tests_lit/llvm2ice_tests/vector-icmp.ll
--- a/tests_lit/llvm2ice_tests/vector-ops.ll
+++ b/tests_lit/llvm2ice_tests/vector-ops.ll
@@ -9,6 +9,12 @@
 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 -mattr=sse4.1 \
 ; RUN:   | FileCheck --check-prefix=SSE41 %s

+; RUN: %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target mips32\
+; RUN:   -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command FileCheck --check-prefix MIPS32 %s
+
 ; insertelement operations

 define internal <4 x float> @insertelement_v4f32_0(<4 x float> %vec,
@@ -21,6 +27,24 @@ entry:

 ; SSE41-LABEL: insertelement_v4f32_0
 ; SSE41: insertps {{.*}},{{.*}},0x0
+
+; *** a0 - implicit return <4 x float>
+; *** a1 - unused due to alignment of %vec
+; *** a2:a3:sp[16]:s[20] - %vec
+; *** sp[24] - %elt
+; MIPS32-LABEL: insertelement_v4f32_0
+; *** Load element 2 and 3 of %vec
+; MIPS32: lw [[BV_E2:.*]],
+; MIPS32: lw [[BV_E3:.*]],
+; *** Load %elt
+; MIPS32: lwc1 [[ELT:.*]],
+; *** Insert %elt at %vec[0]
+; MIPS32: mfc1 [[RV_E0:.*]],[[ELT]]
+; MIPS32: move [[RET_PTR:.*]],a0
+; MIPS32: sw [[RV_E0]],0([[RET_PTR]])
+; MIPS32: sw a3,4([[RET_PTR]])
+; MIPS32: sw [[BV_E2]],8([[RET_PTR]])
+; MIPS32: sw [[BV_E3]],12([[RET_PTR]])
 }

 define internal <4 x i32> @insertelement_v4i32_0(<4 x i32> %vec, i32 %elt) {
@@ -33,6 +57,15 @@ entry:

 ; SSE41-LABEL: insertelement_v4i32_0
 ; SSE41: pinsrd {{.*}},{{.*}},0x0
+
+; *** a0:a1:a2:a3 - %vec
+; *** sp[16] - %elt
+; MIPS32-LABEL: insertelement_v4i32_0
+; *** Load %elt
+; MIPS32: lw v0,16(sp)
+; MIPS32: move v1,a1
+; MIPS32: move a0,a2
+; MIPS32: move a1,a3
 }


@@ -47,6 +80,17 @@ entry:

 ; SSE41-LABEL: insertelement_v4f32_1
 ; SSE41: insertps {{.*}},{{.*}},0x10
+
+; MIPS32-LABEL: insertelement_v4f32_1
+; MIPS32: lw [[VEC_E2:.*]],16(sp)
+; MIPS32: lw [[VEC_E3:.*]],20(sp)
+; MIPS32: lwc1 [[ELT:.*]],24(sp)
+; MIPS32: mfc1 [[R_E1:.*]],[[ELT]]
+; MIPS32: move [[PTR:.*]],a0
+; MIPS32: sw a2,0([[PTR]])
+; MIPS32: sw [[R_E1]],4([[PTR]])
+; MIPS32: sw [[VEC_E2]],8([[PTR]])
+; MIPS32: sw [[VEC_E3]],12([[PTR]])
 }

 define internal <4 x i32> @insertelement_v4i32_1(<4 x i32> %vec, i32 %elt) {
@@ -59,6 +103,13 @@ entry:

 ; SSE41-LABEL: insertelement_v4i32_1
 ; SSE41: pinsrd {{.*}},{{.*}},0x1
+
+; MIPS32-LABEL: insertelement_v4i32_1
+; MIPS32: lw [[ELT:.*]],16(sp)
+; MIPS32: move v1,[[ELT]]
+; MIPS32: move v0,a0
+; MIPS32: move a0,a2
+; MIPS32: move a1,a3
 }

 define internal <8 x i16> @insertelement_v8i16(<8 x i16> %vec, i32 %elt.arg) {
@@ -71,6 +122,16 @@ entry:

 ; SSE41-LABEL: insertelement_v8i16
 ; SSE41: pinsrw
+
+; MIPS32-LABEL: insertelement_v8i16
+; MIPS32: lw [[ELT:.*]],16(sp)
+; MIPS32: sll [[ELT]],[[ELT]],0x10
+; MIPS32: sll a0,a0,0x10
+; MIPS32: srl a0,a0,0x10
+; MIPS32: or v0,[[ELT]],a0
+; MIPS32: move v1,a1
+; MIPS32: move a0,a2
+; MIPS32: move a1,a3
 }

 define internal <16 x i8> @insertelement_v16i8(<16 x i8> %vec, i32 %elt.arg) {
@@ -85,6 +146,18 @@ entry:

 ; SSE41-LABEL: insertelement_v16i8
 ; SSE41: pinsrb
+
+; MIPS32-LABEL: insertelement_v16i8
+; MIPS32: lw [[ELT:.*]],16(sp)
+; MIPS32: andi [[ELT]],[[ELT]],0xff
+; MIPS32: sll [[ELT]],[[ELT]],0x8
+; MIPS32: lui [[T:.*]],0xffff
+; MIPS32: ori [[T]],[[T]],0xff
+; MIPS32: and a0,a0,[[T]]
+; MIPS32: or v0,v0,a0
+; MIPS32: move v1,a1
+; MIPS32: move a0,a2
+; MIPS32: move a1,a3
 }

 define internal <4 x i1> @insertelement_v4i1_0(<4 x i1> %vec, i32 %elt.arg) {
@@ -97,6 +170,12 @@ entry:

 ; SSE41-LABEL: insertelement_v4i1_0
 ; SSE41: pinsrd {{.*}},{{.*}},0x0
+
+; MIPS32-LABEL: insertelement_v4i1_0
+; MIPS32: lw v0,16(sp)
+; MIPS32: move v1,a1
+; MIPS32: move a0,a2
+; MIPS32: move a1,a3
 }

 define internal <4 x i1> @insertelement_v4i1_1(<4 x i1> %vec, i32 %elt.arg) {
@@ -110,6 +189,13 @@ entry:

 ; SSE41-LABEL: insertelement_v4i1_1
 ; SSE41: pinsrd {{.*}},{{.*}},0x1
+
+; MIPS32-LABEL: insertelement_v4i1_1
+; MIPS32: lw [[ELT:.*]],16(sp)
+; MIPS32: move v1,[[ELT]]
+; MIPS32: move v0,a0
+; MIPS32: move a0,a2
+; MIPS32: move a1,a3
 }

 define internal <8 x i1> @insertelement_v8i1(<8 x i1> %vec, i32 %elt.arg) {
@@ -122,6 +208,16 @@ entry:

 ; SSE41-LABEL: insertelement_v8i1
 ; SSE41: pinsrw
+
+; MIPS32-LABEL: insertelement_v8i1
+; MIPS32: lw [[ELT:.*]],16(sp)
+; MIPS32: sll [[ELT]],[[ELT]],0x10
+; MIPS32: sll a0,a0,0x10
+; MIPS32: srl a0,a0,0x10
+; MIPS32: or v0,[[ELT]],a0
+; MIPS32: move v1,a1
+; MIPS32: move a0,a2
+; MIPS32: move a1,a3
 }

 define internal <16 x i1> @insertelement_v16i1(<16 x i1> %vec, i32 %elt.arg) {
@@ -136,6 +232,18 @@ entry:

 ; SSE41-LABEL: insertelement_v16i1
 ; SSE41: pinsrb
+
+; MIPS32-LABEL: insertelement_v16i1
+; MIPS32: lw [[ELT:.*]],16(sp)
+; MIPS32: andi [[ELT]],[[ELT]],0xff
+; MIPS32: sll [[ELT]],[[ELT]],0x8
+; MIPS32: lui [[T:.*]],0xffff
+; MIPS32: ori [[T]],[[T]],0xff
+; MIPS32: and a0,a0,[[T]]
+; MIPS32: or v0,[[ELT]],a0
+; MIPS32: move v1,a1
+; MIPS32: move a0,a2
+; MIPS32: move a1,a3
 }

 ; extractelement operations
@@ -149,6 +257,9 @@ entry:

 ; SSE41-LABEL: extractelement_v4f32
 ; SSE41: pshufd
+
+; MIPS32-LABEL: extractelement_v4f32
+; MIPS32: mtc1 a1,$f0
 }

 define internal i32 @extractelement_v4i32(<4 x i32> %vec) {
@@ -161,6 +272,9 @@ entry:

 ; SSE41-LABEL: extractelement_v4i32
 ; SSE41: pextrd
+
+; MIPS32-LABEL: extractelement_v4i32
+; MIPS32L move v0,a1
 }

 define internal i32 @extractelement_v8i16(<8 x i16> %vec) {
@@ -173,6 +287,11 @@ entry:

 ; SSE41-LABEL: extractelement_v8i16
 ; SSE41: pextrw
+
+; MIPS32-LABEL: extractelement_v8i16
+; MIPS32: srl a0,a0,0x10
+; MIPS32: andi a0,a0,0xffff
+; MIPS32: move v0,a0
 }

 define internal i32 @extractelement_v16i8(<16 x i8> %vec) {
@@ -187,6 +306,12 @@ entry:

 ; SSE41-LABEL: extractelement_v16i8
 ; SSE41: pextrb
+
+; MIPS32-LABEL: extractelement_v16i8
+; MIPS32: srl a0,a0,0x8
+; MIPS32: andi a0,a0,0xff
+; MIPS32: andi a0,a0,0xff
+; MIPS32: move v0,a0
 }

 define internal i32 @extractelement_v4i1(<4 x i1> %vec) {
@@ -199,6 +324,11 @@ entry:

 ; SSE41-LABEL: extractelement_v4i1
 ; SSE41: pextrd
+
+; MIPS32-LABEL: extractelement_v4i1
+; MIPS32: andi a1,a1,0x1
+; MIPS32: andi a1,a1,0x1
+; MIPS32: move v0,a1
 }

 define internal i32 @extractelement_v8i1(<8 x i1> %vec) {
@@ -211,6 +341,12 @@ entry:

 ; SSE41-LABEL: extractelement_v8i1
 ; SSE41: pextrw
+
+; MIPS32-LABEL: extractelement_v8i1
+; MIPS32: srl a0,a0,0x10
+; MIPS32: andi a0,a0,0x1
+; MIPS32: andi a0,a0,0x1
+; MIPS32: move v0,a0
 }

 define internal i32 @extractelement_v16i1(<16 x i1> %vec) {
@@ -225,4 +361,11 @@ entry:

 ; SSE41-LABEL: extractelement_v16i1
 ; SSE41: pextrb
+
+; MIPS32-LABEL: extractelement_v16i1
+; MIPS32: srl a0,a0,0x8
+; MIPS32: andi a0,a0,0xff
+; MIPS32: andi a0,a0,0x1
+; MIPS32: andi a0,a0,0x1
+; MIPS32: move v0,a0
 }
--- a/tests_lit/llvm2ice_tests/vector-select.ll
+++ b/tests_lit/llvm2ice_tests/vector-select.ll