[SubZero] Vector types support for MIPS

This patch implements vector operations on MIPS32 using VariableVecOn32 method (on the lines of Variable64On32). Vector operations are scalarized prior to lowering. Each vector variable is split into 4 containers to hold a variable of vector type. For MIPS32, four GP/FP registers are used to hold a vector variable. Arguments are passed in GP registers irrespective of the type of the vector variable. Lit test vector-mips.ll has been added to test this implementation. R=stichnot@chromium.org Review URL: https://codereview.chromium.org/2380023002 . Patch from Jaydeep Patil <jaydeep.patil@imgtec.com>.

[SubZero] Vector types support for MIPS
958ddb75 · Jaydeep Patil · Jim Stichnoth · 9309756d · 958ddb75 · 958ddb75
Commit 958ddb75 authored Oct 03, 2016 by Jaydeep Patil Committed by Jim Stichnoth Oct 03, 2016
8 changed files
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -119,9 +119,14 @@ void Cfg::swapNodes(NodeList &NewNodes) {

 template <> Variable *Cfg::makeVariable<Variable>(Type Ty) {
  SizeT Index = Variables.size();
-  Variable *Var = Target->shouldSplitToVariable64On32(Ty)
-                      ? Variable64On32::create(this, Ty, Index)
-                      : Variable::create(this, Ty, Index);
+  Variable *Var;
+  if (Target->shouldSplitToVariableVecOn32(Ty)) {
+    Var = VariableVecOn32::create(this, Ty, Index);
+  } else if (Target->shouldSplitToVariable64On32(Ty)) {
+    Var = Variable64On32::create(this, Ty, Index);
+  } else {
+    Var = Variable::create(this, Ty, Index);
+  }
  Variables.push_back(Var);
  return Var;
 }
@@ -244,9 +249,13 @@ void Cfg::translate() {
  }

  // Create the Hi and Lo variables where a split was needed
-  for (Variable *Var : Variables)
-    if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
+  for (Variable *Var : Variables) {
+    if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) {
      Var64On32->initHiLo(this);
+    } else if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Var)) {
+      VarVecOn32->initVecElement(this);
+    }
+  }

  // Instrument the Cfg, e.g. with AddressSanitizer
  if (!BuildDefs::minimal() && getFlags().getSanitizeAddresses()) {

--- a/src/IceInstMIPS32.h
+++ b/src/IceInstMIPS32.h
@@ -955,11 +955,10 @@ public:
  void dump(const Cfg *Func) const override {
    if (!BuildDefs::dump())
      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    Str << "\t" << Opcode << "\t";
-    getSrc(0)->emit(Func);
-    Str << ", ";
-    getSrc(1)->emit(Func);
+    Ostream &Str = Func->getContext()->getStrDump();
+    dumpOpcode(Str, Opcode, getSrc(0)->getType());
+    Str << " ";
+    dumpSources(Func);
    Str << ", " << TrapCode;
  }


--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -52,6 +52,7 @@ public:
    kConst_Max = kConst_Target + MaxTargetKinds,
    kVariable,
    kVariable64On32,
+    kVariableVecOn32,
    kVariableBoolean,
    kVariable_Target, // leave space for target-specific variable kinds
    kVariable_Max = kVariable_Target + MaxTargetKinds,
@@ -962,6 +963,66 @@ protected:
  Variable *HiVar = nullptr;
 };

+// VariableVecOn32 represents a 128-bit vector variable on a 32-bit
+// architecture. In this case the variable must be split into 4 containers.
+class VariableVecOn32 : public Variable {
+  VariableVecOn32() = delete;
+  VariableVecOn32(const VariableVecOn32 &) = delete;
+  VariableVecOn32 &operator=(const VariableVecOn32 &) = delete;
+
+public:
+  static VariableVecOn32 *create(Cfg *Func, Type Ty, SizeT Index) {
+    return new (Func->allocate<VariableVecOn32>())
+        VariableVecOn32(Func, kVariableVecOn32, Ty, Index);
+  }
+
+  void setName(const Cfg *Func, const std::string &NewName) override {
+    Variable::setName(Func, NewName);
+    if (!Containers.empty()) {
+      for (SizeT i = 0; i < ElementsPerContainer; ++i) {
+        Containers[i]->setName(Func, getName() + "__cont" + std::to_string(i));
+      }
+    }
+  }
+
+  void setIsArg(bool Val = true) override {
+    Variable::setIsArg(Val);
+    for (Variable *Var : Containers) {
+      Var->setIsArg(getIsArg());
+    }
+  }
+
+  const VarList &getContainers() const { return Containers; }
+
+  void initVecElement(Cfg *Func) {
+    for (SizeT i = 0; i < ElementsPerContainer; ++i) {
+      Variable *Var = Func->makeVariable(IceType_i32);
+      Var->setIsArg(getIsArg());
+      if (BuildDefs::dump()) {
+        Var->setName(Func, getName() + "__cont" + std::to_string(i));
+      }
+      Containers.push_back(Var);
+    }
+  }
+
+  static bool classof(const Operand *Operand) {
+    OperandKind Kind = Operand->getKind();
+    return Kind == kVariableVecOn32;
+  }
+
+  // A 128-bit vector value is mapped onto 4 32-bit register values.
+  static constexpr SizeT ElementsPerContainer = 4;
+
+protected:
+  VariableVecOn32(const Cfg *Func, OperandKind K, Type Ty, SizeT Index)
+      : Variable(Func, K, Ty, Index) {
+    assert(typeWidthInBytes(Ty) ==
+           ElementsPerContainer * typeWidthInBytes(IceType_i32));
+  }
+
+  VarList Containers;
+};
+
 enum MetadataKind {
  VMK_Uses,       /// Track only uses, not defs
  VMK_SingleDefs, /// Track uses+defs, but only record single def

--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -715,6 +715,10 @@ void TargetLowering::addFakeDefUses(const Inst *Instr) {
    if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) {
      Context.insert<InstFakeUse>(Var64->getLo());
      Context.insert<InstFakeUse>(Var64->getHi());
+    } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Var)) {
+      for (Variable *Var : VarVec->getContainers()) {
+        Context.insert<InstFakeUse>(Var);
+      }
    } else {
      Context.insert<InstFakeUse>(Var);
    }
@@ -725,6 +729,10 @@ void TargetLowering::addFakeDefUses(const Inst *Instr) {
  if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) {
    Context.insert<InstFakeDef>(Var64->getLo());
    Context.insert<InstFakeDef>(Var64->getHi());
+  } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Dest)) {
+    for (Variable *Var : VarVec->getContainers()) {
+      Context.insert<InstFakeDef>(Var);
+    }
  } else {
    Context.insert<InstFakeDef>(Dest);
  }

--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -256,6 +256,12 @@ public:
  /// Return whether a 64-bit Variable should be split into a Variable64On32.
  virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;

+  /// Return whether a Vector Variable should be split into a VariableVecOn32.
+  virtual bool shouldSplitToVariableVecOn32(Type Ty) const {
+    (void)Ty;
+    return false;
+  }
+
  bool hasComputedFrame() const { return HasComputedFrame; }
  /// Returns true if this function calls a function that has the "returns
  /// twice" attribute.
@@ -503,6 +509,9 @@ protected:
    const SizeT NumElements = typeNumElements(DestTy);

    Variable *T = Func->makeVariable(DestTy);
+    if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T)) {
+      VarVecOn32->initVecElement(Func);
+    }
    Context.insert<InstFakeDef>(T);

    for (SizeT I = 0; I < NumElements; ++I) {

--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -60,7 +60,8 @@ public:
  void translateOm1() override;
  void translateO2() override;
  bool doBranchOpt(Inst *Instr, const CfgNode *NextNode) override;
-
+  void setImplicitRet(Variable *Ret) { ImplicitRet = Ret; }
+  Variable *getImplicitRet() const { return ImplicitRet; }
  SizeT getNumRegisters() const override { return RegMIPS32::Reg_NUM; }
  Variable *getPhysicalRegister(RegNumT RegNum,
                                Type Ty = IceType_void) override;
@@ -111,6 +112,10 @@ public:
    return Ty == IceType_i64;
  }

+  bool shouldSplitToVariableVecOn32(Type Ty) const override {
+    return isVectorType(Ty);
+  }
+
  // TODO(ascull): what is the best size of MIPS?
  SizeT getMinJumpTableSize() const override { return 3; }
  void emitJumpTable(const Cfg *Func,
@@ -621,9 +626,11 @@ public:
  void split64(Variable *Var);
  Operand *loOperand(Operand *Operand);
  Operand *hiOperand(Operand *Operand);
+  Operand *getOperandAtIndex(Operand *Operand, Type BaseType, uint32_t Index);

-  void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
-                              size_t BasicFrameOffset, size_t *InArgsSizeBytes);
+  void finishArgumentLowering(Variable *Arg, bool PartialOnStack,
+                              Variable *FramePtr, size_t BasicFrameOffset,
+                              size_t *InArgsSizeBytes);

  Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());

@@ -642,6 +649,7 @@ public:
    /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
    /// be an I64 register pair.
    bool argInReg(Type Ty, uint32_t ArgNo, RegNumT *Reg);
+    void discardReg(RegNumT Reg) { GPRegsUsed |= RegisterAliases[Reg]; }

  private:
    // argInGPR is used to find if any GPR register is available for argument of
@@ -755,6 +763,7 @@ protected:
  size_t FixedAllocaSizeBytes = 0;
  size_t FixedAllocaAlignBytes = 0;
  size_t PreservedRegsSizeBytes = 0;
+  Variable *ImplicitRet = nullptr; /// Implicit return

 private:
  ENABLE_MAKE_UNIQUE;

--- a/tests_lit/llvm2ice_tests/vector-mips.ll
+++ b/tests_lit/llvm2ice_tests/vector-mips.ll
+; This test checks support for vector type in MIPS.
+
+; RUN: %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target mips32\
+; RUN:   -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command FileCheck --check-prefix MIPS32 %s
+
+define internal i32 @test_0(<4 x i32> %a) #0 {
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 0
+  ret i32 %vecext
+}
+; MIPS32-LABEL: test_0
+; MIPS32: move v0,a0
+
+define internal i32 @test_1(<4 x i32> %a) #0 {
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 1
+  ret i32 %vecext
+}
+; MIPS32-LABEL: test_1
+; MIPS32: move v0,a1
+
+define internal i32 @test_2(<4 x i32> %a) #0 {
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 2
+  ret i32 %vecext
+}
+; MIPS32-LABEL: test_2
+; MIPS32: move v0,a2
+
+define internal i32 @test_3(<4 x i32> %a) #0 {
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 3
+  ret i32 %vecext
+}
+; MIPS32-LABEL: test_3
+; MIPS32: move v0,a3
+
+define internal float @test_4(<4 x float> %a) #0 {
+entry:
+  %vecext = extractelement <4 x float> %a, i32 1
+  ret float %vecext
+}
+; MIPS32-LABEL: test_4
+; MIPS32: mtc1 a1,$f0
+
+define internal float @test_5(<4 x float> %a) #0 {
+entry:
+  %vecext = extractelement <4 x float> %a, i32 2
+  ret float %vecext
+}
+; MIPS32-LABEL: test_5
+; MIPS32: mtc1 a2,$f0
+
+define internal i32 @test_6(<16 x i8> %a) #0 {
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 0
+  %conv = sext i8 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_6
+; MIPS32: andi a0,a0,0xff
+; MIPS32: sll a0,a0,0x18
+; MIPS32: sra a0,a0,0x18
+; MIPS32: move v0,a0
+
+define internal i32 @test_7(<16 x i8> %a) #0 {
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 15
+  %conv = sext i8 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_7
+; MIPS32: srl a3,a3,0x18
+; MIPS32: sll a3,a3,0x18
+; MIPS32: sra a3,a3,0x18
+; MIPS32: move v0,a3
+
+define internal i32 @test_8(<8 x i16> %a) #0 {
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = sext i16 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_8
+; MIPS32: andi a0,a0,0xffff
+; MIPS32: sll a0,a0,0x10
+; MIPS32: sra a0,a0,0x10
+; MIPS32: move v0,a0
+
+define internal i32 @test_9(<8 x i16> %a) #0 {
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 7
+  %conv = sext i16 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_9
+; MIPS32: srl a3,a3,0x10
+; MIPS32: sll a3,a3,0x10
+; MIPS32: sra a3,a3,0x10
+; MIPS32: move v0,a3
+
+define internal i32 @test_10(<4 x i1> %a) #0 {
+entry:
+  %vecext = extractelement <4 x i1> %a, i32 0
+  %conv = sext i1 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_10
+; MIPS32: andi a0,a0,0x1
+; MIPS32: sll a0,a0,0x1f
+; MIPS32: sra a0,a0,0x1f
+; MIPS32: move v0,a0
+
+define internal i32 @test_11(<4 x i1> %a) #0 {
+entry:
+  %vecext = extractelement <4 x i1> %a, i32 2
+  %conv = sext i1 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_11
+; MIPS32: andi a2,a2,0x1
+; MIPS32: sll a2,a2,0x1f
+; MIPS32: sra a2,a2,0x1f
+; MIPS32: move v0,a2
+
+define internal i32 @test_12(<8 x i1> %a) #0 {
+entry:
+  %vecext = extractelement <8 x i1> %a, i32 0
+  %conv = sext i1 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_12
+; MIPS32: andi a0,a0,0xffff
+; MIPS32: andi a0,a0,0x1
+; MIPS32: sll a0,a0,0x1f
+; MIPS32: sra a0,a0,0x1f
+; MIPS32: move v0,a0
+
+define internal i32 @test_13(<8 x i1> %a) #0 {
+entry:
+  %vecext = extractelement <8 x i1> %a, i32 7
+  %conv = sext i1 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_13
+; MIPS32: srl a3,a3,0x10
+; MIPS32: andi a3,a3,0x1
+; MIPS32: sll a3,a3,0x1f
+; MIPS32: sra a3,a3,0x1f
+; MIPS32: move v0,a3
+
+define internal i32 @test_14(<16 x i1> %a) #0 {
+entry:
+  %vecext = extractelement <16 x i1> %a, i32 0
+  %conv = sext i1 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_14
+; MIPS32: andi a0,a0,0xff
+; MIPS32: andi a0,a0,0x1
+; MIPS32: sll a0,a0,0x1f
+; MIPS32: sra a0,a0,0x1f
+; MIPS32: move v0,a0
+
+define internal i32 @test_15(<16 x i1> %a) #0 {
+entry:
+  %vecext = extractelement <16 x i1> %a, i32 15
+  %conv = sext i1 %vecext to i32
+  ret i32 %conv
+}
+; MIPS32-LABEL: test_15
+; MIPS32: srl a3,a3,0x18
+; MIPS32: andi a3,a3,0x1
+; MIPS32: sll a3,a3,0x1f
+; MIPS32: sra a3,a3,0x1f
+; MIPS32: move v0,a3
+
+define internal i32 @test_16(i32 %i, <4 x i32> %a) #0 {
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 0
+  %add = add nsw i32 %vecext, %i
+  ret i32 %add
+}
+; MIPS32-LABEL: test_16
+; MIPS32: addu a2,a2,a0
+; MIPS32: move v0,a2
+
+define internal i32 @test_17(i32 %i, <4 x i32> %a) #0 {
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 3
+  %add = add nsw i32 %vecext, %i
+  ret i32 %add
+}
+; MIPS32-LABEL: test_17
+; MIPS32: lw v0,{{.*}}(sp)
+; MIPS32: addu v0,v0,a0
+
+define internal float @test_18(float %f, <4 x float> %a) #0 {
+entry:
+  %vecext = extractelement <4 x float> %a, i32 0
+  %add = fadd float %vecext, %f
+  ret float %add
+}
+; MIPS32-LABEL: test_18
+; MIPS32: mtc1 a2,$f0
+; MIPS32: add.s $f0,$f0,$f12
+
+define internal float @test_19(float %f, <4 x float> %a) #0 {
+entry:
+  %vecext = extractelement <4 x float> %a, i32 3
+  %add = fadd float %vecext, %f
+  ret float %add
+}
+; MIPS32-LABEL: test_19
+; MIPS32: lw v0,{{.*}}(sp)
+; MIPS32: mtc1 v0,$f0
+; MIPS32: add.s $f0,$f0,$f12
+
+define internal <4 x float> @test_20(i32 %addr_i, <4 x float> %addend) {
+entry:
+  %addr = inttoptr i32 %addr_i to <4 x float>*
+  %loaded = load <4 x float>, <4 x float>* %addr, align 4
+  %result = fadd <4 x float> %addend, %loaded
+  ret <4 x float> %result
+}
+; MIPS32-LABEL: test_20
+; MIPS32: add.s
+; MIPS32: add.s
+; MIPS32: add.s
+; MIPS32: add.s
+
+define internal <4 x i32> @test_21(i32 %addr_i, <4 x i32> %addend) {
+entry:
+  %addr = inttoptr i32 %addr_i to <4 x i32>*
+  %loaded = load <4 x i32>, <4 x i32>* %addr, align 4
+  %result = add <4 x i32> %addend, %loaded
+  ret <4 x i32> %result
+}
+; MIPS32-LABEL: test_21
+; MIPS32: add
+; MIPS32: add
+; MIPS32: add
+; MIPS32: add