Add support for passing and returning vectors in accordance with the x86 calling convention.

- Add TargetLowering::lowerArguments() as a new stage in TargetLowering. - Add support for passing arguments/return values in XMM registers in the x86 target. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/372113005

Add support for passing and returning vectors in accordance with the x86 calling convention.
45a06236 · Matt Wala · f37fbbe9 · 45a06236 · 45a06236 · 45a06236
Commit 45a06236 authored Jul 09, 2014 by Matt Wala
10 changed files
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -110,6 +110,10 @@ void Cfg::deletePhis() {
  }
 }

+void Cfg::doArgLowering() {
+  getTarget()->lowerArguments();
+}
+
 void Cfg::doAddressOpt() {
  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
    (*I)->doAddressOpt();

--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -69,6 +69,7 @@ public:
  // Manage arguments to the function.
  void addArg(Variable *Arg);
  const VarList &getArgs() const { return Args; }
+  VarList &getArgs() { return Args; }

  // Miscellaneous accessors.
  TargetLowering *getTarget() const { return Target.get(); }
@@ -86,6 +87,7 @@ public:
  void placePhiStores();
  void deletePhis();
  void doAddressOpt();
+  void doArgLowering();
  void genCode();
  void genFrame();
  void livenessLightweight();

--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -62,6 +62,7 @@ typedef std::string IceString;
 typedef std::list<Inst *> InstList;
 typedef std::list<InstPhi *> PhiList;
 typedef std::vector<Variable *> VarList;
+typedef std::vector<Operand *> OperandList;
 typedef std::vector<CfgNode *> NodeList;
 typedef std::vector<Constant *> ConstantList;


--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -734,6 +734,14 @@ void InstX8632Movp::emit(const Cfg *Func) const {
  Str << "\n";
 }

+void InstX8632Movp::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "movups." << getDest()->getType() << " ";
+  dumpDest(Func);
+  Str << ", ";
+  dumpSources(Func);
+}
+
 void InstX8632Movq::emit(const Cfg *Func) const {
  Ostream &Str = Func->getContext()->getStrEmit();
  assert(getSrcSize() == 1);
@@ -746,14 +754,6 @@ void InstX8632Movq::emit(const Cfg *Func) const {
  Str << "\n";
 }

-void InstX8632Movp::dump(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "movups." << getDest()->getType() << " ";
-  dumpDest(Func);
-  Str << ", ";
-  dumpSources(Func);
-}
-
 void InstX8632Movq::dump(const Cfg *Func) const {
  Ostream &Str = Func->getContext()->getStrDump();
  Str << "movq." << getDest()->getType() << " ";
@@ -882,14 +882,19 @@ void InstX8632Push::emit(const Cfg *Func) const {
  assert(getSrcSize() == 1);
  Type Ty = getSrc(0)->getType();
  Variable *Var = llvm::dyn_cast<Variable>(getSrc(0));
-  if ((Ty == IceType_f32 || Ty == IceType_f64) && Var && Var->hasReg()) {
+  if ((isVectorType(Ty) || Ty == IceType_f32 || Ty == IceType_f64) && Var &&
+      Var->hasReg()) {
    // The xmm registers can't be directly pushed, so we fake it by
    // decrementing esp and then storing to [esp].
    Str << "\tsub\tesp, " << typeWidthInBytes(Ty) << "\n";
    if (!SuppressStackAdjustment)
      Func->getTarget()->updateStackAdjustment(typeWidthInBytes(Ty));
-    Str << "\tmov" << TypeX8632Attributes[Ty].SdSsString << "\t"
-        << TypeX8632Attributes[Ty].WidthString << " [esp], ";
+    if (isVectorType(Ty)) {
+      Str << "\tmovups\txmmword ptr [esp], ";
+    } else {
+      Str << "\tmov" << TypeX8632Attributes[Ty].SdSsString << "\t"
+          << TypeX8632Attributes[Ty].WidthString << " [esp], ";
+    }
    getSrc(0)->emit(Func);
    Str << "\n";
  } else if (Ty == IceType_f64 && (!Var || !Var->hasReg())) {

--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -159,14 +159,18 @@ void Variable::replaceDefinition(Inst *Inst, const CfgNode *Node) {
  setDefinition(Inst, Node);
 }

-void Variable::setIsArg(Cfg *Func) {
-  IsArgument = true;
-  if (DefNode == NULL)
-    return;
-  CfgNode *Entry = Func->getEntryNode();
-  if (DefNode == Entry)
-    return;
-  DefNode = NULL;
+void Variable::setIsArg(Cfg *Func, bool IsArg) {
+  if (IsArg) {
+    IsArgument = true;
+    if (DefNode == NULL)
+      return;
+    CfgNode *Entry = Func->getEntryNode();
+    if (DefNode == Entry)
+      return;
+    DefNode = NULL;
+  } else {
+    IsArgument = false;
+  }
 }

 IceString Variable::getName() const {

--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -339,7 +339,7 @@ public:
  void setUse(const Inst *Inst, const CfgNode *Node);

  bool getIsArg() const { return IsArgument; }
-  void setIsArg(Cfg *Func);
+  void setIsArg(Cfg *Func, bool IsArg = true);

  int32_t getStackOffset() const { return StackOffset; }
  void setStackOffset(int32_t Offset) { StackOffset = Offset; }

--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -148,6 +148,9 @@ public:

  virtual void emitVariable(const Variable *Var, const Cfg *Func) const = 0;

+  // Performs target-specific argument lowering.
+  virtual void lowerArguments() = 0;
+
  virtual void addProlog(CfgNode *Node) = 0;
  virtual void addEpilog(CfgNode *Node) = 0;


--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -46,6 +46,7 @@ public:
    return (typeWidthInBytes(Ty) + 3) & ~3;
  }
  virtual void emitVariable(const Variable *Var, const Cfg *Func) const;
+  virtual void lowerArguments();
  virtual void addProlog(CfgNode *Node);
  virtual void addEpilog(CfgNode *Node);
  virtual void emitConstants() const;
@@ -56,8 +57,8 @@ public:
  // function calls using the 32-bit push instruction (though the
  // latter could be done by directly writing to the stack).
  void split64(Variable *Var);
-  void setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
-                           size_t BasicFrameOffset, size_t &InArgsSizeBytes);
+  void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
+                              size_t BasicFrameOffset, size_t &InArgsSizeBytes);
  Operand *loOperand(Operand *Operand);
  Operand *hiOperand(Operand *Operand);


--- a/tests_lit/llvm2ice_tests/vector-arg.ll
+++ b/tests_lit/llvm2ice_tests/vector-arg.ll
+; This file checks that Subzero generates code in accordance with the
+; calling convention for vectors.
+
+; NOTE: CHECK / OPTM1 lines containing the following strings may be
+; subject to change:
+;
+; * movups: The movups instruction may be changed to movaps when the
+; load / store operation is 16 byte aligned.
+;
+; * stack offsets: These may need to be changed if stack alignment
+; support is implemented.
+;
+; * stack adjustment operations
+
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
+; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
+; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
+; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
+; RUN:                           | FileCheck --check-prefix=DUMP %s
+
+; The first five functions test that vectors are moved from their
+; correct argument location to xmm0.
+
+define <4 x float> @test_returning_arg0(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {
+entry:
+  ret <4 x float> %arg0
+; CHECK-LABEL: test_returning_arg0:
+; CHECK-NOT: mov
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_arg0:
+; OPTM1: movups xmmword ptr [[LOC:.*]], xmm0
+; OPTM1: movups xmm0, xmmword ptr [[LOC]]
+; OPTM1: ret
+}
+
+define <4 x float> @test_returning_arg1(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {
+entry:
+  ret <4 x float> %arg1
+; CHECK-LABEL: test_returning_arg1:
+; CHECK: movups xmm0, xmm1
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_arg1:
+; OPTM1: movups xmmword ptr [[LOC:.*]], xmm1
+; OPTM1: movups xmm0, xmmword ptr [[LOC]]
+; OPTM1: ret
+}
+
+define <4 x float> @test_returning_arg2(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {
+entry:
+  ret <4 x float> %arg2
+; CHECK-LABEL: test_returning_arg2:
+; CHECK: movups xmm0, xmm2
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_arg2:
+; OPTM1: movups xmmword ptr [[LOC:.*]], xmm2
+; OPTM1: movups xmm0, xmmword ptr [[LOC]]
+; OPTM1: ret
+}
+
+define <4 x float> @test_returning_arg3(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {
+entry:
+  ret <4 x float> %arg3
+; CHECK-LABEL: test_returning_arg3:
+; CHECK: movups xmm0, xmm3
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_arg3:
+; OPTM1: movups xmmword ptr [[LOC:.*]], xmm3
+; OPTM1: movups xmm0, xmmword ptr [[LOC]]
+; OPTM1: ret
+}
+
+define <4 x float> @test_returning_arg4(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {
+entry:
+  ret <4 x float> %arg4
+; CHECK-LABEL: test_returning_arg4:
+; CHECK: movups xmm0, xmmword ptr [esp+4]
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_arg4:
+; OPTM1: movups xmm0, xmmword ptr {{.*}}
+; OPTM1: ret
+}
+
+; The next five functions check that xmm arguments are handled
+; correctly when interspersed with stack arguments in the argument
+; list.
+
+define <4 x float> @test_returning_interspersed_arg0(i32 %i32arg0, double %doublearg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, double %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %floatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {
+entry:
+  ret <4 x float> %arg0
+; CHECK-LABEL: test_returning_interspersed_arg0:
+; CHECK-NOT: mov
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_interspersed_arg0:
+; OPTM1: movups xmmword ptr [[LOC:.*]], xmm0
+; OPTM1: movups xmm0, xmmword ptr [[LOC]]
+; OPTM1: ret
+}
+
+define <4 x float> @test_returning_interspersed_arg1(i32 %i32arg0, double %doublearg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, double %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %floatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {
+entry:
+  ret <4 x float> %arg1
+; CHECK-LABEL: test_returning_interspersed_arg1:
+; CHECK: movups xmm0, xmm1
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_interspersed_arg1:
+; OPTM1: movups xmmword ptr [[LOC:.*]], xmm1
+; OPTM1: movups xmm0, xmmword ptr [[LOC]]
+; OPTM1: ret
+}
+
+define <4 x float> @test_returning_interspersed_arg2(i32 %i32arg0, double %doublearg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, double %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %floatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {
+entry:
+  ret <4 x float> %arg2
+; CHECK-LABEL: test_returning_interspersed_arg2:
+; CHECK: movups xmm0, xmm2
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_interspersed_arg2:
+; OPTM1: movups xmmword ptr [[LOC:.*]], xmm2
+; OPTM1: movups xmm0, xmmword ptr [[LOC]]
+; OPTM1: ret
+}
+
+define <4 x float> @test_returning_interspersed_arg3(i32 %i32arg0, double %doublearg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, double %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %floatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {
+entry:
+  ret <4 x float> %arg3
+; CHECK-LABEL: test_returning_interspersed_arg3:
+; CHECK: movups xmm0, xmm3
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_interspersed_arg3:
+; OPTM1: movups xmmword ptr [[LOC:.*]], xmm3
+; OPTM1: movups xmm0, xmmword ptr [[LOC]]
+; OPTM1: ret
+}
+
+define <4 x float> @test_returning_interspersed_arg4(i32 %i32arg0, double %doublearg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, double %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %floatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {
+entry:
+  ret <4 x float> %arg4
+; CHECK-LABEL: test_returning_interspersed_arg4:
+; CHECK: movups xmm0, xmmword ptr [esp+44]
+; CHECK: ret
+
+; OPTM1-LABEL: test_returning_interspersed_arg4:
+; OPTM1: movups xmm0, xmmword ptr {{.*}}
+; OPTM1: ret
+}
+
+; Test that vectors are passed correctly as arguments to a function.
+
+declare void @VectorArgs(<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+
+declare void @killXmmRegisters()
+
+define void @test_passing_vectors(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5, <4 x float> %arg6, <4 x float> %arg7, <4 x float> %arg8, <4 x float> %arg9) {
+entry:
+  ; Kills XMM registers so that no in-arg lowering code interferes
+  ; with the test.
+  call void @killXmmRegisters()
+  call void @VectorArgs(<4 x float> %arg9, <4 x float> %arg8, <4 x float> %arg7, <4 x float> %arg6, <4 x float> %arg5, <4 x float> %arg4)
+  ret void
+; CHECK-LABEL: test_passing_vectors:
+; CHECK: movups  [[ARG6:.*]], xmmword ptr [esp+4]
+; CHECK: sub esp, 16
+; CHECK-NEXT: movups xmmword ptr [esp], [[ARG6]]
+; CHECK: movups  [[ARG5:.*]], xmmword ptr [esp+36]
+; CHECK: sub esp, 16
+; CHECK-NEXT: movups xmmword ptr [esp], [[ARG5]]
+; CHECK: movups  xmm0, xmmword ptr [esp+116]
+; CHECK: movups  xmm1, xmmword ptr [esp+100]
+; CHECK: movups  xmm2, xmmword ptr [esp+84]
+; CHECK: movups  xmm3, xmmword ptr [esp+68]
+; CHECK: call VectorArgs
+; CHECK-NEXT: add esp, 32
+; CHECK: ret
+
+; OPTM1-LABEL: test_passing_vectors:
+; OPTM1: movups  [[ARG6:.*]], xmmword ptr {{.*}}
+; OPTM1: sub esp, 16
+; OPTM1: movups xmmword ptr [esp], [[ARG6]]
+; OPTM1: movups  [[ARG5:.*]], xmmword ptr {{.*}}
+; OPTM1: sub esp, 16
+; OPTM1-NEXT: movups xmmword ptr [esp], [[ARG5]]
+; OPTM1: movups  xmm0, xmmword ptr {{.*}}
+; OPTM1: movups  xmm1, xmmword ptr {{.*}}
+; OPTM1: movups  xmm2, xmmword ptr {{.*}}
+; OPTM1: movups  xmm3, xmmword ptr {{.*}}
+; OPTM1: call VectorArgs
+; OPTM1: add esp, 32
+; OPTM1: ret
+}
+
+; Test that a vector returned from a function is recognized to be in
+; xmm0.
+
+declare <4 x float> @VectorReturn(<4 x float> %arg0)
+
+define void @test_receiving_vectors(<4 x float> %arg0) {
+entry:
+  %result = call <4 x float> @VectorReturn(<4 x float> %arg0)
+  %result2 = call <4 x float> @VectorReturn(<4 x float> %result)
+  ret void
+; CHECK-LABEL: test_receiving_vectors:
+; CHECK: call VectorReturn
+; CHECK-NOT: movups xmm0
+; CHECK: call VectorReturn
+; CHECK: ret
+
+; OPTM1-LABEL: test_receiving_vectors:
+; OPTM1: call VectorReturn
+; OPTM1: movups [[LOC:.*]], xmm0
+; OPTM1: movups xmm0, [[LOC]]
+; OPTM1: call VectorReturn
+; OPTM1: ret
+}
+
+; ERRORS-NOT: ICE translation error
+; DUMP-NOT: SZ