ARM: Assign "actuals" at call site to the appropriate GPR/stack slot.

Actually assign arguments to r0-r3 at the call site. Previously this was left unhandled. There was only logic for pulling formal parameters out of r0-r3. Refactor the GPR counter and move it into a class so that the rounding up for i64 arguments is in one place for callsites and for pulling out of parameters. We might be able to use a similar pattern to count the FP/SIMD registers later. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1187513006.

ARM: Assign "actuals" at call site to the appropriate GPR/stack slot.
b0a8c24e · Jan Voung · cac003e8 · b0a8c24e · b0a8c24e · b0a8c24e
Commit b0a8c24e authored Jun 18, 2015 by Jan Voung
8 changed files
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -198,6 +198,13 @@ OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg,
    Vars[1] = ShiftVar;
 }
+InstARM32AdjustStack::InstARM32AdjustStack(Cfg *Func, Variable *SP,
+                                           SizeT Amount, Operand *SrcAmount)
+    : InstARM32(Func, InstARM32::Adjuststack, 2, SP), Amount(Amount) {
+  addSource(SP);
+  addSource(SrcAmount);
+}
 InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue,
                         const CfgNode *TargetFalse, CondARM32::Cond Pred)
    : InstARM32Pred(Func, InstARM32::Br, 0, nullptr, Pred),
@@ -631,6 +638,39 @@ void InstARM32Pop::dump(const Cfg *Func) const {
  }
 }
+void InstARM32AdjustStack::emit(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 2);
+  Str << "\t"
+      << "sub"
+      << "\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << ", ";
+  getSrc(1)->emit(Func);
+  Func->getTarget()->updateStackAdjustment(Amount);
+}
+void InstARM32AdjustStack::emitIAS(const Cfg *Func) const {
+  (void)Func;
+  llvm_unreachable("Not yet implemented");
+  Func->getTarget()->updateStackAdjustment(Amount);
+}
+void InstARM32AdjustStack::dump(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  getDest()->dump(Func);
+  Str << " = sub.i32 ";
+  getSrc(0)->dump(Func);
+  Str << ", " << Amount << " ; ";
+  getSrc(1)->dump(Func);
+}
 void InstARM32Push::emit(const Cfg *Func) const {
  if (!ALLOW_DUMP)
    return;

--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -258,6 +258,7 @@ public:
    k__Start = Inst::Target,
    Adc,
    Add,
+    Adjuststack,
    And,
    Asr,
    Bic,
@@ -606,6 +607,34 @@ private:
  const CfgNode *TargetFalse;
 };
+// AdjustStack instruction - subtracts SP by the given amount and
+// updates the stack offset during code emission.
+class InstARM32AdjustStack : public InstARM32 {
+  InstARM32AdjustStack() = delete;
+  InstARM32AdjustStack(const InstARM32AdjustStack &) = delete;
+  InstARM32AdjustStack &operator=(const InstARM32AdjustStack &) = delete;
+public:
+  // Note: We need both Amount and SrcAmount. If Amount is too large then
+  // it needs to be copied to a register (so SrcAmount could be a register).
+  // However, we also need the numeric Amount for bookkeeping, and it's
+  // hard to pull that from the generic SrcAmount operand.
+  static InstARM32AdjustStack *create(Cfg *Func, Variable *SP, SizeT Amount,
+                                      Operand *SrcAmount) {
+    return new (Func->allocate<InstARM32AdjustStack>())
+        InstARM32AdjustStack(Func, SP, Amount, SrcAmount);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
+private:
+  InstARM32AdjustStack(Cfg *Func, Variable *SP, SizeT Amount,
+                       Operand *SrcAmount);
+  const SizeT Amount;
+};
 // Call instruction (bl/blx).  Arguments should have already been pushed.
 // Technically bl and the register form of blx can be predicated, but we'll
 // leave that out until needed.

--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -159,6 +159,10 @@ protected:
            CondARM32::Cond Pred = CondARM32::AL) {
    Context.insert(InstARM32Adc::create(Func, Dest, Src0, Src1, Pred));
  }
+  void _adjust_stack(int32_t Amount, Operand *SrcAmount) {
+    Context.insert(InstARM32AdjustStack::create(
+        Func, getPhysicalRegister(RegARM32::Reg_sp), Amount, SrcAmount));
+  }
  void _and(Variable *Dest, Variable *Src0, Operand *Src1,
            CondARM32::Cond Pred = CondARM32::AL) {
    Context.insert(InstARM32And::create(Func, Dest, Src0, Src1, Pred));
@@ -309,6 +313,33 @@ protected:
  VarList PhysicalRegisters[IceType_NUM];
  static IceString RegNames[];
+  // Helper class that understands the Calling Convention and register
+  // assignments. The first few integer type parameters can use r0-r3,
+  // regardless of their position relative to the floating-point/vector
+  // arguments in the argument list. Floating-point and vector arguments
+  // can use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can
+  // start with registers but extend beyond the available registers can be
+  // split between the registers and the stack. However, this is typically
+  // for passing GPR structs by value, and PNaCl transforms expand this out.
+  //
+  // Also, at the point before the call, the stack must be aligned.
+  class CallingConv {
+    CallingConv(const CallingConv &) = delete;
+    CallingConv &operator=(const CallingConv &) = delete;
+  public:
+    CallingConv() : NumGPRRegsUsed(0) {}
+    ~CallingConv() = default;
+    bool I64InRegs(std::pair<int32_t, int32_t> *Regs);
+    bool I32InReg(int32_t *Reg);
+    static constexpr uint32_t ARM32_MAX_GPR_ARG = 4;
+  private:
+    uint32_t NumGPRRegsUsed;
+  };
 private:
  ~TargetARM32() override {}
 };

--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -82,11 +82,36 @@ entry:
 ; OPTM1:      mov     DWORD PTR [esp+0xc]
 ; OPTM1:      call {{.*}} R_{{.*}}    ignore64BitArgNoInline
+; ARM32-LABEL: pass64BitArg
+; ARM32:      sub     sp, {{.*}} #16
+; ARM32:      str     {{.*}}, [sp, #4]
+; ARM32:      str     {{.*}}, [sp]
+; ARM32:      movw    r2, #123
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
+; ARM32:      add     sp, {{.*}} #16
+; ARM32:      sub     sp, {{.*}} #16
+; ARM32:      str     {{.*}}, [sp, #4]
+; ARM32:      str     {{.*}}, [sp]
+; ARM32:      mov     r0
+; ARM32:      mov     r1
+; ARM32:      movw    r2, #123
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
+; ARM32:      add     sp, {{.*}} #16
+; ARM32:      sub     sp, {{.*}} #16
+; ARM32:      str     {{.*}}, [sp, #4]
+; ARM32:      str     {{.*}}, [sp]
+; ARM32:      mov     r0
+; ARM32:      mov     r1
+; ARM32:      movw    r2, #123
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
+; ARM32:      add     sp, {{.*}} #16
 declare i32 @ignore64BitArgNoInline(i64, i32, i64)
 define internal i32 @pass64BitConstArg(i64 %a, i64 %b) {
 entry:
-  %call = call i32 @ignore64BitArgNoInline(i64 %a, i32 123, i64 -2401053092306725256)
+  %call = call i32 @ignore64BitArgNoInline(i64 %b, i32 123, i64 -2401053092306725256)
  ret i32 %call
 }
 ; CHECK-LABEL: pass64BitConstArg
@@ -112,6 +137,20 @@ entry:
 ; OPTM1-NOT:  mov
 ; OPTM1:      call {{.*}} R_{{.*}}    ignore64BitArgNoInline
+; ARM32-LABEL: pass64BitConstArg
+; ARM32:      sub     sp, {{.*}} #16
+; ARM32:      movw    [[REG1:r.*]], {{.*}} ; 0xbeef
+; ARM32:      movt    [[REG1:r.*]], {{.*}} ; 0xdead
+; ARM32:      movw    [[REG2:r.*]], {{.*}} ; 0x5678
+; ARM32:      movt    [[REG2:r.*]], {{.*}} ; 0x1234
+; ARM32:      str     [[REG1]], [sp, #4]
+; ARM32:      str     [[REG2]], [sp]
+; ARM32:      mov     r0, r2
+; ARM32:      mov     r1, r3
+; ARM32:      movw    r2, #123
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
+; ARM32:      add     sp, {{.*}} #16
 define internal i64 @return64BitArg(i64 %a) {
 entry:
  ret i64 %a

--- a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
@@ -2,13 +2,22 @@
 ; should be to the same operand, whether it's in a register or on the
 ; stack.
-; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 \
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
-; RUN:   | FileCheck %s
+; RUN:   --target x8632 -i %s --args -O2 \
-; RUN: %if --need=allow_dump --command %p2i --filetype=asm --assemble \
+; RUN:   | %if --need=target_X8632 --command FileCheck %s
-; RUN:     --disassemble -i %s --args -O2 \
+; RUN: %if --need=allow_dump --need=target_X8632 --command %p2i --filetype=asm \
-; RUN:   | %if --need=allow_dump --command FileCheck %s
+; RUN:     --assemble --disassemble -i %s --args -O2 \
-; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
+; RUN:   | %if --need=allow_dump --need=target_X8632 --command FileCheck %s
-; RUN:   | FileCheck --check-prefix=OPTM1 %s
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN:   --target x8632 -i %s --args -Om1 \
+; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=OPTM1 %s
+; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
 @__init_array_start = internal constant [0 x i8] zeroinitializer, align 4
 @__fini_array_start = internal constant [0 x i8] zeroinitializer, align 4
@@ -38,6 +47,14 @@ entry:
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
+;
+; ARM32-LABEL: CallIndirect
+; ARM32: blx [[REGISTER:r.*]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
 @fp_v = internal global [4 x i8] zeroinitializer, align 4
@@ -63,6 +80,12 @@ entry:
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
+;
+; ARM32-LABEL: CallIndirectGlobal
+; ARM32: blx [[REGISTER:r.*]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
 ; Calling an absolute address is used for non-IRT PNaCl pexes to directly
 ; access syscall trampolines. This is not really an indirect call, but
@@ -85,3 +108,12 @@ entry:
 ; OPTM1: e8 bc 03 01 00 call {{[0-9a-f]+}} {{.*}} R_386_PC32 *ABS*
 ; OPTM1: e8 bc 03 01 00 call {{[0-9a-f]+}} {{.*}} R_386_PC32 *ABS*
 ; OPTM1: e8 bc 03 01 00 call {{[0-9a-f]+}} {{.*}} R_386_PC32 *ABS*
+;
+; ARM32-LABEL: CallConst
+; ARM32: movw [[REGISTER:r.*]], #960
+; ARM32: movt [[REGISTER]], #1
+; ARM32: blx [[REGISTER]]
+; The legalization of the constant could be shared, but it isn't.
+; ARM32: movw [[REGISTER:r.*]], #960
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
--- a/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
@@ -25,8 +25,14 @@
 ; RUN:   --dis-flags=-t --target x8632 -i %s --args --verbose none \
 ; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=SYMTAB %s
-; Only checking symtab for ARM for now. TODO(jvoung): Need to lower
+; This is not really IAS, but we can switch when that is implemented.
-; arguments at callsite.
+; For now we can at least see the instructions / relocations.
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s \
+; RUN:   --args --verbose none --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck \
+; RUN:   --check-prefix=IASARM32 %s
 ; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
 ; RUN:   --disassemble --dis-flags=-t --target arm32 -i %s \
 ; RUN:   --args --verbose none --skip-unimplemented \
@@ -69,31 +75,51 @@ entry:
 ; SYMTAB-DAG: 00000000 {{.*}} .data {{.*}} PrimitiveInit
 ; IAS: mov {{.*}},0x0 {{.*}} .data
 ; IAS: call
+; IASARM32: movw {{.*}} PrimitiveInit
+; IASARM32: movt {{.*}} PrimitiveInit
+; IASARM32: bl
 ; SYMTAB-DAG: 00000000 {{.*}} .rodata {{.*}} PrimitiveInitConst
 ; IAS: mov {{.*}},0x0 {{.*}} .rodata
 ; IAS: call
+; IASARM32: movw {{.*}} PrimitiveInitConst
+; IASARM32: movt {{.*}} PrimitiveInitConst
+; IASARM32: bl
 ; SYMTAB-DAG: 00000000 {{.*}} .bss {{.*}} PrimitiveInitStatic
 ; IAS: mov {{.*}},0x0 {{.*}} .bss
 ; IAS: call
+; IASARM32: movw {{.*}} PrimitiveInitStatic
+; IASARM32: movt {{.*}} PrimitiveInitStatic
+; IASARM32: bl
 ; SYMTAB-DAG: 00000004 {{.*}} .bss {{.*}} PrimitiveUninit
 ; IAS: mov {{.*}},0x4 {{.*}} .bss
 ; IAS: call
+; IASARM32: movw {{.*}} PrimitiveUninit
+; IASARM32: movt {{.*}} PrimitiveUninit
+; IASARM32: bl
 ; SYMTAB-DAG: 00000004{{.*}}.data{{.*}}ArrayInit
 ; IAS: mov {{.*}},0x4 {{.*}} .data
 ; IAS: call
+; IASARM32: movw {{.*}} ArrayInit
+; IASARM32: movt {{.*}} ArrayInit
+; IASARM32: bl
 ; SYMTAB-DAG: 00000018 {{.*}} .data {{.*}} ArrayInitPartial
 ; IAS: mov {{.*}},0x18 {{.*}} .data
 ; IAS: call
+; IASARM32: movw {{.*}} ArrayInitPartial
+; IASARM32: movt {{.*}} ArrayInitPartial
+; IASARM32: bl
 ; SYMTAB-DAG: 00000008 {{.*}} .bss {{.*}} ArrayUninit
 ; IAS: mov {{.*}},0x8 {{.*}} .bss
 ; IAS: call
+; IASARM32: movw {{.*}} ArrayUninit
+; IASARM32: movt {{.*}} ArrayUninit
+; IASARM32: bl
 declare void @use(i32)

--- a/tests_lit/llvm2ice_tests/int-arg.ll
+++ b/tests_lit/llvm2ice_tests/int-arg.ll
@@ -235,3 +235,40 @@ entry:
 ; Test interleaving float/double and integer (different register streams on ARM).
 ; TODO(jvoung): Test once the S/D/Q regs are modeled.
+; Test that integers are passed correctly as arguments to a function.
+declare void @IntArgs(i32, i32, i32, i32, i32, i32)
+declare void @killRegisters()
+define void @test_passing_integers(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6) {
+  call void @killRegisters()
+  call void @IntArgs(i32 %arg6, i32 %arg5, i32 %arg4, i32 %arg3, i32 %arg2, i32 %arg1)
+  ret void
+}
+; CHECK-LABEL: test_passing_integers
+; CHECK-DAG: mov [[REG1:e.*]],DWORD PTR [esp+0x24]
+; CHECK-DAG: mov [[REG2:e.*]],DWORD PTR [esp+0x28]
+; CHECK-DAG: mov [[REG3:e.*]],DWORD PTR [esp+0x2c]
+; CHECK-DAG: mov [[REG4:e.*]],DWORD PTR [esp+0x30]
+; CHECK: mov DWORD PTR [esp]
+; CHECK: mov DWORD PTR [esp+0x4]
+; CHECK-DAG: mov DWORD PTR [esp+0x8],[[REG4]]
+; CHECK-DAG: mov DWORD PTR [esp+0xc],[[REG3]]
+; CHECK-DAG: mov DWORD PTR [esp+0x10],[[REG2]]
+; CHECK-DAG: mov DWORD PTR [esp+0x14],[[REG1]]
+; CHECK: call
+; ARM32-LABEL: test_passing_integers
+; ARM32-DAG: mov [[REG1:.*]], r1
+; ARM32-DAG: mov [[REG2:.*]], r2
+; ARM32-DAG: mov [[REG3:.*]], r3
+; ARM32: str [[REG2]], [sp]
+; ARM32: str [[REG1]], [sp, #4]
+; ARM32-DAG: mov r0
+; ARM32-DAG: mov r1
+; ARM32-DAG: mov r2
+; ARM32-DAG: mov r3, [[REG3]]
+; ARM32: bl