ARM: Assign "actuals" at call site to the appropriate GPR/stack slot.

Actually assign arguments to r0-r3 at the call site. Previously this was left unhandled. There was only logic for pulling formal parameters out of r0-r3. Refactor the GPR counter and move it into a class so that the rounding up for i64 arguments is in one place for callsites and for pulling out of parameters. We might be able to use a similar pattern to count the FP/SIMD registers later. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1187513006.

ARM: Assign "actuals" at call site to the appropriate GPR/stack slot.
b0a8c24e · Jan Voung · cac003e8 · b0a8c24e · b0a8c24e · b0a8c24e
Commit b0a8c24e authored Jun 18, 2015 by Jan Voung
8 changed files
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -198,6 +198,13 @@ OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg,
    Vars[1] = ShiftVar;
 }
+InstARM32AdjustStack::InstARM32AdjustStack(Cfg *Func, Variable *SP,
+                                           SizeT Amount, Operand *SrcAmount)
+    : InstARM32(Func, InstARM32::Adjuststack, 2, SP), Amount(Amount) {
+  addSource(SP);
+  addSource(SrcAmount);
+}
 InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue,
                         const CfgNode *TargetFalse, CondARM32::Cond Pred)
    : InstARM32Pred(Func, InstARM32::Br, 0, nullptr, Pred),
@@ -631,6 +638,39 @@ void InstARM32Pop::dump(const Cfg *Func) const {
  }
 }
+void InstARM32AdjustStack::emit(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 2);
+  Str << "\t"
+      << "sub"
+      << "\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << ", ";
+  getSrc(1)->emit(Func);
+  Func->getTarget()->updateStackAdjustment(Amount);
+}
+void InstARM32AdjustStack::emitIAS(const Cfg *Func) const {
+  (void)Func;
+  llvm_unreachable("Not yet implemented");
+  Func->getTarget()->updateStackAdjustment(Amount);
+}
+void InstARM32AdjustStack::dump(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  getDest()->dump(Func);
+  Str << " = sub.i32 ";
+  getSrc(0)->dump(Func);
+  Str << ", " << Amount << " ; ";
+  getSrc(1)->dump(Func);
+}
 void InstARM32Push::emit(const Cfg *Func) const {
  if (!ALLOW_DUMP)
    return;

--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -258,6 +258,7 @@ public:
    k__Start = Inst::Target,
    Adc,
    Add,
+    Adjuststack,
    And,
    Asr,
    Bic,
@@ -606,6 +607,34 @@ private:
  const CfgNode *TargetFalse;
 };
+// AdjustStack instruction - subtracts SP by the given amount and
+// updates the stack offset during code emission.
+class InstARM32AdjustStack : public InstARM32 {
+  InstARM32AdjustStack() = delete;
+  InstARM32AdjustStack(const InstARM32AdjustStack &) = delete;
+  InstARM32AdjustStack &operator=(const InstARM32AdjustStack &) = delete;
+public:
+  // Note: We need both Amount and SrcAmount. If Amount is too large then
+  // it needs to be copied to a register (so SrcAmount could be a register).
+  // However, we also need the numeric Amount for bookkeeping, and it's
+  // hard to pull that from the generic SrcAmount operand.
+  static InstARM32AdjustStack *create(Cfg *Func, Variable *SP, SizeT Amount,
+                                      Operand *SrcAmount) {
+    return new (Func->allocate<InstARM32AdjustStack>())
+        InstARM32AdjustStack(Func, SP, Amount, SrcAmount);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
+private:
+  InstARM32AdjustStack(Cfg *Func, Variable *SP, SizeT Amount,
+                       Operand *SrcAmount);
+  const SizeT Amount;
+};
 // Call instruction (bl/blx).  Arguments should have already been pushed.
 // Technically bl and the register form of blx can be predicated, but we'll
 // leave that out until needed.

--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -120,9 +120,6 @@ ICEINSTICMP_TABLE
 #undef X
 } // end of namespace dummy1
-// The maximum number of arguments to pass in GPR registers.
-const uint32_t ARM32_MAX_GPR_ARG = 4;
 // Stack alignment
 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
@@ -132,6 +129,18 @@ uint32_t applyStackAlignment(uint32_t Value) {
  return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
 }
+// Value is in bytes. Return Value adjusted to the next highest multiple
+// of the stack alignment required for the given type.
+uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
+  // Use natural alignment, except that normally (non-NaCl) ARM only
+  // aligns vectors to 8 bytes.
+  // TODO(jvoung): Check this ...
+  size_t typeAlignInBytes = typeWidthInBytes(Ty);
+  if (isVectorType(Ty))
+    typeAlignInBytes = 8;
+  return Utils::applyAlignment(Value, typeAlignInBytes);
+}
 } // end of anonymous namespace
 TargetARM32::TargetARM32(Cfg *Func)
@@ -377,7 +386,7 @@ void TargetARM32::emitVariable(const Variable *Var) const {
    Offset += getStackAdjustment();
  // TODO(jvoung): Handle out of range. Perhaps we need a scratch register
  // to materialize a larger offset.
-  const bool SignExt = false;
+  constexpr bool SignExt = false;
  if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) {
    llvm::report_fatal_error("Illegal stack offset");
  }
@@ -389,13 +398,39 @@ void TargetARM32::emitVariable(const Variable *Var) const {
  Str << "]";
 }
+bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
+  if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
+    return false;
+  int32_t RegLo, RegHi;
+  // Always start i64 registers at an even register, so this may end
+  // up padding away a register.
+  if (NumGPRRegsUsed % 2 != 0) {
+    ++NumGPRRegsUsed;
+  }
+  RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
+  ++NumGPRRegsUsed;
+  RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
+  ++NumGPRRegsUsed;
+  // If this bumps us past the boundary, don't allocate to a register
+  // and leave any previously speculatively consumed registers as consumed.
+  if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
+    return false;
+  Regs->first = RegLo;
+  Regs->second = RegHi;
+  return true;
+}
+bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
+  if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
+    return false;
+  *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
+  ++NumGPRRegsUsed;
+  return true;
+}
 void TargetARM32::lowerArguments() {
  VarList &Args = Func->getArgs();
-  // The first few integer type parameters can use r0-r3, regardless of their
+  TargetARM32::CallingConv CC;
-  // position relative to the floating-point/vector arguments in the argument
-  // list. Floating-point and vector arguments can use q0-q3 (aka d0-d7,
-  // s0-s15).
-  unsigned NumGPRRegsUsed = 0;
  // For each register argument, replace Arg in the argument list with the
  // home register.  Then generate an instruction in the prolog to copy the
@@ -414,22 +449,8 @@ void TargetARM32::lowerArguments() {
      UnimplementedError(Func->getContext()->getFlags());
      continue;
    } else if (Ty == IceType_i64) {
-      if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
+      std::pair<int32_t, int32_t> RegPair;
-        continue;
+      if (!CC.I64InRegs(&RegPair))
-      int32_t RegLo;
-      int32_t RegHi;
-      // Always start i64 registers at an even register, so this may end
-      // up padding away a register.
-      if (NumGPRRegsUsed % 2 != 0) {
-        ++NumGPRRegsUsed;
-      }
-      RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
-      ++NumGPRRegsUsed;
-      RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
-      ++NumGPRRegsUsed;
-      // If this bumps us past the boundary, don't allocate to a register
-      // and leave any previously speculatively consumed registers as consumed.
-      if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
        continue;
      Variable *RegisterArg = Func->makeVariable(Ty);
      Variable *RegisterLo = Func->makeVariable(IceType_i32);
@@ -439,9 +460,9 @@ void TargetARM32::lowerArguments() {
        RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
        RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
      }
-      RegisterLo->setRegNum(RegLo);
+      RegisterLo->setRegNum(RegPair.first);
      RegisterLo->setIsArg();
-      RegisterHi->setRegNum(RegHi);
+      RegisterHi->setRegNum(RegPair.second);
      RegisterHi->setIsArg();
      RegisterArg->setLoHi(RegisterLo, RegisterHi);
      RegisterArg->setIsArg();
@@ -452,10 +473,9 @@ void TargetARM32::lowerArguments() {
      continue;
    } else {
      assert(Ty == IceType_i32);
-      if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
+      int32_t RegNum;
+      if (!CC.I32InReg(&RegNum))
        continue;
-      int32_t RegNum = RegARM32::Reg_r0 + NumGPRRegsUsed;
-      ++NumGPRRegsUsed;
      Variable *RegisterArg = Func->makeVariable(Ty);
      if (ALLOW_DUMP) {
        RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
@@ -492,9 +512,7 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
    finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
    return;
  }
-  if (isVectorType(Ty)) {
+  InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
-    InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
-  }
  Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
  InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
  // If the argument variable has been assigned a register, we need to load
@@ -672,9 +690,10 @@ void TargetARM32::addProlog(CfgNode *Node) {
  const VarList &Args = Func->getArgs();
  size_t InArgsSizeBytes = 0;
-  unsigned NumGPRArgs = 0;
+  TargetARM32::CallingConv CC;
  for (Variable *Arg : Args) {
    Type Ty = Arg->getType();
+    bool InRegs = false;
    // Skip arguments passed in registers.
    if (isVectorType(Ty)) {
      UnimplementedError(Func->getContext()->getFlags());
@@ -682,18 +701,15 @@ void TargetARM32::addProlog(CfgNode *Node) {
    } else if (isFloatingType(Ty)) {
      UnimplementedError(Func->getContext()->getFlags());
      continue;
-    } else if (Ty == IceType_i64 && NumGPRArgs < ARM32_MAX_GPR_ARG) {
+    } else if (Ty == IceType_i64) {
-      // Start at an even register.
+      std::pair<int32_t, int32_t> DummyRegs;
-      if (NumGPRArgs % 2 == 1) {
+      InRegs = CC.I64InRegs(&DummyRegs);
-        ++NumGPRArgs;
+    } else {
-      }
+      assert(Ty == IceType_i32);
-      NumGPRArgs += 2;
+      int32_t DummyReg;
-      if (NumGPRArgs <= ARM32_MAX_GPR_ARG)
+      InRegs = CC.I32InReg(&DummyReg);
-        continue;
-    } else if (NumGPRArgs < ARM32_MAX_GPR_ARG) {
-      ++NumGPRArgs;
-      continue;
    }
+    if (!InRegs)
      finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
  }
@@ -1314,10 +1330,97 @@ void TargetARM32::lowerBr(const InstBr *Inst) {
 void TargetARM32::lowerCall(const InstCall *Instr) {
  MaybeLeafFunc = false;
+  NeedsStackAlignment = true;
-  // TODO(jvoung): assign arguments to registers and stack. Also reserve stack.
+  // Assign arguments to registers and stack. Also reserve stack.
-  if (Instr->getNumArgs()) {
+  TargetARM32::CallingConv CC;
+  // Pair of Arg Operand -> GPR number assignments.
+  llvm::SmallVector<std::pair<Operand *, int32_t>,
+                    TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
+  // Pair of Arg Operand -> stack offset.
+  llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
+  int32_t ParameterAreaSizeBytes = 0;
+  // Classify each argument operand according to the location where the
+  // argument is passed.
+  for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
+    Operand *Arg = Instr->getArg(i);
+    Type Ty = Arg->getType();
+    bool InRegs = false;
+    if (isVectorType(Ty)) {
+      UnimplementedError(Func->getContext()->getFlags());
+    } else if (isFloatingType(Ty)) {
      UnimplementedError(Func->getContext()->getFlags());
+    } else if (Ty == IceType_i64) {
+      std::pair<int32_t, int32_t> Regs;
+      if (CC.I64InRegs(&Regs)) {
+        InRegs = true;
+        Operand *Lo = loOperand(Arg);
+        Operand *Hi = hiOperand(Arg);
+        GPRArgs.push_back(std::make_pair(Lo, Regs.first));
+        GPRArgs.push_back(std::make_pair(Hi, Regs.second));
+      }
+    } else {
+      assert(Ty == IceType_i32);
+      int32_t Reg;
+      if (CC.I32InReg(&Reg)) {
+        InRegs = true;
+        GPRArgs.push_back(std::make_pair(Arg, Reg));
+      }
+    }
+    if (!InRegs) {
+      ParameterAreaSizeBytes =
+          applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
+      StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
+      ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
+    }
+  }
+  // Adjust the parameter area so that the stack is aligned.  It is
+  // assumed that the stack is already aligned at the start of the
+  // calling sequence.
+  ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
+  // Subtract the appropriate amount for the argument area.  This also
+  // takes care of setting the stack adjustment during emission.
+  //
+  // TODO: If for some reason the call instruction gets dead-code
+  // eliminated after lowering, we would need to ensure that the
+  // pre-call and the post-call esp adjustment get eliminated as well.
+  if (ParameterAreaSizeBytes) {
+    Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
+                                  Legal_Reg | Legal_Flex);
+    _adjust_stack(ParameterAreaSizeBytes, SubAmount);
+  }
+  // Copy arguments that are passed on the stack to the appropriate
+  // stack locations.
+  Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
+  for (auto &StackArg : StackArgs) {
+    ConstantInteger32 *Loc =
+        llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
+    Type Ty = StackArg.first->getType();
+    OperandARM32Mem *Addr;
+    constexpr bool SignExt = false;
+    if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
+      Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
+    } else {
+      Variable *NewBase = Func->makeVariable(SP->getType());
+      lowerArithmetic(
+          InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
+      Addr = formMemoryOperand(NewBase, Ty);
+    }
+    lowerStore(InstStore::create(Func, StackArg.first, Addr));
+  }
+  // Copy arguments to be passed in registers to the appropriate registers.
+  for (auto &GPRArg : GPRArgs) {
+    Variable *Reg = legalizeToVar(GPRArg.first, GPRArg.second);
+    // Generate a FakeUse of register arguments so that they do not get
+    // dead code eliminated as a result of the FakeKill of scratch
+    // registers after the call.
+    Context.insert(InstFakeUse::create(Func, Reg));
  }
  // Generate the call instruction.  Assign its result to a temporary
@@ -1361,6 +1464,9 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
    }
  }
  Operand *CallTarget = Instr->getCallTarget();
+  // TODO(jvoung): Handle sandboxing.
+  // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
  // Allow ConstantRelocatable to be left alone as a direct call,
  // but force other constants like ConstantInteger32 to be in
  // a register and make it an indirect call.
@@ -1372,6 +1478,15 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
  if (ReturnRegHi)
    Context.insert(InstFakeDef::create(Func, ReturnRegHi));
+  // Add the appropriate offset to SP.  The call instruction takes care
+  // of resetting the stack offset during emission.
+  if (ParameterAreaSizeBytes) {
+    Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
+                                  Legal_Reg | Legal_Flex);
+    Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
+    _add(SP, SP, AddAmount);
+  }
  // Insert a register-kill pseudo instruction.
  Context.insert(InstFakeKill::create(Func, NewCall));

--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -159,6 +159,10 @@ protected:
            CondARM32::Cond Pred = CondARM32::AL) {
    Context.insert(InstARM32Adc::create(Func, Dest, Src0, Src1, Pred));
  }
+  void _adjust_stack(int32_t Amount, Operand *SrcAmount) {
+    Context.insert(InstARM32AdjustStack::create(
+        Func, getPhysicalRegister(RegARM32::Reg_sp), Amount, SrcAmount));
+  }
  void _and(Variable *Dest, Variable *Src0, Operand *Src1,
            CondARM32::Cond Pred = CondARM32::AL) {
    Context.insert(InstARM32And::create(Func, Dest, Src0, Src1, Pred));
@@ -309,6 +313,33 @@ protected:
  VarList PhysicalRegisters[IceType_NUM];
  static IceString RegNames[];
+  // Helper class that understands the Calling Convention and register
+  // assignments. The first few integer type parameters can use r0-r3,
+  // regardless of their position relative to the floating-point/vector
+  // arguments in the argument list. Floating-point and vector arguments
+  // can use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can
+  // start with registers but extend beyond the available registers can be
+  // split between the registers and the stack. However, this is typically
+  // for passing GPR structs by value, and PNaCl transforms expand this out.
+  //
+  // Also, at the point before the call, the stack must be aligned.
+  class CallingConv {
+    CallingConv(const CallingConv &) = delete;
+    CallingConv &operator=(const CallingConv &) = delete;
+  public:
+    CallingConv() : NumGPRRegsUsed(0) {}
+    ~CallingConv() = default;
+    bool I64InRegs(std::pair<int32_t, int32_t> *Regs);
+    bool I32InReg(int32_t *Reg);
+    static constexpr uint32_t ARM32_MAX_GPR_ARG = 4;
+  private:
+    uint32_t NumGPRRegsUsed;
+  };
 private:
  ~TargetARM32() override {}
 };

--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -82,11 +82,36 @@ entry:
 ; OPTM1:      mov     DWORD PTR [esp+0xc]
 ; OPTM1:      call {{.*}} R_{{.*}}    ignore64BitArgNoInline
+; ARM32-LABEL: pass64BitArg
+; ARM32:      sub     sp, {{.*}} #16
+; ARM32:      str     {{.*}}, [sp, #4]
+; ARM32:      str     {{.*}}, [sp]
+; ARM32:      movw    r2, #123
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
+; ARM32:      add     sp, {{.*}} #16
+; ARM32:      sub     sp, {{.*}} #16
+; ARM32:      str     {{.*}}, [sp, #4]
+; ARM32:      str     {{.*}}, [sp]
+; ARM32:      mov     r0
+; ARM32:      mov     r1
+; ARM32:      movw    r2, #123
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
+; ARM32:      add     sp, {{.*}} #16
+; ARM32:      sub     sp, {{.*}} #16
+; ARM32:      str     {{.*}}, [sp, #4]
+; ARM32:      str     {{.*}}, [sp]
+; ARM32:      mov     r0
+; ARM32:      mov     r1
+; ARM32:      movw    r2, #123
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
+; ARM32:      add     sp, {{.*}} #16
 declare i32 @ignore64BitArgNoInline(i64, i32, i64)
 define internal i32 @pass64BitConstArg(i64 %a, i64 %b) {
 entry:
-  %call = call i32 @ignore64BitArgNoInline(i64 %a, i32 123, i64 -2401053092306725256)
+  %call = call i32 @ignore64BitArgNoInline(i64 %b, i32 123, i64 -2401053092306725256)
  ret i32 %call
 }
 ; CHECK-LABEL: pass64BitConstArg
@@ -112,6 +137,20 @@ entry:
 ; OPTM1-NOT:  mov
 ; OPTM1:      call {{.*}} R_{{.*}}    ignore64BitArgNoInline
+; ARM32-LABEL: pass64BitConstArg
+; ARM32:      sub     sp, {{.*}} #16
+; ARM32:      movw    [[REG1:r.*]], {{.*}} ; 0xbeef
+; ARM32:      movt    [[REG1:r.*]], {{.*}} ; 0xdead
+; ARM32:      movw    [[REG2:r.*]], {{.*}} ; 0x5678
+; ARM32:      movt    [[REG2:r.*]], {{.*}} ; 0x1234
+; ARM32:      str     [[REG1]], [sp, #4]
+; ARM32:      str     [[REG2]], [sp]
+; ARM32:      mov     r0, r2
+; ARM32:      mov     r1, r3
+; ARM32:      movw    r2, #123
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
+; ARM32:      add     sp, {{.*}} #16
 define internal i64 @return64BitArg(i64 %a) {
 entry:
  ret i64 %a

--- a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
@@ -2,13 +2,22 @@
 ; should be to the same operand, whether it's in a register or on the
 ; stack.
-; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 \
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
-; RUN:   | FileCheck %s
+; RUN:   --target x8632 -i %s --args -O2 \
-; RUN: %if --need=allow_dump --command %p2i --filetype=asm --assemble \
+; RUN:   | %if --need=target_X8632 --command FileCheck %s
-; RUN:     --disassemble -i %s --args -O2 \
+; RUN: %if --need=allow_dump --need=target_X8632 --command %p2i --filetype=asm \
-; RUN:   | %if --need=allow_dump --command FileCheck %s
+; RUN:     --assemble --disassemble -i %s --args -O2 \
-; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
+; RUN:   | %if --need=allow_dump --need=target_X8632 --command FileCheck %s
-; RUN:   | FileCheck --check-prefix=OPTM1 %s
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN:   --target x8632 -i %s --args -Om1 \
+; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=OPTM1 %s
+; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
 @__init_array_start = internal constant [0 x i8] zeroinitializer, align 4
 @__fini_array_start = internal constant [0 x i8] zeroinitializer, align 4
@@ -38,6 +47,14 @@ entry:
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
+;
+; ARM32-LABEL: CallIndirect
+; ARM32: blx [[REGISTER:r.*]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
 @fp_v = internal global [4 x i8] zeroinitializer, align 4
@@ -63,6 +80,12 @@ entry:
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
+;
+; ARM32-LABEL: CallIndirectGlobal
+; ARM32: blx [[REGISTER:r.*]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
 ; Calling an absolute address is used for non-IRT PNaCl pexes to directly
 ; access syscall trampolines. This is not really an indirect call, but
@@ -85,3 +108,12 @@ entry:
 ; OPTM1: e8 bc 03 01 00 call {{[0-9a-f]+}} {{.*}} R_386_PC32 *ABS*
 ; OPTM1: e8 bc 03 01 00 call {{[0-9a-f]+}} {{.*}} R_386_PC32 *ABS*
 ; OPTM1: e8 bc 03 01 00 call {{[0-9a-f]+}} {{.*}} R_386_PC32 *ABS*
+;
+; ARM32-LABEL: CallConst
+; ARM32: movw [[REGISTER:r.*]], #960
+; ARM32: movt [[REGISTER]], #1
+; ARM32: blx [[REGISTER]]
+; The legalization of the constant could be shared, but it isn't.
+; ARM32: movw [[REGISTER:r.*]], #960
+; ARM32: blx [[REGISTER]]
+; ARM32: blx [[REGISTER]]
--- a/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
@@ -25,8 +25,14 @@
 ; RUN:   --dis-flags=-t --target x8632 -i %s --args --verbose none \
 ; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=SYMTAB %s
-; Only checking symtab for ARM for now. TODO(jvoung): Need to lower
+; This is not really IAS, but we can switch when that is implemented.
-; arguments at callsite.
+; For now we can at least see the instructions / relocations.
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s \
+; RUN:   --args --verbose none --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck \
+; RUN:   --check-prefix=IASARM32 %s
 ; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
 ; RUN:   --disassemble --dis-flags=-t --target arm32 -i %s \
 ; RUN:   --args --verbose none --skip-unimplemented \
@@ -69,31 +75,51 @@ entry:
 ; SYMTAB-DAG: 00000000 {{.*}} .data {{.*}} PrimitiveInit
 ; IAS: mov {{.*}},0x0 {{.*}} .data
 ; IAS: call
+; IASARM32: movw {{.*}} PrimitiveInit
+; IASARM32: movt {{.*}} PrimitiveInit
+; IASARM32: bl
 ; SYMTAB-DAG: 00000000 {{.*}} .rodata {{.*}} PrimitiveInitConst
 ; IAS: mov {{.*}},0x0 {{.*}} .rodata
 ; IAS: call
+; IASARM32: movw {{.*}} PrimitiveInitConst
+; IASARM32: movt {{.*}} PrimitiveInitConst
+; IASARM32: bl
 ; SYMTAB-DAG: 00000000 {{.*}} .bss {{.*}} PrimitiveInitStatic
 ; IAS: mov {{.*}},0x0 {{.*}} .bss
 ; IAS: call
+; IASARM32: movw {{.*}} PrimitiveInitStatic
+; IASARM32: movt {{.*}} PrimitiveInitStatic
+; IASARM32: bl
 ; SYMTAB-DAG: 00000004 {{.*}} .bss {{.*}} PrimitiveUninit
 ; IAS: mov {{.*}},0x4 {{.*}} .bss
 ; IAS: call
+; IASARM32: movw {{.*}} PrimitiveUninit
+; IASARM32: movt {{.*}} PrimitiveUninit
+; IASARM32: bl
 ; SYMTAB-DAG: 00000004{{.*}}.data{{.*}}ArrayInit
 ; IAS: mov {{.*}},0x4 {{.*}} .data
 ; IAS: call
+; IASARM32: movw {{.*}} ArrayInit
+; IASARM32: movt {{.*}} ArrayInit
+; IASARM32: bl
 ; SYMTAB-DAG: 00000018 {{.*}} .data {{.*}} ArrayInitPartial
 ; IAS: mov {{.*}},0x18 {{.*}} .data
 ; IAS: call
+; IASARM32: movw {{.*}} ArrayInitPartial
+; IASARM32: movt {{.*}} ArrayInitPartial
+; IASARM32: bl
 ; SYMTAB-DAG: 00000008 {{.*}} .bss {{.*}} ArrayUninit
 ; IAS: mov {{.*}},0x8 {{.*}} .bss
 ; IAS: call
+; IASARM32: movw {{.*}} ArrayUninit
+; IASARM32: movt {{.*}} ArrayUninit
+; IASARM32: bl
 declare void @use(i32)

--- a/tests_lit/llvm2ice_tests/int-arg.ll
+++ b/tests_lit/llvm2ice_tests/int-arg.ll
@@ -235,3 +235,40 @@ entry:
 ; Test interleaving float/double and integer (different register streams on ARM).
 ; TODO(jvoung): Test once the S/D/Q regs are modeled.
+; Test that integers are passed correctly as arguments to a function.
+declare void @IntArgs(i32, i32, i32, i32, i32, i32)
+declare void @killRegisters()
+define void @test_passing_integers(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6) {
+  call void @killRegisters()
+  call void @IntArgs(i32 %arg6, i32 %arg5, i32 %arg4, i32 %arg3, i32 %arg2, i32 %arg1)
+  ret void
+}
+; CHECK-LABEL: test_passing_integers
+; CHECK-DAG: mov [[REG1:e.*]],DWORD PTR [esp+0x24]
+; CHECK-DAG: mov [[REG2:e.*]],DWORD PTR [esp+0x28]
+; CHECK-DAG: mov [[REG3:e.*]],DWORD PTR [esp+0x2c]
+; CHECK-DAG: mov [[REG4:e.*]],DWORD PTR [esp+0x30]
+; CHECK: mov DWORD PTR [esp]
+; CHECK: mov DWORD PTR [esp+0x4]
+; CHECK-DAG: mov DWORD PTR [esp+0x8],[[REG4]]
+; CHECK-DAG: mov DWORD PTR [esp+0xc],[[REG3]]
+; CHECK-DAG: mov DWORD PTR [esp+0x10],[[REG2]]
+; CHECK-DAG: mov DWORD PTR [esp+0x14],[[REG1]]
+; CHECK: call
+; ARM32-LABEL: test_passing_integers
+; ARM32-DAG: mov [[REG1:.*]], r1
+; ARM32-DAG: mov [[REG2:.*]], r2
+; ARM32-DAG: mov [[REG3:.*]], r3
+; ARM32: str [[REG2]], [sp]
+; ARM32: str [[REG1]], [sp, #4]
+; ARM32-DAG: mov r0
+; ARM32-DAG: mov r1
+; ARM32-DAG: mov r2
+; ARM32-DAG: mov r3, [[REG3]]
+; ARM32: bl