Subzero. ARM32. Removes memory legalization warts.

This CL removes two warts from the ARM32 backend: 1) during argument lowering, if a stack parameter is assigned a register, the backend creates a new Variable that references the stack location with the incoming argument, and _mov() it to the parameter. 2) During stack slot legalization, all _mov(Mem(), Reg) are converted to stores; and all _mov(Reg, Mem()) are converted to loads. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=kschimpf@google.com Review URL: https://codereview.chromium.org/1457683004 .

Subzero. ARM32. Removes memory legalization warts.
3f6b47d5 · John Porto · 174531ec · 3f6b47d5 · 3f6b47d5 · 3f6b47d5
Commit 3f6b47d5 authored Nov 19, 2015 by John Porto
7 changed files
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -765,7 +765,7 @@ void InstARM32Mov::emitSingleDestMultiSource(const Cfg *Func) const {
 namespace {
 bool isVariableWithoutRegister(const Operand *Op) {
-  if (const auto *OpV = llvm::dyn_cast<const Variable>(Op)) {
+  if (const auto *OpV = llvm::dyn_cast<Variable>(Op)) {
    return !OpV->hasReg();
  }
  return false;
@@ -789,84 +789,57 @@ void InstARM32Mov::emitSingleDestSingleSource(const Cfg *Func) const {
  Ostream &Str = Func->getContext()->getStrEmit();
  Variable *Dest = getDest();
-  if (Dest->hasReg()) {
+  if (!Dest->hasReg()) {
-    Type Ty = Dest->getType();
+    llvm::report_fatal_error("mov can't store.");
-    Operand *Src0 = getSrc(0);
+  }
-    const bool IsVector = isVectorType(Ty);
-    const bool IsScalarFP = isScalarFloatingType(Ty);
+  Operand *Src0 = getSrc(0);
-    const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
+  if (isMemoryAccess(Src0)) {
-    const char *LoadOpcode = IsVector ? "vld1" : (IsScalarFP ? "vldr" : "ldr");
+    llvm::report_fatal_error("mov can't load.");
-    const bool IsVMove = (IsVector || IsScalarFP || CoreVFPMove);
+  }
-    const char *RegMovOpcode = IsVMove ? "vmov" : "mov";
-    const char *ActualOpcode = isMemoryAccess(Src0) ? LoadOpcode : RegMovOpcode;
+  Type Ty = Dest->getType();
-    // when vmov{c}'ing, we need to emit a width string. Otherwise, the
+  const bool IsVector = isVectorType(Ty);
-    // assembler might be tempted to assume we want a vector vmov{c}, and that
+  const bool IsScalarFP = isScalarFloatingType(Ty);
-    // is disallowed because ARM.
+  const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
-    const char *NoWidthString = "";
+  const bool IsVMove = (IsVector || IsScalarFP || CoreVFPMove);
-    const char *WidthString =
+  const char *Opcode = IsVMove ? "vmov" : "mov";
-        isMemoryAccess(Src0)
+  // when vmov{c}'ing, we need to emit a width string. Otherwise, the
-            ? (IsVector ? ".64" : getWidthString(Ty))
+  // assembler might be tempted to assume we want a vector vmov{c}, and that
-            : (!CoreVFPMove ? getVecWidthString(Ty) : NoWidthString);
+  // is disallowed because ARM.
-    Str << "\t" << ActualOpcode;
+  const char *WidthString = !CoreVFPMove ? getVecWidthString(Ty) : "";
-    const bool IsVInst = IsVMove || IsVector || IsScalarFP;
+  Str << "\t" << Opcode;
-    if (IsVInst) {
+  if (IsVMove) {
-      Str << getPredicate() << WidthString;
+    Str << getPredicate() << WidthString;
-    } else {
-      Str << WidthString << getPredicate();
-    }
-    Str << "\t";
-    Dest->emit(Func);
-    Str << ", ";
-    Src0->emit(Func);
  } else {
-    Variable *Src0 = llvm::cast<Variable>(getSrc(0));
+    Str << WidthString << getPredicate();
-    assert(Src0->hasReg());
-    Type Ty = Src0->getType();
-    const bool IsVector = isVectorType(Ty);
-    const bool IsScalarFP = isScalarFloatingType(Ty);
-    const char *ActualOpcode =
-        IsVector ? "vst1" : (IsScalarFP ? "vstr" : "str");
-    const char *WidthString = IsVector ? ".64" : getWidthString(Ty);
-    Str << "\t" << ActualOpcode;
-    const bool IsVInst = IsVector || IsScalarFP;
-    if (IsVInst) {
-      Str << getPredicate() << WidthString;
-    } else {
-      Str << WidthString << getPredicate();
-    }
-    Str << "\t";
-    Src0->emit(Func);
-    Str << ", ";
-    Dest->emit(Func);
  }
+  Str << "\t";
+  Dest->emit(Func);
+  Str << ", ";
+  Src0->emit(Func);
 }
 void InstARM32Mov::emitIASSingleDestSingleSource(const Cfg *Func) const {
  auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
  Variable *Dest = getDest();
  Operand *Src0 = getSrc(0);
-  if (Dest->hasReg()) {
-    const Type DestTy = Dest->getType();
+  if (!Dest->hasReg()) {
-    const bool DestIsVector = isVectorType(DestTy);
+    llvm::report_fatal_error("mov can't store.");
-    const bool DestIsScalarFP = isScalarFloatingType(DestTy);
+  }
-    const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
-    if (DestIsVector || DestIsScalarFP || CoreVFPMove)
+  if (isMemoryAccess(Src0)) {
-      return Asm->setNeedsTextFixup();
+    llvm::report_fatal_error("mov can't load.");
-    if (isMemoryAccess(Src0)) {
-      // TODO(kschimpf) Figure out how to do ldr on CoreVPFMove? (see
-      // emitSingleDestSingleSource, local variable LoadOpcode).
-      return Asm->ldr(Dest, Src0, getPredicate(), Func->getTarget());
-    }
-    return Asm->mov(Dest, Src0, getPredicate());
-  } else {
-    const Type Src0Type = Src0->getType();
-    const bool Src0IsVector = isVectorType(Src0Type);
-    const bool Src0IsScalarFP = isScalarFloatingType(Src0Type);
-    const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
-    if (Src0IsVector || Src0IsScalarFP || CoreVFPMove)
-      return Asm->setNeedsTextFixup();
-    return Asm->str(Src0, Dest, getPredicate(), Func->getTarget());
  }
+  const Type DestTy = Dest->getType();
+  const bool DestIsVector = isVectorType(DestTy);
+  const bool DestIsScalarFP = isScalarFloatingType(DestTy);
+  const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
+  if (DestIsVector || DestIsScalarFP || CoreVFPMove)
+    return Asm->setNeedsTextFixup();
+  return Asm->mov(Dest, Src0, getPredicate());
 }
 void InstARM32Mov::emit(const Cfg *Func) const {

--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -135,7 +135,7 @@ public:
  Operand *loOperand(Operand *Operand);
  Operand *hiOperand(Operand *Operand);
  void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
-                              size_t BasicFrameOffset, size_t &InArgsSizeBytes);
+                              size_t BasicFrameOffset, size_t *InArgsSizeBytes);
  bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
    return CPUFeatures.hasFeature(I);
@@ -384,6 +384,7 @@ protected:
    // an assert around just in case there is some untested code path where Dest
    // is nullptr.
    assert(Dest != nullptr);
+    assert(!llvm::isa<OperandARM32Mem>(Src0));
    auto *Instr = InstARM32Mov::create(Func, Dest, Src0, Pred);
    Context.insert(Instr);
@@ -794,15 +795,15 @@ protected:
             CondARM32::Cond Pred = CondARM32::AL) {
    Context.insert(InstARM32Vcmp::create(Func, Src0, FpZero, Pred));
  }
+  void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
+    Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1));
+  }
  void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
    Context.insert(InstARM32Vmrs::create(Func, Pred));
  }
  void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
    Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
  }
-  void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1));
-  }
  void _vsqrt(Variable *Dest, Variable *Src,
              CondARM32::Cond Pred = CondARM32::AL) {
    Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred));
@@ -821,17 +822,27 @@ protected:
  // [OrigBaseReg, +/- Offset+StackAdjust].
  Variable *newBaseRegister(int32_t Offset, int32_t StackAdjust,
                            Variable *OrigBaseReg);
-  /// Creates a new, legal StackVariable w.r.t. ARM's Immediate requirements.
+  /// Creates a new, legal OperandARM32Mem for accessing OrigBase + Offset +
-  /// This method is not very smart: it will always create and return a new
+  /// StackAdjust. The returned mem operand is a legal operand for accessing
-  /// StackVariable, even if Offset + StackAdjust is encodable.
+  /// memory that is of type Ty.
-  StackVariable *legalizeStackSlot(Type Ty, int32_t Offset, int32_t StackAdjust,
+  ///
-                                   Variable *OrigBaseReg, Variable **NewBaseReg,
+  /// If [OrigBaseReg, #Offset+StackAdjust] is encodable, then the method
-                                   int32_t *NewBaseOffset);
+  /// returns a Mem operand expressing it. Otherwise,
-  /// Legalizes Mov if its Source (or Destination) contains an invalid
+  ///
-  /// immediate.
+  /// if [*NewBaseReg, #Offset+StackAdjust-*NewBaseOffset] is encodable, the
-  void legalizeMovStackAddrImm(InstARM32Mov *Mov, int32_t StackAdjust,
+  /// method will return that. Otherwise,
-                               Variable *OrigBaseReg, Variable **NewBaseReg,
+  ///
-                               int32_t *NewBaseOffset);
+  /// a new base register ip=OrigBaseReg+Offset+StackAdjust is created, and the
+  /// method returns [ip, #0].
+  OperandARM32Mem *createMemOperand(Type Ty, int32_t Offset,
+                                    int32_t StackAdjust, Variable *OrigBaseReg,
+                                    Variable **NewBaseReg,
+                                    int32_t *NewBaseOffset);
+  /// Legalizes Mov if its Source (or Destination) is a spilled Variable. Moves
+  /// to memory become store instructions, and moves from memory, loads.
+  void legalizeMov(InstARM32Mov *Mov, int32_t StackAdjust,
+                   Variable *OrigBaseReg, Variable **NewBaseReg,
+                   int32_t *NewBaseOffset);
  TargetARM32Features CPUFeatures;
  bool UsesFramePointer = false;

--- a/tests_lit/assembler/arm32/branch-mult-fwd.ll
+++ b/tests_lit/assembler/arm32/branch-mult-fwd.ll
@@ -24,7 +24,9 @@ define internal void @mult_fwd_branches(i32 %a, i32 %b) {
 ; ASM-NEXT:     sub     sp, sp, #12
 ; ASM-NEXT:     str     r0, [sp, #8]
+; ASM-NEXT:     # [sp, #8] = def.pseudo
 ; ASM-NEXT:     str     r1, [sp, #4]
+; ASM-NEXT:     # [sp, #4] = def.pseudo
 ; DIS-LABEL:00000000 <mult_fwd_branches>:
@@ -58,6 +60,7 @@ define internal void @mult_fwd_branches(i32 %a, i32 %b) {
 ; ASM-NEXT:     cmp     r1, r2
 ; ASM-NEXT:     movlt   r0, #1
 ; ASM-NEXT:     strb    r0, [sp]
+; ASM-NEXT:     # [sp] = def.pseudo
 ; DIS-NEXT:   c:        e3a00000
 ; DIS-NEXT:  10:        e59d1008
@@ -172,5 +175,4 @@ end:
 ; IASM-NEXT:    .byte 0xff
 ; IASM-NEXT:    .byte 0x2f
 ; IASM-NEXT:    .byte 0xe1
 }
--- a/tests_lit/assembler/arm32/load-store.ll
+++ b/tests_lit/assembler/arm32/load-store.ll
@@ -35,6 +35,7 @@ define internal i32 @add1ToR0(i32 %p) {
 ; IASM-NEXT:    .byte 0xe2
 ; ASM-NEXT:     str     r0, [sp, #4]
+; ASM-NEXT:     # [sp, #4] = def.pseudo
 ; DIS-NEXT:   4:        e58d0004
 ; IASM-NEXT:    .byte 0x4
 ; IASM-NEXT:    .byte 0x0
@@ -56,6 +57,7 @@ define internal i32 @add1ToR0(i32 %p) {
 ; IASM-NEXT:    .byte 0xe2
 ; ASM-NEXT:     str     r0, [sp]
+; ASM-NEXT:     # [sp] = def.pseudo
 ; DIS-NEXT:  10:        e58d0000
 ; IASM-NEXT:    .byte 0x0
 ; IASM-NEXT:    .byte 0x0

--- a/tests_lit/assembler/arm32/mov-const.ll
+++ b/tests_lit/assembler/arm32/mov-const.ll
@@ -29,7 +29,7 @@ entry:
 ; ASM-NEXT:     movw    ip, #4092
 ; ASM-NEXT:     sub     sp, sp, ip
 ; ASM-NEXT:     str     r0, [sp, #4088]
+; ASM-NEXT:     # [sp, #4088] = def.pseudo
 ; DIS-LABEL: 00000000 <foo>:
 ; DIS-NEXT:    0:       e300cffc
 ; DIS-NEXT:    4:       e04dd00c
@@ -59,6 +59,7 @@ entry:
 ; ASM-NEXT:     ldr     r1, [sp, #4088]
 ; ASM-NEXT:     mul     r0, r0, r1
 ; ASM-NEXT:     str     r0, [sp, #4084]
+; ASM-NEXT:     # [sp, #4084] = def.pseudo
 ; DIS-NEXT:    c:       e59d0ff8
 ; DIS-NEXT:   10:       e59d1ff8

--- a/tests_lit/assembler/arm32/store-sf.ll
+++ b/tests_lit/assembler/arm32/store-sf.ll
@@ -57,7 +57,7 @@ entry:
 ; IASM-NEXT: 	.byte 0x0
 ; IASM-NEXT: 	.byte 0xb
 ; IASM-NEXT: 	.byte 0xe5
+; ASM-NEXT:     # [fp, #-4] = def.pseudo
  br label %next
 ; ASM-NEXT: 	b	.Ltest_vla_in_loop$next