Subzero: Fix srem.i8/urem.i8 lowering for x86-64.

The problem was that the lowering sequence might lead to the rem result %ah being directly moved into an 8-bit register other than al/bl/cl/dl/ah/bh/ch/dh. This is not encodable, and attempting to encode it leads to actually moving from %spl instead of %ah. The machinery to handle this was already available - copy through a temporary with register class RCX86_IsAhRcvr. So this was just a matter of hooking it up for srem/urem. This CL also requires relaxing some checks in the register allocator for when the -reg-reserve option is used. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4370 R=jpp@chromium.org Review URL: https://codereview.chromium.org/1909853002 .

Subzero: Fix srem.i8/urem.i8 lowering for x86-64.
2655d962 · Jim Stichnoth · fd07ad08 · 2655d962 · 2655d962 · 2655d962
Commit 2655d962 authored Apr 21, 2016 by Jim Stichnoth
4 changed files
--- a/src/IceInstX8664.def
+++ b/src/IceInstX8664.def
@@ -81,13 +81,13 @@
  X(Reg_r15l, 15, "r15b", Reg_r15, 0,1,0,0,1, 1,0,0,0,1, 0, 0,0,0,1,0,         \
    REGLIST3(RegX8664, r15, r15d, r15w))                                       \
  /* High 8-bit registers.  None are allowed for register allocation. */       \
-  X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1,              \
+  X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0,              \
    REGLIST3(RegX8664, rax, eax, ax))                                          \
-  X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1,              \
+  X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0,              \
    REGLIST3(RegX8664, rcx, ecx, cx))                                          \
-  X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1,              \
+  X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0,              \
    REGLIST3(RegX8664, rdx, edx, dx))                                          \
-  X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1,              \
+  X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0,              \
    REGLIST3(RegX8664, rbx, ebx, bx))                                          \
  /* End of 8-bit register set */
 //#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,

--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -91,7 +91,7 @@ int32_t findMinWeightIndex(
    if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex])
      MinWeightIndex = i;
  }
-  assert(MinWeightIndex >= 0);
+  assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0);
  return MinWeightIndex;
 }

@@ -686,7 +686,8 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
  // All the weights are now calculated. Find the register with smallest weight.
  int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);

-  if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
+  if (MinWeightIndex < 0 ||
+      Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
    if (!Iter.Cur->mustHaveReg()) {
      // Iter.Cur doesn't have priority over any other live ranges, so don't
      // allocate any register to it, and move it to the Handled state.
@@ -870,7 +871,10 @@ void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) {
    Iter.Cur = Unhandled.back();
    Unhandled.pop_back();
    dumpLiveRangeTrace("\nConsidering  ", Iter.Cur);
-    assert(Target->getRegistersForVariable(Iter.Cur).any());
+    if (UseReserve)
+      assert(Target->getAllRegistersForVariable(Iter.Cur).any());
+    else
+      assert(Target->getRegistersForVariable(Iter.Cur).any());
    Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
    Iter.RegMaskUnfiltered =
        RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);

--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -2308,6 +2308,14 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
    _mov(T_edx, Ctx->getConstantZero(Ty));
    _mov(T, Src0, Eax);
    _div(T_edx, Src1, T);
+    if (Ty == IceType_i8) {
+      // Register ah must be moved into one of {al,bl,cl,dl} before it can be
+      // moved into a general 8-bit register.
+      auto *T_AhRcvr = makeReg(Ty);
+      T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
+      _mov(T_AhRcvr, T_edx);
+      T_edx = T_AhRcvr;
+    }
    _mov(Dest, T_edx);
  } break;
  case InstArithmetic::Srem: {
@@ -2377,6 +2385,14 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
    _mov(T, Src0, Eax);
    _cbwdq(T_edx, T);
    _idiv(T_edx, Src1, T);
+    if (Ty == IceType_i8) {
+      // Register ah must be moved into one of {al,bl,cl,dl} before it can be
+      // moved into a general 8-bit register.
+      auto *T_AhRcvr = makeReg(Ty);
+      T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
+      _mov(T_AhRcvr, T_edx);
+      T_edx = T_AhRcvr;
+    }
    _mov(Dest, T_edx);
  } break;
  case InstArithmetic::Fadd:

--- a/tests_lit/llvm2ice_tests/8bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
@@ -5,6 +5,15 @@
 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
 ; RUN:   -allow-externally-defined-symbols | FileCheck %s

+; The following tests i8 srem/urem lowering on x86-64, specifically that the %ah
+; result gets copied into %al/%bl/%cl/%dl before moved into its final register.
+; This extra copy is forced by excluding al/bl/cl/dl by default (-reg-exclude),
+; but allowing them to be used if absolutely necessary (-reg-reserve).
+
+; RUN: %p2i --target=x8664 --filetype=obj --disassemble -i %s --args -O2 \
+; RUN:   -reg-exclude=al,bl,cl,dl -reg-reserve \
+; RUN:   -allow-externally-defined-symbols | FileCheck %s --check-prefix=REM
+
 declare void @useInt(i32 %x)

 define internal i32 @add8Bit(i32 %a, i32 %b) {
@@ -103,6 +112,9 @@ entry:
 }
 ; CHECK-LABEL: urem8Bit
 ; CHECK: div {{[abcd]l|BYTE PTR}}
+; REM-LABEL: urem8Bit
+; REM: div
+; REM-NEXT: mov {{[abcd]}}l,ah

 define internal i32 @urem8BitConst(i32 %a) {
 entry:
@@ -113,6 +125,7 @@ entry:
 }
 ; CHECK-LABEL: urem8BitConst
 ; CHECK: div {{[abcd]l|BYTE PTR}}
+; REM-LABEL: urem8BitConst


 define internal i32 @sdiv8Bit(i32 %a, i32 %b) {
@@ -146,6 +159,9 @@ entry:
 }
 ; CHECK-LABEL: srem8Bit
 ; CHECK: idiv {{[abcd]l|BYTE PTR}}
+; REM-LABEL: srem8Bit
+; REM: idiv
+; REM-NEXT: mov {{[abcd]}}l,ah

 define internal i32 @srem8BitConst(i32 %a) {
 entry:
@@ -156,6 +172,7 @@ entry:
 }
 ; CHECK-LABEL: srem8BitConst
 ; CHECK: idiv {{[abcd]l|BYTE PTR}}
+; REM-LABEL: srem8BitConst

 define internal i32 @shl8Bit(i32 %a, i32 %b) {
 entry: