Commit 2655d962 by Jim Stichnoth

Subzero: Fix srem.i8/urem.i8 lowering for x86-64.

The problem was that the lowering sequence might lead to the rem result %ah being directly moved into an 8-bit register other than al/bl/cl/dl/ah/bh/ch/dh. This is not encodable, and attempting to encode it leads to actually moving from %spl instead of %ah. The machinery to handle this was already available - copy through a temporary with register class RCX86_IsAhRcvr. So this was just a matter of hooking it up for srem/urem. This CL also requires relaxing some checks in the register allocator for when the -reg-reserve option is used. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4370 R=jpp@chromium.org Review URL: https://codereview.chromium.org/1909853002 .
parent fd07ad08
......@@ -81,13 +81,13 @@
X(Reg_r15l, 15, "r15b", Reg_r15, 0,1,0,0,1, 1,0,0,0,1, 0, 0,0,0,1,0, \
REGLIST3(RegX8664, r15, r15d, r15w)) \
/* High 8-bit registers. None are allowed for register allocation. */ \
X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rax, eax, ax)) \
X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rcx, ecx, cx)) \
X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rdx, edx, dx)) \
X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rbx, ebx, bx)) \
/* End of 8-bit register set */
//#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,
......
......@@ -91,7 +91,7 @@ int32_t findMinWeightIndex(
if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex])
MinWeightIndex = i;
}
assert(MinWeightIndex >= 0);
assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0);
return MinWeightIndex;
}
......@@ -686,7 +686,8 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
// All the weights are now calculated. Find the register with smallest weight.
int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
if (MinWeightIndex < 0 ||
Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
if (!Iter.Cur->mustHaveReg()) {
// Iter.Cur doesn't have priority over any other live ranges, so don't
// allocate any register to it, and move it to the Handled state.
......@@ -870,7 +871,10 @@ void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) {
Iter.Cur = Unhandled.back();
Unhandled.pop_back();
dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
assert(Target->getRegistersForVariable(Iter.Cur).any());
if (UseReserve)
assert(Target->getAllRegistersForVariable(Iter.Cur).any());
else
assert(Target->getRegistersForVariable(Iter.Cur).any());
Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
Iter.RegMaskUnfiltered =
RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
......
......@@ -2308,6 +2308,14 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
_mov(T_edx, Ctx->getConstantZero(Ty));
_mov(T, Src0, Eax);
_div(T_edx, Src1, T);
if (Ty == IceType_i8) {
// Register ah must be moved into one of {al,bl,cl,dl} before it can be
// moved into a general 8-bit register.
auto *T_AhRcvr = makeReg(Ty);
T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
_mov(T_AhRcvr, T_edx);
T_edx = T_AhRcvr;
}
_mov(Dest, T_edx);
} break;
case InstArithmetic::Srem: {
......@@ -2377,6 +2385,14 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
_mov(T, Src0, Eax);
_cbwdq(T_edx, T);
_idiv(T_edx, Src1, T);
if (Ty == IceType_i8) {
// Register ah must be moved into one of {al,bl,cl,dl} before it can be
// moved into a general 8-bit register.
auto *T_AhRcvr = makeReg(Ty);
T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
_mov(T_AhRcvr, T_edx);
T_edx = T_AhRcvr;
}
_mov(Dest, T_edx);
} break;
case InstArithmetic::Fadd:
......
......@@ -5,6 +5,15 @@
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols | FileCheck %s
; The following tests i8 srem/urem lowering on x86-64, specifically that the %ah
; result gets copied into %al/%bl/%cl/%dl before moved into its final register.
; This extra copy is forced by excluding al/bl/cl/dl by default (-reg-exclude),
; but allowing them to be used if absolutely necessary (-reg-reserve).
; RUN: %p2i --target=x8664 --filetype=obj --disassemble -i %s --args -O2 \
; RUN: -reg-exclude=al,bl,cl,dl -reg-reserve \
; RUN: -allow-externally-defined-symbols | FileCheck %s --check-prefix=REM
declare void @useInt(i32 %x)
define internal i32 @add8Bit(i32 %a, i32 %b) {
......@@ -103,6 +112,9 @@ entry:
}
; CHECK-LABEL: urem8Bit
; CHECK: div {{[abcd]l|BYTE PTR}}
; REM-LABEL: urem8Bit
; REM: div
; REM-NEXT: mov {{[abcd]}}l,ah
define internal i32 @urem8BitConst(i32 %a) {
entry:
......@@ -113,6 +125,7 @@ entry:
}
; CHECK-LABEL: urem8BitConst
; CHECK: div {{[abcd]l|BYTE PTR}}
; REM-LABEL: urem8BitConst
define internal i32 @sdiv8Bit(i32 %a, i32 %b) {
......@@ -146,6 +159,9 @@ entry:
}
; CHECK-LABEL: srem8Bit
; CHECK: idiv {{[abcd]l|BYTE PTR}}
; REM-LABEL: srem8Bit
; REM: idiv
; REM-NEXT: mov {{[abcd]}}l,ah
define internal i32 @srem8BitConst(i32 %a) {
entry:
......@@ -156,6 +172,7 @@ entry:
}
; CHECK-LABEL: srem8BitConst
; CHECK: idiv {{[abcd]l|BYTE PTR}}
; REM-LABEL: srem8BitConst
define internal i32 @shl8Bit(i32 %a, i32 %b) {
entry:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment