Commit 2655d962 by Jim Stichnoth

Subzero: Fix srem.i8/urem.i8 lowering for x86-64.

The problem was that the lowering sequence might lead to the rem result %ah being directly moved into an 8-bit register other than al/bl/cl/dl/ah/bh/ch/dh. This is not encodable, and attempting to encode it leads to actually moving from %spl instead of %ah. The machinery to handle this was already available - copy through a temporary with register class RCX86_IsAhRcvr. So this was just a matter of hooking it up for srem/urem. This CL also requires relaxing some checks in the register allocator for when the -reg-reserve option is used. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4370 R=jpp@chromium.org Review URL: https://codereview.chromium.org/1909853002 .
parent fd07ad08
...@@ -81,13 +81,13 @@ ...@@ -81,13 +81,13 @@
X(Reg_r15l, 15, "r15b", Reg_r15, 0,1,0,0,1, 1,0,0,0,1, 0, 0,0,0,1,0, \ X(Reg_r15l, 15, "r15b", Reg_r15, 0,1,0,0,1, 1,0,0,0,1, 0, 0,0,0,1,0, \
REGLIST3(RegX8664, r15, r15d, r15w)) \ REGLIST3(RegX8664, r15, r15d, r15w)) \
/* High 8-bit registers. None are allowed for register allocation. */ \ /* High 8-bit registers. None are allowed for register allocation. */ \
X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \ X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rax, eax, ax)) \ REGLIST3(RegX8664, rax, eax, ax)) \
X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \ X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rcx, ecx, cx)) \ REGLIST3(RegX8664, rcx, ecx, cx)) \
X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \ X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rdx, edx, dx)) \ REGLIST3(RegX8664, rdx, edx, dx)) \
X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \ X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rbx, ebx, bx)) \ REGLIST3(RegX8664, rbx, ebx, bx)) \
/* End of 8-bit register set */ /* End of 8-bit register set */
//#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr, //#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,
......
...@@ -91,7 +91,7 @@ int32_t findMinWeightIndex( ...@@ -91,7 +91,7 @@ int32_t findMinWeightIndex(
if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex]) if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex])
MinWeightIndex = i; MinWeightIndex = i;
} }
assert(MinWeightIndex >= 0); assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0);
return MinWeightIndex; return MinWeightIndex;
} }
...@@ -686,7 +686,8 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) { ...@@ -686,7 +686,8 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
// All the weights are now calculated. Find the register with smallest weight. // All the weights are now calculated. Find the register with smallest weight.
int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights); int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) { if (MinWeightIndex < 0 ||
Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
if (!Iter.Cur->mustHaveReg()) { if (!Iter.Cur->mustHaveReg()) {
// Iter.Cur doesn't have priority over any other live ranges, so don't // Iter.Cur doesn't have priority over any other live ranges, so don't
// allocate any register to it, and move it to the Handled state. // allocate any register to it, and move it to the Handled state.
...@@ -870,7 +871,10 @@ void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) { ...@@ -870,7 +871,10 @@ void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) {
Iter.Cur = Unhandled.back(); Iter.Cur = Unhandled.back();
Unhandled.pop_back(); Unhandled.pop_back();
dumpLiveRangeTrace("\nConsidering ", Iter.Cur); dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
assert(Target->getRegistersForVariable(Iter.Cur).any()); if (UseReserve)
assert(Target->getAllRegistersForVariable(Iter.Cur).any());
else
assert(Target->getRegistersForVariable(Iter.Cur).any());
Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur); Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
Iter.RegMaskUnfiltered = Iter.RegMaskUnfiltered =
RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur); RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
......
...@@ -2308,6 +2308,14 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) { ...@@ -2308,6 +2308,14 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
_mov(T_edx, Ctx->getConstantZero(Ty)); _mov(T_edx, Ctx->getConstantZero(Ty));
_mov(T, Src0, Eax); _mov(T, Src0, Eax);
_div(T_edx, Src1, T); _div(T_edx, Src1, T);
if (Ty == IceType_i8) {
// Register ah must be moved into one of {al,bl,cl,dl} before it can be
// moved into a general 8-bit register.
auto *T_AhRcvr = makeReg(Ty);
T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
_mov(T_AhRcvr, T_edx);
T_edx = T_AhRcvr;
}
_mov(Dest, T_edx); _mov(Dest, T_edx);
} break; } break;
case InstArithmetic::Srem: { case InstArithmetic::Srem: {
...@@ -2377,6 +2385,14 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) { ...@@ -2377,6 +2385,14 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
_mov(T, Src0, Eax); _mov(T, Src0, Eax);
_cbwdq(T_edx, T); _cbwdq(T_edx, T);
_idiv(T_edx, Src1, T); _idiv(T_edx, Src1, T);
if (Ty == IceType_i8) {
// Register ah must be moved into one of {al,bl,cl,dl} before it can be
// moved into a general 8-bit register.
auto *T_AhRcvr = makeReg(Ty);
T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
_mov(T_AhRcvr, T_edx);
T_edx = T_AhRcvr;
}
_mov(Dest, T_edx); _mov(Dest, T_edx);
} break; } break;
case InstArithmetic::Fadd: case InstArithmetic::Fadd:
......
...@@ -5,6 +5,15 @@ ...@@ -5,6 +5,15 @@
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \ ; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols | FileCheck %s ; RUN: -allow-externally-defined-symbols | FileCheck %s
; The following tests i8 srem/urem lowering on x86-64, specifically that the %ah
; result gets copied into %al/%bl/%cl/%dl before moved into its final register.
; This extra copy is forced by excluding al/bl/cl/dl by default (-reg-exclude),
; but allowing them to be used if absolutely necessary (-reg-reserve).
; RUN: %p2i --target=x8664 --filetype=obj --disassemble -i %s --args -O2 \
; RUN: -reg-exclude=al,bl,cl,dl -reg-reserve \
; RUN: -allow-externally-defined-symbols | FileCheck %s --check-prefix=REM
declare void @useInt(i32 %x) declare void @useInt(i32 %x)
define internal i32 @add8Bit(i32 %a, i32 %b) { define internal i32 @add8Bit(i32 %a, i32 %b) {
...@@ -103,6 +112,9 @@ entry: ...@@ -103,6 +112,9 @@ entry:
} }
; CHECK-LABEL: urem8Bit ; CHECK-LABEL: urem8Bit
; CHECK: div {{[abcd]l|BYTE PTR}} ; CHECK: div {{[abcd]l|BYTE PTR}}
; REM-LABEL: urem8Bit
; REM: div
; REM-NEXT: mov {{[abcd]}}l,ah
define internal i32 @urem8BitConst(i32 %a) { define internal i32 @urem8BitConst(i32 %a) {
entry: entry:
...@@ -113,6 +125,7 @@ entry: ...@@ -113,6 +125,7 @@ entry:
} }
; CHECK-LABEL: urem8BitConst ; CHECK-LABEL: urem8BitConst
; CHECK: div {{[abcd]l|BYTE PTR}} ; CHECK: div {{[abcd]l|BYTE PTR}}
; REM-LABEL: urem8BitConst
define internal i32 @sdiv8Bit(i32 %a, i32 %b) { define internal i32 @sdiv8Bit(i32 %a, i32 %b) {
...@@ -146,6 +159,9 @@ entry: ...@@ -146,6 +159,9 @@ entry:
} }
; CHECK-LABEL: srem8Bit ; CHECK-LABEL: srem8Bit
; CHECK: idiv {{[abcd]l|BYTE PTR}} ; CHECK: idiv {{[abcd]l|BYTE PTR}}
; REM-LABEL: srem8Bit
; REM: idiv
; REM-NEXT: mov {{[abcd]}}l,ah
define internal i32 @srem8BitConst(i32 %a) { define internal i32 @srem8BitConst(i32 %a) {
entry: entry:
...@@ -156,6 +172,7 @@ entry: ...@@ -156,6 +172,7 @@ entry:
} }
; CHECK-LABEL: srem8BitConst ; CHECK-LABEL: srem8BitConst
; CHECK: idiv {{[abcd]l|BYTE PTR}} ; CHECK: idiv {{[abcd]l|BYTE PTR}}
; REM-LABEL: srem8BitConst
define internal i32 @shl8Bit(i32 %a, i32 %b) { define internal i32 @shl8Bit(i32 %a, i32 %b) {
entry: entry:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment