Commit 28df6bad by Jim Stichnoth

Subzero: Fix a mul lowering error.

The low-level mul instruction may produce results in a register pair where one register is the explicit dest of the instruction, and the other register is defined through a FakeDef. If the FakeDef portion is ultimately unused, the FakeDef gets dead-code eliminated, and the register allocator doesn't know that the mul instruction affects the other register. On x86, this can silently produce incorrect code. On ARM, the emitter complains that the explicitly represented second dest variable does not have a register. The fix is to add a FakeUse of the FakeDef'd register. Unfortunately, this prevents the low-level mul instruction from ever being dead-code eliminated, but that's probably OK because it should have been eliminated at the high level. BUG= none R=eholk@chromium.org Review URL: https://codereview.chromium.org/1678523002 .
parent 45bec54f
...@@ -763,6 +763,7 @@ protected: ...@@ -763,6 +763,7 @@ protected:
// Model the modification to the second dest as a fake def. Note that the // Model the modification to the second dest as a fake def. Note that the
// def is not predicated. // def is not predicated.
Context.insert<InstFakeDef>(DestHi, DestLo); Context.insert<InstFakeDef>(DestHi, DestLo);
Context.insert<InstFakeUse>(DestHi);
} }
void _uxt(Variable *Dest, Variable *Src0, void _uxt(Variable *Dest, Variable *Src0,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
......
...@@ -1954,6 +1954,7 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) { ...@@ -1954,6 +1954,7 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
// The mul instruction produces two dest variables, edx:eax. We create a // The mul instruction produces two dest variables, edx:eax. We create a
// fake definition of edx to account for this. // fake definition of edx to account for this.
Context.insert<InstFakeDef>(T_4Hi, T_4Lo); Context.insert<InstFakeDef>(T_4Hi, T_4Lo);
Context.insert<InstFakeUse>(T_4Hi);
_mov(DestLo, T_4Lo); _mov(DestLo, T_4Lo);
_add(T_4Hi, T_1); _add(T_4Hi, T_1);
_mov(T_2, Src1Hi); _mov(T_2, Src1Hi);
......
; This tests against a lowering error in a multiply instruction that produces
; results in a low and high register. This is usually lowered as a mul
; instruction whose dest contains the low portion, and a FakeDef of the high
; portion. The problem is that if the high portion is unused (e.g. the multiply
; is followed by a truncation), the FakeDef may be eliminated, and the register
; allocator may assign the high register to a variable that is live across the
; mul instruction. This is incorrect because the mul instruction smashes the
; register.
; REQUIRES: allow_dump
; RUN: %p2i --target x8632 -i %s --filetype=asm --args -O2 -asm-verbose \
; RUN: --reg-use=eax,edx -reg-reserve | FileCheck --check-prefix=X8632 %s
; RUN: %p2i --target arm32 -i %s --filetype=asm --args -O2 -asm-verbose \
; RUN: | FileCheck --check-prefix=ARM32 %s
define internal i32 @mul(i64 %a, i64 %b, i32 %c) {
; Force an early use of %c.
store i32 %c, i32* undef, align 1
%m = mul i64 %a, %b
%t = trunc i64 %m to i32
; Make many uses of %c to give it high weight.
%t1 = add i32 %t, %c
%t2 = add i32 %t1, %c
%t3 = add i32 %t2, %c
ret i32 %t3
}
; For x8632, we want asm-verbose to print the stack offset assignment for lv$c
; ("local variable 'c'") in the prolog, and then have at least one use of lv$c
; in the body, i.e. don't register-allocate edx to %c.
; X8632-LABEL: mul
; X8632: lv$c =
; X8632: lv$c
; For arm32, the failure would manifest as a translation error - no register
; being allocated to the high operand, so we just check for successful
; translation.
; ARM32-LABEL: mul
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment