Commit 5dfdf5fc by Jim Stichnoth

Subzero: Fix lowering of the fabs() intrinsic.

The pand instruction for masking off the sign bit can operate on a register or an m128 memory location, but not a 32-bit or 64-bit memory location. This means we need to make sure f32 and f64 operands are first loaded into a register. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/1022123004
parent 927f7ccc
...@@ -3094,6 +3094,10 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -3094,6 +3094,10 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Type Ty = Src->getType(); Type Ty = Src->getType();
Variable *Dest = Instr->getDest(); Variable *Dest = Instr->getDest();
Variable *T = makeVectorOfFabsMask(Ty); Variable *T = makeVectorOfFabsMask(Ty);
// The pand instruction operates on an m128 memory operand, so if
// Src is an f32 or f64, we need to make sure it's in a register.
if (!isVectorType(Ty))
Src = legalizeToVar(Src);
_pand(T, Src); _pand(T, Src);
if (isVectorType(Ty)) if (isVectorType(Ty))
_movp(Dest, T); _movp(Dest, T);
......
...@@ -279,16 +279,18 @@ entry: ...@@ -279,16 +279,18 @@ entry:
%r4 = fadd float %r2, %r3 %r4 = fadd float %r2, %r3
ret float %r4 ret float %r4
} }
;;; Specially check that the pand instruction doesn't try to operate on a 32-bit
;;; (f32) memory operand, and instead uses two xmm registers.
; CHECK-LABEL: test_fabs_float ; CHECK-LABEL: test_fabs_float
; CHECK: pcmpeqd ; CHECK: pcmpeqd
; CHECK: psrld ; CHECK: psrld
; CHECK: pand ; CHECK: pand {{.*}}xmm{{.*}}xmm
; CHECK: pcmpeqd ; CHECK: pcmpeqd
; CHECK: psrld ; CHECK: psrld
; CHECK: pand ; CHECK: pand {{.*}}xmm{{.*}}xmm
; CHECK: pcmpeqd ; CHECK: pcmpeqd
; CHECK: psrld ; CHECK: psrld
; CHECK: pand ; CHECK: pand {{.*}}xmm{{.*}}xmm
define double @test_fabs_double(double %x) { define double @test_fabs_double(double %x) {
entry: entry:
...@@ -298,16 +300,18 @@ entry: ...@@ -298,16 +300,18 @@ entry:
%r4 = fadd double %r2, %r3 %r4 = fadd double %r2, %r3
ret double %r4 ret double %r4
} }
;;; Specially check that the pand instruction doesn't try to operate on a 64-bit
;;; (f64) memory operand, and instead uses two xmm registers.
; CHECK-LABEL: test_fabs_double ; CHECK-LABEL: test_fabs_double
; CHECK: pcmpeqd ; CHECK: pcmpeqd
; CHECK: psrlq ; CHECK: psrlq
; CHECK: pand ; CHECK: pand {{.*}}xmm{{.*}}xmm
; CHECK: pcmpeqd ; CHECK: pcmpeqd
; CHECK: psrlq ; CHECK: psrlq
; CHECK: pand ; CHECK: pand {{.*}}xmm{{.*}}xmm
; CHECK: pcmpeqd ; CHECK: pcmpeqd
; CHECK: psrlq ; CHECK: psrlq
; CHECK: pand ; CHECK: pand {{.*}}xmm{{.*}}xmm
define <4 x float> @test_fabs_v4f32(<4 x float> %x) { define <4 x float> @test_fabs_v4f32(<4 x float> %x) {
entry: entry:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment