Commit 99165667 by Jim Stichnoth

Subzero: Use "pxor reg,reg" to load a floating-point scalar 0.0 value.

BUG= none R=jpp@chromium.org, sehr@chromium.org Review URL: https://codereview.chromium.org/1439363002 .
parent b8e49c18
......@@ -271,6 +271,10 @@ protected:
Variable *copyToReg8(Operand *Src, int32_t RegNum = Variable::NoRegister);
Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
/// Returns a register containing all zeros, without affecting the FLAGS
/// register, using the best instruction for the type.
Variable *makeZeroedRegister(Type Ty, int32_t RegNum = Variable::NoRegister);
/// \name Returns a vector in a register with the given constant entries.
/// @{
Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
......
......@@ -29,6 +29,7 @@
#include "IceUtils.h"
#include "llvm/Support/MathExtras.h"
#include <cmath> // signbit()
#include <stack>
namespace Ice {
......@@ -1916,7 +1917,7 @@ void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
// If Dest already has a physical register, then only basic legalization
// is needed, as the source operand can be a register, immediate, or
// memory.
Src0Legal = legalize(Src0);
Src0Legal = legalize(Src0, Legal_Reg, Dest->getRegNum());
} else {
// If Dest could be a stack operand, then RI must be a physical register
// or a scalar integer immediate.
......@@ -5307,6 +5308,34 @@ template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
this, Context.getNode(), Func);
}
template <class Machine>
Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) {
Variable *Reg = makeReg(Ty, RegNum);
switch (Ty) {
case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
case IceType_i64:
// Conservatively do "mov reg, 0" to avoid modifying FLAGS.
_mov(Reg, Ctx->getConstantZero(Ty));
break;
case IceType_f32:
case IceType_f64:
Context.insert(InstFakeDef::create(Func, Reg));
// TODO(stichnot): Use xorps/xorpd instead of pxor.
_pxor(Reg, Reg);
break;
default:
// All vector types use the same pxor instruction.
assert(isVectorType(Ty));
Context.insert(InstFakeDef::create(Func, Reg));
_pxor(Reg, Reg);
break;
}
return Reg;
}
// There is no support for loading or emitting vector constants, so the vector
// values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are
// initialized with register operations.
......@@ -5316,12 +5345,7 @@ template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
template <class Machine>
Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Variable *Reg = makeReg(Ty, RegNum);
// Insert a FakeDef, since otherwise the live range of Reg might be
// overestimated.
Context.insert(InstFakeDef::create(Func, Reg));
_pxor(Reg, Reg);
return Reg;
return makeZeroedRegister(Ty, RegNum);
}
template <class Machine>
......@@ -5471,6 +5495,16 @@ Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
return Reg;
}
namespace {
template <typename T> bool isPositiveZero(T Val) {
static_assert(std::is_floating_point<T>::value,
"Input type must be floating point");
return Val == 0 && !signbit(Val);
}
} // end of anonymous namespace
template <class Machine>
Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
int32_t RegNum) {
......@@ -5563,6 +5597,13 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
// Convert a scalar floating point constant into an explicit memory
// operand.
if (isScalarFloatingType(Ty)) {
if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(Const)) {
if (isPositiveZero(ConstFloat->getValue()))
return makeZeroedRegister(Ty, RegNum);
} else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) {
if (isPositiveZero(ConstDouble->getValue()))
return makeZeroedRegister(Ty, RegNum);
}
Variable *Base = nullptr;
std::string Buffer;
llvm::raw_string_ostream StrBuf(Buffer);
......
......@@ -537,13 +537,12 @@ return: ; preds = %entry, %sw.bb65, %s
; Check for float pool
; X86-LABEL: .rodata.cst4
; X86: 00000041 0000c0ff 0000803f 00008040
; X86: 0000c07f 00000000 0000003f 0000803e
; X86: 00000040
; X86: 0000c07f 0000003f 0000803e 00000040
; Check for double pool
; X86-LABEL: .rodata.cst8
; X86: 00000000 0000f8ff 00000000 0000f87f
; X86: 00000000 0000e03f 00000000 00000000
; X86: 55555555 5555d53f 00000000 0000d03f
; X86: 00000000 0000e03f 55555555 5555d53f
; X86: 00000000 0000d03f
; X86-LABEL: .text
......@@ -254,5 +254,5 @@ entry:
; Check for pooled constant reordering
; POOLEDCONSTANTS-LABEL: .rodata.cst4
; POOLEDCONSTANTS: 0000803e 0000803f 00000000 0000003f
; POOLEDCONSTANTS: 00008040 00000040
; POOLEDCONSTANTS: 0000803e 0000803f 0000003f 00008040
; POOLEDCONSTANTS: 00000040
......@@ -38,7 +38,8 @@ define internal float @undef_float() {
entry:
ret float undef
; CHECK-LABEL: undef_float
; CHECK: fld DWORD PTR {{.*}} .L$float$00000000
; CHECK: pxor [[REG:xmm.]],[[REG]]
; CHECK: fld
}
define internal <4 x i1> @undef_v4i1() {
......@@ -191,7 +192,8 @@ entry:
%val = insertelement <4 x float> %arg, float undef, i32 0
ret <4 x float> %val
; CHECK-LABEL: vector_insertelement_arg2
; CHECK: {{movss|insertps}} {{.*}},DWORD PTR {{.*}} .L$float$00000000
; CHECK: pxor [[REG:xmm.]],[[REG]]
; CHECK: {{movss|insertps}} {{.*}},[[REG]]
}
define internal float @vector_extractelement_v4f32_index_0() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment