Commit ccea793f by John Porto

Subzero. ARM32. Improve constant lowering.

parent a98091d4
......@@ -284,6 +284,87 @@ bool OperandARM32FlexImm::canHoldImm(uint32_t Immediate, uint32_t *RotateAmt,
return false;
}
OperandARM32FlexFpImm::OperandARM32FlexFpImm(Cfg * /*Func*/, Type Ty,
uint32_t ModifiedImm)
: OperandARM32Flex(kFlexFpImm, Ty), ModifiedImm(ModifiedImm) {}
bool OperandARM32FlexFpImm::canHoldImm(Operand *C, uint32_t *ModifiedImm) {
switch (C->getType()) {
default:
llvm::report_fatal_error("Unhandled fp constant type.");
case IceType_f32: {
// We violate llvm naming conventions a bit here so that the constants are
// named after the bit fields they represent. See "A7.5.1 Operation of
// modified immediate constants, Floating-point" in the ARM ARM.
static constexpr uint32_t a = 0x80000000u;
static constexpr uint32_t B = 0x40000000;
static constexpr uint32_t bbbbb = 0x3E000000;
static constexpr uint32_t cdefgh = 0x01F80000;
static constexpr uint32_t AllowedBits = a | B | bbbbb | cdefgh;
static_assert(AllowedBits == 0xFFF80000u,
"Invalid mask for f32 modified immediates.");
const float F32 = llvm::cast<ConstantFloat>(C)->getValue();
const uint32_t I32 = *reinterpret_cast<const uint32_t *>(&F32);
if (I32 & ~AllowedBits) {
// constant has disallowed bits.
return false;
}
if ((I32 & bbbbb) != bbbbb && (I32 & bbbbb)) {
// not all bbbbb bits are 0 or 1.
return false;
}
if (((I32 & B) != 0) == ((I32 & bbbbb) != 0)) {
// B ^ b = 0;
return false;
}
*ModifiedImm = ((I32 & a) ? 0x80 : 0x00) | ((I32 & bbbbb) ? 0x40 : 0x00) |
((I32 & cdefgh) >> 19);
return true;
}
case IceType_f64: {
static constexpr uint32_t a = 0x80000000u;
static constexpr uint32_t B = 0x40000000;
static constexpr uint32_t bbbbbbbb = 0x3FC00000;
static constexpr uint32_t cdefgh = 0x003F0000;
static constexpr uint32_t AllowedBits = a | B | bbbbbbbb | cdefgh;
static_assert(AllowedBits == 0xFFFF0000u,
"Invalid mask for f64 modified immediates.");
const double F64 = llvm::cast<ConstantDouble>(C)->getValue();
const uint64_t I64 = *reinterpret_cast<const uint64_t *>(&F64);
if (I64 & 0xFFFFFFFFu) {
// constant has disallowed bits.
return false;
}
const uint32_t I32 = I64 >> 32;
if (I32 & ~AllowedBits) {
// constant has disallowed bits.
return false;
}
if ((I32 & bbbbbbbb) != bbbbbbbb && (I32 & bbbbbbbb)) {
// not all bbbbb bits are 0 or 1.
return false;
}
if (((I32 & B) != 0) == ((I32 & bbbbbbbb) != 0)) {
// B ^ b = 0;
return false;
}
*ModifiedImm = ((I32 & a) ? 0x80 : 0x00) |
((I32 & bbbbbbbb) ? 0x40 : 0x00) | ((I32 & cdefgh) >> 16);
return true;
}
}
}
OperandARM32FlexFpZero::OperandARM32FlexFpZero(Cfg * /*Func*/, Type Ty)
: OperandARM32Flex(kFlexFpZero, Ty) {}
OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg,
ShiftKind ShiftOp, Operand *ShiftAmt)
: OperandARM32Flex(kFlexReg, Ty), Reg(Reg), ShiftOp(ShiftOp),
......@@ -557,15 +638,18 @@ template <> void InstARM32Tst::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
}
InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1,
InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vcmp, 2, nullptr, Predicate) {
HasSideEffects = true;
addSource(Src0);
addSource(Src1);
}
InstARM32Vmrs::InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {}
: InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {
HasSideEffects = true;
}
InstARM32Vabs::InstARM32Vabs(Cfg *Func, Variable *Dest, Variable *Src,
CondARM32::Cond Predicate)
......@@ -605,6 +689,7 @@ template <> const char *InstARM32Lsr::Opcode = "lsr";
template <> const char *InstARM32Mul::Opcode = "mul";
template <> const char *InstARM32Orr::Opcode = "orr";
template <> const char *InstARM32Rsb::Opcode = "rsb";
template <> const char *InstARM32Rsc::Opcode = "rsc";
template <> const char *InstARM32Sbc::Opcode = "sbc";
template <> const char *InstARM32Sdiv::Opcode = "sdiv";
template <> const char *InstARM32Sub::Opcode = "sub";
......@@ -613,11 +698,13 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";
template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Veor::Opcode = "veor";
template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls";
// Cmp-like ops
template <> const char *InstARM32Cmn::Opcode = "cmn";
template <> const char *InstARM32Cmp::Opcode = "cmp";
template <> const char *InstARM32Tst::Opcode = "tst";
......@@ -1701,6 +1788,67 @@ void OperandARM32FlexImm::dump(const Cfg * /* Func */, Ostream &Str) const {
Str << "#(" << Imm << " ror 2*" << RotateAmt << ")";
}
namespace {
static constexpr uint32_t a = 0x80;
static constexpr uint32_t b = 0x40;
static constexpr uint32_t cdefgh = 0x3F;
static constexpr uint32_t AllowedBits = a | b | cdefgh;
static_assert(AllowedBits == 0xFF,
"Invalid mask for f32/f64 constant rematerialization.");
// There's no loss in always returning the modified immediate as float.
// TODO(jpp): returning a double causes problems when outputting the constants
// for filetype=asm. Why?
float materializeFloatImmediate(uint32_t ModifiedImm) {
const uint32_t Ret = ((ModifiedImm & a) ? 0x80000000 : 0) |
((ModifiedImm & b) ? 0x3E000000 : 0x40000000) |
((ModifiedImm & cdefgh) << 19);
return *reinterpret_cast<const float *>(&Ret);
}
} // end of anonymous namespace
void OperandARM32FlexFpImm::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
switch (Ty) {
default:
llvm::report_fatal_error("Invalid flex fp imm type.");
case IceType_f64:
case IceType_f32:
Str << "#" << materializeFloatImmediate(ModifiedImm)
<< " @ Modified: " << ModifiedImm;
break;
}
}
void OperandARM32FlexFpImm::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump())
return;
Str << "#" << materializeFloatImmediate(ModifiedImm)
<< InstARM32::getVecWidthString(Ty);
}
void OperandARM32FlexFpZero::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
switch (Ty) {
default:
llvm::report_fatal_error("Invalid flex fp imm type.");
case IceType_f64:
case IceType_f32:
Str << "#0.0";
}
}
void OperandARM32FlexFpZero::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump())
return;
Str << "#0.0" << InstARM32::getVecWidthString(Ty);
}
void OperandARM32FlexReg::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
......@@ -1741,6 +1889,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Lsr>;
template class InstARM32ThreeAddrGPR<InstARM32::Mul>;
template class InstARM32ThreeAddrGPR<InstARM32::Orr>;
template class InstARM32ThreeAddrGPR<InstARM32::Rsb>;
template class InstARM32ThreeAddrGPR<InstARM32::Rsc>;
template class InstARM32ThreeAddrGPR<InstARM32::Sbc>;
template class InstARM32ThreeAddrGPR<InstARM32::Sdiv>;
template class InstARM32ThreeAddrGPR<InstARM32::Sub>;
......@@ -1749,6 +1898,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vadd>;
template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>;
......@@ -1768,6 +1918,7 @@ template class InstARM32UnaryopFP<InstARM32::Vsqrt>;
template class InstARM32FourAddrGPR<InstARM32::Mla>;
template class InstARM32FourAddrGPR<InstARM32::Mls>;
template class InstARM32CmpLike<InstARM32::Cmn>;
template class InstARM32CmpLike<InstARM32::Cmp>;
template class InstARM32CmpLike<InstARM32::Tst>;
......
......@@ -40,6 +40,8 @@ public:
kMem,
kFlexStart,
kFlexImm = kFlexStart,
kFlexFpImm,
kFlexFpZero,
kFlexReg,
kFlexEnd = kFlexReg
};
......@@ -205,6 +207,59 @@ private:
uint32_t RotateAmt;
};
/// Modified Floating-point constant.
class OperandARM32FlexFpImm : public OperandARM32Flex {
OperandARM32FlexFpImm() = delete;
OperandARM32FlexFpImm(const OperandARM32FlexFpImm &) = delete;
OperandARM32FlexFpImm &operator=(const OperandARM32FlexFpImm &) = delete;
public:
static OperandARM32FlexFpImm *create(Cfg *Func, Type Ty,
uint32_t ModifiedImm) {
return new (Func->allocate<OperandARM32FlexFpImm>())
OperandARM32FlexFpImm(Func, Ty, ModifiedImm);
}
void emit(const Cfg *Func) const override;
using OperandARM32::dump;
void dump(const Cfg *Func, Ostream &Str) const override;
static bool classof(const Operand *Operand) {
return Operand->getKind() == static_cast<OperandKind>(kFlexFpImm);
}
static bool canHoldImm(Operand *C, uint32_t *ModifiedImm);
private:
OperandARM32FlexFpImm(Cfg *Func, Type Ty, uint32_t ModifiedImm);
uint32_t ModifiedImm;
};
/// An operand for representing the 0.0 immediate in vcmp.
class OperandARM32FlexFpZero : public OperandARM32Flex {
OperandARM32FlexFpZero() = delete;
OperandARM32FlexFpZero(const OperandARM32FlexFpZero &) = delete;
OperandARM32FlexFpZero &operator=(const OperandARM32FlexFpZero &) = delete;
public:
static OperandARM32FlexFpZero *create(Cfg *Func, Type Ty) {
return new (Func->allocate<OperandARM32FlexFpZero>())
OperandARM32FlexFpZero(Func, Ty);
}
void emit(const Cfg *Func) const override;
using OperandARM32::dump;
void dump(const Cfg *Func, Ostream &Str) const override;
static bool classof(const Operand *Operand) {
return Operand->getKind() == static_cast<OperandKind>(kFlexFpZero);
}
private:
OperandARM32FlexFpZero(Cfg *Func, Type Ty);
};
/// Shifted register variant.
class OperandARM32FlexReg : public OperandARM32Flex {
OperandARM32FlexReg() = delete;
......@@ -289,6 +344,7 @@ public:
Bic,
Br,
Call,
Cmn,
Cmp,
Clz,
Dmb,
......@@ -312,6 +368,7 @@ public:
Ret,
Rev,
Rsb,
Rsc,
Sbc,
Sdiv,
Str,
......@@ -328,6 +385,7 @@ public:
Vcmp,
Vcvt,
Vdiv,
Veor,
Vmrs,
Vmul,
Vsqrt,
......@@ -609,6 +667,7 @@ private:
InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0,
Operand *Src1, CondARM32::Cond Predicate, bool SetFlags)
: InstARM32Pred(Func, K, 2, Dest, Predicate), SetFlags(SetFlags) {
HasSideEffects = SetFlags;
addSource(Src0);
addSource(Src1);
}
......@@ -741,6 +800,7 @@ private:
InstARM32CmpLike(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 2, nullptr, Predicate) {
HasSideEffects = true;
addSource(Src0);
addSource(Src1);
}
......@@ -759,6 +819,7 @@ using InstARM32Lsr = InstARM32ThreeAddrGPR<InstARM32::Lsr>;
using InstARM32Mul = InstARM32ThreeAddrGPR<InstARM32::Mul>;
using InstARM32Orr = InstARM32ThreeAddrGPR<InstARM32::Orr>;
using InstARM32Rsb = InstARM32ThreeAddrGPR<InstARM32::Rsb>;
using InstARM32Rsc = InstARM32ThreeAddrGPR<InstARM32::Rsc>;
using InstARM32Sbc = InstARM32ThreeAddrGPR<InstARM32::Sbc>;
using InstARM32Sdiv = InstARM32ThreeAddrGPR<InstARM32::Sdiv>;
using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>;
......@@ -766,6 +827,7 @@ using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>;
using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>;
using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
......@@ -785,6 +847,7 @@ using InstARM32Uxt = InstARM32UnaryopGPR<InstARM32::Uxt, true>;
using InstARM32Vsqrt = InstARM32UnaryopFP<InstARM32::Vsqrt>;
using InstARM32Mla = InstARM32FourAddrGPR<InstARM32::Mla>;
using InstARM32Mls = InstARM32FourAddrGPR<InstARM32::Mls>;
using InstARM32Cmn = InstARM32CmpLike<InstARM32::Cmn>;
using InstARM32Cmp = InstARM32CmpLike<InstARM32::Cmp>;
using InstARM32Tst = InstARM32CmpLike<InstARM32::Tst>;
......@@ -1178,12 +1241,18 @@ public:
return new (Func->allocate<InstARM32Vcmp>())
InstARM32Vcmp(Func, Src0, Src1, Predicate);
}
static InstARM32Vcmp *create(Cfg *Func, Variable *Src0,
OperandARM32FlexFpZero *Src1,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vcmp>())
InstARM32Vcmp(Func, Src0, Src1, Predicate);
}
void emit(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Vcmp); }
private:
InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1,
InstARM32Vcmp(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate);
};
......
......@@ -140,7 +140,23 @@ public:
bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
return CPUFeatures.hasFeature(I);
}
enum OperandLegalization {
Legal_None = 0,
Legal_Reg = 1 << 0, /// physical register, not stack location
Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
/// immediates, shifted registers, or modified fp imm.
Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12]
Legal_All = ~Legal_None
};
using LegalMask = uint32_t;
Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
int32_t RegNum = Variable::NoRegister);
Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
GlobalContext *getCtx() const { return Ctx; }
protected:
explicit TargetARM32(Cfg *Func);
......@@ -154,6 +170,8 @@ protected:
void lowerAlloca(const InstAlloca *Inst) override;
SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Inst);
void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
Operand *Src0, Operand *Src1);
void lowerArithmetic(const InstArithmetic *Inst) override;
void lowerAssign(const InstAssign *Inst) override;
void lowerBr(const InstBr *Inst) override;
......@@ -192,6 +210,12 @@ protected:
CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
void lowerFcmp(const InstFcmp *Instr) override;
CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
Operand *Src0, Operand *Src1);
CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
void lowerIcmp(const InstIcmp *Instr) override;
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
......@@ -211,18 +235,6 @@ protected:
void randomlyInsertNop(float Probability,
RandomNumberGenerator &RNG) override;
enum OperandLegalization {
Legal_None = 0,
Legal_Reg = 1 << 0, /// physical register, not stack location
Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
/// immediates, or shifted registers.
Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12]
Legal_All = ~Legal_None
};
using LegalMask = uint32_t;
Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
int32_t RegNum = Variable::NoRegister);
Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
Variable64On32 *makeI64RegPair();
......@@ -299,6 +311,10 @@ protected:
void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Label, Condition));
}
void _cmn(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmn::create(Func, Src0, Src1, Pred));
}
void _cmp(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
......@@ -332,6 +348,12 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred));
}
void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert(
InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Lsr::create(Func, Dest, Src0, Src1, Pred));
......@@ -654,6 +676,22 @@ protected:
void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert(InstARM32Ret::create(Func, LR, Src0));
}
void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert(
InstARM32Rsc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsc::create(Func, Dest, Src0, Src1, Pred));
}
void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert(
InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
......@@ -745,12 +783,19 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vcmp::create(Func, Src0, Src1, Pred));
}
void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vcmp::create(Func, Src0, FpZero, Pred));
}
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmrs::create(Func, Pred));
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
}
void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1));
}
void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred));
......
......@@ -29,7 +29,6 @@
#include "IceUtils.h"
#include "llvm/Support/MathExtras.h"
#include <cmath> // signbit()
#include <stack>
namespace Ice {
......@@ -5506,16 +5505,6 @@ Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
return Reg;
}
namespace {
template <typename T> bool isPositiveZero(T Val) {
static_assert(std::is_floating_point<T>::value,
"Input type must be floating point");
return Val == 0 && !std::signbit(Val);
}
} // end of anonymous namespace
template <class Machine>
Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
int32_t RegNum) {
......@@ -5609,10 +5598,10 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
// operand.
if (isScalarFloatingType(Ty)) {
if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(Const)) {
if (isPositiveZero(ConstFloat->getValue()))
if (Utils::isPositiveZero(ConstFloat->getValue()))
return makeZeroedRegister(Ty, RegNum);
} else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) {
if (isPositiveZero(ConstDouble->getValue()))
if (Utils::isPositiveZero(ConstDouble->getValue()))
return makeZeroedRegister(Ty, RegNum);
}
Variable *Base = nullptr;
......
......@@ -16,6 +16,7 @@
#define SUBZERO_SRC_ICEUTILS_H
#include <climits>
#include <cmath> // std::signbit()
namespace Ice {
......@@ -117,6 +118,13 @@ public:
return value;
return (value >> shift) | (value << (32 - shift));
}
/// Returns true if Val is +0.0. It requires T to be a floating point type.
template <typename T> static bool isPositiveZero(T Val) {
static_assert(std::is_floating_point<T>::value,
"Input type must be floating point");
return Val == 0 && !std::signbit(Val);
}
};
} // end of namespace Ice
......
......@@ -512,13 +512,13 @@ entry:
; OPTM1: sar {{.*}},0x1f
; ARM32-LABEL: shr64BitSigned
; ARM32: lsr [[T0:r[0-9]+]], r0, r2
; ARM32: rsb [[T1:r[0-9]+]], r2, #32
; ARM32: orr r0, [[T0]], r1, lsl [[T1]]
; ARM32: sub [[T2:r[0-9]+]], r2, #32
; ARM32: lsr [[T0:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}
; ARM32: rsb [[T1:r[0-9]+]], r{{[0-9]+}}, #32
; ARM32: orr r{{[0-9]+}}, [[T0]], r{{[0-9]+}}, lsl [[T1]]
; ARM32: sub [[T2:r[0-9]+]], r{{[0-9]+}}, #32
; ARM32: cmp [[T2]], #0
; ARM32: asrge r0, r1, [[T2]]
; ARM32: asr r{{[0-9]+}}, r1, r2
; ARM32: asrge r{{[0-9]+}}, r{{[0-9]+}}, [[T2]]
; ARM32: asr r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
define internal i32 @shr64BitSignedTrunc(i64 %a, i64 %b) {
entry:
......
......@@ -117,7 +117,7 @@ entry:
; CHECK-LABEL: MulImm
; CHECK: imul e{{.*}},e{{.*}},0x63
; ARM32-LABEL: MulImm
; ARM32: mov {{.*}}, #99
; ARM32: movw {{.*}}, #99
; ARM32: mul r{{.*}}, r{{.*}}, r{{.*}}
; MIPS32-LABEL: MulImm
; MIPS32: mul
......
......@@ -169,8 +169,7 @@ entry:
; CHECK: cmovl
; ARM32-LABEL: fold_cmp_select_64_undef
; ARM32: mov
; ARM32: mov
; ARM32: cmp {{r[0-9]+}}, r0
; ARM32: rsbs r{{[0-9]+}}, r{{[0-9]+}}, #0
; ARM32: movlt
; ARM32: movlt
; ARM32: bx lr
......
......@@ -39,7 +39,7 @@ return: ; preds = %entry
; CHECK: ret
; ARM32-LABEL: divide
; ARM32: cmp
; ARM32: tst
; ARM32: .word 0xe7fedef0
; ARM32: bl {{.*}} __divsi3
; ARM32: bx lr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment