Commit ccea793f by John Porto

Subzero. ARM32. Improve constant lowering.

parent a98091d4
...@@ -284,6 +284,87 @@ bool OperandARM32FlexImm::canHoldImm(uint32_t Immediate, uint32_t *RotateAmt, ...@@ -284,6 +284,87 @@ bool OperandARM32FlexImm::canHoldImm(uint32_t Immediate, uint32_t *RotateAmt,
return false; return false;
} }
OperandARM32FlexFpImm::OperandARM32FlexFpImm(Cfg * /*Func*/, Type Ty,
uint32_t ModifiedImm)
: OperandARM32Flex(kFlexFpImm, Ty), ModifiedImm(ModifiedImm) {}
bool OperandARM32FlexFpImm::canHoldImm(Operand *C, uint32_t *ModifiedImm) {
switch (C->getType()) {
default:
llvm::report_fatal_error("Unhandled fp constant type.");
case IceType_f32: {
// We violate llvm naming conventions a bit here so that the constants are
// named after the bit fields they represent. See "A7.5.1 Operation of
// modified immediate constants, Floating-point" in the ARM ARM.
static constexpr uint32_t a = 0x80000000u;
static constexpr uint32_t B = 0x40000000;
static constexpr uint32_t bbbbb = 0x3E000000;
static constexpr uint32_t cdefgh = 0x01F80000;
static constexpr uint32_t AllowedBits = a | B | bbbbb | cdefgh;
static_assert(AllowedBits == 0xFFF80000u,
"Invalid mask for f32 modified immediates.");
const float F32 = llvm::cast<ConstantFloat>(C)->getValue();
const uint32_t I32 = *reinterpret_cast<const uint32_t *>(&F32);
if (I32 & ~AllowedBits) {
// constant has disallowed bits.
return false;
}
if ((I32 & bbbbb) != bbbbb && (I32 & bbbbb)) {
// not all bbbbb bits are 0 or 1.
return false;
}
if (((I32 & B) != 0) == ((I32 & bbbbb) != 0)) {
// B ^ b = 0;
return false;
}
*ModifiedImm = ((I32 & a) ? 0x80 : 0x00) | ((I32 & bbbbb) ? 0x40 : 0x00) |
((I32 & cdefgh) >> 19);
return true;
}
case IceType_f64: {
static constexpr uint32_t a = 0x80000000u;
static constexpr uint32_t B = 0x40000000;
static constexpr uint32_t bbbbbbbb = 0x3FC00000;
static constexpr uint32_t cdefgh = 0x003F0000;
static constexpr uint32_t AllowedBits = a | B | bbbbbbbb | cdefgh;
static_assert(AllowedBits == 0xFFFF0000u,
"Invalid mask for f64 modified immediates.");
const double F64 = llvm::cast<ConstantDouble>(C)->getValue();
const uint64_t I64 = *reinterpret_cast<const uint64_t *>(&F64);
if (I64 & 0xFFFFFFFFu) {
// constant has disallowed bits.
return false;
}
const uint32_t I32 = I64 >> 32;
if (I32 & ~AllowedBits) {
// constant has disallowed bits.
return false;
}
if ((I32 & bbbbbbbb) != bbbbbbbb && (I32 & bbbbbbbb)) {
// not all bbbbb bits are 0 or 1.
return false;
}
if (((I32 & B) != 0) == ((I32 & bbbbbbbb) != 0)) {
// B ^ b = 0;
return false;
}
*ModifiedImm = ((I32 & a) ? 0x80 : 0x00) |
((I32 & bbbbbbbb) ? 0x40 : 0x00) | ((I32 & cdefgh) >> 16);
return true;
}
}
}
OperandARM32FlexFpZero::OperandARM32FlexFpZero(Cfg * /*Func*/, Type Ty)
: OperandARM32Flex(kFlexFpZero, Ty) {}
OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg, OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg,
ShiftKind ShiftOp, Operand *ShiftAmt) ShiftKind ShiftOp, Operand *ShiftAmt)
: OperandARM32Flex(kFlexReg, Ty), Reg(Reg), ShiftOp(ShiftOp), : OperandARM32Flex(kFlexReg, Ty), Reg(Reg), ShiftOp(ShiftOp),
...@@ -557,15 +638,18 @@ template <> void InstARM32Tst::emitIAS(const Cfg *Func) const { ...@@ -557,15 +638,18 @@ template <> void InstARM32Tst::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func); emitUsingTextFixup(Func);
} }
InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1, InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate) CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vcmp, 2, nullptr, Predicate) { : InstARM32Pred(Func, InstARM32::Vcmp, 2, nullptr, Predicate) {
HasSideEffects = true;
addSource(Src0); addSource(Src0);
addSource(Src1); addSource(Src1);
} }
InstARM32Vmrs::InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate) InstARM32Vmrs::InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {} : InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {
HasSideEffects = true;
}
InstARM32Vabs::InstARM32Vabs(Cfg *Func, Variable *Dest, Variable *Src, InstARM32Vabs::InstARM32Vabs(Cfg *Func, Variable *Dest, Variable *Src,
CondARM32::Cond Predicate) CondARM32::Cond Predicate)
...@@ -605,6 +689,7 @@ template <> const char *InstARM32Lsr::Opcode = "lsr"; ...@@ -605,6 +689,7 @@ template <> const char *InstARM32Lsr::Opcode = "lsr";
template <> const char *InstARM32Mul::Opcode = "mul"; template <> const char *InstARM32Mul::Opcode = "mul";
template <> const char *InstARM32Orr::Opcode = "orr"; template <> const char *InstARM32Orr::Opcode = "orr";
template <> const char *InstARM32Rsb::Opcode = "rsb"; template <> const char *InstARM32Rsb::Opcode = "rsb";
template <> const char *InstARM32Rsc::Opcode = "rsc";
template <> const char *InstARM32Sbc::Opcode = "sbc"; template <> const char *InstARM32Sbc::Opcode = "sbc";
template <> const char *InstARM32Sdiv::Opcode = "sdiv"; template <> const char *InstARM32Sdiv::Opcode = "sdiv";
template <> const char *InstARM32Sub::Opcode = "sub"; template <> const char *InstARM32Sub::Opcode = "sub";
...@@ -613,11 +698,13 @@ template <> const char *InstARM32Udiv::Opcode = "udiv"; ...@@ -613,11 +698,13 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";
template <> const char *InstARM32Vadd::Opcode = "vadd"; template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vdiv::Opcode = "vdiv"; template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Vmul::Opcode = "vmul"; template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Veor::Opcode = "veor";
template <> const char *InstARM32Vsub::Opcode = "vsub"; template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls"; template <> const char *InstARM32Mls::Opcode = "mls";
// Cmp-like ops // Cmp-like ops
template <> const char *InstARM32Cmn::Opcode = "cmn";
template <> const char *InstARM32Cmp::Opcode = "cmp"; template <> const char *InstARM32Cmp::Opcode = "cmp";
template <> const char *InstARM32Tst::Opcode = "tst"; template <> const char *InstARM32Tst::Opcode = "tst";
...@@ -1701,6 +1788,67 @@ void OperandARM32FlexImm::dump(const Cfg * /* Func */, Ostream &Str) const { ...@@ -1701,6 +1788,67 @@ void OperandARM32FlexImm::dump(const Cfg * /* Func */, Ostream &Str) const {
Str << "#(" << Imm << " ror 2*" << RotateAmt << ")"; Str << "#(" << Imm << " ror 2*" << RotateAmt << ")";
} }
namespace {
static constexpr uint32_t a = 0x80;
static constexpr uint32_t b = 0x40;
static constexpr uint32_t cdefgh = 0x3F;
static constexpr uint32_t AllowedBits = a | b | cdefgh;
static_assert(AllowedBits == 0xFF,
"Invalid mask for f32/f64 constant rematerialization.");
// There's no loss in always returning the modified immediate as float.
// TODO(jpp): returning a double causes problems when outputting the constants
// for filetype=asm. Why?
float materializeFloatImmediate(uint32_t ModifiedImm) {
const uint32_t Ret = ((ModifiedImm & a) ? 0x80000000 : 0) |
((ModifiedImm & b) ? 0x3E000000 : 0x40000000) |
((ModifiedImm & cdefgh) << 19);
return *reinterpret_cast<const float *>(&Ret);
}
} // end of anonymous namespace
void OperandARM32FlexFpImm::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
switch (Ty) {
default:
llvm::report_fatal_error("Invalid flex fp imm type.");
case IceType_f64:
case IceType_f32:
Str << "#" << materializeFloatImmediate(ModifiedImm)
<< " @ Modified: " << ModifiedImm;
break;
}
}
void OperandARM32FlexFpImm::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump())
return;
Str << "#" << materializeFloatImmediate(ModifiedImm)
<< InstARM32::getVecWidthString(Ty);
}
void OperandARM32FlexFpZero::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
switch (Ty) {
default:
llvm::report_fatal_error("Invalid flex fp imm type.");
case IceType_f64:
case IceType_f32:
Str << "#0.0";
}
}
void OperandARM32FlexFpZero::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump())
return;
Str << "#0.0" << InstARM32::getVecWidthString(Ty);
}
void OperandARM32FlexReg::emit(const Cfg *Func) const { void OperandARM32FlexReg::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
...@@ -1741,6 +1889,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Lsr>; ...@@ -1741,6 +1889,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Lsr>;
template class InstARM32ThreeAddrGPR<InstARM32::Mul>; template class InstARM32ThreeAddrGPR<InstARM32::Mul>;
template class InstARM32ThreeAddrGPR<InstARM32::Orr>; template class InstARM32ThreeAddrGPR<InstARM32::Orr>;
template class InstARM32ThreeAddrGPR<InstARM32::Rsb>; template class InstARM32ThreeAddrGPR<InstARM32::Rsb>;
template class InstARM32ThreeAddrGPR<InstARM32::Rsc>;
template class InstARM32ThreeAddrGPR<InstARM32::Sbc>; template class InstARM32ThreeAddrGPR<InstARM32::Sbc>;
template class InstARM32ThreeAddrGPR<InstARM32::Sdiv>; template class InstARM32ThreeAddrGPR<InstARM32::Sdiv>;
template class InstARM32ThreeAddrGPR<InstARM32::Sub>; template class InstARM32ThreeAddrGPR<InstARM32::Sub>;
...@@ -1749,6 +1898,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>; ...@@ -1749,6 +1898,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vadd>; template class InstARM32ThreeAddrFP<InstARM32::Vadd>;
template class InstARM32ThreeAddrFP<InstARM32::Vdiv>; template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>; template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>; template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>; template class InstARM32LoadBase<InstARM32::Ldr>;
...@@ -1768,6 +1918,7 @@ template class InstARM32UnaryopFP<InstARM32::Vsqrt>; ...@@ -1768,6 +1918,7 @@ template class InstARM32UnaryopFP<InstARM32::Vsqrt>;
template class InstARM32FourAddrGPR<InstARM32::Mla>; template class InstARM32FourAddrGPR<InstARM32::Mla>;
template class InstARM32FourAddrGPR<InstARM32::Mls>; template class InstARM32FourAddrGPR<InstARM32::Mls>;
template class InstARM32CmpLike<InstARM32::Cmn>;
template class InstARM32CmpLike<InstARM32::Cmp>; template class InstARM32CmpLike<InstARM32::Cmp>;
template class InstARM32CmpLike<InstARM32::Tst>; template class InstARM32CmpLike<InstARM32::Tst>;
......
...@@ -40,6 +40,8 @@ public: ...@@ -40,6 +40,8 @@ public:
kMem, kMem,
kFlexStart, kFlexStart,
kFlexImm = kFlexStart, kFlexImm = kFlexStart,
kFlexFpImm,
kFlexFpZero,
kFlexReg, kFlexReg,
kFlexEnd = kFlexReg kFlexEnd = kFlexReg
}; };
...@@ -205,6 +207,59 @@ private: ...@@ -205,6 +207,59 @@ private:
uint32_t RotateAmt; uint32_t RotateAmt;
}; };
/// Modified Floating-point constant.
class OperandARM32FlexFpImm : public OperandARM32Flex {
OperandARM32FlexFpImm() = delete;
OperandARM32FlexFpImm(const OperandARM32FlexFpImm &) = delete;
OperandARM32FlexFpImm &operator=(const OperandARM32FlexFpImm &) = delete;
public:
static OperandARM32FlexFpImm *create(Cfg *Func, Type Ty,
uint32_t ModifiedImm) {
return new (Func->allocate<OperandARM32FlexFpImm>())
OperandARM32FlexFpImm(Func, Ty, ModifiedImm);
}
void emit(const Cfg *Func) const override;
using OperandARM32::dump;
void dump(const Cfg *Func, Ostream &Str) const override;
static bool classof(const Operand *Operand) {
return Operand->getKind() == static_cast<OperandKind>(kFlexFpImm);
}
static bool canHoldImm(Operand *C, uint32_t *ModifiedImm);
private:
OperandARM32FlexFpImm(Cfg *Func, Type Ty, uint32_t ModifiedImm);
uint32_t ModifiedImm;
};
/// An operand for representing the 0.0 immediate in vcmp.
class OperandARM32FlexFpZero : public OperandARM32Flex {
OperandARM32FlexFpZero() = delete;
OperandARM32FlexFpZero(const OperandARM32FlexFpZero &) = delete;
OperandARM32FlexFpZero &operator=(const OperandARM32FlexFpZero &) = delete;
public:
static OperandARM32FlexFpZero *create(Cfg *Func, Type Ty) {
return new (Func->allocate<OperandARM32FlexFpZero>())
OperandARM32FlexFpZero(Func, Ty);
}
void emit(const Cfg *Func) const override;
using OperandARM32::dump;
void dump(const Cfg *Func, Ostream &Str) const override;
static bool classof(const Operand *Operand) {
return Operand->getKind() == static_cast<OperandKind>(kFlexFpZero);
}
private:
OperandARM32FlexFpZero(Cfg *Func, Type Ty);
};
/// Shifted register variant. /// Shifted register variant.
class OperandARM32FlexReg : public OperandARM32Flex { class OperandARM32FlexReg : public OperandARM32Flex {
OperandARM32FlexReg() = delete; OperandARM32FlexReg() = delete;
...@@ -289,6 +344,7 @@ public: ...@@ -289,6 +344,7 @@ public:
Bic, Bic,
Br, Br,
Call, Call,
Cmn,
Cmp, Cmp,
Clz, Clz,
Dmb, Dmb,
...@@ -312,6 +368,7 @@ public: ...@@ -312,6 +368,7 @@ public:
Ret, Ret,
Rev, Rev,
Rsb, Rsb,
Rsc,
Sbc, Sbc,
Sdiv, Sdiv,
Str, Str,
...@@ -328,6 +385,7 @@ public: ...@@ -328,6 +385,7 @@ public:
Vcmp, Vcmp,
Vcvt, Vcvt,
Vdiv, Vdiv,
Veor,
Vmrs, Vmrs,
Vmul, Vmul,
Vsqrt, Vsqrt,
...@@ -609,6 +667,7 @@ private: ...@@ -609,6 +667,7 @@ private:
InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0, InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0,
Operand *Src1, CondARM32::Cond Predicate, bool SetFlags) Operand *Src1, CondARM32::Cond Predicate, bool SetFlags)
: InstARM32Pred(Func, K, 2, Dest, Predicate), SetFlags(SetFlags) { : InstARM32Pred(Func, K, 2, Dest, Predicate), SetFlags(SetFlags) {
HasSideEffects = SetFlags;
addSource(Src0); addSource(Src0);
addSource(Src1); addSource(Src1);
} }
...@@ -741,6 +800,7 @@ private: ...@@ -741,6 +800,7 @@ private:
InstARM32CmpLike(Cfg *Func, Variable *Src0, Operand *Src1, InstARM32CmpLike(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate) CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 2, nullptr, Predicate) { : InstARM32Pred(Func, K, 2, nullptr, Predicate) {
HasSideEffects = true;
addSource(Src0); addSource(Src0);
addSource(Src1); addSource(Src1);
} }
...@@ -759,6 +819,7 @@ using InstARM32Lsr = InstARM32ThreeAddrGPR<InstARM32::Lsr>; ...@@ -759,6 +819,7 @@ using InstARM32Lsr = InstARM32ThreeAddrGPR<InstARM32::Lsr>;
using InstARM32Mul = InstARM32ThreeAddrGPR<InstARM32::Mul>; using InstARM32Mul = InstARM32ThreeAddrGPR<InstARM32::Mul>;
using InstARM32Orr = InstARM32ThreeAddrGPR<InstARM32::Orr>; using InstARM32Orr = InstARM32ThreeAddrGPR<InstARM32::Orr>;
using InstARM32Rsb = InstARM32ThreeAddrGPR<InstARM32::Rsb>; using InstARM32Rsb = InstARM32ThreeAddrGPR<InstARM32::Rsb>;
using InstARM32Rsc = InstARM32ThreeAddrGPR<InstARM32::Rsc>;
using InstARM32Sbc = InstARM32ThreeAddrGPR<InstARM32::Sbc>; using InstARM32Sbc = InstARM32ThreeAddrGPR<InstARM32::Sbc>;
using InstARM32Sdiv = InstARM32ThreeAddrGPR<InstARM32::Sdiv>; using InstARM32Sdiv = InstARM32ThreeAddrGPR<InstARM32::Sdiv>;
using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>; using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>;
...@@ -766,6 +827,7 @@ using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>; ...@@ -766,6 +827,7 @@ using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>;
using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>; using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>;
using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>; using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>; using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>; using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>; using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
...@@ -785,6 +847,7 @@ using InstARM32Uxt = InstARM32UnaryopGPR<InstARM32::Uxt, true>; ...@@ -785,6 +847,7 @@ using InstARM32Uxt = InstARM32UnaryopGPR<InstARM32::Uxt, true>;
using InstARM32Vsqrt = InstARM32UnaryopFP<InstARM32::Vsqrt>; using InstARM32Vsqrt = InstARM32UnaryopFP<InstARM32::Vsqrt>;
using InstARM32Mla = InstARM32FourAddrGPR<InstARM32::Mla>; using InstARM32Mla = InstARM32FourAddrGPR<InstARM32::Mla>;
using InstARM32Mls = InstARM32FourAddrGPR<InstARM32::Mls>; using InstARM32Mls = InstARM32FourAddrGPR<InstARM32::Mls>;
using InstARM32Cmn = InstARM32CmpLike<InstARM32::Cmn>;
using InstARM32Cmp = InstARM32CmpLike<InstARM32::Cmp>; using InstARM32Cmp = InstARM32CmpLike<InstARM32::Cmp>;
using InstARM32Tst = InstARM32CmpLike<InstARM32::Tst>; using InstARM32Tst = InstARM32CmpLike<InstARM32::Tst>;
...@@ -1178,12 +1241,18 @@ public: ...@@ -1178,12 +1241,18 @@ public:
return new (Func->allocate<InstARM32Vcmp>()) return new (Func->allocate<InstARM32Vcmp>())
InstARM32Vcmp(Func, Src0, Src1, Predicate); InstARM32Vcmp(Func, Src0, Src1, Predicate);
} }
static InstARM32Vcmp *create(Cfg *Func, Variable *Src0,
OperandARM32FlexFpZero *Src1,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vcmp>())
InstARM32Vcmp(Func, Src0, Src1, Predicate);
}
void emit(const Cfg *Func) const override; void emit(const Cfg *Func) const override;
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Vcmp); } static bool classof(const Inst *Inst) { return isClassof(Inst, Vcmp); }
private: private:
InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1, InstARM32Vcmp(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate); CondARM32::Cond Predicate);
}; };
......
...@@ -140,7 +140,23 @@ public: ...@@ -140,7 +140,23 @@ public:
bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const { bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
return CPUFeatures.hasFeature(I); return CPUFeatures.hasFeature(I);
} }
enum OperandLegalization {
Legal_None = 0,
Legal_Reg = 1 << 0, /// physical register, not stack location
Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
/// immediates, shifted registers, or modified fp imm.
Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12]
Legal_All = ~Legal_None
};
using LegalMask = uint32_t;
Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister); Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
int32_t RegNum = Variable::NoRegister);
Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
GlobalContext *getCtx() const { return Ctx; }
protected: protected:
explicit TargetARM32(Cfg *Func); explicit TargetARM32(Cfg *Func);
...@@ -154,6 +170,8 @@ protected: ...@@ -154,6 +170,8 @@ protected:
void lowerAlloca(const InstAlloca *Inst) override; void lowerAlloca(const InstAlloca *Inst) override;
SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Inst); SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Inst);
void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
Operand *Src0, Operand *Src1);
void lowerArithmetic(const InstArithmetic *Inst) override; void lowerArithmetic(const InstArithmetic *Inst) override;
void lowerAssign(const InstAssign *Inst) override; void lowerAssign(const InstAssign *Inst) override;
void lowerBr(const InstBr *Inst) override; void lowerBr(const InstBr *Inst) override;
...@@ -192,6 +210,12 @@ protected: ...@@ -192,6 +210,12 @@ protected:
CondWhenTrue lowerFcmpCond(const InstFcmp *Instr); CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
void lowerFcmp(const InstFcmp *Instr) override; void lowerFcmp(const InstFcmp *Instr) override;
CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
Operand *Src0, Operand *Src1);
CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerIcmpCond(const InstIcmp *Instr); CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
void lowerIcmp(const InstIcmp *Instr) override; void lowerIcmp(const InstIcmp *Instr) override;
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
...@@ -211,18 +235,6 @@ protected: ...@@ -211,18 +235,6 @@ protected:
void randomlyInsertNop(float Probability, void randomlyInsertNop(float Probability,
RandomNumberGenerator &RNG) override; RandomNumberGenerator &RNG) override;
enum OperandLegalization {
Legal_None = 0,
Legal_Reg = 1 << 0, /// physical register, not stack location
Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
/// immediates, or shifted registers.
Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12]
Legal_All = ~Legal_None
};
using LegalMask = uint32_t;
Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
int32_t RegNum = Variable::NoRegister);
Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty); OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
Variable64On32 *makeI64RegPair(); Variable64On32 *makeI64RegPair();
...@@ -299,6 +311,10 @@ protected: ...@@ -299,6 +311,10 @@ protected:
void _br(InstARM32Label *Label, CondARM32::Cond Condition) { void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Label, Condition)); Context.insert(InstARM32Br::create(Func, Label, Condition));
} }
void _cmn(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmn::create(Func, Src0, Src1, Pred));
}
void _cmp(Variable *Src0, Operand *Src1, void _cmp(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred)); Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
...@@ -332,6 +348,12 @@ protected: ...@@ -332,6 +348,12 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred));
} }
void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert(
InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _lsr(Variable *Dest, Variable *Src0, Operand *Src1, void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Lsr::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Lsr::create(Func, Dest, Src0, Src1, Pred));
...@@ -654,6 +676,22 @@ protected: ...@@ -654,6 +676,22 @@ protected:
void _ret(Variable *LR, Variable *Src0 = nullptr) { void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert(InstARM32Ret::create(Func, LR, Src0)); Context.insert(InstARM32Ret::create(Func, LR, Src0));
} }
void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert(
InstARM32Rsc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsc::create(Func, Dest, Src0, Src1, Pred));
}
void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert(
InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _rsb(Variable *Dest, Variable *Src0, Operand *Src1, void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
...@@ -745,12 +783,19 @@ protected: ...@@ -745,12 +783,19 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vcmp::create(Func, Src0, Src1, Pred)); Context.insert(InstARM32Vcmp::create(Func, Src0, Src1, Pred));
} }
void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vcmp::create(Func, Src0, FpZero, Pred));
}
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmrs::create(Func, Pred)); Context.insert(InstARM32Vmrs::create(Func, Pred));
} }
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1)); Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
} }
void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1));
}
void _vsqrt(Variable *Dest, Variable *Src, void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred)); Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred));
......
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
#include "IceUtils.h" #include "IceUtils.h"
#include "llvm/Support/MathExtras.h" #include "llvm/Support/MathExtras.h"
#include <cmath> // signbit()
#include <stack> #include <stack>
namespace Ice { namespace Ice {
...@@ -5506,16 +5505,6 @@ Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { ...@@ -5506,16 +5505,6 @@ Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
return Reg; return Reg;
} }
namespace {
template <typename T> bool isPositiveZero(T Val) {
static_assert(std::is_floating_point<T>::value,
"Input type must be floating point");
return Val == 0 && !std::signbit(Val);
}
} // end of anonymous namespace
template <class Machine> template <class Machine>
Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
int32_t RegNum) { int32_t RegNum) {
...@@ -5609,10 +5598,10 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, ...@@ -5609,10 +5598,10 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
// operand. // operand.
if (isScalarFloatingType(Ty)) { if (isScalarFloatingType(Ty)) {
if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(Const)) { if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(Const)) {
if (isPositiveZero(ConstFloat->getValue())) if (Utils::isPositiveZero(ConstFloat->getValue()))
return makeZeroedRegister(Ty, RegNum); return makeZeroedRegister(Ty, RegNum);
} else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) { } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) {
if (isPositiveZero(ConstDouble->getValue())) if (Utils::isPositiveZero(ConstDouble->getValue()))
return makeZeroedRegister(Ty, RegNum); return makeZeroedRegister(Ty, RegNum);
} }
Variable *Base = nullptr; Variable *Base = nullptr;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define SUBZERO_SRC_ICEUTILS_H #define SUBZERO_SRC_ICEUTILS_H
#include <climits> #include <climits>
#include <cmath> // std::signbit()
namespace Ice { namespace Ice {
...@@ -117,6 +118,13 @@ public: ...@@ -117,6 +118,13 @@ public:
return value; return value;
return (value >> shift) | (value << (32 - shift)); return (value >> shift) | (value << (32 - shift));
} }
/// Returns true if Val is +0.0. It requires T to be a floating point type.
template <typename T> static bool isPositiveZero(T Val) {
static_assert(std::is_floating_point<T>::value,
"Input type must be floating point");
return Val == 0 && !std::signbit(Val);
}
}; };
} // end of namespace Ice } // end of namespace Ice
......
...@@ -512,13 +512,13 @@ entry: ...@@ -512,13 +512,13 @@ entry:
; OPTM1: sar {{.*}},0x1f ; OPTM1: sar {{.*}},0x1f
; ARM32-LABEL: shr64BitSigned ; ARM32-LABEL: shr64BitSigned
; ARM32: lsr [[T0:r[0-9]+]], r0, r2 ; ARM32: lsr [[T0:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}
; ARM32: rsb [[T1:r[0-9]+]], r2, #32 ; ARM32: rsb [[T1:r[0-9]+]], r{{[0-9]+}}, #32
; ARM32: orr r0, [[T0]], r1, lsl [[T1]] ; ARM32: orr r{{[0-9]+}}, [[T0]], r{{[0-9]+}}, lsl [[T1]]
; ARM32: sub [[T2:r[0-9]+]], r2, #32 ; ARM32: sub [[T2:r[0-9]+]], r{{[0-9]+}}, #32
; ARM32: cmp [[T2]], #0 ; ARM32: cmp [[T2]], #0
; ARM32: asrge r0, r1, [[T2]] ; ARM32: asrge r{{[0-9]+}}, r{{[0-9]+}}, [[T2]]
; ARM32: asr r{{[0-9]+}}, r1, r2 ; ARM32: asr r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
define internal i32 @shr64BitSignedTrunc(i64 %a, i64 %b) { define internal i32 @shr64BitSignedTrunc(i64 %a, i64 %b) {
entry: entry:
......
...@@ -117,7 +117,7 @@ entry: ...@@ -117,7 +117,7 @@ entry:
; CHECK-LABEL: MulImm ; CHECK-LABEL: MulImm
; CHECK: imul e{{.*}},e{{.*}},0x63 ; CHECK: imul e{{.*}},e{{.*}},0x63
; ARM32-LABEL: MulImm ; ARM32-LABEL: MulImm
; ARM32: mov {{.*}}, #99 ; ARM32: movw {{.*}}, #99
; ARM32: mul r{{.*}}, r{{.*}}, r{{.*}} ; ARM32: mul r{{.*}}, r{{.*}}, r{{.*}}
; MIPS32-LABEL: MulImm ; MIPS32-LABEL: MulImm
; MIPS32: mul ; MIPS32: mul
......
...@@ -169,8 +169,7 @@ entry: ...@@ -169,8 +169,7 @@ entry:
; CHECK: cmovl ; CHECK: cmovl
; ARM32-LABEL: fold_cmp_select_64_undef ; ARM32-LABEL: fold_cmp_select_64_undef
; ARM32: mov ; ARM32: mov
; ARM32: mov ; ARM32: rsbs r{{[0-9]+}}, r{{[0-9]+}}, #0
; ARM32: cmp {{r[0-9]+}}, r0
; ARM32: movlt ; ARM32: movlt
; ARM32: movlt ; ARM32: movlt
; ARM32: bx lr ; ARM32: bx lr
......
...@@ -39,7 +39,7 @@ return: ; preds = %entry ...@@ -39,7 +39,7 @@ return: ; preds = %entry
; CHECK: ret ; CHECK: ret
; ARM32-LABEL: divide ; ARM32-LABEL: divide
; ARM32: cmp ; ARM32: tst
; ARM32: .word 0xe7fedef0 ; ARM32: .word 0xe7fedef0
; ARM32: bl {{.*}} __divsi3 ; ARM32: bl {{.*}} __divsi3
; ARM32: bx lr ; ARM32: bx lr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment