Commit ccea793f by John Porto

Subzero. ARM32. Improve constant lowering.

parent a98091d4
...@@ -284,6 +284,87 @@ bool OperandARM32FlexImm::canHoldImm(uint32_t Immediate, uint32_t *RotateAmt, ...@@ -284,6 +284,87 @@ bool OperandARM32FlexImm::canHoldImm(uint32_t Immediate, uint32_t *RotateAmt,
return false; return false;
} }
OperandARM32FlexFpImm::OperandARM32FlexFpImm(Cfg * /*Func*/, Type Ty,
uint32_t ModifiedImm)
: OperandARM32Flex(kFlexFpImm, Ty), ModifiedImm(ModifiedImm) {}
bool OperandARM32FlexFpImm::canHoldImm(Operand *C, uint32_t *ModifiedImm) {
switch (C->getType()) {
default:
llvm::report_fatal_error("Unhandled fp constant type.");
case IceType_f32: {
// We violate llvm naming conventions a bit here so that the constants are
// named after the bit fields they represent. See "A7.5.1 Operation of
// modified immediate constants, Floating-point" in the ARM ARM.
static constexpr uint32_t a = 0x80000000u;
static constexpr uint32_t B = 0x40000000;
static constexpr uint32_t bbbbb = 0x3E000000;
static constexpr uint32_t cdefgh = 0x01F80000;
static constexpr uint32_t AllowedBits = a | B | bbbbb | cdefgh;
static_assert(AllowedBits == 0xFFF80000u,
"Invalid mask for f32 modified immediates.");
const float F32 = llvm::cast<ConstantFloat>(C)->getValue();
const uint32_t I32 = *reinterpret_cast<const uint32_t *>(&F32);
if (I32 & ~AllowedBits) {
// constant has disallowed bits.
return false;
}
if ((I32 & bbbbb) != bbbbb && (I32 & bbbbb)) {
// not all bbbbb bits are 0 or 1.
return false;
}
if (((I32 & B) != 0) == ((I32 & bbbbb) != 0)) {
// B ^ b = 0;
return false;
}
*ModifiedImm = ((I32 & a) ? 0x80 : 0x00) | ((I32 & bbbbb) ? 0x40 : 0x00) |
((I32 & cdefgh) >> 19);
return true;
}
case IceType_f64: {
static constexpr uint32_t a = 0x80000000u;
static constexpr uint32_t B = 0x40000000;
static constexpr uint32_t bbbbbbbb = 0x3FC00000;
static constexpr uint32_t cdefgh = 0x003F0000;
static constexpr uint32_t AllowedBits = a | B | bbbbbbbb | cdefgh;
static_assert(AllowedBits == 0xFFFF0000u,
"Invalid mask for f64 modified immediates.");
const double F64 = llvm::cast<ConstantDouble>(C)->getValue();
const uint64_t I64 = *reinterpret_cast<const uint64_t *>(&F64);
if (I64 & 0xFFFFFFFFu) {
// constant has disallowed bits.
return false;
}
const uint32_t I32 = I64 >> 32;
if (I32 & ~AllowedBits) {
// constant has disallowed bits.
return false;
}
if ((I32 & bbbbbbbb) != bbbbbbbb && (I32 & bbbbbbbb)) {
// not all bbbbb bits are 0 or 1.
return false;
}
if (((I32 & B) != 0) == ((I32 & bbbbbbbb) != 0)) {
// B ^ b = 0;
return false;
}
*ModifiedImm = ((I32 & a) ? 0x80 : 0x00) |
((I32 & bbbbbbbb) ? 0x40 : 0x00) | ((I32 & cdefgh) >> 16);
return true;
}
}
}
OperandARM32FlexFpZero::OperandARM32FlexFpZero(Cfg * /*Func*/, Type Ty)
: OperandARM32Flex(kFlexFpZero, Ty) {}
OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg, OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg,
ShiftKind ShiftOp, Operand *ShiftAmt) ShiftKind ShiftOp, Operand *ShiftAmt)
: OperandARM32Flex(kFlexReg, Ty), Reg(Reg), ShiftOp(ShiftOp), : OperandARM32Flex(kFlexReg, Ty), Reg(Reg), ShiftOp(ShiftOp),
...@@ -557,15 +638,18 @@ template <> void InstARM32Tst::emitIAS(const Cfg *Func) const { ...@@ -557,15 +638,18 @@ template <> void InstARM32Tst::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func); emitUsingTextFixup(Func);
} }
InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1, InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate) CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vcmp, 2, nullptr, Predicate) { : InstARM32Pred(Func, InstARM32::Vcmp, 2, nullptr, Predicate) {
HasSideEffects = true;
addSource(Src0); addSource(Src0);
addSource(Src1); addSource(Src1);
} }
InstARM32Vmrs::InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate) InstARM32Vmrs::InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {} : InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {
HasSideEffects = true;
}
InstARM32Vabs::InstARM32Vabs(Cfg *Func, Variable *Dest, Variable *Src, InstARM32Vabs::InstARM32Vabs(Cfg *Func, Variable *Dest, Variable *Src,
CondARM32::Cond Predicate) CondARM32::Cond Predicate)
...@@ -605,6 +689,7 @@ template <> const char *InstARM32Lsr::Opcode = "lsr"; ...@@ -605,6 +689,7 @@ template <> const char *InstARM32Lsr::Opcode = "lsr";
template <> const char *InstARM32Mul::Opcode = "mul"; template <> const char *InstARM32Mul::Opcode = "mul";
template <> const char *InstARM32Orr::Opcode = "orr"; template <> const char *InstARM32Orr::Opcode = "orr";
template <> const char *InstARM32Rsb::Opcode = "rsb"; template <> const char *InstARM32Rsb::Opcode = "rsb";
template <> const char *InstARM32Rsc::Opcode = "rsc";
template <> const char *InstARM32Sbc::Opcode = "sbc"; template <> const char *InstARM32Sbc::Opcode = "sbc";
template <> const char *InstARM32Sdiv::Opcode = "sdiv"; template <> const char *InstARM32Sdiv::Opcode = "sdiv";
template <> const char *InstARM32Sub::Opcode = "sub"; template <> const char *InstARM32Sub::Opcode = "sub";
...@@ -613,11 +698,13 @@ template <> const char *InstARM32Udiv::Opcode = "udiv"; ...@@ -613,11 +698,13 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";
template <> const char *InstARM32Vadd::Opcode = "vadd"; template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vdiv::Opcode = "vdiv"; template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Vmul::Opcode = "vmul"; template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Veor::Opcode = "veor";
template <> const char *InstARM32Vsub::Opcode = "vsub"; template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls"; template <> const char *InstARM32Mls::Opcode = "mls";
// Cmp-like ops // Cmp-like ops
template <> const char *InstARM32Cmn::Opcode = "cmn";
template <> const char *InstARM32Cmp::Opcode = "cmp"; template <> const char *InstARM32Cmp::Opcode = "cmp";
template <> const char *InstARM32Tst::Opcode = "tst"; template <> const char *InstARM32Tst::Opcode = "tst";
...@@ -1701,6 +1788,67 @@ void OperandARM32FlexImm::dump(const Cfg * /* Func */, Ostream &Str) const { ...@@ -1701,6 +1788,67 @@ void OperandARM32FlexImm::dump(const Cfg * /* Func */, Ostream &Str) const {
Str << "#(" << Imm << " ror 2*" << RotateAmt << ")"; Str << "#(" << Imm << " ror 2*" << RotateAmt << ")";
} }
namespace {
static constexpr uint32_t a = 0x80;
static constexpr uint32_t b = 0x40;
static constexpr uint32_t cdefgh = 0x3F;
static constexpr uint32_t AllowedBits = a | b | cdefgh;
static_assert(AllowedBits == 0xFF,
"Invalid mask for f32/f64 constant rematerialization.");
// There's no loss in always returning the modified immediate as float.
// TODO(jpp): returning a double causes problems when outputting the constants
// for filetype=asm. Why?
float materializeFloatImmediate(uint32_t ModifiedImm) {
const uint32_t Ret = ((ModifiedImm & a) ? 0x80000000 : 0) |
((ModifiedImm & b) ? 0x3E000000 : 0x40000000) |
((ModifiedImm & cdefgh) << 19);
return *reinterpret_cast<const float *>(&Ret);
}
} // end of anonymous namespace
void OperandARM32FlexFpImm::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
switch (Ty) {
default:
llvm::report_fatal_error("Invalid flex fp imm type.");
case IceType_f64:
case IceType_f32:
Str << "#" << materializeFloatImmediate(ModifiedImm)
<< " @ Modified: " << ModifiedImm;
break;
}
}
void OperandARM32FlexFpImm::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump())
return;
Str << "#" << materializeFloatImmediate(ModifiedImm)
<< InstARM32::getVecWidthString(Ty);
}
void OperandARM32FlexFpZero::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
switch (Ty) {
default:
llvm::report_fatal_error("Invalid flex fp imm type.");
case IceType_f64:
case IceType_f32:
Str << "#0.0";
}
}
void OperandARM32FlexFpZero::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump())
return;
Str << "#0.0" << InstARM32::getVecWidthString(Ty);
}
void OperandARM32FlexReg::emit(const Cfg *Func) const { void OperandARM32FlexReg::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
...@@ -1741,6 +1889,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Lsr>; ...@@ -1741,6 +1889,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Lsr>;
template class InstARM32ThreeAddrGPR<InstARM32::Mul>; template class InstARM32ThreeAddrGPR<InstARM32::Mul>;
template class InstARM32ThreeAddrGPR<InstARM32::Orr>; template class InstARM32ThreeAddrGPR<InstARM32::Orr>;
template class InstARM32ThreeAddrGPR<InstARM32::Rsb>; template class InstARM32ThreeAddrGPR<InstARM32::Rsb>;
template class InstARM32ThreeAddrGPR<InstARM32::Rsc>;
template class InstARM32ThreeAddrGPR<InstARM32::Sbc>; template class InstARM32ThreeAddrGPR<InstARM32::Sbc>;
template class InstARM32ThreeAddrGPR<InstARM32::Sdiv>; template class InstARM32ThreeAddrGPR<InstARM32::Sdiv>;
template class InstARM32ThreeAddrGPR<InstARM32::Sub>; template class InstARM32ThreeAddrGPR<InstARM32::Sub>;
...@@ -1749,6 +1898,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>; ...@@ -1749,6 +1898,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vadd>; template class InstARM32ThreeAddrFP<InstARM32::Vadd>;
template class InstARM32ThreeAddrFP<InstARM32::Vdiv>; template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>; template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>; template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>; template class InstARM32LoadBase<InstARM32::Ldr>;
...@@ -1768,6 +1918,7 @@ template class InstARM32UnaryopFP<InstARM32::Vsqrt>; ...@@ -1768,6 +1918,7 @@ template class InstARM32UnaryopFP<InstARM32::Vsqrt>;
template class InstARM32FourAddrGPR<InstARM32::Mla>; template class InstARM32FourAddrGPR<InstARM32::Mla>;
template class InstARM32FourAddrGPR<InstARM32::Mls>; template class InstARM32FourAddrGPR<InstARM32::Mls>;
template class InstARM32CmpLike<InstARM32::Cmn>;
template class InstARM32CmpLike<InstARM32::Cmp>; template class InstARM32CmpLike<InstARM32::Cmp>;
template class InstARM32CmpLike<InstARM32::Tst>; template class InstARM32CmpLike<InstARM32::Tst>;
......
...@@ -40,6 +40,8 @@ public: ...@@ -40,6 +40,8 @@ public:
kMem, kMem,
kFlexStart, kFlexStart,
kFlexImm = kFlexStart, kFlexImm = kFlexStart,
kFlexFpImm,
kFlexFpZero,
kFlexReg, kFlexReg,
kFlexEnd = kFlexReg kFlexEnd = kFlexReg
}; };
...@@ -205,6 +207,59 @@ private: ...@@ -205,6 +207,59 @@ private:
uint32_t RotateAmt; uint32_t RotateAmt;
}; };
/// Modified Floating-point constant.
class OperandARM32FlexFpImm : public OperandARM32Flex {
OperandARM32FlexFpImm() = delete;
OperandARM32FlexFpImm(const OperandARM32FlexFpImm &) = delete;
OperandARM32FlexFpImm &operator=(const OperandARM32FlexFpImm &) = delete;
public:
static OperandARM32FlexFpImm *create(Cfg *Func, Type Ty,
uint32_t ModifiedImm) {
return new (Func->allocate<OperandARM32FlexFpImm>())
OperandARM32FlexFpImm(Func, Ty, ModifiedImm);
}
void emit(const Cfg *Func) const override;
using OperandARM32::dump;
void dump(const Cfg *Func, Ostream &Str) const override;
static bool classof(const Operand *Operand) {
return Operand->getKind() == static_cast<OperandKind>(kFlexFpImm);
}
static bool canHoldImm(Operand *C, uint32_t *ModifiedImm);
private:
OperandARM32FlexFpImm(Cfg *Func, Type Ty, uint32_t ModifiedImm);
uint32_t ModifiedImm;
};
/// An operand for representing the 0.0 immediate in vcmp.
class OperandARM32FlexFpZero : public OperandARM32Flex {
OperandARM32FlexFpZero() = delete;
OperandARM32FlexFpZero(const OperandARM32FlexFpZero &) = delete;
OperandARM32FlexFpZero &operator=(const OperandARM32FlexFpZero &) = delete;
public:
static OperandARM32FlexFpZero *create(Cfg *Func, Type Ty) {
return new (Func->allocate<OperandARM32FlexFpZero>())
OperandARM32FlexFpZero(Func, Ty);
}
void emit(const Cfg *Func) const override;
using OperandARM32::dump;
void dump(const Cfg *Func, Ostream &Str) const override;
static bool classof(const Operand *Operand) {
return Operand->getKind() == static_cast<OperandKind>(kFlexFpZero);
}
private:
OperandARM32FlexFpZero(Cfg *Func, Type Ty);
};
/// Shifted register variant. /// Shifted register variant.
class OperandARM32FlexReg : public OperandARM32Flex { class OperandARM32FlexReg : public OperandARM32Flex {
OperandARM32FlexReg() = delete; OperandARM32FlexReg() = delete;
...@@ -289,6 +344,7 @@ public: ...@@ -289,6 +344,7 @@ public:
Bic, Bic,
Br, Br,
Call, Call,
Cmn,
Cmp, Cmp,
Clz, Clz,
Dmb, Dmb,
...@@ -312,6 +368,7 @@ public: ...@@ -312,6 +368,7 @@ public:
Ret, Ret,
Rev, Rev,
Rsb, Rsb,
Rsc,
Sbc, Sbc,
Sdiv, Sdiv,
Str, Str,
...@@ -328,6 +385,7 @@ public: ...@@ -328,6 +385,7 @@ public:
Vcmp, Vcmp,
Vcvt, Vcvt,
Vdiv, Vdiv,
Veor,
Vmrs, Vmrs,
Vmul, Vmul,
Vsqrt, Vsqrt,
...@@ -609,6 +667,7 @@ private: ...@@ -609,6 +667,7 @@ private:
InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0, InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0,
Operand *Src1, CondARM32::Cond Predicate, bool SetFlags) Operand *Src1, CondARM32::Cond Predicate, bool SetFlags)
: InstARM32Pred(Func, K, 2, Dest, Predicate), SetFlags(SetFlags) { : InstARM32Pred(Func, K, 2, Dest, Predicate), SetFlags(SetFlags) {
HasSideEffects = SetFlags;
addSource(Src0); addSource(Src0);
addSource(Src1); addSource(Src1);
} }
...@@ -741,6 +800,7 @@ private: ...@@ -741,6 +800,7 @@ private:
InstARM32CmpLike(Cfg *Func, Variable *Src0, Operand *Src1, InstARM32CmpLike(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate) CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 2, nullptr, Predicate) { : InstARM32Pred(Func, K, 2, nullptr, Predicate) {
HasSideEffects = true;
addSource(Src0); addSource(Src0);
addSource(Src1); addSource(Src1);
} }
...@@ -759,6 +819,7 @@ using InstARM32Lsr = InstARM32ThreeAddrGPR<InstARM32::Lsr>; ...@@ -759,6 +819,7 @@ using InstARM32Lsr = InstARM32ThreeAddrGPR<InstARM32::Lsr>;
using InstARM32Mul = InstARM32ThreeAddrGPR<InstARM32::Mul>; using InstARM32Mul = InstARM32ThreeAddrGPR<InstARM32::Mul>;
using InstARM32Orr = InstARM32ThreeAddrGPR<InstARM32::Orr>; using InstARM32Orr = InstARM32ThreeAddrGPR<InstARM32::Orr>;
using InstARM32Rsb = InstARM32ThreeAddrGPR<InstARM32::Rsb>; using InstARM32Rsb = InstARM32ThreeAddrGPR<InstARM32::Rsb>;
using InstARM32Rsc = InstARM32ThreeAddrGPR<InstARM32::Rsc>;
using InstARM32Sbc = InstARM32ThreeAddrGPR<InstARM32::Sbc>; using InstARM32Sbc = InstARM32ThreeAddrGPR<InstARM32::Sbc>;
using InstARM32Sdiv = InstARM32ThreeAddrGPR<InstARM32::Sdiv>; using InstARM32Sdiv = InstARM32ThreeAddrGPR<InstARM32::Sdiv>;
using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>; using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>;
...@@ -766,6 +827,7 @@ using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>; ...@@ -766,6 +827,7 @@ using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>;
using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>; using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>;
using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>; using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>; using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>; using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>; using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
...@@ -785,6 +847,7 @@ using InstARM32Uxt = InstARM32UnaryopGPR<InstARM32::Uxt, true>; ...@@ -785,6 +847,7 @@ using InstARM32Uxt = InstARM32UnaryopGPR<InstARM32::Uxt, true>;
using InstARM32Vsqrt = InstARM32UnaryopFP<InstARM32::Vsqrt>; using InstARM32Vsqrt = InstARM32UnaryopFP<InstARM32::Vsqrt>;
using InstARM32Mla = InstARM32FourAddrGPR<InstARM32::Mla>; using InstARM32Mla = InstARM32FourAddrGPR<InstARM32::Mla>;
using InstARM32Mls = InstARM32FourAddrGPR<InstARM32::Mls>; using InstARM32Mls = InstARM32FourAddrGPR<InstARM32::Mls>;
using InstARM32Cmn = InstARM32CmpLike<InstARM32::Cmn>;
using InstARM32Cmp = InstARM32CmpLike<InstARM32::Cmp>; using InstARM32Cmp = InstARM32CmpLike<InstARM32::Cmp>;
using InstARM32Tst = InstARM32CmpLike<InstARM32::Tst>; using InstARM32Tst = InstARM32CmpLike<InstARM32::Tst>;
...@@ -1178,12 +1241,18 @@ public: ...@@ -1178,12 +1241,18 @@ public:
return new (Func->allocate<InstARM32Vcmp>()) return new (Func->allocate<InstARM32Vcmp>())
InstARM32Vcmp(Func, Src0, Src1, Predicate); InstARM32Vcmp(Func, Src0, Src1, Predicate);
} }
static InstARM32Vcmp *create(Cfg *Func, Variable *Src0,
OperandARM32FlexFpZero *Src1,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vcmp>())
InstARM32Vcmp(Func, Src0, Src1, Predicate);
}
void emit(const Cfg *Func) const override; void emit(const Cfg *Func) const override;
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Vcmp); } static bool classof(const Inst *Inst) { return isClassof(Inst, Vcmp); }
private: private:
InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1, InstARM32Vcmp(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate); CondARM32::Cond Predicate);
}; };
......
...@@ -1297,29 +1297,26 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { ...@@ -1297,29 +1297,26 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
Variable *SrcLoReg = legalizeToReg(SrcLo); Variable *SrcLoReg = legalizeToReg(SrcLo);
switch (Ty) { switch (Ty) {
default: default:
llvm_unreachable("Unexpected type"); llvm::report_fatal_error("Unexpected type");
case IceType_i8: { case IceType_i8:
Operand *Mask =
legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
_tst(SrcLoReg, Mask);
break;
}
case IceType_i16: { case IceType_i16: {
Operand *Mask = Operand *ShAmtF =
legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)),
_tst(SrcLoReg, Mask); Legal_Reg | Legal_Flex);
break; Variable *T = makeReg(IceType_i32);
} _lsls(T, SrcLoReg, ShAmtF);
Context.insert(InstFakeUse::create(Func, T));
} break;
case IceType_i32: { case IceType_i32: {
_tst(SrcLoReg, SrcLoReg); _tst(SrcLoReg, SrcLoReg);
break; break;
} }
case IceType_i64: { case IceType_i64: {
Variable *ScratchReg = makeReg(IceType_i32); Variable *T = makeReg(IceType_i32);
_orrs(ScratchReg, SrcLoReg, SrcHi); _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
// ScratchReg isn't going to be used, but we need the side-effect of // T isn't going to be used, but we need the side-effect of setting flags
// setting flags from this operation. // from this operation.
Context.insert(InstFakeUse::create(Func, ScratchReg)); Context.insert(InstFakeUse::create(Func, T));
} }
} }
InstARM32Label *Label = InstARM32Label::create(Func, this); InstARM32Label *Label = InstARM32Label::create(Func, this);
...@@ -1404,29 +1401,172 @@ TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Inst) { ...@@ -1404,29 +1401,172 @@ TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Inst) {
return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
} }
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { namespace {
Variable *Dest = Inst->getDest(); // NumericOperands is used during arithmetic/icmp lowering for constant folding.
if (Dest->getType() == IceType_i1) { // It holds the two sources operands, and maintains some state as to whether one
lowerInt1Arithmetic(Inst); // of them is a constant. If one of the operands is a constant, then it will be
return; // be stored as the operation's second source, with a bit indicating whether the
// operands were swapped.
//
// The class is split into a base class with operand type-independent methods,
// and a derived, templated class, for each type of operand we want to fold
// constants for:
//
// NumericOperandsBase --> NumericOperands<ConstantFloat>
// --> NumericOperands<ConstantDouble>
// --> NumericOperands<ConstantInt32>
//
// NumericOperands<ConstantInt32> also exposes helper methods for emitting
// inverted/negated immediates.
class NumericOperandsBase {
NumericOperandsBase() = delete;
NumericOperandsBase(const NumericOperandsBase &) = delete;
NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
public:
NumericOperandsBase(Operand *S0, Operand *S1)
: Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
Swapped(Src0 == S1 && S0 != S1) {
assert(Src0 != nullptr);
assert(Src1 != nullptr);
assert(Src0 != Src1 || S0 == S1);
} }
// TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to bool hasConstOperand() const {
// legalize Src0 to flex or Src1 to flex and there is a reversible return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
// instruction. E.g., reverse subtract with immediate, register vs register, }
// immediate.
// Or it may be the case that the operands aren't swapped, but the bits can bool swappedOperands() const { return Swapped; }
// be flipped and a different operation applied. E.g., use BIC (bit clear)
// instead of AND for some masks. Variable *src0R(TargetARM32 *Target) const {
Operand *Src0 = legalizeUndef(Inst->getSrc(0)); return legalizeToReg(Target, Src0);
Operand *Src1 = legalizeUndef(Inst->getSrc(1)); }
if (Dest->getType() == IceType_i64) {
Variable *unswappedSrc0R(TargetARM32 *Target) const {
return legalizeToReg(Target, Swapped ? Src1 : Src0);
}
Operand *src1RF(TargetARM32 *Target) const {
return legalizeToRegOrFlex(Target, Src1);
}
Variable *unswappedSrc1R(TargetARM32 *Target) const {
return legalizeToReg(Target, Swapped ? Src0 : Src1);
}
Operand *unswappedSrc1RF(TargetARM32 *Target) const {
return legalizeToRegOrFlex(Target, Swapped ? Src0 : Src1);
}
protected:
Operand *const Src0;
Operand *const Src1;
const bool Swapped;
static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
return Target->legalizeToReg(Src);
}
static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
return Target->legalize(Src,
TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
}
private:
static Operand *NonConstOperand(Operand *S0, Operand *S1) {
if (!llvm::isa<Constant>(S0))
return S0;
if (!llvm::isa<Constant>(S1))
return S1;
if (llvm::isa<ConstantRelocatable>(S1) &&
!llvm::isa<ConstantRelocatable>(S0))
return S1;
return S0;
}
static Operand *ConstOperand(Operand *S0, Operand *S1) {
if (!llvm::isa<Constant>(S0))
return S1;
if (!llvm::isa<Constant>(S1))
return S0;
if (llvm::isa<ConstantRelocatable>(S1) &&
!llvm::isa<ConstantRelocatable>(S0))
return S0;
return S1;
}
};
template <typename C> class NumericOperands : public NumericOperandsBase {
NumericOperands() = delete;
NumericOperands(const NumericOperands &) = delete;
NumericOperands &operator=(const NumericOperands &) = delete;
public:
NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
}
typename C::PrimType getConstantValue() const {
return llvm::cast<C>(Src1)->getValue();
}
};
using FloatOperands = NumericOperands<ConstantFloat>;
using DoubleOperands = NumericOperands<ConstantDouble>;
class Int32Operands : public NumericOperands<ConstantInteger32> {
Int32Operands() = delete;
Int32Operands(const Int32Operands &) = delete;
Int32Operands &operator=(const Int32Operands &) = delete;
public:
Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
bool immediateIsFlexEncodable() const {
uint32_t Rotate, Imm8;
return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
}
bool negatedImmediateIsFlexEncodable() const {
uint32_t Rotate, Imm8;
return OperandARM32FlexImm::canHoldImm(
-static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
}
Operand *negatedSrc1F(TargetARM32 *Target) const {
return legalizeToRegOrFlex(Target,
Target->getCtx()->getConstantInt32(
-static_cast<int32_t>(getConstantValue())));
}
bool invertedImmediateIsFlexEncodable() const {
uint32_t Rotate, Imm8;
return OperandARM32FlexImm::canHoldImm(
~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
}
Operand *invertedSrc1F(TargetARM32 *Target) const {
return legalizeToRegOrFlex(Target,
Target->getCtx()->getConstantInt32(
~static_cast<uint32_t>(getConstantValue())));
}
};
} // end of anonymous namespace
void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
Variable *Dest, Operand *Src0,
Operand *Src1) {
Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
// These helper-call-involved instructions are lowered in this separate // These helper-call-involved instructions are lowered in this separate
// switch. This is because we would otherwise assume that we need to // switch. This is because we would otherwise assume that we need to
// legalize Src0 to Src0RLo and Src0Hi. However, those go unused with // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
// helper calls, and such unused/redundant instructions will fail liveness // helper calls, and such unused/redundant instructions will fail liveness
// analysis under -Om1 setting. // analysis under -Om1 setting.
switch (Inst->getOp()) { switch (Op) {
default: default:
break; break;
case InstArithmetic::Udiv: case InstArithmetic::Udiv:
...@@ -1437,23 +1577,23 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1437,23 +1577,23 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
// register, which will hide a constant source operand. Instead, check // register, which will hide a constant source operand. Instead, check
// the not-yet-legalized Src1 to optimize-out a divide by 0 check. // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
if (C64->getValue() == 0) { if (SrcsLo.getConstantValue() == 0 && SrcsHi.getConstantValue() == 0) {
_trap(); _trap();
return; return;
} }
} else { } else {
Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); Operand *Src1Lo = SrcsLo.unswappedSrc1R(this);
Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); Operand *Src1Hi = SrcsHi.unswappedSrc1R(this);
div0Check(IceType_i64, Src1Lo, Src1Hi); div0Check(IceType_i64, Src1Lo, Src1Hi);
} }
// Technically, ARM has their own aeabi routines, but we can use the // Technically, ARM has its own aeabi routines, but we can use the
// non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
// the more standard __moddi3 for rem. // the more standard __moddi3 for rem.
const char *HelperName = ""; const char *HelperName = "";
switch (Inst->getOp()) { switch (Op) {
default: default:
llvm_unreachable("Should have only matched div ops."); llvm::report_fatal_error("Should have only matched div ops.");
break; break;
case InstArithmetic::Udiv: case InstArithmetic::Udiv:
HelperName = H_udiv_i64; HelperName = H_udiv_i64;
...@@ -1476,58 +1616,78 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1476,58 +1616,78 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
return; return;
} }
} }
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Variable *Src0RLo = legalizeToReg(loOperand(Src0));
Variable *Src0RHi = legalizeToReg(hiOperand(Src0));
Operand *Src1Lo = loOperand(Src1);
Operand *Src1Hi = hiOperand(Src1);
Variable *T_Lo = makeReg(DestLo->getType()); Variable *T_Lo = makeReg(DestLo->getType());
Variable *T_Hi = makeReg(DestHi->getType()); Variable *T_Hi = makeReg(DestHi->getType());
switch (Inst->getOp()) {
switch (Op) {
case InstArithmetic::_num: case InstArithmetic::_num:
llvm_unreachable("Unknown arithmetic operator"); llvm::report_fatal_error("Unknown arithmetic operator");
return; return;
case InstArithmetic::Add: case InstArithmetic::Add: {
Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); Variable *Src0LoR = SrcsLo.src0R(this);
Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); Operand *Src1LoRF = SrcsLo.src1RF(this);
_adds(T_Lo, Src0RLo, Src1Lo); Variable *Src0HiR = SrcsHi.src0R(this);
Operand *Src1HiRF = SrcsHi.src1RF(this);
_adds(T_Lo, Src0LoR, Src1LoRF);
_mov(DestLo, T_Lo); _mov(DestLo, T_Lo);
_adc(T_Hi, Src0RHi, Src1Hi); _adc(T_Hi, Src0HiR, Src1HiRF);
_mov(DestHi, T_Hi); _mov(DestHi, T_Hi);
return; return;
case InstArithmetic::And: }
Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); case InstArithmetic::And: {
Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); Variable *Src0LoR = SrcsLo.src0R(this);
_and(T_Lo, Src0RLo, Src1Lo); Operand *Src1LoRF = SrcsLo.src1RF(this);
Variable *Src0HiR = SrcsHi.src0R(this);
Operand *Src1HiRF = SrcsHi.src1RF(this);
_and(T_Lo, Src0LoR, Src1LoRF);
_mov(DestLo, T_Lo); _mov(DestLo, T_Lo);
_and(T_Hi, Src0RHi, Src1Hi); _and(T_Hi, Src0HiR, Src1HiRF);
_mov(DestHi, T_Hi); _mov(DestHi, T_Hi);
return; return;
case InstArithmetic::Or: }
Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); case InstArithmetic::Or: {
Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); Variable *Src0LoR = SrcsLo.src0R(this);
_orr(T_Lo, Src0RLo, Src1Lo); Operand *Src1LoRF = SrcsLo.src1RF(this);
Variable *Src0HiR = SrcsHi.src0R(this);
Operand *Src1HiRF = SrcsHi.src1RF(this);
_orr(T_Lo, Src0LoR, Src1LoRF);
_mov(DestLo, T_Lo); _mov(DestLo, T_Lo);
_orr(T_Hi, Src0RHi, Src1Hi); _orr(T_Hi, Src0HiR, Src1HiRF);
_mov(DestHi, T_Hi); _mov(DestHi, T_Hi);
return; return;
case InstArithmetic::Xor: }
Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); case InstArithmetic::Xor: {
Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); Variable *Src0LoR = SrcsLo.src0R(this);
_eor(T_Lo, Src0RLo, Src1Lo); Operand *Src1LoRF = SrcsLo.src1RF(this);
Variable *Src0HiR = SrcsHi.src0R(this);
Operand *Src1HiRF = SrcsHi.src1RF(this);
_eor(T_Lo, Src0LoR, Src1LoRF);
_mov(DestLo, T_Lo); _mov(DestLo, T_Lo);
_eor(T_Hi, Src0RHi, Src1Hi); _eor(T_Hi, Src0HiR, Src1HiRF);
_mov(DestHi, T_Hi); _mov(DestHi, T_Hi);
return; return;
case InstArithmetic::Sub: }
Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); case InstArithmetic::Sub: {
Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); Variable *Src0LoR = SrcsLo.src0R(this);
_subs(T_Lo, Src0RLo, Src1Lo); Operand *Src1LoRF = SrcsLo.src1RF(this);
Variable *Src0HiR = SrcsHi.src0R(this);
Operand *Src1HiRF = SrcsHi.src1RF(this);
if (SrcsLo.swappedOperands()) {
_rsbs(T_Lo, Src0LoR, Src1LoRF);
_mov(DestLo, T_Lo);
_rsc(T_Hi, Src0HiR, Src1HiRF);
_mov(DestHi, T_Hi);
} else {
_subs(T_Lo, Src0LoR, Src1LoRF);
_mov(DestLo, T_Lo); _mov(DestLo, T_Lo);
_sbc(T_Hi, Src0RHi, Src1Hi); _sbc(T_Hi, Src0HiR, Src1HiRF);
_mov(DestHi, T_Hi); _mov(DestHi, T_Hi);
}
return; return;
}
case InstArithmetic::Mul: { case InstArithmetic::Mul: {
// GCC 4.8 does: // GCC 4.8 does:
// a=b*c ==> // a=b*c ==>
...@@ -1551,8 +1711,10 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1551,8 +1711,10 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *T_Acc = makeReg(IceType_i32); Variable *T_Acc = makeReg(IceType_i32);
Variable *T_Acc1 = makeReg(IceType_i32); Variable *T_Acc1 = makeReg(IceType_i32);
Variable *T_Hi1 = makeReg(IceType_i32); Variable *T_Hi1 = makeReg(IceType_i32);
Variable *Src1RLo = legalizeToReg(Src1Lo); Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
Variable *Src1RHi = legalizeToReg(Src1Hi); Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
_mul(T_Acc, Src0RLo, Src1RHi); _mul(T_Acc, Src0RLo, Src1RHi);
_mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
_umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
...@@ -1562,6 +1724,49 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1562,6 +1724,49 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
return; return;
} }
case InstArithmetic::Shl: { case InstArithmetic::Shl: {
if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
Variable *Src0RLo = SrcsLo.src0R(this);
// Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
if (ShAmtImm == 0) {
_mov(DestLo, Src0RLo);
_mov(DestHi, SrcsHi.src0R(this));
return;
}
if (ShAmtImm >= 32) {
if (ShAmtImm == 32) {
_mov(DestHi, Src0RLo);
} else {
Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
Legal_Reg | Legal_Flex);
_lsl(T_Hi, Src0RLo, ShAmtOp);
_mov(DestHi, T_Hi);
}
Operand *_0 =
legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
_mov(T_Lo, _0);
_mov(DestLo, T_Lo);
return;
}
Variable *Src0RHi = SrcsHi.src0R(this);
Operand *ShAmtOp =
legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
Legal_Reg | Legal_Flex);
_lsl(T_Hi, Src0RHi, ShAmtOp);
_orr(T_Hi, T_Hi,
OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
OperandARM32::LSR, ComplShAmtOp));
_mov(DestHi, T_Hi);
_lsl(T_Lo, Src0RLo, ShAmtOp);
_mov(DestLo, T_Lo);
return;
}
// a=b<<c ==> // a=b<<c ==>
// pnacl-llc does: // pnacl-llc does:
// mov t_b.lo, b.lo // mov t_b.lo, b.lo
...@@ -1593,14 +1798,17 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1593,14 +1798,17 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
// ARM, shifts only take the lower 8 bits of the shift register, and // ARM, shifts only take the lower 8 bits of the shift register, and
// saturate to the range 0-32, so the negative value will saturate to 32. // saturate to the range 0-32, so the negative value will saturate to 32.
Constant *_32 = Ctx->getConstantInt32(32); Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
Constant *_0 = Ctx->getConstantZero(IceType_i32); Operand *_0 =
Variable *Src1RLo = legalizeToReg(Src1Lo); legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
Variable *T0 = makeReg(IceType_i32); Variable *T0 = makeReg(IceType_i32);
Variable *T1 = makeReg(IceType_i32); Variable *T1 = makeReg(IceType_i32);
Variable *T2 = makeReg(IceType_i32); Variable *T2 = makeReg(IceType_i32);
Variable *TA_Hi = makeReg(IceType_i32); Variable *TA_Hi = makeReg(IceType_i32);
Variable *TA_Lo = makeReg(IceType_i32); Variable *TA_Lo = makeReg(IceType_i32);
Variable *Src0RLo = SrcsLo.src0R(this);
Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
_rsb(T0, Src1RLo, _32); _rsb(T0, Src1RLo, _32);
_lsr(T1, Src0RLo, T0); _lsr(T1, Src0RLo, T0);
_orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
...@@ -1616,6 +1824,64 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1616,6 +1824,64 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
} }
case InstArithmetic::Lshr: case InstArithmetic::Lshr:
case InstArithmetic::Ashr: { case InstArithmetic::Ashr: {
const bool ASR = Op == InstArithmetic::Ashr;
if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
Variable *Src0RHi = SrcsHi.src0R(this);
// Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
if (ShAmtImm == 0) {
_mov(DestHi, Src0RHi);
_mov(DestLo, SrcsLo.src0R(this));
return;
}
if (ShAmtImm >= 32) {
if (ShAmtImm == 32) {
_mov(DestLo, Src0RHi);
} else {
Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
Legal_Reg | Legal_Flex);
if (ASR) {
_asr(T_Lo, Src0RHi, ShAmtOp);
} else {
_lsr(T_Lo, Src0RHi, ShAmtOp);
}
_mov(DestLo, T_Lo);
}
if (ASR) {
Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32),
Legal_Reg | Legal_Flex);
_asr(T_Hi, Src0RHi, _31);
} else {
Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
Legal_Reg | Legal_Flex);
_mov(T_Hi, _0);
}
_mov(DestHi, T_Hi);
return;
}
Variable *Src0RLo = SrcsLo.src0R(this);
Operand *ShAmtOp =
legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
Legal_Reg | Legal_Flex);
_lsr(T_Lo, Src0RLo, ShAmtOp);
_orr(T_Lo, T_Lo,
OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
OperandARM32::LSL, ComplShAmtOp));
_mov(DestLo, T_Lo);
if (ASR) {
_asr(T_Hi, Src0RHi, ShAmtOp);
} else {
_lsr(T_Hi, Src0RHi, ShAmtOp);
}
_mov(DestHi, T_Hi);
return;
}
// a=b>>c // a=b>>c
// pnacl-llc does: // pnacl-llc does:
// mov t_b.lo, b.lo // mov t_b.lo, b.lo
...@@ -1642,22 +1908,24 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1642,22 +1908,24 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// mov a.hi, t_hi // mov a.hi, t_hi
// //
// These are incompatible, therefore we mimic pnacl-llc. // These are incompatible, therefore we mimic pnacl-llc.
const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
Constant *_32 = Ctx->getConstantInt32(32); Operand *_0 =
Constant *_0 = Ctx->getConstantZero(IceType_i32); legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
Variable *Src1RLo = legalizeToReg(Src1Lo);
Variable *T0 = makeReg(IceType_i32); Variable *T0 = makeReg(IceType_i32);
Variable *T1 = makeReg(IceType_i32); Variable *T1 = makeReg(IceType_i32);
Variable *T2 = makeReg(IceType_i32); Variable *T2 = makeReg(IceType_i32);
Variable *TA_Lo = makeReg(IceType_i32); Variable *TA_Lo = makeReg(IceType_i32);
Variable *TA_Hi = makeReg(IceType_i32); Variable *TA_Hi = makeReg(IceType_i32);
Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
_lsr(T0, Src0RLo, Src1RLo); _lsr(T0, Src0RLo, Src1RLo);
_rsb(T1, Src1RLo, _32); _rsb(T1, Src1RLo, _32);
_orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
OperandARM32::LSL, T1)); OperandARM32::LSL, T1));
_sub(T2, Src1RLo, _32); _sub(T2, Src1RLo, _32);
_cmp(T2, _0); _cmp(T2, _0);
if (IsAshr) { if (ASR) {
_asr(TA_Lo, Src0RHi, T2, CondARM32::GE); _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
_set_dest_redefined(); _set_dest_redefined();
_asr(TA_Hi, Src0RHi, Src1RLo); _asr(TA_Hi, Src0RHi, Src1RLo);
...@@ -1675,18 +1943,33 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1675,18 +1943,33 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
case InstArithmetic::Fmul: case InstArithmetic::Fmul:
case InstArithmetic::Fdiv: case InstArithmetic::Fdiv:
case InstArithmetic::Frem: case InstArithmetic::Frem:
llvm_unreachable("FP instruction with i64 type"); llvm::report_fatal_error("FP instruction with i64 type");
return; return;
case InstArithmetic::Udiv: case InstArithmetic::Udiv:
case InstArithmetic::Sdiv: case InstArithmetic::Sdiv:
case InstArithmetic::Urem: case InstArithmetic::Urem:
case InstArithmetic::Srem: case InstArithmetic::Srem:
llvm_unreachable("Call-helper-involved instruction for i64 type " llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
"should have already been handled before"); "should have already been handled before");
return; return;
} }
}
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
if (Dest->getType() == IceType_i1) {
lowerInt1Arithmetic(Inst);
return;
}
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Operand *Src1 = legalizeUndef(Inst->getSrc(1));
if (Dest->getType() == IceType_i64) {
lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);
return; return;
} else if (isVectorType(Dest->getType())) { }
if (isVectorType(Dest->getType())) {
// Add a fake def to keep liveness consistent in the meantime. // Add a fake def to keep liveness consistent in the meantime.
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T)); Context.insert(InstFakeDef::create(Func, T));
...@@ -1694,41 +1977,49 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1694,41 +1977,49 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
return; return;
} }
// Dest->getType() is a non-i64 scalar. // Dest->getType() is a non-i64 scalar.
Variable *Src0R = legalizeToReg(Src0);
Variable *T = makeReg(Dest->getType()); Variable *T = makeReg(Dest->getType());
// Handle div/rem separately. They require a non-legalized Src1 to inspect
// * Handle div/rem separately. They require a non-legalized Src1 to inspect
// whether or not Src1 is a non-zero constant. Once legalized it is more // whether or not Src1 is a non-zero constant. Once legalized it is more
// difficult to determine (constant may be moved to a register). // difficult to determine (constant may be moved to a register).
// * Handle floating point arithmetic separately: they require Src1 to be
// legalized to a register.
switch (Inst->getOp()) { switch (Inst->getOp()) {
default: default:
break; break;
case InstArithmetic::Udiv: { case InstArithmetic::Udiv: {
constexpr bool NotRemainder = false; constexpr bool NotRemainder = false;
Variable *Src0R = legalizeToReg(Src0);
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
H_udiv_i32, NotRemainder); H_udiv_i32, NotRemainder);
return; return;
} }
case InstArithmetic::Sdiv: { case InstArithmetic::Sdiv: {
constexpr bool NotRemainder = false; constexpr bool NotRemainder = false;
Variable *Src0R = legalizeToReg(Src0);
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
H_sdiv_i32, NotRemainder); H_sdiv_i32, NotRemainder);
return; return;
} }
case InstArithmetic::Urem: { case InstArithmetic::Urem: {
constexpr bool IsRemainder = true; constexpr bool IsRemainder = true;
Variable *Src0R = legalizeToReg(Src0);
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
H_urem_i32, IsRemainder); H_urem_i32, IsRemainder);
return; return;
} }
case InstArithmetic::Srem: { case InstArithmetic::Srem: {
constexpr bool IsRemainder = true; constexpr bool IsRemainder = true;
Variable *Src0R = legalizeToReg(Src0);
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
H_srem_i32, IsRemainder); H_srem_i32, IsRemainder);
return; return;
} }
case InstArithmetic::Frem: { case InstArithmetic::Frem: {
const SizeT MaxSrcs = 2; constexpr SizeT MaxSrcs = 2;
Variable *Src0R = legalizeToReg(Src0);
Type Ty = Dest->getType(); Type Ty = Dest->getType();
InstCall *Call = makeHelperCall( InstCall *Call = makeHelperCall(
isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
...@@ -1737,32 +2028,29 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1737,32 +2028,29 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
lowerCall(Call); lowerCall(Call);
return; return;
} }
}
// Handle floating point arithmetic separately: they require Src1 to be
// legalized to a register.
switch (Inst->getOp()) {
default:
break;
case InstArithmetic::Fadd: { case InstArithmetic::Fadd: {
Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vadd(T, Src0R, Src1R); _vadd(T, Src0R, Src1R);
_mov(Dest, T); _mov(Dest, T);
return; return;
} }
case InstArithmetic::Fsub: { case InstArithmetic::Fsub: {
Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vsub(T, Src0R, Src1R); _vsub(T, Src0R, Src1R);
_mov(Dest, T); _mov(Dest, T);
return; return;
} }
case InstArithmetic::Fmul: { case InstArithmetic::Fmul: {
Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vmul(T, Src0R, Src1R); _vmul(T, Src0R, Src1R);
_mov(Dest, T); _mov(Dest, T);
return; return;
} }
case InstArithmetic::Fdiv: { case InstArithmetic::Fdiv: {
Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vdiv(T, Src0R, Src1R); _vdiv(T, Src0R, Src1R);
_mov(Dest, T); _mov(Dest, T);
...@@ -1770,67 +2058,136 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1770,67 +2058,136 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
} }
} }
Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); // Handle everything else here.
Int32Operands Srcs(Src0, Src1);
switch (Inst->getOp()) { switch (Inst->getOp()) {
case InstArithmetic::_num: case InstArithmetic::_num:
llvm_unreachable("Unknown arithmetic operator"); llvm::report_fatal_error("Unknown arithmetic operator");
return; return;
case InstArithmetic::Add: case InstArithmetic::Add: {
if (Srcs.hasConstOperand()) {
if (!Srcs.immediateIsFlexEncodable() &&
Srcs.negatedImmediateIsFlexEncodable()) {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1F = Srcs.negatedSrc1F(this);
if (!Srcs.swappedOperands()) {
_sub(T, Src0R, Src1F);
} else {
_rsb(T, Src0R, Src1F);
}
_mov(Dest, T);
return;
}
}
Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this);
_add(T, Src0R, Src1RF); _add(T, Src0R, Src1RF);
_mov(Dest, T); _mov(Dest, T);
return; return;
case InstArithmetic::And: }
case InstArithmetic::And: {
if (Srcs.hasConstOperand()) {
if (!Srcs.immediateIsFlexEncodable() &&
Srcs.invertedImmediateIsFlexEncodable()) {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1F = Srcs.invertedSrc1F(this);
_bic(T, Src0R, Src1F);
_mov(Dest, T);
return;
}
}
Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this);
_and(T, Src0R, Src1RF); _and(T, Src0R, Src1RF);
_mov(Dest, T); _mov(Dest, T);
return; return;
case InstArithmetic::Or: }
case InstArithmetic::Or: {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this);
_orr(T, Src0R, Src1RF); _orr(T, Src0R, Src1RF);
_mov(Dest, T); _mov(Dest, T);
return; return;
case InstArithmetic::Xor: }
case InstArithmetic::Xor: {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this);
_eor(T, Src0R, Src1RF); _eor(T, Src0R, Src1RF);
_mov(Dest, T); _mov(Dest, T);
return; return;
case InstArithmetic::Sub: }
case InstArithmetic::Sub: {
if (Srcs.hasConstOperand()) {
Variable *Src0R = Srcs.src0R(this);
if (Srcs.immediateIsFlexEncodable()) {
Operand *Src1RF = Srcs.src1RF(this);
if (Srcs.swappedOperands()) {
_rsb(T, Src0R, Src1RF);
} else {
_sub(T, Src0R, Src1RF); _sub(T, Src0R, Src1RF);
}
_mov(Dest, T);
return;
}
if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
Operand *Src1F = Srcs.negatedSrc1F(this);
_add(T, Src0R, Src1F);
_mov(Dest, T); _mov(Dest, T);
return; return;
}
}
Variable *Src0R = Srcs.unswappedSrc0R(this);
Variable *Src1R = Srcs.unswappedSrc1R(this);
_sub(T, Src0R, Src1R);
_mov(Dest, T);
return;
}
case InstArithmetic::Mul: { case InstArithmetic::Mul: {
Variable *Src1R = legalizeToReg(Src1RF); Variable *Src0R = Srcs.unswappedSrc0R(this);
Variable *Src1R = Srcs.unswappedSrc1R(this);
_mul(T, Src0R, Src1R); _mul(T, Src0R, Src1R);
_mov(Dest, T); _mov(Dest, T);
return; return;
} }
case InstArithmetic::Shl: case InstArithmetic::Shl: {
_lsl(T, Src0R, Src1RF); Variable *Src0R = Srcs.unswappedSrc0R(this);
Operand *Src1R = Srcs.unswappedSrc1RF(this);
_lsl(T, Src0R, Src1R);
_mov(Dest, T); _mov(Dest, T);
return; return;
case InstArithmetic::Lshr: }
case InstArithmetic::Lshr: {
Variable *Src0R = Srcs.unswappedSrc0R(this);
if (Dest->getType() != IceType_i32) { if (Dest->getType() != IceType_i32) {
_uxt(Src0R, Src0R); _uxt(Src0R, Src0R);
} }
_lsr(T, Src0R, Src1RF); _lsr(T, Src0R, Srcs.unswappedSrc1RF(this));
_mov(Dest, T); _mov(Dest, T);
return; return;
case InstArithmetic::Ashr: }
case InstArithmetic::Ashr: {
Variable *Src0R = Srcs.unswappedSrc0R(this);
if (Dest->getType() != IceType_i32) { if (Dest->getType() != IceType_i32) {
_sxt(Src0R, Src0R); _sxt(Src0R, Src0R);
} }
_asr(T, Src0R, Src1RF); _asr(T, Src0R, Srcs.unswappedSrc1RF(this));
_mov(Dest, T); _mov(Dest, T);
return; return;
}
case InstArithmetic::Udiv: case InstArithmetic::Udiv:
case InstArithmetic::Sdiv: case InstArithmetic::Sdiv:
case InstArithmetic::Urem: case InstArithmetic::Urem:
case InstArithmetic::Srem: case InstArithmetic::Srem:
llvm_unreachable("Integer div/rem should have been handled earlier."); llvm::report_fatal_error(
"Integer div/rem should have been handled earlier.");
return; return;
case InstArithmetic::Fadd: case InstArithmetic::Fadd:
case InstArithmetic::Fsub: case InstArithmetic::Fsub:
case InstArithmetic::Fmul: case InstArithmetic::Fmul:
case InstArithmetic::Fdiv: case InstArithmetic::Fdiv:
case InstArithmetic::Frem: case InstArithmetic::Frem:
llvm_unreachable("Floating point arith should have been handled earlier."); llvm::report_fatal_error(
"Floating point arith should have been handled earlier.");
return; return;
} }
} }
...@@ -1841,18 +2198,22 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { ...@@ -1841,18 +2198,22 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
assert(Dest->getType() == Src0->getType()); assert(Dest->getType() == Src0->getType());
if (Dest->getType() == IceType_i64) { if (Dest->getType() == IceType_i64) {
Src0 = legalizeUndef(Src0); Src0 = legalizeUndef(Src0);
Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Variable *T_Lo = makeReg(IceType_i32);
Variable *T_Hi = makeReg(IceType_i32);
Variable *T_Lo = makeReg(IceType_i32);
auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
_mov(T_Lo, Src0Lo); _mov(T_Lo, Src0Lo);
_mov(DestLo, T_Lo); _mov(DestLo, T_Lo);
Variable *T_Hi = makeReg(IceType_i32);
auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
_mov(T_Hi, Src0Hi); _mov(T_Hi, Src0Hi);
_mov(DestHi, T_Hi); _mov(DestHi, T_Hi);
} else {
return;
}
Operand *NewSrc; Operand *NewSrc;
if (Dest->hasReg()) { if (Dest->hasReg()) {
// If Dest already has a physical register, then legalize the Src operand // If Dest already has a physical register, then legalize the Src operand
...@@ -1865,16 +2226,11 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { ...@@ -1865,16 +2226,11 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
// register. // register.
NewSrc = legalize(Src0, Legal_Reg); NewSrc = legalize(Src0, Legal_Reg);
} }
if (isVectorType(Dest->getType())) {
Variable *SrcR = legalizeToReg(NewSrc); if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
_mov(Dest, SrcR); NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
} else if (isFloatingType(Dest->getType())) {
Variable *SrcR = legalizeToReg(NewSrc);
_mov(Dest, SrcR);
} else {
_mov(Dest, NewSrc);
}
} }
_mov(Dest, NewSrc);
} }
TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
...@@ -2580,6 +2936,18 @@ struct { ...@@ -2580,6 +2936,18 @@ struct {
FCMPARM32_TABLE FCMPARM32_TABLE
#undef X #undef X
}; };
bool isFloatingPointZero(Operand *Src) {
if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {
return Utils::isPositiveZero(F32->getValue());
}
if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {
return Utils::isPositiveZero(F64->getValue());
}
return false;
}
} // end of anonymous namespace } // end of anonymous namespace
TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
...@@ -2592,8 +2960,12 @@ TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { ...@@ -2592,8 +2960,12 @@ TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
break; break;
default: { default: {
Variable *Src0R = legalizeToReg(Instr->getSrc(0)); Variable *Src0R = legalizeToReg(Instr->getSrc(0));
Variable *Src1R = legalizeToReg(Instr->getSrc(1)); Operand *Src1 = Instr->getSrc(1);
_vcmp(Src0R, Src1R); if (isFloatingPointZero(Src1)) {
_vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
} else {
_vcmp(Src0R, legalizeToReg(Src1));
}
_vmrs(); _vmrs();
assert(Condition < llvm::array_lengthof(TableFcmp)); assert(Condition < llvm::array_lengthof(TableFcmp));
return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
...@@ -2642,12 +3014,87 @@ void TargetARM32::lowerFcmp(const InstFcmp *Instr) { ...@@ -2642,12 +3014,87 @@ void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
_mov(Dest, T); _mov(Dest, T);
} }
TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { TargetARM32::CondWhenTrue
assert(Inst->getSrc(0)->getType() != IceType_i1); TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
assert(Inst->getSrc(1)->getType() != IceType_i1); Operand *Src1) {
size_t Index = static_cast<size_t>(Condition);
assert(Index < llvm::array_lengthof(TableIcmp64));
Operand *Src0 = legalizeUndef(Inst->getSrc(0)); Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
Operand *Src1 = legalizeUndef(Inst->getSrc(1)); Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
if (SrcsLo.hasConstOperand()) {
const uint32_t ValueLo = SrcsLo.getConstantValue();
const uint32_t ValueHi = SrcsHi.getConstantValue();
const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
Value == 0) {
Variable *T = makeReg(IceType_i32);
Variable *Src0LoR = SrcsLo.src0R(this);
Variable *Src0HiR = SrcsHi.src0R(this);
_orrs(T, Src0LoR, Src0HiR);
Context.insert(InstFakeUse::create(Func, T));
return CondWhenTrue(TableIcmp64[Index].C1);
}
Variable *Src0RLo = SrcsLo.src0R(this);
Variable *Src0RHi = SrcsHi.src0R(this);
Operand *Src1RFLo = SrcsLo.src1RF(this);
Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
const bool UseRsb = TableIcmp64[Index].Swapped != SrcsLo.swappedOperands();
if (UseRsb) {
if (TableIcmp64[Index].IsSigned) {
Variable *T = makeReg(IceType_i32);
_rsbs(T, Src0RLo, Src1RFLo);
Context.insert(InstFakeUse::create(Func, T));
T = makeReg(IceType_i32);
_rscs(T, Src0RHi, Src1RFHi);
// We need to add a FakeUse here because liveness gets mad at us (Def
// without Use.) Note that flag-setting instructions are considered to
// have side effects and, therefore, are not DCE'ed.
Context.insert(InstFakeUse::create(Func, T));
} else {
Variable *T = makeReg(IceType_i32);
_rsbs(T, Src0RHi, Src1RFHi);
Context.insert(InstFakeUse::create(Func, T));
T = makeReg(IceType_i32);
_rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
Context.insert(InstFakeUse::create(Func, T));
}
} else {
if (TableIcmp64[Index].IsSigned) {
_cmp(Src0RLo, Src1RFLo);
Variable *T = makeReg(IceType_i32);
_sbcs(T, Src0RHi, Src1RFHi);
Context.insert(InstFakeUse::create(Func, T));
} else {
_cmp(Src0RHi, Src1RFHi);
_cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
}
}
return CondWhenTrue(TableIcmp64[Index].C1);
}
Variable *Src0RLo, *Src0RHi;
Operand *Src1RFLo, *Src1RFHi;
if (TableIcmp64[Index].Swapped) {
Src0RLo = legalizeToReg(loOperand(Src1));
Src0RHi = legalizeToReg(hiOperand(Src1));
Src1RFLo = legalizeToReg(loOperand(Src0));
Src1RFHi = legalizeToReg(hiOperand(Src0));
} else {
Src0RLo = legalizeToReg(loOperand(Src0));
Src0RHi = legalizeToReg(hiOperand(Src0));
Src1RFLo = legalizeToReg(loOperand(Src1));
Src1RFHi = legalizeToReg(hiOperand(Src1));
}
// a=icmp cond, b, c ==> // a=icmp cond, b, c ==>
// GCC does: // GCC does:
...@@ -2678,38 +3125,111 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { ...@@ -2678,38 +3125,111 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
// //
// So, we are going with the GCC version since it's usually better (except // So, we are going with the GCC version since it's usually better (except
// perhaps for eq/ne). We could revisit special-casing eq/ne later. // perhaps for eq/ne). We could revisit special-casing eq/ne later.
if (Src0->getType() == IceType_i64) {
InstIcmp::ICond Conditon = Inst->getCondition();
size_t Index = static_cast<size_t>(Conditon);
assert(Index < llvm::array_lengthof(TableIcmp64));
Variable *Src0Lo, *Src0Hi;
Operand *Src1LoRF, *Src1HiRF;
if (TableIcmp64[Index].Swapped) {
Src0Lo = legalizeToReg(loOperand(Src1));
Src0Hi = legalizeToReg(hiOperand(Src1));
Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
} else {
Src0Lo = legalizeToReg(loOperand(Src0));
Src0Hi = legalizeToReg(hiOperand(Src0));
Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
}
if (TableIcmp64[Index].IsSigned) { if (TableIcmp64[Index].IsSigned) {
Variable *ScratchReg = makeReg(IceType_i32); Variable *ScratchReg = makeReg(IceType_i32);
_cmp(Src0Lo, Src1LoRF); _cmp(Src0RLo, Src1RFLo);
_sbcs(ScratchReg, Src0Hi, Src1HiRF); _sbcs(ScratchReg, Src0RHi, Src1RFHi);
// ScratchReg isn't going to be used, but we need the side-effect of // ScratchReg isn't going to be used, but we need the side-effect of
// setting flags from this operation. // setting flags from this operation.
Context.insert(InstFakeUse::create(Func, ScratchReg)); Context.insert(InstFakeUse::create(Func, ScratchReg));
} else { } else {
_cmp(Src0Hi, Src1HiRF); _cmp(Src0RHi, Src1RFHi);
_cmp(Src0Lo, Src1LoRF, CondARM32::EQ); _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
} }
return CondWhenTrue(TableIcmp64[Index].C1); return CondWhenTrue(TableIcmp64[Index].C1);
}
TargetARM32::CondWhenTrue
TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1) {
Int32Operands Srcs(Src0, Src1);
if (!Srcs.hasConstOperand()) {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this);
_cmp(Src0R, Src1RF);
return CondWhenTrue(getIcmp32Mapping(Condition));
}
Variable *Src0R = Srcs.src0R(this);
const int32_t Value = Srcs.getConstantValue();
if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
_tst(Src0R, Src0R);
return CondWhenTrue(getIcmp32Mapping(Condition));
}
if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
Srcs.negatedImmediateIsFlexEncodable()) {
Operand *Src1F = Srcs.negatedSrc1F(this);
_cmn(Src0R, Src1F);
return CondWhenTrue(getIcmp32Mapping(Condition));
} }
Operand *Src1RF = Srcs.src1RF(this);
if (!Srcs.swappedOperands()) {
_cmp(Src0R, Src1RF);
} else {
Variable *T = makeReg(IceType_i32);
_rsbs(T, Src0R, Src1RF);
Context.insert(InstFakeUse::create(Func, T));
}
return CondWhenTrue(getIcmp32Mapping(Condition));
}
TargetARM32::CondWhenTrue
TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1) {
Int32Operands Srcs(Src0, Src1);
const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
assert(ShAmt >= 0);
if (!Srcs.hasConstOperand()) {
Variable *Src0R = makeReg(IceType_i32);
Operand *ShAmtF =
legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex);
_lsl(Src0R, legalizeToReg(Src0), ShAmtF);
Variable *Src1R = legalizeToReg(Src1);
OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create(
Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF);
_cmp(Src0R, Src1F);
return CondWhenTrue(getIcmp32Mapping(Condition));
}
const int32_t Value = Srcs.getConstantValue();
if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt);
Variable *T = makeReg(IceType_i32);
_lsls(T, Srcs.src0R(this), ShAmtOp);
Context.insert(InstFakeUse::create(Func, T));
return CondWhenTrue(getIcmp32Mapping(Condition));
}
Variable *ConstR = makeReg(IceType_i32);
_mov(ConstR,
legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
Operand *NonConstF = OperandARM32FlexReg::create(
Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
Ctx->getConstantInt32(ShAmt));
if (Srcs.swappedOperands()) {
_cmp(ConstR, NonConstF);
} else {
Variable *T = makeReg(IceType_i32);
_rsbs(T, ConstR, NonConstF);
Context.insert(InstFakeUse::create(Func, T));
}
return CondWhenTrue(getIcmp32Mapping(Condition));
}
TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
assert(Inst->getSrc(0)->getType() != IceType_i1);
assert(Inst->getSrc(1)->getType() != IceType_i1);
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Operand *Src1 = legalizeUndef(Inst->getSrc(1));
const InstIcmp::ICond Condition = Inst->getCondition();
// a=icmp cond b, c ==> // a=icmp cond b, c ==>
// GCC does: // GCC does:
// <u/s>xtb tb, b // <u/s>xtb tb, b
...@@ -2739,27 +3259,17 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { ...@@ -2739,27 +3259,17 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
// //
// We'll go with the LLVM way for now, since it's shorter and has just as few // We'll go with the LLVM way for now, since it's shorter and has just as few
// dependencies. // dependencies.
int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); switch (Src0->getType()) {
assert(ShiftAmt >= 0); default:
Constant *ShiftConst = nullptr; llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
Variable *Src0R = nullptr; case IceType_i8:
if (ShiftAmt) { case IceType_i16:
ShiftConst = Ctx->getConstantInt32(ShiftAmt); return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
Src0R = makeReg(IceType_i32); case IceType_i32:
_lsl(Src0R, legalizeToReg(Src0), ShiftConst); return lowerInt32IcmpCond(Condition, Src0, Src1);
} else { case IceType_i64:
Src0R = legalizeToReg(Src0); return lowerInt64IcmpCond(Condition, Src0, Src1);
}
if (ShiftAmt) {
Variable *Src1R = legalizeToReg(Src1);
OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
_cmp(Src0R, Src1RShifted);
} else {
Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
_cmp(Src0R, Src1RF);
} }
return CondWhenTrue(getIcmp32Mapping(Inst->getCondition()));
} }
void TargetARM32::lowerIcmp(const InstIcmp *Inst) { void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
...@@ -4254,13 +4764,24 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, ...@@ -4254,13 +4764,24 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
return Reg; return Reg;
} else { } else {
assert(isScalarFloatingType(Ty)); assert(isScalarFloatingType(Ty));
uint32_t ModifiedImm;
if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
Variable *T = makeReg(Ty, RegNum);
_mov(T,
OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
return T;
}
if (Ty == IceType_f64 && isFloatingPointZero(From)) {
// Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
// because ARM does not have a veor instruction with S registers.
Variable *T = makeReg(IceType_f64, RegNum);
Context.insert(InstFakeDef::create(Func, T));
_veor(T, T, T);
return T;
}
// Load floats/doubles from literal pool. // Load floats/doubles from literal pool.
// TODO(jvoung): Allow certain immediates to be encoded directly in an
// operand. See Table A7-18 of the ARM manual: "Floating-point modified
// immediate constants". Or, for 32-bit floating point numbers, just
// encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG
// instead of using a movw/movt pair to get the const-pool address then
// loading to SREG.
std::string Buffer; std::string Buffer;
llvm::raw_string_ostream StrBuf(Buffer); llvm::raw_string_ostream StrBuf(Buffer);
llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
......
...@@ -140,7 +140,23 @@ public: ...@@ -140,7 +140,23 @@ public:
bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const { bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
return CPUFeatures.hasFeature(I); return CPUFeatures.hasFeature(I);
} }
enum OperandLegalization {
Legal_None = 0,
Legal_Reg = 1 << 0, /// physical register, not stack location
Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
/// immediates, shifted registers, or modified fp imm.
Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12]
Legal_All = ~Legal_None
};
using LegalMask = uint32_t;
Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister); Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
int32_t RegNum = Variable::NoRegister);
Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
GlobalContext *getCtx() const { return Ctx; }
protected: protected:
explicit TargetARM32(Cfg *Func); explicit TargetARM32(Cfg *Func);
...@@ -154,6 +170,8 @@ protected: ...@@ -154,6 +170,8 @@ protected:
void lowerAlloca(const InstAlloca *Inst) override; void lowerAlloca(const InstAlloca *Inst) override;
SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Inst); SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Inst);
void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
Operand *Src0, Operand *Src1);
void lowerArithmetic(const InstArithmetic *Inst) override; void lowerArithmetic(const InstArithmetic *Inst) override;
void lowerAssign(const InstAssign *Inst) override; void lowerAssign(const InstAssign *Inst) override;
void lowerBr(const InstBr *Inst) override; void lowerBr(const InstBr *Inst) override;
...@@ -192,6 +210,12 @@ protected: ...@@ -192,6 +210,12 @@ protected:
CondWhenTrue lowerFcmpCond(const InstFcmp *Instr); CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
void lowerFcmp(const InstFcmp *Instr) override; void lowerFcmp(const InstFcmp *Instr) override;
CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
Operand *Src0, Operand *Src1);
CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerIcmpCond(const InstIcmp *Instr); CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
void lowerIcmp(const InstIcmp *Instr) override; void lowerIcmp(const InstIcmp *Instr) override;
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
...@@ -211,18 +235,6 @@ protected: ...@@ -211,18 +235,6 @@ protected:
void randomlyInsertNop(float Probability, void randomlyInsertNop(float Probability,
RandomNumberGenerator &RNG) override; RandomNumberGenerator &RNG) override;
enum OperandLegalization {
Legal_None = 0,
Legal_Reg = 1 << 0, /// physical register, not stack location
Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
/// immediates, or shifted registers.
Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12]
Legal_All = ~Legal_None
};
using LegalMask = uint32_t;
Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
int32_t RegNum = Variable::NoRegister);
Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty); OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
Variable64On32 *makeI64RegPair(); Variable64On32 *makeI64RegPair();
...@@ -299,6 +311,10 @@ protected: ...@@ -299,6 +311,10 @@ protected:
void _br(InstARM32Label *Label, CondARM32::Cond Condition) { void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Label, Condition)); Context.insert(InstARM32Br::create(Func, Label, Condition));
} }
void _cmn(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmn::create(Func, Src0, Src1, Pred));
}
void _cmp(Variable *Src0, Operand *Src1, void _cmp(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred)); Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
...@@ -332,6 +348,12 @@ protected: ...@@ -332,6 +348,12 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred));
} }
void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert(
InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _lsr(Variable *Dest, Variable *Src0, Operand *Src1, void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Lsr::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Lsr::create(Func, Dest, Src0, Src1, Pred));
...@@ -654,6 +676,22 @@ protected: ...@@ -654,6 +676,22 @@ protected:
void _ret(Variable *LR, Variable *Src0 = nullptr) { void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert(InstARM32Ret::create(Func, LR, Src0)); Context.insert(InstARM32Ret::create(Func, LR, Src0));
} }
void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert(
InstARM32Rsc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsc::create(Func, Dest, Src0, Src1, Pred));
}
void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert(
InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
void _rsb(Variable *Dest, Variable *Src0, Operand *Src1, void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
...@@ -745,12 +783,19 @@ protected: ...@@ -745,12 +783,19 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vcmp::create(Func, Src0, Src1, Pred)); Context.insert(InstARM32Vcmp::create(Func, Src0, Src1, Pred));
} }
void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vcmp::create(Func, Src0, FpZero, Pred));
}
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmrs::create(Func, Pred)); Context.insert(InstARM32Vmrs::create(Func, Pred));
} }
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1)); Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
} }
void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1));
}
void _vsqrt(Variable *Dest, Variable *Src, void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred)); Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred));
......
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
#include "IceUtils.h" #include "IceUtils.h"
#include "llvm/Support/MathExtras.h" #include "llvm/Support/MathExtras.h"
#include <cmath> // signbit()
#include <stack> #include <stack>
namespace Ice { namespace Ice {
...@@ -5506,16 +5505,6 @@ Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { ...@@ -5506,16 +5505,6 @@ Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
return Reg; return Reg;
} }
namespace {
template <typename T> bool isPositiveZero(T Val) {
static_assert(std::is_floating_point<T>::value,
"Input type must be floating point");
return Val == 0 && !std::signbit(Val);
}
} // end of anonymous namespace
template <class Machine> template <class Machine>
Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
int32_t RegNum) { int32_t RegNum) {
...@@ -5609,10 +5598,10 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, ...@@ -5609,10 +5598,10 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
// operand. // operand.
if (isScalarFloatingType(Ty)) { if (isScalarFloatingType(Ty)) {
if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(Const)) { if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(Const)) {
if (isPositiveZero(ConstFloat->getValue())) if (Utils::isPositiveZero(ConstFloat->getValue()))
return makeZeroedRegister(Ty, RegNum); return makeZeroedRegister(Ty, RegNum);
} else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) { } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) {
if (isPositiveZero(ConstDouble->getValue())) if (Utils::isPositiveZero(ConstDouble->getValue()))
return makeZeroedRegister(Ty, RegNum); return makeZeroedRegister(Ty, RegNum);
} }
Variable *Base = nullptr; Variable *Base = nullptr;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define SUBZERO_SRC_ICEUTILS_H #define SUBZERO_SRC_ICEUTILS_H
#include <climits> #include <climits>
#include <cmath> // std::signbit()
namespace Ice { namespace Ice {
...@@ -117,6 +118,13 @@ public: ...@@ -117,6 +118,13 @@ public:
return value; return value;
return (value >> shift) | (value << (32 - shift)); return (value >> shift) | (value << (32 - shift));
} }
/// Returns true if Val is +0.0. It requires T to be a floating point type.
template <typename T> static bool isPositiveZero(T Val) {
static_assert(std::is_floating_point<T>::value,
"Input type must be floating point");
return Val == 0 && !std::signbit(Val);
}
}; };
} // end of namespace Ice } // end of namespace Ice
......
...@@ -512,13 +512,13 @@ entry: ...@@ -512,13 +512,13 @@ entry:
; OPTM1: sar {{.*}},0x1f ; OPTM1: sar {{.*}},0x1f
; ARM32-LABEL: shr64BitSigned ; ARM32-LABEL: shr64BitSigned
; ARM32: lsr [[T0:r[0-9]+]], r0, r2 ; ARM32: lsr [[T0:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}
; ARM32: rsb [[T1:r[0-9]+]], r2, #32 ; ARM32: rsb [[T1:r[0-9]+]], r{{[0-9]+}}, #32
; ARM32: orr r0, [[T0]], r1, lsl [[T1]] ; ARM32: orr r{{[0-9]+}}, [[T0]], r{{[0-9]+}}, lsl [[T1]]
; ARM32: sub [[T2:r[0-9]+]], r2, #32 ; ARM32: sub [[T2:r[0-9]+]], r{{[0-9]+}}, #32
; ARM32: cmp [[T2]], #0 ; ARM32: cmp [[T2]], #0
; ARM32: asrge r0, r1, [[T2]] ; ARM32: asrge r{{[0-9]+}}, r{{[0-9]+}}, [[T2]]
; ARM32: asr r{{[0-9]+}}, r1, r2 ; ARM32: asr r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
define internal i32 @shr64BitSignedTrunc(i64 %a, i64 %b) { define internal i32 @shr64BitSignedTrunc(i64 %a, i64 %b) {
entry: entry:
......
...@@ -117,7 +117,7 @@ entry: ...@@ -117,7 +117,7 @@ entry:
; CHECK-LABEL: MulImm ; CHECK-LABEL: MulImm
; CHECK: imul e{{.*}},e{{.*}},0x63 ; CHECK: imul e{{.*}},e{{.*}},0x63
; ARM32-LABEL: MulImm ; ARM32-LABEL: MulImm
; ARM32: mov {{.*}}, #99 ; ARM32: movw {{.*}}, #99
; ARM32: mul r{{.*}}, r{{.*}}, r{{.*}} ; ARM32: mul r{{.*}}, r{{.*}}, r{{.*}}
; MIPS32-LABEL: MulImm ; MIPS32-LABEL: MulImm
; MIPS32: mul ; MIPS32: mul
......
...@@ -169,8 +169,7 @@ entry: ...@@ -169,8 +169,7 @@ entry:
; CHECK: cmovl ; CHECK: cmovl
; ARM32-LABEL: fold_cmp_select_64_undef ; ARM32-LABEL: fold_cmp_select_64_undef
; ARM32: mov ; ARM32: mov
; ARM32: mov ; ARM32: rsbs r{{[0-9]+}}, r{{[0-9]+}}, #0
; ARM32: cmp {{r[0-9]+}}, r0
; ARM32: movlt ; ARM32: movlt
; ARM32: movlt ; ARM32: movlt
; ARM32: bx lr ; ARM32: bx lr
......
...@@ -62,234 +62,234 @@ declare void @float18(float %p0, float %p1, float %p2, float %p3, float %p4, ...@@ -62,234 +62,234 @@ declare void @float18(float %p0, float %p1, float %p2, float %p3, float %p4,
define internal void @floatHarness() nounwind { define internal void @floatHarness() nounwind {
; CHECK-LABEL: floatHarness ; CHECK-LABEL: floatHarness
call void @float1(float 1.0) call void @float1(float 1.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK: bl {{.*}} float1 ; CHECK: bl {{.*}} float1
call void @float2(float 1.0, float 2.0) call void @float2(float 1.0, float 2.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK: bl {{.*}} float2 ; CHECK: bl {{.*}} float2
call void @float3(float 1.0, float 2.0, float 3.0) call void @float3(float 1.0, float 2.0, float 3.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK: bl {{.*}} float3 ; CHECK: bl {{.*}} float3
call void @float4(float 1.0, float 2.0, float 3.0, float 4.0) call void @float4(float 1.0, float 2.0, float 3.0, float 4.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK: bl {{.*}} float4 ; CHECK: bl {{.*}} float4
call void @float5(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0) call void @float5(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK: bl {{.*}} float5 ; CHECK: bl {{.*}} float5
call void @float6(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float6(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0) float 6.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK: bl {{.*}} float6 ; CHECK: bl {{.*}} float6
call void @float7(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float7(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0) float 6.0, float 7.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK: bl {{.*}} float7 ; CHECK: bl {{.*}} float7
call void @float8(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float8(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0) float 6.0, float 7.0, float 8.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK: bl {{.*}} float8 ; CHECK: bl {{.*}} float8
call void @float9(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float9(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0) float 6.0, float 7.0, float 8.0, float 9.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK: bl {{.*}} float9 ; CHECK: bl {{.*}} float9
call void @float10(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float10(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0) float 6.0, float 7.0, float 8.0, float 9.0, float 10.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vldr s9 ; CHECK-DAG: vmov.f32 s9
; CHECK: bl {{.*}} float10 ; CHECK: bl {{.*}} float10
call void @float11(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float11(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0) float 11.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vldr s9 ; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vldr s10 ; CHECK-DAG: vmov.f32 s10
; CHECK: bl {{.*}} float11 ; CHECK: bl {{.*}} float11
call void @float12(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float12(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0) float 11.0, float 12.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vldr s9 ; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vldr s10 ; CHECK-DAG: vmov.f32 s10
; CHECK-DAG: vldr s11 ; CHECK-DAG: vmov.f32 s11
; CHECK: bl {{.*}} float12 ; CHECK: bl {{.*}} float12
call void @float13(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float13(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0) float 11.0, float 12.0, float 13.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vldr s9 ; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vldr s10 ; CHECK-DAG: vmov.f32 s10
; CHECK-DAG: vldr s11 ; CHECK-DAG: vmov.f32 s11
; CHECK-DAG: vldr s12 ; CHECK-DAG: vmov.f32 s12
; CHECK: bl {{.*}} float13 ; CHECK: bl {{.*}} float13
call void @float14(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float14(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0) float 11.0, float 12.0, float 13.0, float 14.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vldr s9 ; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vldr s10 ; CHECK-DAG: vmov.f32 s10
; CHECK-DAG: vldr s11 ; CHECK-DAG: vmov.f32 s11
; CHECK-DAG: vldr s12 ; CHECK-DAG: vmov.f32 s12
; CHECK-DAG: vldr s13 ; CHECK-DAG: vmov.f32 s13
; CHECK: bl {{.*}} float14 ; CHECK: bl {{.*}} float14
call void @float15(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float15(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0, float 11.0, float 12.0, float 13.0, float 14.0,
float 15.0) float 15.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vldr s9 ; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vldr s10 ; CHECK-DAG: vmov.f32 s10
; CHECK-DAG: vldr s11 ; CHECK-DAG: vmov.f32 s11
; CHECK-DAG: vldr s12 ; CHECK-DAG: vmov.f32 s12
; CHECK-DAG: vldr s13 ; CHECK-DAG: vmov.f32 s13
; CHECK-DAG: vldr s14 ; CHECK-DAG: vmov.f32 s14
; CHECK: bl {{.*}} float15 ; CHECK: bl {{.*}} float15
call void @float16(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float16(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0, float 11.0, float 12.0, float 13.0, float 14.0,
float 15.0, float 16.0) float 15.0, float 16.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vldr s9 ; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vldr s10 ; CHECK-DAG: vmov.f32 s10
; CHECK-DAG: vldr s11 ; CHECK-DAG: vmov.f32 s11
; CHECK-DAG: vldr s12 ; CHECK-DAG: vmov.f32 s12
; CHECK-DAG: vldr s13 ; CHECK-DAG: vmov.f32 s13
; CHECK-DAG: vldr s14 ; CHECK-DAG: vmov.f32 s14
; CHECK-DAG: vldr s15 ; CHECK-DAG: vmov.f32 s15
; CHECK: bl {{.*}} float16 ; CHECK: bl {{.*}} float16
call void @float17(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float17(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0, float 11.0, float 12.0, float 13.0, float 14.0,
float 15.0, float 16.0, float 17.0) float 15.0, float 16.0, float 17.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vldr s9 ; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vldr s10 ; CHECK-DAG: vmov.f32 s10
; CHECK-DAG: vldr s11 ; CHECK-DAG: vmov.f32 s11
; CHECK-DAG: vldr s12 ; CHECK-DAG: vmov.f32 s12
; CHECK-DAG: vldr s13 ; CHECK-DAG: vmov.f32 s13
; CHECK-DAG: vldr s14 ; CHECK-DAG: vmov.f32 s14
; CHECK-DAG: vldr s15 ; CHECK-DAG: vmov.f32 s15
; CHECK-DAG: vstr s{{.*}}, [sp] ; CHECK-DAG: vstr s{{.*}}, [sp]
; CHECK: bl {{.*}} float17 ; CHECK: bl {{.*}} float17
call void @float18(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, call void @float18(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0, float 11.0, float 12.0, float 13.0, float 14.0,
float 15.0, float 16.0, float 17.0, float 18.0) float 15.0, float 16.0, float 17.0, float 18.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vldr s2 ; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vldr s3 ; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vldr s4 ; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vldr s5 ; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vldr s6 ; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vldr s7 ; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vldr s8 ; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vldr s9 ; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vldr s10 ; CHECK-DAG: vmov.f32 s10
; CHECK-DAG: vldr s11 ; CHECK-DAG: vmov.f32 s11
; CHECK-DAG: vldr s12 ; CHECK-DAG: vmov.f32 s12
; CHECK-DAG: vldr s13 ; CHECK-DAG: vmov.f32 s13
; CHECK-DAG: vldr s14 ; CHECK-DAG: vmov.f32 s14
; CHECK-DAG: vldr s15 ; CHECK-DAG: vmov.f32 s15
; CHECK-DAG: vstr s{{.*}}, [sp] ; CHECK-DAG: vstr s{{.*}}, [sp]
; CHECK-DAG: vstr s{{.*}}, [sp, #4] ; CHECK-DAG: vstr s{{.*}}, [sp, #4]
; CHECK: bl {{.*}} float18 ; CHECK: bl {{.*}} float18
...@@ -317,85 +317,85 @@ declare void @double10(double %p0, double %p1, double %p2, double %p3, ...@@ -317,85 +317,85 @@ declare void @double10(double %p0, double %p1, double %p2, double %p3,
define internal void @doubleHarness() nounwind { define internal void @doubleHarness() nounwind {
; CHECK-LABEL: doubleHarness ; CHECK-LABEL: doubleHarness
call void @double1(double 1.0) call void @double1(double 1.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK: bl {{.*}} double1 ; CHECK: bl {{.*}} double1
call void @double2(double 1.0, double 2.0) call void @double2(double 1.0, double 2.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK: bl {{.*}} double2 ; CHECK: bl {{.*}} double2
call void @double3(double 1.0, double 2.0, double 3.0) call void @double3(double 1.0, double 2.0, double 3.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK: bl {{.*}} double3 ; CHECK: bl {{.*}} double3
call void @double4(double 1.0, double 2.0, double 3.0, double 4.0) call void @double4(double 1.0, double 2.0, double 3.0, double 4.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK: bl {{.*}} double4 ; CHECK: bl {{.*}} double4
call void @double5(double 1.0, double 2.0, double 3.0, double 4.0, call void @double5(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0) double 5.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK: bl {{.*}} double5 ; CHECK: bl {{.*}} double5
call void @double6(double 1.0, double 2.0, double 3.0, double 4.0, call void @double6(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0) double 5.0, double 6.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK: bl {{.*}} double6 ; CHECK: bl {{.*}} double6
call void @double7(double 1.0, double 2.0, double 3.0, double 4.0, call void @double7(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0) double 5.0, double 6.0, double 7.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK: bl {{.*}} double7 ; CHECK: bl {{.*}} double7
call void @double8(double 1.0, double 2.0, double 3.0, double 4.0, call void @double8(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0) double 5.0, double 6.0, double 7.0, double 8.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr d7 ; CHECK-DAG: vmov.f64 d7
; CHECK: bl {{.*}} double8 ; CHECK: bl {{.*}} double8
call void @double9(double 1.0, double 2.0, double 3.0, double 4.0, call void @double9(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0, double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0) double 9.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr d7 ; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp] ; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK: bl {{.*}} double9 ; CHECK: bl {{.*}} double9
call void @double10(double 1.0, double 2.0, double 3.0, double 4.0, call void @double10(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0, double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, double 10.0) double 9.0, double 10.0)
; CHECK-DAG: vldr d0 ; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr d7 ; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp] ; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vstr d{{.*}}, [sp, #8] ; CHECK-DAG: vstr d{{.*}}, [sp, #8]
; CHECK: bl {{.*}} double10 ; CHECK: bl {{.*}} double10
...@@ -434,106 +434,106 @@ declare void @testFDDDDDDDDFDF(float %p0, double %p1, double %p2, double %p3, ...@@ -434,106 +434,106 @@ declare void @testFDDDDDDDDFDF(float %p0, double %p1, double %p2, double %p3,
define internal void @packsFloats() nounwind { define internal void @packsFloats() nounwind {
; CHECK-LABEL: packsFloats ; CHECK-LABEL: packsFloats
call void @testFDF(float 1.0, double 2.0, float 3.0) call void @testFDF(float 1.0, double 2.0, float 3.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK: bl {{.*}} testFDF ; CHECK: bl {{.*}} testFDF
call void @testFDDF(float 1.0, double 2.0, double 3.0, float 4.0) call void @testFDDF(float 1.0, double 2.0, double 3.0, float 4.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK: bl {{.*}} testFDDF ; CHECK: bl {{.*}} testFDDF
call void @testFDDDF(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
float 5.0) float 5.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK: bl {{.*}} testFDDDF ; CHECK: bl {{.*}} testFDDDF
call void @testFDDDDF(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, float 6.0) double 5.0, float 6.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK: bl {{.*}} testFDDDDF ; CHECK: bl {{.*}} testFDDDDF
call void @testFDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, float 7.0) double 5.0, double 6.0, float 7.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK: bl {{.*}} testFDDDDDF ; CHECK: bl {{.*}} testFDDDDDF
call void @testFDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, float 8.0) double 5.0, double 6.0, double 7.0, float 8.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK: bl {{.*}} testFDDDDDDF ; CHECK: bl {{.*}} testFDDDDDDF
call void @testFDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0, double 5.0, double 6.0, double 7.0, double 8.0,
float 9.0) float 9.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr d7 ; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK: bl {{.*}} testFDDDDDDDF ; CHECK: bl {{.*}} testFDDDDDDDF
call void @testFDDDDDDDFD(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDDDDDFD(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0, double 5.0, double 6.0, double 7.0, double 8.0,
float 9.0, double 10.0) float 9.0, double 10.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr d7 ; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp] ; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vldr s1 ; CHECK-DAG: vmov.f32 s1
; CHECK: bl {{.*}} testFDDDDDDDFD ; CHECK: bl {{.*}} testFDDDDDDDFD
call void @testFDDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0, double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, float 10.0) double 9.0, float 10.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr d7 ; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp] ; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vstr s{{.*}}, [sp, #8] ; CHECK-DAG: vstr s{{.*}}, [sp, #8]
; CHECK: bl {{.*}} testFDDDDDDDDF ; CHECK: bl {{.*}} testFDDDDDDDDF
call void @testFDDDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0, double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, double 10.0, float 11.0) double 9.0, double 10.0, float 11.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr d7 ; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp] ; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vstr d{{.*}}, [sp, #8] ; CHECK-DAG: vstr d{{.*}}, [sp, #8]
; CHECK-DAG: vstr s{{.*}}, [sp, #16] ; CHECK-DAG: vstr s{{.*}}, [sp, #16]
...@@ -541,14 +541,14 @@ define internal void @packsFloats() nounwind { ...@@ -541,14 +541,14 @@ define internal void @packsFloats() nounwind {
call void @testFDDDDDDDDFD(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDDDDDDFD(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0, double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, float 10.0, double 11.0) double 9.0, float 10.0, double 11.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr d7 ; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp] ; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vstr s{{.*}}, [sp, #8] ; CHECK-DAG: vstr s{{.*}}, [sp, #8]
; CHECK-DAG: vstr d{{.*}}, [sp, #16] ; CHECK-DAG: vstr d{{.*}}, [sp, #16]
...@@ -556,14 +556,14 @@ define internal void @packsFloats() nounwind { ...@@ -556,14 +556,14 @@ define internal void @packsFloats() nounwind {
call void @testFDDDDDDDDFDF(float 1.0, double 2.0, double 3.0, double 4.0, call void @testFDDDDDDDDFDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0, double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, float 10.0, double 11.0, float 12.0) double 9.0, float 10.0, double 11.0, float 12.0)
; CHECK-DAG: vldr s0 ; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vldr d1 ; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vldr d2 ; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vldr d3 ; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vldr d4 ; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vldr d5 ; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vldr d6 ; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vldr d7 ; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp] ; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vstr s{{.*}}, [sp, #8] ; CHECK-DAG: vstr s{{.*}}, [sp, #8]
; CHECK-DAG: vstr d{{.*}}, [sp, #16] ; CHECK-DAG: vstr d{{.*}}, [sp, #16]
......
...@@ -39,7 +39,7 @@ return: ; preds = %entry ...@@ -39,7 +39,7 @@ return: ; preds = %entry
; CHECK: ret ; CHECK: ret
; ARM32-LABEL: divide ; ARM32-LABEL: divide
; ARM32: cmp ; ARM32: tst
; ARM32: .word 0xe7fedef0 ; ARM32: .word 0xe7fedef0
; ARM32: bl {{.*}} __divsi3 ; ARM32: bl {{.*}} __divsi3
; ARM32: bx lr ; ARM32: bx lr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment