Commit 86ebec12 by Jan Voung

Add the ARM32 FP register table entries, simple arith, and args.

Lower some instructions, without much guarantee of correctness. *Running* generated code will be risky because the register allocator isn't aware of register aliasing. Fill in v{add,div,mul,sub}.f{32,64}, vmov, vldr and vsqrt.f{32,64}. I tried to make the nacl-other-intrinsics test not explode, so added vsqrt too. That was pretty easy for sqrt, but then fabs tests also exploded. Those are not truly fixed but are currently "fixed" by adding a FakeDef to satisfy liveness. Propagate float/double arguments to the right register in lowerArguments, lowerCall, and propagate to s0/d0/q0 for lowerReturn. May need to double check the calling convention. Currently can't test call-ret because vpush/vpop for prologues and epilogues isn't done. Legalize FP immediates to make the nacl-other-intrinsics sqrt test happy. Use the correct type of load (vldr (.32 and .64 are optional) instead of ldr{b,h,,d}). Whether or not the float/vector instructions can be predicated is a bit interesting. The float/double ones can, but the SIMD versions cannot. E.g. vadd<cond>.f32 s0, s0, s1 is okay vadd<cond>.f32 q0, q0, q1 is not okay. For now, just omit conditions from instructions that may end up being reused for SIMD. Split up the fp.pnacl.ll test into multiple ones so that parts of lowering can be tested incrementally. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1266263003 .
parent f4fbf7fd
...@@ -29,11 +29,12 @@ namespace { ...@@ -29,11 +29,12 @@ namespace {
const struct TypeARM32Attributes_ { const struct TypeARM32Attributes_ {
const char *WidthString; // b, h, <blank>, or d const char *WidthString; // b, h, <blank>, or d
const char *VecWidthString; // i8, i16, i32, f32, f64
int8_t SExtAddrOffsetBits; int8_t SExtAddrOffsetBits;
int8_t ZExtAddrOffsetBits; int8_t ZExtAddrOffsetBits;
} TypeARM32Attributes[] = { } TypeARM32Attributes[] = {
#define X(tag, elementty, width, sbits, ubits) \ #define X(tag, elementty, int_width, vec_width, sbits, ubits) \
{ width, sbits, ubits } \ { int_width, vec_width, sbits, ubits } \
, ,
ICETYPEARM32_TABLE ICETYPEARM32_TABLE
#undef X #undef X
...@@ -66,6 +67,10 @@ const char *InstARM32::getWidthString(Type Ty) { ...@@ -66,6 +67,10 @@ const char *InstARM32::getWidthString(Type Ty) {
return TypeARM32Attributes[Ty].WidthString; return TypeARM32Attributes[Ty].WidthString;
} }
const char *InstARM32::getVecWidthString(Type Ty) {
return TypeARM32Attributes[Ty].VecWidthString;
}
const char *InstARM32Pred::predString(CondARM32::Cond Pred) { const char *InstARM32Pred::predString(CondARM32::Cond Pred) {
return InstARM32CondAttributes[Pred].EmitString; return InstARM32CondAttributes[Pred].EmitString;
} }
...@@ -94,6 +99,18 @@ void InstARM32Pred::emitUnaryopGPR(const char *Opcode, ...@@ -94,6 +99,18 @@ void InstARM32Pred::emitUnaryopGPR(const char *Opcode,
Inst->getSrc(0)->emit(Func); Inst->getSrc(0)->emit(Func);
} }
void InstARM32Pred::emitUnaryopFP(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 1);
Type SrcTy = Inst->getSrc(0)->getType();
Str << "\t" << Opcode << Inst->getPredicate() << getVecWidthString(SrcTy)
<< "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(0)->emit(Func);
}
void InstARM32Pred::emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst, void InstARM32Pred::emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) { const Cfg *Func) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -123,6 +140,21 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst, ...@@ -123,6 +140,21 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
Inst->getSrc(1)->emit(Func); Inst->getSrc(1)->emit(Func);
} }
void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 2);
Str << "\t" << Opcode << getVecWidthString(Inst->getDest()->getType())
<< "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(0)->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
}
void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst, void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) { const Cfg *Func) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -304,12 +336,6 @@ IceString InstARM32Label::getName(const Cfg *Func) const { ...@@ -304,12 +336,6 @@ IceString InstARM32Label::getName(const Cfg *Func) const {
return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number); return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
} }
InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Ldr, 1, Dest, Predicate) {
addSource(Mem);
}
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests) InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) { : InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs. // Track modifications to Dests separately via FakeDefs.
...@@ -363,8 +389,14 @@ template <> const char *InstARM32Rbit::Opcode = "rbit"; ...@@ -363,8 +389,14 @@ template <> const char *InstARM32Rbit::Opcode = "rbit";
template <> const char *InstARM32Rev::Opcode = "rev"; template <> const char *InstARM32Rev::Opcode = "rev";
template <> const char *InstARM32Sxt::Opcode = "sxt"; // still requires b/h template <> const char *InstARM32Sxt::Opcode = "sxt"; // still requires b/h
template <> const char *InstARM32Uxt::Opcode = "uxt"; // still requires b/h template <> const char *InstARM32Uxt::Opcode = "uxt"; // still requires b/h
// FP
template <> const char *InstARM32Vsqrt::Opcode = "vsqrt";
// Mov-like ops // Mov-like ops
template <> const char *InstARM32Ldr::Opcode = "ldr";
template <> const char *InstARM32Mov::Opcode = "mov"; template <> const char *InstARM32Mov::Opcode = "mov";
// FP
template <> const char *InstARM32Vldr::Opcode = "vldr";
template <> const char *InstARM32Vmov::Opcode = "vmov";
// Three-addr ops // Three-addr ops
template <> const char *InstARM32Adc::Opcode = "adc"; template <> const char *InstARM32Adc::Opcode = "adc";
template <> const char *InstARM32Add::Opcode = "add"; template <> const char *InstARM32Add::Opcode = "add";
...@@ -381,6 +413,11 @@ template <> const char *InstARM32Sbc::Opcode = "sbc"; ...@@ -381,6 +413,11 @@ template <> const char *InstARM32Sbc::Opcode = "sbc";
template <> const char *InstARM32Sdiv::Opcode = "sdiv"; template <> const char *InstARM32Sdiv::Opcode = "sdiv";
template <> const char *InstARM32Sub::Opcode = "sub"; template <> const char *InstARM32Sub::Opcode = "sub";
template <> const char *InstARM32Udiv::Opcode = "udiv"; template <> const char *InstARM32Udiv::Opcode = "udiv";
// FP
template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls"; template <> const char *InstARM32Mls::Opcode = "mls";
...@@ -403,19 +440,19 @@ template <> void InstARM32Mov::emit(const Cfg *Func) const { ...@@ -403,19 +440,19 @@ template <> void InstARM32Mov::emit(const Cfg *Func) const {
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
Variable *Dest = getDest(); Variable *Dest = getDest();
if (Dest->hasReg()) { if (Dest->hasReg()) {
IceString Opcode = "mov"; IceString ActualOpcode = Opcode;
Operand *Src0 = getSrc(0); Operand *Src0 = getSrc(0);
if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) { if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
if (!Src0V->hasReg()) { if (!Src0V->hasReg()) {
// Always use the whole stack slot. A 32-bit load has a larger range // Always use the whole stack slot. A 32-bit load has a larger range
// of offsets than 16-bit, etc. // of offsets than 16-bit, etc.
Opcode = IceString("ldr"); ActualOpcode = IceString("ldr");
} }
} else { } else {
if (llvm::isa<OperandARM32Mem>(Src0)) if (llvm::isa<OperandARM32Mem>(Src0))
Opcode = IceString("ldr") + getWidthString(Dest->getType()); ActualOpcode = IceString("ldr") + getWidthString(Dest->getType());
} }
Str << "\t" << Opcode << getPredicate() << "\t"; Str << "\t" << ActualOpcode << getPredicate() << "\t";
getDest()->emit(Func); getDest()->emit(Func);
Str << ", "; Str << ", ";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
...@@ -436,6 +473,64 @@ template <> void InstARM32Mov::emitIAS(const Cfg *Func) const { ...@@ -436,6 +473,64 @@ template <> void InstARM32Mov::emitIAS(const Cfg *Func) const {
llvm_unreachable("Not yet implemented"); llvm_unreachable("Not yet implemented");
} }
template <> void InstARM32Vldr::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
assert(getDest()->hasReg());
Str << "\t"<< Opcode << getPredicate() << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
}
template <> void InstARM32Vldr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
(void)Func;
llvm_unreachable("Not yet implemented");
}
template <> void InstARM32Vmov::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
assert(CondARM32::AL == getPredicate());
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Variable *Dest = getDest();
if (Dest->hasReg()) {
IceString ActualOpcode = Opcode;
Operand *Src0 = getSrc(0);
if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
if (!Src0V->hasReg()) {
ActualOpcode = IceString("vldr");
}
} else {
if (llvm::isa<OperandARM32Mem>(Src0))
ActualOpcode = IceString("vldr");
}
Str << "\t" << ActualOpcode << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
} else {
Variable *Src0 = llvm::cast<Variable>(getSrc(0));
assert(Src0->hasReg());
Str << "\t"
"vstr"
"\t";
Src0->emit(Func);
Str << ", ";
Dest->emit(Func);
}
}
template <> void InstARM32Vmov::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Br::emit(const Cfg *Func) const { void InstARM32Br::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
...@@ -547,37 +642,25 @@ void InstARM32Label::dump(const Cfg *Func) const { ...@@ -547,37 +642,25 @@ void InstARM32Label::dump(const Cfg *Func) const {
Str << getName(Func) << ":"; Str << getName(Func) << ":";
} }
void InstARM32Ldr::emit(const Cfg *Func) const { template <> void InstARM32Ldr::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
assert(getDest()->hasReg()); assert(getDest()->hasReg());
Type Ty = getSrc(0)->getType(); Type Ty = getSrc(0)->getType();
Str << "\t" Str << "\t"<< Opcode << getWidthString(Ty) << getPredicate() << "\t";
<< "ldr" << getWidthString(Ty) << getPredicate() << "\t";
getDest()->emit(Func); getDest()->emit(Func);
Str << ", "; Str << ", ";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
} }
void InstARM32Ldr::emitIAS(const Cfg *Func) const { template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
(void)Func; (void)Func;
llvm_unreachable("Not yet implemented"); llvm_unreachable("Not yet implemented");
} }
void InstARM32Ldr::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
dumpOpcodePred(Str, "ldr", getDest()->getType());
Str << " ";
dumpSources(Func);
}
template <> void InstARM32Movw::emit(const Cfg *Func) const { template <> void InstARM32Movw::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
......
...@@ -320,12 +320,24 @@ public: ...@@ -320,12 +320,24 @@ public:
Tst, Tst,
Udiv, Udiv,
Umull, Umull,
Uxt Uxt,
Vadd,
Vdiv,
Vldr,
Vmov,
Vmul,
Vsqrt,
Vsub
}; };
static const char *getWidthString(Type Ty); static const char *getWidthString(Type Ty);
static const char *getVecWidthString(Type Ty);
static CondARM32::Cond getOppositeCondition(CondARM32::Cond Cond); static CondARM32::Cond getOppositeCondition(CondARM32::Cond Cond);
/// Shared emit routines for common forms of instructions.
static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func);
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
protected: protected:
...@@ -357,6 +369,8 @@ public: ...@@ -357,6 +369,8 @@ public:
/// Shared emit routines for common forms of instructions. /// Shared emit routines for common forms of instructions.
static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Inst, static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func, bool NeedsWidthSuffix); const Cfg *Func, bool NeedsWidthSuffix);
static void emitUnaryopFP(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func);
static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst, static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func); const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst, static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
...@@ -420,6 +434,50 @@ private: ...@@ -420,6 +434,50 @@ private:
static const char *Opcode; static const char *Opcode;
}; };
/// Instructions of the form x := op(y), for vector/FP.
template <InstARM32::InstKindARM32 K>
class InstARM32UnaryopFP : public InstARM32Pred {
InstARM32UnaryopFP() = delete;
InstARM32UnaryopFP(const InstARM32UnaryopFP &) = delete;
InstARM32UnaryopFP &operator=(const InstARM32UnaryopFP &) = delete;
public:
static InstARM32UnaryopFP *create(Cfg *Func, Variable *Dest, Variable *Src,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32UnaryopFP>())
InstARM32UnaryopFP(Func, Dest, Src, Predicate);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitUnaryopFP(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override {
(void)Func;
llvm::report_fatal_error("Not yet implemented");
}
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
dumpOpcodePred(Str, Opcode, getDest()->getType());
Str << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32UnaryopFP(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 1, Dest, Predicate) {
addSource(Src);
}
static const char *Opcode;
};
/// Instructions of the form x := x op y. /// Instructions of the form x := x op y.
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32TwoAddrGPR : public InstARM32Pred { class InstARM32TwoAddrGPR : public InstARM32Pred {
...@@ -559,7 +617,56 @@ private: ...@@ -559,7 +617,56 @@ private:
bool SetFlags; bool SetFlags;
}; };
// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate. /// Instructions of the form x := y op z, for vector/FP. We leave these as
/// unconditional: "ARM deprecates the conditional execution of any instruction
/// encoding provided by the Advanced SIMD Extension that is not also provided
/// by the Floating-point (VFP) extension". They do not set flags.
template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrFP : public InstARM32 {
InstARM32ThreeAddrFP() = delete;
InstARM32ThreeAddrFP(const InstARM32ThreeAddrFP &) = delete;
InstARM32ThreeAddrFP &operator=(const InstARM32ThreeAddrFP &) = delete;
public:
/// Create a vector/FP binary-op instruction like vadd, and vsub.
/// Everything must be a register.
static InstARM32ThreeAddrFP *create(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1) {
return new (Func->allocate<InstARM32ThreeAddrFP>())
InstARM32ThreeAddrFP(Func, Dest, Src0, Src1);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitThreeAddrFP(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override {
(void)Func;
llvm::report_fatal_error("Not yet implemented");
}
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
Str << Opcode << "." << getDest()->getType() << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32ThreeAddrFP(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1)
: InstARM32(Func, K, 2, Dest) {
addSource(Src0);
addSource(Src1);
}
static const char *Opcode;
};
/// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate.
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrGPR : public InstARM32Pred { class InstARM32FourAddrGPR : public InstARM32Pred {
InstARM32FourAddrGPR() = delete; InstARM32FourAddrGPR() = delete;
...@@ -608,7 +715,7 @@ private: ...@@ -608,7 +715,7 @@ private:
static const char *Opcode; static const char *Opcode;
}; };
// Instructions of the form x cmpop y (setting flags). /// Instructions of the form x cmpop y (setting flags).
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32CmpLike : public InstARM32Pred { class InstARM32CmpLike : public InstARM32Pred {
InstARM32CmpLike() = delete; InstARM32CmpLike() = delete;
...@@ -666,10 +773,19 @@ typedef InstARM32ThreeAddrGPR<InstARM32::Sbc> InstARM32Sbc; ...@@ -666,10 +773,19 @@ typedef InstARM32ThreeAddrGPR<InstARM32::Sbc> InstARM32Sbc;
typedef InstARM32ThreeAddrGPR<InstARM32::Sdiv> InstARM32Sdiv; typedef InstARM32ThreeAddrGPR<InstARM32::Sdiv> InstARM32Sdiv;
typedef InstARM32ThreeAddrGPR<InstARM32::Sub> InstARM32Sub; typedef InstARM32ThreeAddrGPR<InstARM32::Sub> InstARM32Sub;
typedef InstARM32ThreeAddrGPR<InstARM32::Udiv> InstARM32Udiv; typedef InstARM32ThreeAddrGPR<InstARM32::Udiv> InstARM32Udiv;
typedef InstARM32ThreeAddrFP<InstARM32::Vadd> InstARM32Vadd;
typedef InstARM32ThreeAddrFP<InstARM32::Vdiv> InstARM32Vdiv;
typedef InstARM32ThreeAddrFP<InstARM32::Vmul> InstARM32Vmul;
typedef InstARM32ThreeAddrFP<InstARM32::Vsub> InstARM32Vsub;
typedef InstARM32Movlike<InstARM32::Ldr> InstARM32Ldr;
/// Move instruction (variable <- flex). This is more of a pseudo-inst. /// Move instruction (variable <- flex). This is more of a pseudo-inst.
/// If var is a register, then we use "mov". If var is stack, then we use /// If var is a register, then we use "mov". If var is stack, then we use
/// "str" to store to the stack. /// "str" to store to the stack.
typedef InstARM32Movlike<InstARM32::Mov> InstARM32Mov; typedef InstARM32Movlike<InstARM32::Mov> InstARM32Mov;
/// Represents various vector mov instruction forms (simple single source,
/// single dest forms only, not the 2 GPR <-> 1 D reg forms, etc.).
typedef InstARM32Movlike<InstARM32::Vmov> InstARM32Vmov;
typedef InstARM32Movlike<InstARM32::Vldr> InstARM32Vldr;
/// MovT leaves the bottom bits alone so dest is also a source. /// MovT leaves the bottom bits alone so dest is also a source.
/// This helps indicate that a previous MovW setting dest is not dead code. /// This helps indicate that a previous MovW setting dest is not dead code.
typedef InstARM32TwoAddrGPR<InstARM32::Movt> InstARM32Movt; typedef InstARM32TwoAddrGPR<InstARM32::Movt> InstARM32Movt;
...@@ -683,6 +799,7 @@ typedef InstARM32UnaryopGPR<InstARM32::Rev, false> InstARM32Rev; ...@@ -683,6 +799,7 @@ typedef InstARM32UnaryopGPR<InstARM32::Rev, false> InstARM32Rev;
// but we aren't using that for now, so just model as a Unaryop. // but we aren't using that for now, so just model as a Unaryop.
typedef InstARM32UnaryopGPR<InstARM32::Sxt, true> InstARM32Sxt; typedef InstARM32UnaryopGPR<InstARM32::Sxt, true> InstARM32Sxt;
typedef InstARM32UnaryopGPR<InstARM32::Uxt, true> InstARM32Uxt; typedef InstARM32UnaryopGPR<InstARM32::Uxt, true> InstARM32Uxt;
typedef InstARM32UnaryopFP<InstARM32::Vsqrt> InstARM32Vsqrt;
typedef InstARM32FourAddrGPR<InstARM32::Mla> InstARM32Mla; typedef InstARM32FourAddrGPR<InstARM32::Mla> InstARM32Mla;
typedef InstARM32FourAddrGPR<InstARM32::Mls> InstARM32Mls; typedef InstARM32FourAddrGPR<InstARM32::Mls> InstARM32Mls;
typedef InstARM32CmpLike<InstARM32::Cmp> InstARM32Cmp; typedef InstARM32CmpLike<InstARM32::Cmp> InstARM32Cmp;
...@@ -838,29 +955,6 @@ private: ...@@ -838,29 +955,6 @@ private:
InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget); InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
}; };
/// Load instruction.
class InstARM32Ldr : public InstARM32Pred {
InstARM32Ldr() = delete;
InstARM32Ldr(const InstARM32Ldr &) = delete;
InstARM32Ldr &operator=(const InstARM32Ldr &) = delete;
public:
/// Dest must be a register.
static InstARM32Ldr *create(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Ldr>())
InstARM32Ldr(Func, Dest, Mem, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Ldr); }
private:
InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
CondARM32::Cond Predicate);
};
/// Pop into a list of GPRs. Technically this can be predicated, but we don't /// Pop into a list of GPRs. Technically this can be predicated, but we don't
/// need that functionality. /// need that functionality.
class InstARM32Pop : public InstARM32 { class InstARM32Pop : public InstARM32 {
...@@ -1003,8 +1097,12 @@ private: ...@@ -1003,8 +1097,12 @@ private:
// already have default implementations. Without this, there is the // already have default implementations. Without this, there is the
// possibility of ODR violations and link errors. // possibility of ODR violations and link errors.
template <> void InstARM32Ldr::emit(const Cfg *Func) const;
template <> void InstARM32Mov::emit(const Cfg *Func) const;
template <> void InstARM32Movw::emit(const Cfg *Func) const; template <> void InstARM32Movw::emit(const Cfg *Func) const;
template <> void InstARM32Movt::emit(const Cfg *Func) const; template <> void InstARM32Movt::emit(const Cfg *Func) const;
template <> void InstARM32Vldr::emit(const Cfg *Func) const;
template <> void InstARM32Vmov::emit(const Cfg *Func) const;
} // end of namespace Ice } // end of namespace Ice
......
...@@ -21,13 +21,13 @@ ...@@ -21,13 +21,13 @@
namespace Ice { namespace Ice {
namespace RegARM32 { class RegARM32 {
public:
/// An enum of every register. The enum value may not match the encoding /// An enum of every register. The enum value may not match the encoding
/// used to binary encode register operands in instructions. /// used to binary encode register operands in instructions.
enum AllRegisters { enum AllRegisters {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP) \ isFP32, isFP64, isVec128) \
val, val,
REGARM32_TABLE REGARM32_TABLE
#undef X #undef X
...@@ -35,28 +35,76 @@ enum AllRegisters { ...@@ -35,28 +35,76 @@ enum AllRegisters {
#define X(val, init) val init, #define X(val, init) val init,
REGARM32_TABLE_BOUNDS REGARM32_TABLE_BOUNDS
#undef X #undef X
}; };
/// An enum of GPR Registers. The enum value does match the encoding used /// An enum of GPR Registers. The enum value does match the encoding used
/// to binary encode register operands in instructions. /// to binary encode register operands in instructions.
enum GPRRegister { enum GPRRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP) \ isFP32, isFP64, isVec128) \
Encoded_##val encode, Encoded_##val = encode,
REGARM32_GPR_TABLE REGARM32_GPR_TABLE
#undef X #undef X
Encoded_Not_GPR = -1 Encoded_Not_GPR = -1
}; };
/// An enum of FP32 S-Registers. The enum value does match the encoding used
/// to binary encode register operands in instructions.
enum SRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128) \
Encoded_##val = encode,
REGARM32_FP32_TABLE
#undef X
Encoded_Not_SReg = -1
};
/// An enum of FP64 D-Registers. The enum value does match the encoding used
/// to binary encode register operands in instructions.
enum DRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128) \
Encoded_##val = encode,
REGARM32_FP64_TABLE
#undef X
Encoded_Not_DReg = -1
};
// TODO(jvoung): Floating point and vector registers... /// An enum of 128-bit Q-Registers. The enum value does match the encoding
// Need to model overlap and difference in encoding too. /// used to binary encode register operands in instructions.
enum QRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128) \
Encoded_##val = encode,
REGARM32_VEC128_TABLE
#undef X
Encoded_Not_QReg = -1
};
static inline GPRRegister getEncodedGPR(int32_t RegNum) { static inline GPRRegister getEncodedGPR(int32_t RegNum) {
assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last); assert(Reg_GPR_First <= RegNum);
assert(RegNum <= Reg_GPR_Last);
return GPRRegister(RegNum - Reg_GPR_First); return GPRRegister(RegNum - Reg_GPR_First);
} }
} // end of namespace RegARM32 static inline SRegister getEncodedSReg(int32_t RegNum) {
assert(Reg_SREG_First <= RegNum);
assert(RegNum <= Reg_SREG_Last);
return SRegister(RegNum - Reg_SREG_First);
}
static inline DRegister getEncodedDReg(int32_t RegNum) {
assert(Reg_DREG_First <= RegNum);
assert(RegNum <= Reg_DREG_Last);
return DRegister(RegNum - Reg_DREG_First);
}
static inline QRegister getEncodedQReg(int32_t RegNum) {
assert(Reg_QREG_First <= RegNum);
assert(RegNum <= Reg_QREG_Last);
return QRegister(RegNum - Reg_QREG_First);
}
};
} // end of namespace Ice } // end of namespace Ice
......
...@@ -190,7 +190,7 @@ protected: ...@@ -190,7 +190,7 @@ protected:
} }
void _adds(Variable *Dest, Variable *Src0, Operand *Src1, void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true; constexpr bool SetFlags = true;
Context.insert( Context.insert(
InstARM32Add::create(Func, Dest, Src0, Src1, Pred, SetFlags)); InstARM32Add::create(Func, Dest, Src0, Src1, Pred, SetFlags));
} }
...@@ -300,7 +300,7 @@ protected: ...@@ -300,7 +300,7 @@ protected:
} }
void _orrs(Variable *Dest, Variable *Src0, Operand *Src1, void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true; constexpr bool SetFlags = true;
Context.insert( Context.insert(
InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags)); InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags));
} }
...@@ -334,7 +334,7 @@ protected: ...@@ -334,7 +334,7 @@ protected:
} }
void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1, void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true; constexpr bool SetFlags = true;
Context.insert( Context.insert(
InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags)); InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
} }
...@@ -352,7 +352,7 @@ protected: ...@@ -352,7 +352,7 @@ protected:
} }
void _subs(Variable *Dest, Variable *Src0, Operand *Src1, void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true; constexpr bool SetFlags = true;
Context.insert( Context.insert(
InstARM32Sub::create(Func, Dest, Src0, Src1, Pred, SetFlags)); InstARM32Sub::create(Func, Dest, Src0, Src1, Pred, SetFlags));
} }
...@@ -381,6 +381,41 @@ protected: ...@@ -381,6 +381,41 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred)); Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
} }
void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vadd::create(Func, Dest, Src0, Src1));
}
void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vdiv::create(Func, Dest, Src0, Src1));
}
void _vldr(Variable *Dest, OperandARM32Mem *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vldr::create(Func, Dest, Src, Pred));
}
// There are a whole bunch of vmov variants, to transfer within
// S/D/Q registers, between core integer registers and S/D,
// and from small immediates into S/D.
// For integer -> S/D/Q there is a variant which takes two integer
// register to fill a D, or to fill two consecutive S registers.
// Vmov can also be used to insert-element. E.g.,
// "vmov.8 d0[1], r0"
// but insert-element is a "two-address" operation where only part of the
// register is modified. This cannot model that.
//
// This represents the simple single source, single dest variants only.
void _vmov(Variable *Dest, Operand *Src0) {
constexpr CondARM32::Cond Pred = CondARM32::AL;
Context.insert(InstARM32Vmov::create(Func, Dest, Src0, Pred));
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
}
void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred));
}
void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1));
}
/// Run a pass through stack variables and ensure that the offsets are legal. /// Run a pass through stack variables and ensure that the offsets are legal.
/// If the offset is not legal, use a new base register that accounts for /// If the offset is not legal, use a new base register that accounts for
...@@ -417,16 +452,20 @@ protected: ...@@ -417,16 +452,20 @@ protected:
CallingConv &operator=(const CallingConv &) = delete; CallingConv &operator=(const CallingConv &) = delete;
public: public:
CallingConv() : NumGPRRegsUsed(0) {} CallingConv() {}
~CallingConv() = default; ~CallingConv() = default;
bool I64InRegs(std::pair<int32_t, int32_t> *Regs); bool I64InRegs(std::pair<int32_t, int32_t> *Regs);
bool I32InReg(int32_t *Reg); bool I32InReg(int32_t *Reg);
bool FPInReg(Type Ty, int32_t *Reg);
static constexpr uint32_t ARM32_MAX_GPR_ARG = 4; static constexpr uint32_t ARM32_MAX_GPR_ARG = 4;
// Units of S registers still available to S/D/Q arguments.
static constexpr uint32_t ARM32_MAX_FP_REG_UNITS = 16;
private: private:
uint32_t NumGPRRegsUsed; uint32_t NumGPRRegsUsed = 0;
uint32_t NumFPRegUnits = 0;
}; };
private: private:
......
; This tries to be a comprehensive test of f32 and f64 arith operations.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -Om1 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
define internal float @addFloat(float %a, float %b) {
entry:
%add = fadd float %a, %b
ret float %add
}
; CHECK-LABEL: addFloat
; CHECK: addss
; CHECK: fld
; ARM32-LABEL: addFloat
; ARM32: vadd.f32 s{{[0-9]+}}, s
define internal double @addDouble(double %a, double %b) {
entry:
%add = fadd double %a, %b
ret double %add
}
; CHECK-LABEL: addDouble
; CHECK: addsd
; CHECK: fld
; ARM32-LABEL: addDouble
; ARM32: vadd.f64 d{{[0-9]+}}, d
define internal float @subFloat(float %a, float %b) {
entry:
%sub = fsub float %a, %b
ret float %sub
}
; CHECK-LABEL: subFloat
; CHECK: subss
; CHECK: fld
; ARM32-LABEL: subFloat
; ARM32: vsub.f32 s{{[0-9]+}}, s
define internal double @subDouble(double %a, double %b) {
entry:
%sub = fsub double %a, %b
ret double %sub
}
; CHECK-LABEL: subDouble
; CHECK: subsd
; CHECK: fld
; ARM32-LABEL: subDouble
; ARM32: vsub.f64 d{{[0-9]+}}, d
define internal float @mulFloat(float %a, float %b) {
entry:
%mul = fmul float %a, %b
ret float %mul
}
; CHECK-LABEL: mulFloat
; CHECK: mulss
; CHECK: fld
; ARM32-LABEL: mulFloat
; ARM32: vmul.f32 s{{[0-9]+}}, s
define internal double @mulDouble(double %a, double %b) {
entry:
%mul = fmul double %a, %b
ret double %mul
}
; CHECK-LABEL: mulDouble
; CHECK: mulsd
; CHECK: fld
; ARM32-LABEL: mulDouble
; ARM32: vmul.f64 d{{[0-9]+}}, d
define internal float @divFloat(float %a, float %b) {
entry:
%div = fdiv float %a, %b
ret float %div
}
; CHECK-LABEL: divFloat
; CHECK: divss
; CHECK: fld
; ARM32-LABEL: divFloat
; ARM32: vdiv.f32 s{{[0-9]+}}, s
define internal double @divDouble(double %a, double %b) {
entry:
%div = fdiv double %a, %b
ret double %div
}
; CHECK-LABEL: divDouble
; CHECK: divsd
; CHECK: fld
; ARM32-LABEL: divDouble
; ARM32: vdiv.f64 d{{[0-9]+}}, d
define internal float @remFloat(float %a, float %b) {
entry:
%div = frem float %a, %b
ret float %div
}
; CHECK-LABEL: remFloat
; CHECK: call {{.*}} R_{{.*}} fmodf
; ARM32-LABEL: remFloat
; ARM32: bl {{.*}} fmodf
define internal double @remDouble(double %a, double %b) {
entry:
%div = frem double %a, %b
ret double %div
}
; CHECK-LABEL: remDouble
; CHECK: call {{.*}} R_{{.*}} fmod
; ARM32-LABEL: remDouble
; ARM32: bl {{.*}} fmod
; This tries to be a comprehensive test of f32 and f64 call/return ops.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; Can't test on ARM yet. Need to use several vpush {contiguous FP regs},
; instead of push {any GPR list}.
define internal i32 @doubleArgs(double %a, i32 %b, double %c) {
entry:
ret i32 %b
}
; CHECK-LABEL: doubleArgs
; CHECK: mov eax,DWORD PTR [esp+0xc]
; CHECK-NEXT: ret
; ARM32-LABEL: doubleArgs
define internal i32 @floatArgs(float %a, i32 %b, float %c) {
entry:
ret i32 %b
}
; CHECK-LABEL: floatArgs
; CHECK: mov eax,DWORD PTR [esp+0x8]
; CHECK-NEXT: ret
define internal i32 @passFpArgs(float %a, double %b, float %c, double %d, float %e, double %f) {
entry:
%call = call i32 @ignoreFpArgsNoInline(float %a, i32 123, double %b)
%call1 = call i32 @ignoreFpArgsNoInline(float %c, i32 123, double %d)
%call2 = call i32 @ignoreFpArgsNoInline(float %e, i32 123, double %f)
%add = add i32 %call1, %call
%add3 = add i32 %add, %call2
ret i32 %add3
}
; CHECK-LABEL: passFpArgs
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
declare i32 @ignoreFpArgsNoInline(float %x, i32 %y, double %z)
define internal i32 @passFpConstArg(float %a, double %b) {
entry:
%call = call i32 @ignoreFpArgsNoInline(float %a, i32 123, double 2.340000e+00)
ret i32 %call
}
; CHECK-LABEL: passFpConstArg
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
define internal i32 @passFp32ConstArg(float %a) {
entry:
%call = call i32 @ignoreFp32ArgsNoInline(float %a, i32 123, float 2.0)
ret i32 %call
}
; CHECK-LABEL: passFp32ConstArg
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: movss DWORD PTR [esp+0x8]
; CHECK: call {{.*}} R_{{.*}} ignoreFp32ArgsNoInline
declare i32 @ignoreFp32ArgsNoInline(float %x, i32 %y, float %z)
define internal float @returnFloatArg(float %a) {
entry:
ret float %a
}
; CHECK-LABEL: returnFloatArg
; CHECK: fld DWORD PTR [esp
define internal double @returnDoubleArg(double %a) {
entry:
ret double %a
}
; CHECK-LABEL: returnDoubleArg
; CHECK: fld QWORD PTR [esp
define internal float @returnFloatConst() {
entry:
ret float 0x3FF3AE1480000000
}
; CHECK-LABEL: returnFloatConst
; CHECK: fld
define internal double @returnDoubleConst() {
entry:
ret double 1.230000e+00
}
; CHECK-LABEL: returnDoubleConst
; CHECK: fld
; This tries to be a comprehensive test of f32 and f64 compare operations.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 | FileCheck %s
define internal float @loadFloat(i32 %a) {
entry:
%__1 = inttoptr i32 %a to float*
%v0 = load float, float* %__1, align 4
ret float %v0
}
; CHECK-LABEL: loadFloat
; CHECK: movss
; CHECK: fld
define internal double @loadDouble(i32 %a) {
entry:
%__1 = inttoptr i32 %a to double*
%v0 = load double, double* %__1, align 8
ret double %v0
}
; CHECK-LABEL: loadDouble
; CHECK: movsd
; CHECK: fld
define internal void @storeFloat(i32 %a, float %value) {
entry:
%__2 = inttoptr i32 %a to float*
store float %value, float* %__2, align 4
ret void
}
; CHECK-LABEL: storeFloat
; CHECK: movss
; CHECK: movss
define internal void @storeDouble(i32 %a, double %value) {
entry:
%__2 = inttoptr i32 %a to double*
store double %value, double* %__2, align 8
ret void
}
; CHECK-LABEL: storeDouble
; CHECK: movsd
; CHECK: movsd
define internal void @storeFloatConst(i32 %a) {
entry:
%a.asptr = inttoptr i32 %a to float*
store float 0x3FF3AE1480000000, float* %a.asptr, align 4
ret void
}
; CHECK-LABEL: storeFloatConst
; CHECK: movss
; CHECK: movss
define internal void @storeDoubleConst(i32 %a) {
entry:
%a.asptr = inttoptr i32 %a to double*
store double 1.230000e+00, double* %a.asptr, align 8
ret void
}
; CHECK-LABEL: storeDoubleConst
; CHECK: movsd
; CHECK: movsd
...@@ -150,6 +150,11 @@ entry: ...@@ -150,6 +150,11 @@ entry:
; CHECK: sqrtss xmm{{.*}} ; CHECK: sqrtss xmm{{.*}}
; CHECK: sqrtss xmm{{.*}} ; CHECK: sqrtss xmm{{.*}}
; CHECK: sqrtss xmm{{.*}},DWORD PTR ; CHECK: sqrtss xmm{{.*}},DWORD PTR
; ARM32-LABEL: test_sqrt_float
; ARM32: vsqrt.f32
; ARM32: vsqrt.f32
; ARM32: vsqrt.f32
; ARM32: vadd.f32
define float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) { define float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) {
entry: entry:
...@@ -164,6 +169,9 @@ entry: ...@@ -164,6 +169,9 @@ entry:
; current folding only handles load + arithmetic op. The sqrt inst ; current folding only handles load + arithmetic op. The sqrt inst
; is considered an intrinsic call and not an arithmetic op. ; is considered an intrinsic call and not an arithmetic op.
; CHECK: sqrtss xmm{{.*}} ; CHECK: sqrtss xmm{{.*}}
; ARM32-LABEL: test_sqrt_float_mergeable_load
; ARM32: vldr s{{.*}}
; ARM32: vsqrt.f32
define double @test_sqrt_double(double %x, i32 %iptr) { define double @test_sqrt_double(double %x, i32 %iptr) {
entry: entry:
...@@ -177,6 +185,11 @@ entry: ...@@ -177,6 +185,11 @@ entry:
; CHECK: sqrtsd xmm{{.*}} ; CHECK: sqrtsd xmm{{.*}}
; CHECK: sqrtsd xmm{{.*}} ; CHECK: sqrtsd xmm{{.*}}
; CHECK: sqrtsd xmm{{.*}},QWORD PTR ; CHECK: sqrtsd xmm{{.*}},QWORD PTR
; ARM32-LABEL: test_sqrt_double
; ARM32: vsqrt.f64
; ARM32: vsqrt.f64
; ARM32: vsqrt.f64
; ARM32: vadd.f64
define double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) { define double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) {
entry: entry:
...@@ -188,6 +201,9 @@ entry: ...@@ -188,6 +201,9 @@ entry:
} }
; CHECK-LABEL: test_sqrt_double_mergeable_load ; CHECK-LABEL: test_sqrt_double_mergeable_load
; CHECK: sqrtsd xmm{{.*}} ; CHECK: sqrtsd xmm{{.*}}
; ARM32-LABEL: test_sqrt_double_mergeable_load
; ARM32: vldr d{{.*}}
; ARM32: vsqrt.f64
define float @test_sqrt_ignored(float %x, double %y) { define float @test_sqrt_ignored(float %x, double %y) {
entry: entry:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment