Commit 86ebec12 by Jan Voung

Add the ARM32 FP register table entries, simple arith, and args.

Lower some instructions, without much guarantee of correctness. *Running* generated code will be risky because the register allocator isn't aware of register aliasing. Fill in v{add,div,mul,sub}.f{32,64}, vmov, vldr and vsqrt.f{32,64}. I tried to make the nacl-other-intrinsics test not explode, so added vsqrt too. That was pretty easy for sqrt, but then fabs tests also exploded. Those are not truly fixed but are currently "fixed" by adding a FakeDef to satisfy liveness. Propagate float/double arguments to the right register in lowerArguments, lowerCall, and propagate to s0/d0/q0 for lowerReturn. May need to double check the calling convention. Currently can't test call-ret because vpush/vpop for prologues and epilogues isn't done. Legalize FP immediates to make the nacl-other-intrinsics sqrt test happy. Use the correct type of load (vldr (.32 and .64 are optional) instead of ldr{b,h,,d}). Whether or not the float/vector instructions can be predicated is a bit interesting. The float/double ones can, but the SIMD versions cannot. E.g. vadd<cond>.f32 s0, s0, s1 is okay vadd<cond>.f32 q0, q0, q1 is not okay. For now, just omit conditions from instructions that may end up being reused for SIMD. Split up the fp.pnacl.ll test into multiple ones so that parts of lowering can be tested incrementally. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1266263003 .
parent f4fbf7fd
...@@ -28,12 +28,13 @@ namespace Ice { ...@@ -28,12 +28,13 @@ namespace Ice {
namespace { namespace {
const struct TypeARM32Attributes_ { const struct TypeARM32Attributes_ {
const char *WidthString; // b, h, <blank>, or d const char *WidthString; // b, h, <blank>, or d
const char *VecWidthString; // i8, i16, i32, f32, f64
int8_t SExtAddrOffsetBits; int8_t SExtAddrOffsetBits;
int8_t ZExtAddrOffsetBits; int8_t ZExtAddrOffsetBits;
} TypeARM32Attributes[] = { } TypeARM32Attributes[] = {
#define X(tag, elementty, width, sbits, ubits) \ #define X(tag, elementty, int_width, vec_width, sbits, ubits) \
{ width, sbits, ubits } \ { int_width, vec_width, sbits, ubits } \
, ,
ICETYPEARM32_TABLE ICETYPEARM32_TABLE
#undef X #undef X
...@@ -66,6 +67,10 @@ const char *InstARM32::getWidthString(Type Ty) { ...@@ -66,6 +67,10 @@ const char *InstARM32::getWidthString(Type Ty) {
return TypeARM32Attributes[Ty].WidthString; return TypeARM32Attributes[Ty].WidthString;
} }
const char *InstARM32::getVecWidthString(Type Ty) {
return TypeARM32Attributes[Ty].VecWidthString;
}
const char *InstARM32Pred::predString(CondARM32::Cond Pred) { const char *InstARM32Pred::predString(CondARM32::Cond Pred) {
return InstARM32CondAttributes[Pred].EmitString; return InstARM32CondAttributes[Pred].EmitString;
} }
...@@ -94,6 +99,18 @@ void InstARM32Pred::emitUnaryopGPR(const char *Opcode, ...@@ -94,6 +99,18 @@ void InstARM32Pred::emitUnaryopGPR(const char *Opcode,
Inst->getSrc(0)->emit(Func); Inst->getSrc(0)->emit(Func);
} }
void InstARM32Pred::emitUnaryopFP(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 1);
Type SrcTy = Inst->getSrc(0)->getType();
Str << "\t" << Opcode << Inst->getPredicate() << getVecWidthString(SrcTy)
<< "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(0)->emit(Func);
}
void InstARM32Pred::emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst, void InstARM32Pred::emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) { const Cfg *Func) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -123,6 +140,21 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst, ...@@ -123,6 +140,21 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
Inst->getSrc(1)->emit(Func); Inst->getSrc(1)->emit(Func);
} }
void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 2);
Str << "\t" << Opcode << getVecWidthString(Inst->getDest()->getType())
<< "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(0)->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
}
void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst, void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) { const Cfg *Func) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -304,12 +336,6 @@ IceString InstARM32Label::getName(const Cfg *Func) const { ...@@ -304,12 +336,6 @@ IceString InstARM32Label::getName(const Cfg *Func) const {
return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number); return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
} }
InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Ldr, 1, Dest, Predicate) {
addSource(Mem);
}
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests) InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) { : InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs. // Track modifications to Dests separately via FakeDefs.
...@@ -363,8 +389,14 @@ template <> const char *InstARM32Rbit::Opcode = "rbit"; ...@@ -363,8 +389,14 @@ template <> const char *InstARM32Rbit::Opcode = "rbit";
template <> const char *InstARM32Rev::Opcode = "rev"; template <> const char *InstARM32Rev::Opcode = "rev";
template <> const char *InstARM32Sxt::Opcode = "sxt"; // still requires b/h template <> const char *InstARM32Sxt::Opcode = "sxt"; // still requires b/h
template <> const char *InstARM32Uxt::Opcode = "uxt"; // still requires b/h template <> const char *InstARM32Uxt::Opcode = "uxt"; // still requires b/h
// FP
template <> const char *InstARM32Vsqrt::Opcode = "vsqrt";
// Mov-like ops // Mov-like ops
template <> const char *InstARM32Ldr::Opcode = "ldr";
template <> const char *InstARM32Mov::Opcode = "mov"; template <> const char *InstARM32Mov::Opcode = "mov";
// FP
template <> const char *InstARM32Vldr::Opcode = "vldr";
template <> const char *InstARM32Vmov::Opcode = "vmov";
// Three-addr ops // Three-addr ops
template <> const char *InstARM32Adc::Opcode = "adc"; template <> const char *InstARM32Adc::Opcode = "adc";
template <> const char *InstARM32Add::Opcode = "add"; template <> const char *InstARM32Add::Opcode = "add";
...@@ -381,6 +413,11 @@ template <> const char *InstARM32Sbc::Opcode = "sbc"; ...@@ -381,6 +413,11 @@ template <> const char *InstARM32Sbc::Opcode = "sbc";
template <> const char *InstARM32Sdiv::Opcode = "sdiv"; template <> const char *InstARM32Sdiv::Opcode = "sdiv";
template <> const char *InstARM32Sub::Opcode = "sub"; template <> const char *InstARM32Sub::Opcode = "sub";
template <> const char *InstARM32Udiv::Opcode = "udiv"; template <> const char *InstARM32Udiv::Opcode = "udiv";
// FP
template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls"; template <> const char *InstARM32Mls::Opcode = "mls";
...@@ -403,19 +440,19 @@ template <> void InstARM32Mov::emit(const Cfg *Func) const { ...@@ -403,19 +440,19 @@ template <> void InstARM32Mov::emit(const Cfg *Func) const {
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
Variable *Dest = getDest(); Variable *Dest = getDest();
if (Dest->hasReg()) { if (Dest->hasReg()) {
IceString Opcode = "mov"; IceString ActualOpcode = Opcode;
Operand *Src0 = getSrc(0); Operand *Src0 = getSrc(0);
if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) { if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
if (!Src0V->hasReg()) { if (!Src0V->hasReg()) {
// Always use the whole stack slot. A 32-bit load has a larger range // Always use the whole stack slot. A 32-bit load has a larger range
// of offsets than 16-bit, etc. // of offsets than 16-bit, etc.
Opcode = IceString("ldr"); ActualOpcode = IceString("ldr");
} }
} else { } else {
if (llvm::isa<OperandARM32Mem>(Src0)) if (llvm::isa<OperandARM32Mem>(Src0))
Opcode = IceString("ldr") + getWidthString(Dest->getType()); ActualOpcode = IceString("ldr") + getWidthString(Dest->getType());
} }
Str << "\t" << Opcode << getPredicate() << "\t"; Str << "\t" << ActualOpcode << getPredicate() << "\t";
getDest()->emit(Func); getDest()->emit(Func);
Str << ", "; Str << ", ";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
...@@ -436,6 +473,64 @@ template <> void InstARM32Mov::emitIAS(const Cfg *Func) const { ...@@ -436,6 +473,64 @@ template <> void InstARM32Mov::emitIAS(const Cfg *Func) const {
llvm_unreachable("Not yet implemented"); llvm_unreachable("Not yet implemented");
} }
template <> void InstARM32Vldr::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
assert(getDest()->hasReg());
Str << "\t"<< Opcode << getPredicate() << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
}
template <> void InstARM32Vldr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
(void)Func;
llvm_unreachable("Not yet implemented");
}
template <> void InstARM32Vmov::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
assert(CondARM32::AL == getPredicate());
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Variable *Dest = getDest();
if (Dest->hasReg()) {
IceString ActualOpcode = Opcode;
Operand *Src0 = getSrc(0);
if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
if (!Src0V->hasReg()) {
ActualOpcode = IceString("vldr");
}
} else {
if (llvm::isa<OperandARM32Mem>(Src0))
ActualOpcode = IceString("vldr");
}
Str << "\t" << ActualOpcode << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
} else {
Variable *Src0 = llvm::cast<Variable>(getSrc(0));
assert(Src0->hasReg());
Str << "\t"
"vstr"
"\t";
Src0->emit(Func);
Str << ", ";
Dest->emit(Func);
}
}
template <> void InstARM32Vmov::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
(void)Func;
llvm_unreachable("Not yet implemented");
}
void InstARM32Br::emit(const Cfg *Func) const { void InstARM32Br::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
...@@ -547,37 +642,25 @@ void InstARM32Label::dump(const Cfg *Func) const { ...@@ -547,37 +642,25 @@ void InstARM32Label::dump(const Cfg *Func) const {
Str << getName(Func) << ":"; Str << getName(Func) << ":";
} }
void InstARM32Ldr::emit(const Cfg *Func) const { template <> void InstARM32Ldr::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
assert(getDest()->hasReg()); assert(getDest()->hasReg());
Type Ty = getSrc(0)->getType(); Type Ty = getSrc(0)->getType();
Str << "\t" Str << "\t"<< Opcode << getWidthString(Ty) << getPredicate() << "\t";
<< "ldr" << getWidthString(Ty) << getPredicate() << "\t";
getDest()->emit(Func); getDest()->emit(Func);
Str << ", "; Str << ", ";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
} }
void InstARM32Ldr::emitIAS(const Cfg *Func) const { template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
(void)Func; (void)Func;
llvm_unreachable("Not yet implemented"); llvm_unreachable("Not yet implemented");
} }
void InstARM32Ldr::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
dumpOpcodePred(Str, "ldr", getDest()->getType());
Str << " ";
dumpSources(Func);
}
template <> void InstARM32Movw::emit(const Cfg *Func) const { template <> void InstARM32Movw::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
......
...@@ -27,99 +27,249 @@ ...@@ -27,99 +27,249 @@
// It is technically preserved, but save/restore is handled separately, // It is technically preserved, but save/restore is handled separately,
// based on whether or not the function MaybeLeafFunc. // based on whether or not the function MaybeLeafFunc.
#define REGARM32_GPR_TABLE \ #define REGARM32_GPR_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, isInt, isFP */ \ /* val, encode, name, scratch, preserved, stackptr, frameptr, \
X(Reg_r0, = 0, "r0", 1, 0, 0, 0, 1, 0) \ isInt, isFP32, isFP64, isVec128 */ \
X(Reg_r1, = Reg_r0 + 1, "r1", 1, 0, 0, 0, 1, 0) \ X(Reg_r0, 0, "r0", 1, 0, 0, 0, 1, 0, 0, 0) \
X(Reg_r2, = Reg_r0 + 2, "r2", 1, 0, 0, 0, 1, 0) \ X(Reg_r1, 1, "r1", 1, 0, 0, 0, 1, 0, 0, 0) \
X(Reg_r3, = Reg_r0 + 3, "r3", 1, 0, 0, 0, 1, 0) \ X(Reg_r2, 2, "r2", 1, 0, 0, 0, 1, 0, 0, 0) \
X(Reg_r4, = Reg_r0 + 4, "r4", 0, 1, 0, 0, 1, 0) \ X(Reg_r3, 3, "r3", 1, 0, 0, 0, 1, 0, 0, 0) \
X(Reg_r5, = Reg_r0 + 5, "r5", 0, 1, 0, 0, 1, 0) \ X(Reg_r4, 4, "r4", 0, 1, 0, 0, 1, 0, 0, 0) \
X(Reg_r6, = Reg_r0 + 6, "r6", 0, 1, 0, 0, 1, 0) \ X(Reg_r5, 5, "r5", 0, 1, 0, 0, 1, 0, 0, 0) \
X(Reg_r7, = Reg_r0 + 7, "r7", 0, 1, 0, 0, 1, 0) \ X(Reg_r6, 6, "r6", 0, 1, 0, 0, 1, 0, 0, 0) \
X(Reg_r8, = Reg_r0 + 8, "r8", 0, 1, 0, 0, 1, 0) \ X(Reg_r7, 7, "r7", 0, 1, 0, 0, 1, 0, 0, 0) \
X(Reg_r9, = Reg_r0 + 9, "r9", 0, 1, 0, 0, 0, 0) \ X(Reg_r8, 8, "r8", 0, 1, 0, 0, 1, 0, 0, 0) \
X(Reg_r10, = Reg_r0 + 10, "r10", 0, 1, 0, 0, 1, 0) \ X(Reg_r9, 9, "r9", 0, 1, 0, 0, 0, 0, 0, 0) \
X(Reg_fp, = Reg_r0 + 11, "fp", 0, 1, 0, 1, 1, 0) \ X(Reg_r10, 10, "r10", 0, 1, 0, 0, 1, 0, 0, 0) \
X(Reg_ip, = Reg_r0 + 12, "ip", 1, 0, 0, 0, 0, 0) \ X(Reg_fp, 11, "fp", 0, 1, 0, 1, 1, 0, 0, 0) \
X(Reg_sp, = Reg_r0 + 13, "sp", 0, 0, 1, 0, 0, 0) \ X(Reg_ip, 12, "ip", 1, 0, 0, 0, 0, 0, 0, 0) \
X(Reg_lr, = Reg_r0 + 14, "lr", 0, 0, 0, 0, 0, 0) \ X(Reg_sp, 13, "sp", 0, 0, 1, 0, 0, 0, 0, 0) \
X(Reg_pc, = Reg_r0 + 15, "pc", 0, 0, 0, 0, 0, 0) \ X(Reg_lr, 14, "lr", 0, 0, 0, 0, 0, 0, 0, 0) \
X(Reg_pc, 15, "pc", 0, 0, 0, 0, 0, 0, 0, 0)
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr, //#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isFP) // isInt, isFP32, isFP64, isVec128)
// TODO(jvoung): List FP registers and know S0 == D0 == Q0, etc. // TODO(jvoung): Be able to grab even registers, and the corresponding odd
// Be able to grab even registers, and the corresponding odd register // register for each even register. Want "register units" to encapsulate
// for each even register. // the aliasing/overlap.
//
// S registers 0-15 are scratch, but 16-31 are preserved.
// Regenerate this with the following python script:
//
// def print_sregs():
// for i in xrange(0, 32):
// is_scratch = 1 if i < 16 else 0
// is_preserved = 1 if i >= 16 else 0
// print ('X(Reg_s{regnum:<2}, {regnum:<2}, "s{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 1, 0, 0) \\').format(
// regnum=i, scratch=is_scratch, preserved=is_preserved)
//
// print_sregs()
//
#define REGARM32_FP32_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, \
isInt, isFP32, isFP64, isVec128 */ \
X(Reg_s0, 0, "s0", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s1, 1, "s1", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s2, 2, "s2", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s3, 3, "s3", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s4, 4, "s4", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s5, 5, "s5", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s6, 6, "s6", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s7, 7, "s7", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s8, 8, "s8", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s9, 9, "s9", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s10, 10, "s10", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s11, 11, "s11", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s12, 12, "s12", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s13, 13, "s13", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s14, 14, "s14", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s15, 15, "s15", 1, 0, 0, 0, 0, 1, 0, 0) \
X(Reg_s16, 16, "s16", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s17, 17, "s17", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s18, 18, "s18", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s19, 19, "s19", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s20, 20, "s20", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s21, 21, "s21", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s22, 22, "s22", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s23, 23, "s23", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s24, 24, "s24", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s25, 25, "s25", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s26, 26, "s26", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s27, 27, "s27", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s28, 28, "s28", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s29, 29, "s29", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s30, 30, "s30", 0, 1, 0, 0, 0, 1, 0, 0) \
X(Reg_s31, 31, "s31", 0, 1, 0, 0, 0, 1, 0, 0)
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isFP32, isFP64, isVec128)
// D registers 0-7 are scratch, 8-15 are preserved, and 16-31
// are also scratch (if supported by the D32 feature vs D16).
//
// Regenerate this with the following python script:
// def print_dregs():
// for i in xrange(0, 32):
// is_scratch = 1 if (i < 8 or i >= 16) else 0
// is_preserved = 1 if (8 <= i and i < 16) else 0
// print ('X(Reg_d{regnum:<2}, {regnum:<2}, "d{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 1, 0) \\').format(
// regnum=i, scratch=is_scratch, preserved=is_preserved)
//
// print_dregs()
//
#define REGARM32_FP64_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, \
isInt, isFP32, isFP64, isVec128 */ \
X(Reg_d0, 0, "d0", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d1, 1, "d1", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d2, 2, "d2", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d3, 3, "d3", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d4, 4, "d4", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d5, 5, "d5", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d6, 6, "d6", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d7, 7, "d7", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d8, 8, "d8", 0, 1, 0, 0, 0, 0, 1, 0) \
X(Reg_d9, 9, "d9", 0, 1, 0, 0, 0, 0, 1, 0) \
X(Reg_d10, 10, "d10", 0, 1, 0, 0, 0, 0, 1, 0) \
X(Reg_d11, 11, "d11", 0, 1, 0, 0, 0, 0, 1, 0) \
X(Reg_d12, 12, "d12", 0, 1, 0, 0, 0, 0, 1, 0) \
X(Reg_d13, 13, "d13", 0, 1, 0, 0, 0, 0, 1, 0) \
X(Reg_d14, 14, "d14", 0, 1, 0, 0, 0, 0, 1, 0) \
X(Reg_d15, 15, "d15", 0, 1, 0, 0, 0, 0, 1, 0) \
X(Reg_d16, 16, "d16", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d17, 17, "d17", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d18, 18, "d18", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d19, 19, "d19", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d20, 20, "d20", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d21, 21, "d21", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d22, 22, "d22", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d23, 23, "d23", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d24, 24, "d24", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d25, 25, "d25", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d26, 26, "d26", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d27, 27, "d27", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d28, 28, "d28", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d29, 29, "d29", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d30, 30, "d30", 1, 0, 0, 0, 0, 0, 1, 0) \
X(Reg_d31, 31, "d31", 1, 0, 0, 0, 0, 0, 1, 0)
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isFP32, isFP64, isVec128)
// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15
// are also scratch (if supported by the D32 feature).
//
// Regenerate this with the following python script:
// def print_qregs():
// for i in xrange(0, 16):
// is_scratch = 1 if (i < 4 or i >= 8) else 0
// is_preserved = 1 if (4 <= i and i < 8) else 0
// print ('X(Reg_q{regnum:<2}, {regnum:<2}, "q{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 1) \\').format(
// regnum=i, scratch=is_scratch, preserved=is_preserved)
//
// print_qregs()
//
#define REGARM32_VEC128_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, \
isInt, isFP32, isFP64, isVec128 */ \
X(Reg_q0, 0, "q0", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q1, 1, "q1", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q2, 2, "q2", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q3, 3, "q3", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q4, 4, "q4", 0, 1, 0, 0, 0, 0, 0, 1) \
X(Reg_q5, 5, "q5", 0, 1, 0, 0, 0, 0, 0, 1) \
X(Reg_q6, 6, "q6", 0, 1, 0, 0, 0, 0, 0, 1) \
X(Reg_q7, 7, "q7", 0, 1, 0, 0, 0, 0, 0, 1) \
X(Reg_q8, 8, "q8", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q9, 9, "q9", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q10, 10, "q10", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q11, 11, "q11", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q12, 12, "q12", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q13, 13, "q13", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q14, 14, "q14", 1, 0, 0, 0, 0, 0, 0, 1) \
X(Reg_q15, 15, "q15", 1, 0, 0, 0, 0, 0, 0, 1)
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isFP32, isFP64, isVec128)
// We also provide a combined table, so that there is a namespace where // We also provide a combined table, so that there is a namespace where
// all of the registers are considered and have distinct numberings. // all of the registers are considered and have distinct numberings.
// This is in contrast to the above, where the "encode" is based on how // This is in contrast to the above, where the "encode" is based on how
// the register numbers will be encoded in binaries and values can overlap. // the register numbers will be encoded in binaries and values can overlap.
#define REGARM32_TABLE \ #define REGARM32_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, isInt, isFP */ \ /* val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
REGARM32_GPR_TABLE isFP32, isFP64, isVec128 */ \
REGARM32_GPR_TABLE \
REGARM32_FP32_TABLE \
REGARM32_FP64_TABLE \
REGARM32_VEC128_TABLE
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr, //#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isFP) // isInt, isFP32, isFP64, isVec128)
#define REGARM32_TABLE_BOUNDS \
/* val, init */ \
X(Reg_GPR_First, = Reg_r0) \
X(Reg_GPR_Last, = Reg_pc)
//define X(val, init)
// TODO(jvoung): add condition code tables, etc. #define REGARM32_TABLE_BOUNDS \
/* val, init */ \
X(Reg_GPR_First, = Reg_r0) \
X(Reg_GPR_Last, = Reg_pc) \
X(Reg_SREG_First, = Reg_s0) \
X(Reg_SREG_Last, = Reg_s31) \
X(Reg_DREG_First, = Reg_d0) \
X(Reg_DREG_Last, = Reg_d31) \
X(Reg_QREG_First, = Reg_q0) \
X(Reg_QREG_Last, = Reg_q15)
// define X(val, init)
// Load/Store instruction width suffixes. // Load/Store instruction width suffixes and FP/Vector element size suffixes
#define ICETYPEARM32_TABLE \ // the # of offset bits allowed as part of an addressing mode (for sign or
/* tag, element type, width, addr off bits sext, zext */ \ // zero extending load/stores).
X(IceType_void, IceType_void, "", 0, 0) \ #define ICETYPEARM32_TABLE \
X(IceType_i1, IceType_void, "b", 8, 12) \ /* tag, element type, int_width, vec_width, addr bits sext, zext */ \
X(IceType_i8, IceType_void, "b", 8, 12) \ X(IceType_void, IceType_void, "", "", 0, 0) \
X(IceType_i16, IceType_void, "h", 8, 8) \ X(IceType_i1, IceType_void, "b", "", 8, 12) \
X(IceType_i32, IceType_void, "", 12, 12) \ X(IceType_i8, IceType_void, "b", "", 8, 12) \
X(IceType_i64, IceType_void, "d", 8, 8) \ X(IceType_i16, IceType_void, "h", "", 8, 8) \
X(IceType_f32, IceType_void, "", 10, 10) \ X(IceType_i32, IceType_void, "", "", 12, 12) \
X(IceType_f64, IceType_void, "", 10, 10) \ X(IceType_i64, IceType_void, "d", "", 8, 8) \
X(IceType_v4i1, IceType_i32 , "", 0, 0) \ X(IceType_f32, IceType_void, "", ".f32", 10, 10) \
X(IceType_v8i1, IceType_i16 , "", 0, 0) \ X(IceType_f64, IceType_void, "", ".f64", 10, 10) \
X(IceType_v16i1, IceType_i8 , "", 0, 0) \ X(IceType_v4i1, IceType_i32, "", ".i32", 0, 0) \
X(IceType_v16i8, IceType_i8 , "", 0, 0) \ X(IceType_v8i1, IceType_i16, "", ".i16", 0, 0) \
X(IceType_v8i16, IceType_i16 , "", 0, 0) \ X(IceType_v16i1, IceType_i8, "", ".i8", 0, 0) \
X(IceType_v4i32, IceType_i32 , "", 0, 0) \ X(IceType_v16i8, IceType_i8, "", ".i8", 0, 0) \
X(IceType_v4f32, IceType_f32 , "", 0, 0) \ X(IceType_v8i16, IceType_i16, "", ".i16", 0, 0) \
//#define X(tag, elementty, width, sbits, ubits) X(IceType_v4i32, IceType_i32, "", ".i32", 0, 0) \
X(IceType_v4f32, IceType_f32, "", ".f32", 0, 0)
//#define X(tag, elementty, int_width, vec_width, sbits, ubits)
// Shifter types for Data-processing operands as defined in section A5.1.2. // Shifter types for Data-processing operands as defined in section A5.1.2.
#define ICEINSTARM32SHIFT_TABLE \ #define ICEINSTARM32SHIFT_TABLE \
/* enum value, emit */ \ /* enum value, emit */ \
X(LSL, "lsl") \ X(LSL, "lsl") \
X(LSR, "lsr") \ X(LSR, "lsr") \
X(ASR, "asr") \ X(ASR, "asr") \
X(ROR, "ror") \ X(ROR, "ror") \
X(RRX, "rrx") \ X(RRX, "rrx")
//#define X(tag, emit) //#define X(tag, emit)
// Attributes for the condition code 4-bit encoding (that is independent // Attributes for the condition code 4-bit encoding (that is independent
// of the APSR's NZCV fields). For example, EQ is 0, but corresponds to // of the APSR's NZCV fields). For example, EQ is 0, but corresponds to
// Z = 1, and NE is 1, but corresponds to Z = 0. // Z = 1, and NE is 1, but corresponds to Z = 0.
#define ICEINSTARM32COND_TABLE \ #define ICEINSTARM32COND_TABLE \
/* enum value, encoding, opposite, emit */ \ /* enum value, encoding, opposite, emit */ \
X(EQ, 0, NE, "eq") /* equal */ \ X(EQ, 0, NE, "eq") /* equal */ \
X(NE, 1, EQ, "ne") /* not equal */ \ X(NE, 1, EQ, "ne") /* not equal */ \
X(CS, 2, CC, "cs") /* carry set/unsigned (AKA hs: higher or same) */ \ X(CS, 2, CC, "cs") /* carry set/unsigned (AKA hs: higher or same) */ \
X(CC, 3, CS, "cc") /* carry clear/unsigned (AKA lo: lower) */ \ X(CC, 3, CS, "cc") /* carry clear/unsigned (AKA lo: lower) */ \
X(MI, 4, PL, "mi") /* minus/negative */ \ X(MI, 4, PL, "mi") /* minus/negative */ \
X(PL, 5, MI, "pl") /* plus/positive or zero */ \ X(PL, 5, MI, "pl") /* plus/positive or zero */ \
X(VS, 6, VC, "vs") /* overflow (float unordered) */ \ X(VS, 6, VC, "vs") /* overflow (float unordered) */ \
X(VC, 7, VS, "vc") /* no overflow (float not unordered) */ \ X(VC, 7, VS, "vc") /* no overflow (float not unordered) */ \
X(HI, 8, LS, "hi") /* unsigned higher */ \ X(HI, 8, LS, "hi") /* unsigned higher */ \
X(LS, 9, HI, "ls") /* unsigned lower or same */ \ X(LS, 9, HI, "ls") /* unsigned lower or same */ \
X(GE, 10, LT, "ge") /* signed greater than or equal */ \ X(GE, 10, LT, "ge") /* signed greater than or equal */ \
X(LT, 11, GE, "lt") /* signed less than */ \ X(LT, 11, GE, "lt") /* signed less than */ \
X(GT, 12, LE, "gt") /* signed greater than */ \ X(GT, 12, LE, "gt") /* signed greater than */ \
X(LE, 13, GT, "le") /* signed less than or equal */ \ X(LE, 13, GT, "le") /* signed less than or equal */ \
X(AL, 14, kNone, "") /* always (unconditional) */ \ X(AL, 14, kNone, "") /* always (unconditional) */ \
X(kNone, 15, kNone, "??") /* special condition / none */ \ X(kNone, 15, kNone, "??") /* special condition / none */
//#define(tag, encode, opp, emit) //#define(tag, encode, opp, emit)
#endif // SUBZERO_SRC_ICEINSTARM32_DEF #endif // SUBZERO_SRC_ICEINSTARM32_DEF
...@@ -320,12 +320,24 @@ public: ...@@ -320,12 +320,24 @@ public:
Tst, Tst,
Udiv, Udiv,
Umull, Umull,
Uxt Uxt,
Vadd,
Vdiv,
Vldr,
Vmov,
Vmul,
Vsqrt,
Vsub
}; };
static const char *getWidthString(Type Ty); static const char *getWidthString(Type Ty);
static const char *getVecWidthString(Type Ty);
static CondARM32::Cond getOppositeCondition(CondARM32::Cond Cond); static CondARM32::Cond getOppositeCondition(CondARM32::Cond Cond);
/// Shared emit routines for common forms of instructions.
static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func);
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
protected: protected:
...@@ -357,6 +369,8 @@ public: ...@@ -357,6 +369,8 @@ public:
/// Shared emit routines for common forms of instructions. /// Shared emit routines for common forms of instructions.
static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Inst, static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func, bool NeedsWidthSuffix); const Cfg *Func, bool NeedsWidthSuffix);
static void emitUnaryopFP(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func);
static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst, static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func); const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst, static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
...@@ -420,6 +434,50 @@ private: ...@@ -420,6 +434,50 @@ private:
static const char *Opcode; static const char *Opcode;
}; };
/// Instructions of the form x := op(y), for vector/FP.
template <InstARM32::InstKindARM32 K>
class InstARM32UnaryopFP : public InstARM32Pred {
InstARM32UnaryopFP() = delete;
InstARM32UnaryopFP(const InstARM32UnaryopFP &) = delete;
InstARM32UnaryopFP &operator=(const InstARM32UnaryopFP &) = delete;
public:
static InstARM32UnaryopFP *create(Cfg *Func, Variable *Dest, Variable *Src,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32UnaryopFP>())
InstARM32UnaryopFP(Func, Dest, Src, Predicate);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitUnaryopFP(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override {
(void)Func;
llvm::report_fatal_error("Not yet implemented");
}
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
dumpOpcodePred(Str, Opcode, getDest()->getType());
Str << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32UnaryopFP(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 1, Dest, Predicate) {
addSource(Src);
}
static const char *Opcode;
};
/// Instructions of the form x := x op y. /// Instructions of the form x := x op y.
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32TwoAddrGPR : public InstARM32Pred { class InstARM32TwoAddrGPR : public InstARM32Pred {
...@@ -559,7 +617,56 @@ private: ...@@ -559,7 +617,56 @@ private:
bool SetFlags; bool SetFlags;
}; };
// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate. /// Instructions of the form x := y op z, for vector/FP. We leave these as
/// unconditional: "ARM deprecates the conditional execution of any instruction
/// encoding provided by the Advanced SIMD Extension that is not also provided
/// by the Floating-point (VFP) extension". They do not set flags.
template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrFP : public InstARM32 {
InstARM32ThreeAddrFP() = delete;
InstARM32ThreeAddrFP(const InstARM32ThreeAddrFP &) = delete;
InstARM32ThreeAddrFP &operator=(const InstARM32ThreeAddrFP &) = delete;
public:
/// Create a vector/FP binary-op instruction like vadd, and vsub.
/// Everything must be a register.
static InstARM32ThreeAddrFP *create(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1) {
return new (Func->allocate<InstARM32ThreeAddrFP>())
InstARM32ThreeAddrFP(Func, Dest, Src0, Src1);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitThreeAddrFP(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override {
(void)Func;
llvm::report_fatal_error("Not yet implemented");
}
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
Str << Opcode << "." << getDest()->getType() << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32ThreeAddrFP(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1)
: InstARM32(Func, K, 2, Dest) {
addSource(Src0);
addSource(Src1);
}
static const char *Opcode;
};
/// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate.
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrGPR : public InstARM32Pred { class InstARM32FourAddrGPR : public InstARM32Pred {
InstARM32FourAddrGPR() = delete; InstARM32FourAddrGPR() = delete;
...@@ -608,7 +715,7 @@ private: ...@@ -608,7 +715,7 @@ private:
static const char *Opcode; static const char *Opcode;
}; };
// Instructions of the form x cmpop y (setting flags). /// Instructions of the form x cmpop y (setting flags).
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32CmpLike : public InstARM32Pred { class InstARM32CmpLike : public InstARM32Pred {
InstARM32CmpLike() = delete; InstARM32CmpLike() = delete;
...@@ -666,10 +773,19 @@ typedef InstARM32ThreeAddrGPR<InstARM32::Sbc> InstARM32Sbc; ...@@ -666,10 +773,19 @@ typedef InstARM32ThreeAddrGPR<InstARM32::Sbc> InstARM32Sbc;
typedef InstARM32ThreeAddrGPR<InstARM32::Sdiv> InstARM32Sdiv; typedef InstARM32ThreeAddrGPR<InstARM32::Sdiv> InstARM32Sdiv;
typedef InstARM32ThreeAddrGPR<InstARM32::Sub> InstARM32Sub; typedef InstARM32ThreeAddrGPR<InstARM32::Sub> InstARM32Sub;
typedef InstARM32ThreeAddrGPR<InstARM32::Udiv> InstARM32Udiv; typedef InstARM32ThreeAddrGPR<InstARM32::Udiv> InstARM32Udiv;
typedef InstARM32ThreeAddrFP<InstARM32::Vadd> InstARM32Vadd;
typedef InstARM32ThreeAddrFP<InstARM32::Vdiv> InstARM32Vdiv;
typedef InstARM32ThreeAddrFP<InstARM32::Vmul> InstARM32Vmul;
typedef InstARM32ThreeAddrFP<InstARM32::Vsub> InstARM32Vsub;
typedef InstARM32Movlike<InstARM32::Ldr> InstARM32Ldr;
/// Move instruction (variable <- flex). This is more of a pseudo-inst. /// Move instruction (variable <- flex). This is more of a pseudo-inst.
/// If var is a register, then we use "mov". If var is stack, then we use /// If var is a register, then we use "mov". If var is stack, then we use
/// "str" to store to the stack. /// "str" to store to the stack.
typedef InstARM32Movlike<InstARM32::Mov> InstARM32Mov; typedef InstARM32Movlike<InstARM32::Mov> InstARM32Mov;
/// Represents various vector mov instruction forms (simple single source,
/// single dest forms only, not the 2 GPR <-> 1 D reg forms, etc.).
typedef InstARM32Movlike<InstARM32::Vmov> InstARM32Vmov;
typedef InstARM32Movlike<InstARM32::Vldr> InstARM32Vldr;
/// MovT leaves the bottom bits alone so dest is also a source. /// MovT leaves the bottom bits alone so dest is also a source.
/// This helps indicate that a previous MovW setting dest is not dead code. /// This helps indicate that a previous MovW setting dest is not dead code.
typedef InstARM32TwoAddrGPR<InstARM32::Movt> InstARM32Movt; typedef InstARM32TwoAddrGPR<InstARM32::Movt> InstARM32Movt;
...@@ -683,6 +799,7 @@ typedef InstARM32UnaryopGPR<InstARM32::Rev, false> InstARM32Rev; ...@@ -683,6 +799,7 @@ typedef InstARM32UnaryopGPR<InstARM32::Rev, false> InstARM32Rev;
// but we aren't using that for now, so just model as a Unaryop. // but we aren't using that for now, so just model as a Unaryop.
typedef InstARM32UnaryopGPR<InstARM32::Sxt, true> InstARM32Sxt; typedef InstARM32UnaryopGPR<InstARM32::Sxt, true> InstARM32Sxt;
typedef InstARM32UnaryopGPR<InstARM32::Uxt, true> InstARM32Uxt; typedef InstARM32UnaryopGPR<InstARM32::Uxt, true> InstARM32Uxt;
typedef InstARM32UnaryopFP<InstARM32::Vsqrt> InstARM32Vsqrt;
typedef InstARM32FourAddrGPR<InstARM32::Mla> InstARM32Mla; typedef InstARM32FourAddrGPR<InstARM32::Mla> InstARM32Mla;
typedef InstARM32FourAddrGPR<InstARM32::Mls> InstARM32Mls; typedef InstARM32FourAddrGPR<InstARM32::Mls> InstARM32Mls;
typedef InstARM32CmpLike<InstARM32::Cmp> InstARM32Cmp; typedef InstARM32CmpLike<InstARM32::Cmp> InstARM32Cmp;
...@@ -838,29 +955,6 @@ private: ...@@ -838,29 +955,6 @@ private:
InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget); InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
}; };
/// Load instruction.
class InstARM32Ldr : public InstARM32Pred {
InstARM32Ldr() = delete;
InstARM32Ldr(const InstARM32Ldr &) = delete;
InstARM32Ldr &operator=(const InstARM32Ldr &) = delete;
public:
/// Dest must be a register.
static InstARM32Ldr *create(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Ldr>())
InstARM32Ldr(Func, Dest, Mem, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Ldr); }
private:
InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
CondARM32::Cond Predicate);
};
/// Pop into a list of GPRs. Technically this can be predicated, but we don't /// Pop into a list of GPRs. Technically this can be predicated, but we don't
/// need that functionality. /// need that functionality.
class InstARM32Pop : public InstARM32 { class InstARM32Pop : public InstARM32 {
...@@ -1003,8 +1097,12 @@ private: ...@@ -1003,8 +1097,12 @@ private:
// already have default implementations. Without this, there is the // already have default implementations. Without this, there is the
// possibility of ODR violations and link errors. // possibility of ODR violations and link errors.
template <> void InstARM32Ldr::emit(const Cfg *Func) const;
template <> void InstARM32Mov::emit(const Cfg *Func) const;
template <> void InstARM32Movw::emit(const Cfg *Func) const; template <> void InstARM32Movw::emit(const Cfg *Func) const;
template <> void InstARM32Movt::emit(const Cfg *Func) const; template <> void InstARM32Movt::emit(const Cfg *Func) const;
template <> void InstARM32Vldr::emit(const Cfg *Func) const;
template <> void InstARM32Vmov::emit(const Cfg *Func) const;
} // end of namespace Ice } // end of namespace Ice
......
...@@ -21,42 +21,90 @@ ...@@ -21,42 +21,90 @@
namespace Ice { namespace Ice {
namespace RegARM32 { class RegARM32 {
public:
/// An enum of every register. The enum value may not match the encoding /// An enum of every register. The enum value may not match the encoding
/// used to binary encode register operands in instructions. /// used to binary encode register operands in instructions.
enum AllRegisters { enum AllRegisters {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP) \ isFP32, isFP64, isVec128) \
val, val,
REGARM32_TABLE REGARM32_TABLE
#undef X #undef X
Reg_NUM, Reg_NUM,
#define X(val, init) val init, #define X(val, init) val init,
REGARM32_TABLE_BOUNDS REGARM32_TABLE_BOUNDS
#undef X #undef X
}; };
/// An enum of GPR Registers. The enum value does match the encoding used /// An enum of GPR Registers. The enum value does match the encoding used
/// to binary encode register operands in instructions. /// to binary encode register operands in instructions.
enum GPRRegister { enum GPRRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP) \ isFP32, isFP64, isVec128) \
Encoded_##val encode, Encoded_##val = encode,
REGARM32_GPR_TABLE REGARM32_GPR_TABLE
#undef X #undef X
Encoded_Not_GPR = -1 Encoded_Not_GPR = -1
}; };
/// An enum of FP32 S-Registers. The enum value does match the encoding used
/// to binary encode register operands in instructions.
enum SRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128) \
Encoded_##val = encode,
REGARM32_FP32_TABLE
#undef X
Encoded_Not_SReg = -1
};
/// An enum of FP64 D-Registers. The enum value does match the encoding used
/// to binary encode register operands in instructions.
enum DRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128) \
Encoded_##val = encode,
REGARM32_FP64_TABLE
#undef X
Encoded_Not_DReg = -1
};
// TODO(jvoung): Floating point and vector registers... /// An enum of 128-bit Q-Registers. The enum value does match the encoding
// Need to model overlap and difference in encoding too. /// used to binary encode register operands in instructions.
enum QRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128) \
Encoded_##val = encode,
REGARM32_VEC128_TABLE
#undef X
Encoded_Not_QReg = -1
};
static inline GPRRegister getEncodedGPR(int32_t RegNum) { static inline GPRRegister getEncodedGPR(int32_t RegNum) {
assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last); assert(Reg_GPR_First <= RegNum);
return GPRRegister(RegNum - Reg_GPR_First); assert(RegNum <= Reg_GPR_Last);
} return GPRRegister(RegNum - Reg_GPR_First);
}
} // end of namespace RegARM32 static inline SRegister getEncodedSReg(int32_t RegNum) {
assert(Reg_SREG_First <= RegNum);
assert(RegNum <= Reg_SREG_Last);
return SRegister(RegNum - Reg_SREG_First);
}
static inline DRegister getEncodedDReg(int32_t RegNum) {
assert(Reg_DREG_First <= RegNum);
assert(RegNum <= Reg_DREG_Last);
return DRegister(RegNum - Reg_DREG_First);
}
static inline QRegister getEncodedQReg(int32_t RegNum) {
assert(Reg_QREG_First <= RegNum);
assert(RegNum <= Reg_QREG_Last);
return QRegister(RegNum - Reg_QREG_First);
}
};
} // end of namespace Ice } // end of namespace Ice
......
...@@ -174,16 +174,19 @@ TargetARM32::TargetARM32(Cfg *Func) ...@@ -174,16 +174,19 @@ TargetARM32::TargetARM32(Cfg *Func)
// TODO: Don't initialize IntegerRegisters and friends every time. // TODO: Don't initialize IntegerRegisters and friends every time.
// Instead, initialize in some sort of static initializer for the // Instead, initialize in some sort of static initializer for the
// class. // class.
// Limit this size (or do all bitsets need to be the same width)???
llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM); llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);
llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
ScratchRegs.resize(RegARM32::Reg_NUM); ScratchRegs.resize(RegARM32::Reg_NUM);
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP) \ isFP32, isFP64, isVec128) \
IntegerRegisters[RegARM32::val] = isInt; \ IntegerRegisters[RegARM32::val] = isInt; \
FloatRegisters[RegARM32::val] = isFP; \ Float32Registers[RegARM32::val] = isFP32; \
VectorRegisters[RegARM32::val] = isFP; \ Float64Registers[RegARM32::val] = isFP64; \
VectorRegisters[RegARM32::val] = isVec128; \
ScratchRegs[RegARM32::val] = scratch; ScratchRegs[RegARM32::val] = scratch;
REGARM32_TABLE; REGARM32_TABLE;
#undef X #undef X
...@@ -193,8 +196,8 @@ TargetARM32::TargetARM32(Cfg *Func) ...@@ -193,8 +196,8 @@ TargetARM32::TargetARM32(Cfg *Func)
TypeToRegisterSet[IceType_i16] = IntegerRegisters; TypeToRegisterSet[IceType_i16] = IntegerRegisters;
TypeToRegisterSet[IceType_i32] = IntegerRegisters; TypeToRegisterSet[IceType_i32] = IntegerRegisters;
TypeToRegisterSet[IceType_i64] = IntegerRegisters; TypeToRegisterSet[IceType_i64] = IntegerRegisters;
TypeToRegisterSet[IceType_f32] = FloatRegisters; TypeToRegisterSet[IceType_f32] = Float32Registers;
TypeToRegisterSet[IceType_f64] = FloatRegisters; TypeToRegisterSet[IceType_f64] = Float64Registers;
TypeToRegisterSet[IceType_v4i1] = VectorRegisters; TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
TypeToRegisterSet[IceType_v8i1] = VectorRegisters; TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
TypeToRegisterSet[IceType_v16i1] = VectorRegisters; TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
...@@ -363,7 +366,7 @@ IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const { ...@@ -363,7 +366,7 @@ IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
(void)Ty; (void)Ty;
static const char *RegNames[] = { static const char *RegNames[] = {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP) \ isFP32, isFP64, isVec128) \
name, name,
REGARM32_TABLE REGARM32_TABLE
#undef X #undef X
...@@ -435,9 +438,7 @@ bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { ...@@ -435,9 +438,7 @@ bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
int32_t RegLo, RegHi; int32_t RegLo, RegHi;
// Always start i64 registers at an even register, so this may end // Always start i64 registers at an even register, so this may end
// up padding away a register. // up padding away a register.
if (NumGPRRegsUsed % 2 != 0) { NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);
++NumGPRRegsUsed;
}
RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
++NumGPRRegsUsed; ++NumGPRRegsUsed;
RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
...@@ -459,6 +460,33 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { ...@@ -459,6 +460,33 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
return true; return true;
} }
bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS)
return false;
if (isVectorType(Ty)) {
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
*Reg = RegARM32::Reg_q0 + (NumFPRegUnits / 4);
NumFPRegUnits += 4;
// If this bumps us past the boundary, don't allocate to a register
// and leave any previously speculatively consumed registers as consumed.
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
return false;
} else if (Ty == IceType_f64) {
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
*Reg = RegARM32::Reg_d0 + (NumFPRegUnits / 2);
NumFPRegUnits += 2;
// If this bumps us past the boundary, don't allocate to a register
// and leave any previously speculatively consumed registers as consumed.
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
return false;
} else {
assert(Ty == IceType_f32);
*Reg = RegARM32::Reg_s0 + NumFPRegUnits;
++NumFPRegUnits;
}
return true;
}
void TargetARM32::lowerArguments() { void TargetARM32::lowerArguments() {
VarList &Args = Func->getArgs(); VarList &Args = Func->getArgs();
TargetARM32::CallingConv CC; TargetARM32::CallingConv CC;
...@@ -472,14 +500,7 @@ void TargetARM32::lowerArguments() { ...@@ -472,14 +500,7 @@ void TargetARM32::lowerArguments() {
for (SizeT I = 0, E = Args.size(); I < E; ++I) { for (SizeT I = 0, E = Args.size(); I < E; ++I) {
Variable *Arg = Args[I]; Variable *Arg = Args[I];
Type Ty = Arg->getType(); Type Ty = Arg->getType();
// TODO(jvoung): handle float/vector types. if (Ty == IceType_i64) {
if (isVectorType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
continue;
} else if (isFloatingType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
continue;
} else if (Ty == IceType_i64) {
std::pair<int32_t, int32_t> RegPair; std::pair<int32_t, int32_t> RegPair;
if (!CC.I64InRegs(&RegPair)) if (!CC.I64InRegs(&RegPair))
continue; continue;
...@@ -503,10 +524,15 @@ void TargetARM32::lowerArguments() { ...@@ -503,10 +524,15 @@ void TargetARM32::lowerArguments() {
Context.insert(InstAssign::create(Func, Arg, RegisterArg)); Context.insert(InstAssign::create(Func, Arg, RegisterArg));
continue; continue;
} else { } else {
assert(Ty == IceType_i32);
int32_t RegNum; int32_t RegNum;
if (!CC.I32InReg(&RegNum)) if (isVectorType(Ty) || isFloatingType(Ty)) {
continue; if (!CC.FPInReg(Ty, &RegNum))
continue;
} else {
assert(Ty == IceType_i32);
if (!CC.I32InReg(&RegNum))
continue;
}
Variable *RegisterArg = Func->makeVariable(Ty); Variable *RegisterArg = Func->makeVariable(Ty);
if (BuildDefs::dump()) { if (BuildDefs::dump()) {
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
...@@ -517,6 +543,7 @@ void TargetARM32::lowerArguments() { ...@@ -517,6 +543,7 @@ void TargetARM32::lowerArguments() {
Args[I] = RegisterArg; Args[I] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg)); Context.insert(InstAssign::create(Func, Arg, RegisterArg));
continue;
} }
} }
} }
...@@ -554,7 +581,10 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, ...@@ -554,7 +581,10 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
Ctx->getConstantInt32(Arg->getStackOffset()))); Ctx->getConstantInt32(Arg->getStackOffset())));
if (isVectorType(Arg->getType())) { if (isVectorType(Arg->getType())) {
// Use vld1.$elem or something?
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
} else if (isFloatingType(Arg->getType())) {
_vldr(Arg, Mem);
} else { } else {
_ldr(Arg, Mem); _ldr(Arg, Mem);
} }
...@@ -725,12 +755,9 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -725,12 +755,9 @@ void TargetARM32::addProlog(CfgNode *Node) {
Type Ty = Arg->getType(); Type Ty = Arg->getType();
bool InRegs = false; bool InRegs = false;
// Skip arguments passed in registers. // Skip arguments passed in registers.
if (isVectorType(Ty)) { if (isVectorType(Ty) || isFloatingType(Ty)) {
UnimplementedError(Func->getContext()->getFlags()); int32_t DummyReg;
continue; InRegs = CC.FPInReg(Ty, &DummyReg);
} else if (isFloatingType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
continue;
} else if (Ty == IceType_i64) { } else if (Ty == IceType_i64) {
std::pair<int32_t, int32_t> DummyRegs; std::pair<int32_t, int32_t> DummyRegs;
InRegs = CC.I64InRegs(&DummyRegs); InRegs = CC.I64InRegs(&DummyRegs);
...@@ -858,6 +885,8 @@ void TargetARM32::addEpilog(CfgNode *Node) { ...@@ -858,6 +885,8 @@ void TargetARM32::addEpilog(CfgNode *Node) {
bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
constexpr bool SignExt = false; constexpr bool SignExt = false;
// TODO(jvoung): vldr of FP stack slots has a different limit from the
// plain stackSlotType().
return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
} }
...@@ -1121,7 +1150,7 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include, ...@@ -1121,7 +1150,7 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
llvm::SmallBitVector Registers(RegARM32::Reg_NUM); llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP) \ isFP32, isFP64, isVec128) \
if (scratch && (Include & RegSet_CallerSave)) \ if (scratch && (Include & RegSet_CallerSave)) \
Registers[RegARM32::val] = true; \ Registers[RegARM32::val] = true; \
if (preserved && (Include & RegSet_CalleeSave)) \ if (preserved && (Include & RegSet_CalleeSave)) \
...@@ -1518,6 +1547,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1518,6 +1547,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
return; return;
} else if (isVectorType(Dest->getType())) { } else if (isVectorType(Dest->getType())) {
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
// Add a fake def to keep liveness consistent in the meantime.
Context.insert(InstFakeDef::create(Func, Dest));
return; return;
} }
// Dest->getType() is a non-i64 scalar. // Dest->getType() is a non-i64 scalar.
...@@ -1553,6 +1584,47 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1553,6 +1584,47 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
H_srem_i32, IsRemainder); H_srem_i32, IsRemainder);
return; return;
} }
case InstArithmetic::Frem: {
const SizeT MaxSrcs = 2;
Type Ty = Dest->getType();
InstCall *Call = makeHelperCall(
isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
Call->addArg(Src0R);
Call->addArg(Src1);
lowerCall(Call);
return;
}
}
// Handle floating point arithmetic separately: they require Src1 to be
// legalized to a register.
switch (Inst->getOp()) {
default:
break;
case InstArithmetic::Fadd: {
Variable *Src1R = legalizeToReg(Src1);
_vadd(T, Src0R, Src1R);
_vmov(Dest, T);
return;
}
case InstArithmetic::Fsub: {
Variable *Src1R = legalizeToReg(Src1);
_vsub(T, Src0R, Src1R);
_vmov(Dest, T);
return;
}
case InstArithmetic::Fmul: {
Variable *Src1R = legalizeToReg(Src1);
_vmul(T, Src0R, Src1R);
_vmov(Dest, T);
return;
}
case InstArithmetic::Fdiv: {
Variable *Src1R = legalizeToReg(Src1);
_vdiv(T, Src0R, Src1R);
_vmov(Dest, T);
return;
}
} }
Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
...@@ -1605,19 +1677,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1605,19 +1677,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
llvm_unreachable("Integer div/rem should have been handled earlier."); llvm_unreachable("Integer div/rem should have been handled earlier.");
return; return;
case InstArithmetic::Fadd: case InstArithmetic::Fadd:
UnimplementedError(Func->getContext()->getFlags());
return;
case InstArithmetic::Fsub: case InstArithmetic::Fsub:
UnimplementedError(Func->getContext()->getFlags());
return;
case InstArithmetic::Fmul: case InstArithmetic::Fmul:
UnimplementedError(Func->getContext()->getFlags());
return;
case InstArithmetic::Fdiv: case InstArithmetic::Fdiv:
UnimplementedError(Func->getContext()->getFlags());
return;
case InstArithmetic::Frem: case InstArithmetic::Frem:
UnimplementedError(Func->getContext()->getFlags()); llvm_unreachable("Floating point arith should have been handled earlier.");
return; return;
} }
} }
...@@ -1652,6 +1716,9 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { ...@@ -1652,6 +1716,9 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
} }
if (isVectorType(Dest->getType())) { if (isVectorType(Dest->getType())) {
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
} else if (isFloatingType(Dest->getType())) {
Variable *SrcR = legalizeToReg(NewSrc);
_vmov(Dest, SrcR);
} else { } else {
_mov(Dest, NewSrc); _mov(Dest, NewSrc);
} }
...@@ -1681,6 +1748,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1681,6 +1748,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
// Pair of Arg Operand -> GPR number assignments. // Pair of Arg Operand -> GPR number assignments.
llvm::SmallVector<std::pair<Operand *, int32_t>, llvm::SmallVector<std::pair<Operand *, int32_t>,
TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs; TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
llvm::SmallVector<std::pair<Operand *, int32_t>,
TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;
// Pair of Arg Operand -> stack offset. // Pair of Arg Operand -> stack offset.
llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
int32_t ParameterAreaSizeBytes = 0; int32_t ParameterAreaSizeBytes = 0;
...@@ -1691,11 +1760,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1691,11 +1760,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
Operand *Arg = legalizeUndef(Instr->getArg(i)); Operand *Arg = legalizeUndef(Instr->getArg(i));
Type Ty = Arg->getType(); Type Ty = Arg->getType();
bool InRegs = false; bool InRegs = false;
if (isVectorType(Ty)) { if (Ty == IceType_i64) {
UnimplementedError(Func->getContext()->getFlags());
} else if (isFloatingType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
} else if (Ty == IceType_i64) {
std::pair<int32_t, int32_t> Regs; std::pair<int32_t, int32_t> Regs;
if (CC.I64InRegs(&Regs)) { if (CC.I64InRegs(&Regs)) {
InRegs = true; InRegs = true;
...@@ -1704,6 +1769,12 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1704,6 +1769,12 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
GPRArgs.push_back(std::make_pair(Lo, Regs.first)); GPRArgs.push_back(std::make_pair(Lo, Regs.first));
GPRArgs.push_back(std::make_pair(Hi, Regs.second)); GPRArgs.push_back(std::make_pair(Hi, Regs.second));
} }
} else if (isVectorType(Ty) || isFloatingType(Ty)) {
int32_t Reg;
if (CC.FPInReg(Ty, &Reg)) {
InRegs = true;
FPArgs.push_back(std::make_pair(Arg, Reg));
}
} else { } else {
assert(Ty == IceType_i32); assert(Ty == IceType_i32);
int32_t Reg; int32_t Reg;
...@@ -1766,6 +1837,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1766,6 +1837,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
// registers after the call. // registers after the call.
Context.insert(InstFakeUse::create(Func, Reg)); Context.insert(InstFakeUse::create(Func, Reg));
} }
for (auto &FPArg : FPArgs) {
Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
Context.insert(InstFakeUse::create(Func, Reg));
}
// Generate the call instruction. Assign its result to a temporary // Generate the call instruction. Assign its result to a temporary
// with high register allocation weight. // with high register allocation weight.
...@@ -1791,9 +1866,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1791,9 +1866,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
break; break;
case IceType_f32: case IceType_f32:
ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
break;
case IceType_f64: case IceType_f64:
// Use S and D regs. ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
UnimplementedError(Func->getContext()->getFlags());
break; break;
case IceType_v4i1: case IceType_v4i1:
case IceType_v8i1: case IceType_v8i1:
...@@ -1802,8 +1878,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1802,8 +1878,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
case IceType_v8i16: case IceType_v8i16:
case IceType_v4i32: case IceType_v4i32:
case IceType_v4f32: case IceType_v4f32:
// Use Q regs. ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
UnimplementedError(Func->getContext()->getFlags());
break; break;
} }
} }
...@@ -1853,12 +1928,11 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -1853,12 +1928,11 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
_mov(DestLo, ReturnReg); _mov(DestLo, ReturnReg);
_mov(DestHi, ReturnRegHi); _mov(DestHi, ReturnRegHi);
} else { } else {
assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
isVectorType(Dest->getType()));
if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
UnimplementedError(Func->getContext()->getFlags()); _vmov(Dest, ReturnReg);
} else { } else {
assert(isIntegerType(Dest->getType()) &&
typeWidthInBytes(Dest->getType()) <= 4);
_mov(Dest, ReturnReg); _mov(Dest, ReturnReg);
} }
} }
...@@ -2291,6 +2365,8 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -2291,6 +2365,8 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return; return;
} }
case Intrinsics::Fabs: { case Intrinsics::Fabs: {
// Add a fake def to keep liveness consistent in the meantime.
Context.insert(InstFakeDef::create(Func, Instr->getDest()));
UnimplementedError(Func->getContext()->getFlags()); UnimplementedError(Func->getContext()->getFlags());
return; return;
} }
...@@ -2352,7 +2428,11 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -2352,7 +2428,11 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return; return;
} }
case Intrinsics::Sqrt: { case Intrinsics::Sqrt: {
UnimplementedError(Func->getContext()->getFlags()); Variable *Src = legalizeToReg(Instr->getArg(0));
Variable *Dest = Instr->getDest();
Variable *T = makeReg(Dest->getType());
_vsqrt(T, Src);
_vmov(Dest, T);
return; return;
} }
case Intrinsics::Stacksave: { case Intrinsics::Stacksave: {
...@@ -2440,16 +2520,22 @@ void TargetARM32::lowerRet(const InstRet *Inst) { ...@@ -2440,16 +2520,22 @@ void TargetARM32::lowerRet(const InstRet *Inst) {
Variable *Reg = nullptr; Variable *Reg = nullptr;
if (Inst->hasRetValue()) { if (Inst->hasRetValue()) {
Operand *Src0 = Inst->getRetValue(); Operand *Src0 = Inst->getRetValue();
if (Src0->getType() == IceType_i64) { Type Ty = Src0->getType();
if (Ty == IceType_i64) {
Src0 = legalizeUndef(Src0); Src0 = legalizeUndef(Src0);
Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0); Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1); Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
Reg = R0; Reg = R0;
Context.insert(InstFakeUse::create(Func, R1)); Context.insert(InstFakeUse::create(Func, R1));
} else if (isScalarFloatingType(Src0->getType())) { } else if (Ty == IceType_f32) {
UnimplementedError(Func->getContext()->getFlags()); Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
Reg = S0;
} else if (Ty == IceType_f64) {
Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
Reg = D0;
} else if (isVectorType(Src0->getType())) { } else if (isVectorType(Src0->getType())) {
UnimplementedError(Func->getContext()->getFlags()); Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
Reg = Q0;
} else { } else {
Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
...@@ -2596,8 +2682,8 @@ Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { ...@@ -2596,8 +2682,8 @@ Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
Type Ty = Src->getType(); Type Ty = Src->getType();
Variable *Reg = makeReg(Ty, RegNum); Variable *Reg = makeReg(Ty, RegNum);
if (isVectorType(Ty)) { if (isVectorType(Ty) || isFloatingType(Ty)) {
UnimplementedError(Func->getContext()->getFlags()); _vmov(Reg, Src);
} else { } else {
// Mov's Src operand can really only be the flexible second operand type // Mov's Src operand can really only be the flexible second operand type
// or a register. Users should guarantee that. // or a register. Users should guarantee that.
...@@ -2646,7 +2732,13 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, ...@@ -2646,7 +2732,13 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
} }
if (!(Allowed & Legal_Mem)) { if (!(Allowed & Legal_Mem)) {
Variable *Reg = makeReg(Ty, RegNum); Variable *Reg = makeReg(Ty, RegNum);
_ldr(Reg, Mem); if (isVectorType(Ty)) {
UnimplementedError(Func->getContext()->getFlags());
} else if (isFloatingType(Ty)) {
_vldr(Reg, Mem);
} else {
_ldr(Reg, Mem);
}
From = Reg; From = Reg;
} else { } else {
From = Mem; From = Mem;
...@@ -2716,11 +2808,25 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, ...@@ -2716,11 +2808,25 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
_movt(Reg, C); _movt(Reg, C);
return Reg; return Reg;
} else { } else {
assert(isScalarFloatingType(Ty));
// Load floats/doubles from literal pool. // Load floats/doubles from literal pool.
UnimplementedError(Func->getContext()->getFlags()); // TODO(jvoung): Allow certain immediates to be encoded directly in
From = copyToReg(From, RegNum); // an operand. See Table A7-18 of the ARM manual:
// "Floating-point modified immediate constants".
// Or, for 32-bit floating point numbers, just encode the raw bits
// into a movw/movt pair to GPR, and vmov to an SREG, instead of using
// a movw/movt pair to get the const-pool address then loading to SREG.
std::string Buffer;
llvm::raw_string_ostream StrBuf(Buffer);
llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
llvm::cast<Constant>(From)->setShouldBePooled(true);
Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
Variable *BaseReg = makeReg(getPointerType());
_movw(BaseReg, Offset);
_movt(BaseReg, Offset);
From = formMemoryOperand(BaseReg, Ty);
return copyToReg(From, RegNum);
} }
return From;
} }
if (auto Var = llvm::dyn_cast<Variable>(From)) { if (auto Var = llvm::dyn_cast<Variable>(From)) {
......
...@@ -190,7 +190,7 @@ protected: ...@@ -190,7 +190,7 @@ protected:
} }
void _adds(Variable *Dest, Variable *Src0, Operand *Src1, void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true; constexpr bool SetFlags = true;
Context.insert( Context.insert(
InstARM32Add::create(Func, Dest, Src0, Src1, Pred, SetFlags)); InstARM32Add::create(Func, Dest, Src0, Src1, Pred, SetFlags));
} }
...@@ -300,7 +300,7 @@ protected: ...@@ -300,7 +300,7 @@ protected:
} }
void _orrs(Variable *Dest, Variable *Src0, Operand *Src1, void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true; constexpr bool SetFlags = true;
Context.insert( Context.insert(
InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags)); InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags));
} }
...@@ -334,7 +334,7 @@ protected: ...@@ -334,7 +334,7 @@ protected:
} }
void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1, void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true; constexpr bool SetFlags = true;
Context.insert( Context.insert(
InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags)); InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
} }
...@@ -352,7 +352,7 @@ protected: ...@@ -352,7 +352,7 @@ protected:
} }
void _subs(Variable *Dest, Variable *Src0, Operand *Src1, void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
const bool SetFlags = true; constexpr bool SetFlags = true;
Context.insert( Context.insert(
InstARM32Sub::create(Func, Dest, Src0, Src1, Pred, SetFlags)); InstARM32Sub::create(Func, Dest, Src0, Src1, Pred, SetFlags));
} }
...@@ -381,6 +381,41 @@ protected: ...@@ -381,6 +381,41 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred)); Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
} }
void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vadd::create(Func, Dest, Src0, Src1));
}
void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vdiv::create(Func, Dest, Src0, Src1));
}
void _vldr(Variable *Dest, OperandARM32Mem *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vldr::create(Func, Dest, Src, Pred));
}
// There are a whole bunch of vmov variants, to transfer within
// S/D/Q registers, between core integer registers and S/D,
// and from small immediates into S/D.
// For integer -> S/D/Q there is a variant which takes two integer
// register to fill a D, or to fill two consecutive S registers.
// Vmov can also be used to insert-element. E.g.,
// "vmov.8 d0[1], r0"
// but insert-element is a "two-address" operation where only part of the
// register is modified. This cannot model that.
//
// This represents the simple single source, single dest variants only.
void _vmov(Variable *Dest, Operand *Src0) {
constexpr CondARM32::Cond Pred = CondARM32::AL;
Context.insert(InstARM32Vmov::create(Func, Dest, Src0, Pred));
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
}
void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred));
}
void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1));
}
/// Run a pass through stack variables and ensure that the offsets are legal. /// Run a pass through stack variables and ensure that the offsets are legal.
/// If the offset is not legal, use a new base register that accounts for /// If the offset is not legal, use a new base register that accounts for
...@@ -417,16 +452,20 @@ protected: ...@@ -417,16 +452,20 @@ protected:
CallingConv &operator=(const CallingConv &) = delete; CallingConv &operator=(const CallingConv &) = delete;
public: public:
CallingConv() : NumGPRRegsUsed(0) {} CallingConv() {}
~CallingConv() = default; ~CallingConv() = default;
bool I64InRegs(std::pair<int32_t, int32_t> *Regs); bool I64InRegs(std::pair<int32_t, int32_t> *Regs);
bool I32InReg(int32_t *Reg); bool I32InReg(int32_t *Reg);
bool FPInReg(Type Ty, int32_t *Reg);
static constexpr uint32_t ARM32_MAX_GPR_ARG = 4; static constexpr uint32_t ARM32_MAX_GPR_ARG = 4;
// Units of S registers still available to S/D/Q arguments.
static constexpr uint32_t ARM32_MAX_FP_REG_UNITS = 16;
private: private:
uint32_t NumGPRRegsUsed; uint32_t NumGPRRegsUsed = 0;
uint32_t NumFPRegUnits = 0;
}; };
private: private:
......
; This tries to be a comprehensive test of f32 and f64 arith operations.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
; RUN: -i %s --args -Om1 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
define internal float @addFloat(float %a, float %b) {
entry:
%add = fadd float %a, %b
ret float %add
}
; CHECK-LABEL: addFloat
; CHECK: addss
; CHECK: fld
; ARM32-LABEL: addFloat
; ARM32: vadd.f32 s{{[0-9]+}}, s
define internal double @addDouble(double %a, double %b) {
entry:
%add = fadd double %a, %b
ret double %add
}
; CHECK-LABEL: addDouble
; CHECK: addsd
; CHECK: fld
; ARM32-LABEL: addDouble
; ARM32: vadd.f64 d{{[0-9]+}}, d
define internal float @subFloat(float %a, float %b) {
entry:
%sub = fsub float %a, %b
ret float %sub
}
; CHECK-LABEL: subFloat
; CHECK: subss
; CHECK: fld
; ARM32-LABEL: subFloat
; ARM32: vsub.f32 s{{[0-9]+}}, s
define internal double @subDouble(double %a, double %b) {
entry:
%sub = fsub double %a, %b
ret double %sub
}
; CHECK-LABEL: subDouble
; CHECK: subsd
; CHECK: fld
; ARM32-LABEL: subDouble
; ARM32: vsub.f64 d{{[0-9]+}}, d
define internal float @mulFloat(float %a, float %b) {
entry:
%mul = fmul float %a, %b
ret float %mul
}
; CHECK-LABEL: mulFloat
; CHECK: mulss
; CHECK: fld
; ARM32-LABEL: mulFloat
; ARM32: vmul.f32 s{{[0-9]+}}, s
define internal double @mulDouble(double %a, double %b) {
entry:
%mul = fmul double %a, %b
ret double %mul
}
; CHECK-LABEL: mulDouble
; CHECK: mulsd
; CHECK: fld
; ARM32-LABEL: mulDouble
; ARM32: vmul.f64 d{{[0-9]+}}, d
define internal float @divFloat(float %a, float %b) {
entry:
%div = fdiv float %a, %b
ret float %div
}
; CHECK-LABEL: divFloat
; CHECK: divss
; CHECK: fld
; ARM32-LABEL: divFloat
; ARM32: vdiv.f32 s{{[0-9]+}}, s
define internal double @divDouble(double %a, double %b) {
entry:
%div = fdiv double %a, %b
ret double %div
}
; CHECK-LABEL: divDouble
; CHECK: divsd
; CHECK: fld
; ARM32-LABEL: divDouble
; ARM32: vdiv.f64 d{{[0-9]+}}, d
define internal float @remFloat(float %a, float %b) {
entry:
%div = frem float %a, %b
ret float %div
}
; CHECK-LABEL: remFloat
; CHECK: call {{.*}} R_{{.*}} fmodf
; ARM32-LABEL: remFloat
; ARM32: bl {{.*}} fmodf
define internal double @remDouble(double %a, double %b) {
entry:
%div = frem double %a, %b
ret double %div
}
; CHECK-LABEL: remDouble
; CHECK: call {{.*}} R_{{.*}} fmod
; ARM32-LABEL: remDouble
; ARM32: bl {{.*}} fmod
; This tries to be a comprehensive test of f32 and f64 call/return ops.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; Can't test on ARM yet. Need to use several vpush {contiguous FP regs},
; instead of push {any GPR list}.
define internal i32 @doubleArgs(double %a, i32 %b, double %c) {
entry:
ret i32 %b
}
; CHECK-LABEL: doubleArgs
; CHECK: mov eax,DWORD PTR [esp+0xc]
; CHECK-NEXT: ret
; ARM32-LABEL: doubleArgs
define internal i32 @floatArgs(float %a, i32 %b, float %c) {
entry:
ret i32 %b
}
; CHECK-LABEL: floatArgs
; CHECK: mov eax,DWORD PTR [esp+0x8]
; CHECK-NEXT: ret
define internal i32 @passFpArgs(float %a, double %b, float %c, double %d, float %e, double %f) {
entry:
%call = call i32 @ignoreFpArgsNoInline(float %a, i32 123, double %b)
%call1 = call i32 @ignoreFpArgsNoInline(float %c, i32 123, double %d)
%call2 = call i32 @ignoreFpArgsNoInline(float %e, i32 123, double %f)
%add = add i32 %call1, %call
%add3 = add i32 %add, %call2
ret i32 %add3
}
; CHECK-LABEL: passFpArgs
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
declare i32 @ignoreFpArgsNoInline(float %x, i32 %y, double %z)
define internal i32 @passFpConstArg(float %a, double %b) {
entry:
%call = call i32 @ignoreFpArgsNoInline(float %a, i32 123, double 2.340000e+00)
ret i32 %call
}
; CHECK-LABEL: passFpConstArg
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
define internal i32 @passFp32ConstArg(float %a) {
entry:
%call = call i32 @ignoreFp32ArgsNoInline(float %a, i32 123, float 2.0)
ret i32 %call
}
; CHECK-LABEL: passFp32ConstArg
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: movss DWORD PTR [esp+0x8]
; CHECK: call {{.*}} R_{{.*}} ignoreFp32ArgsNoInline
declare i32 @ignoreFp32ArgsNoInline(float %x, i32 %y, float %z)
define internal float @returnFloatArg(float %a) {
entry:
ret float %a
}
; CHECK-LABEL: returnFloatArg
; CHECK: fld DWORD PTR [esp
define internal double @returnDoubleArg(double %a) {
entry:
ret double %a
}
; CHECK-LABEL: returnDoubleArg
; CHECK: fld QWORD PTR [esp
define internal float @returnFloatConst() {
entry:
ret float 0x3FF3AE1480000000
}
; CHECK-LABEL: returnFloatConst
; CHECK: fld
define internal double @returnDoubleConst() {
entry:
ret double 1.230000e+00
}
; CHECK-LABEL: returnDoubleConst
; CHECK: fld
; This tries to be a comprehensive test of f32 and f64 compare operations.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 | FileCheck %s
define internal void @fcmpEq(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp oeq float %a, %b
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %if.then, %entry
%cmp1 = fcmp oeq double %c, %d
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.then2, %if.end
ret void
}
; CHECK-LABEL: fcmpEq
; CHECK: ucomiss
; CHECK: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
declare void @func()
define internal void @fcmpNe(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp une float %a, %b
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %if.then, %entry
%cmp1 = fcmp une double %c, %d
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.then2, %if.end
ret void
}
; CHECK-LABEL: fcmpNe
; CHECK: ucomiss
; CHECK: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
define internal void @fcmpGt(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp ogt float %a, %b
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %if.then, %entry
%cmp1 = fcmp ogt double %c, %d
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.then2, %if.end
ret void
}
; CHECK-LABEL: fcmpGt
; CHECK: ucomiss
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
define internal void @fcmpGe(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp ult float %a, %b
br i1 %cmp, label %if.end, label %if.then
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %entry, %if.then
%cmp1 = fcmp ult double %c, %d
br i1 %cmp1, label %if.end3, label %if.then2
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.end, %if.then2
ret void
}
; CHECK-LABEL: fcmpGe
; CHECK: ucomiss
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
define internal void @fcmpLt(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp olt float %a, %b
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %if.then, %entry
%cmp1 = fcmp olt double %c, %d
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.then2, %if.end
ret void
}
; CHECK-LABEL: fcmpLt
; CHECK: ucomiss
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
define internal void @fcmpLe(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp ugt float %a, %b
br i1 %cmp, label %if.end, label %if.then
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %entry, %if.then
%cmp1 = fcmp ugt double %c, %d
br i1 %cmp1, label %if.end3, label %if.then2
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.end, %if.then2
ret void
}
; CHECK-LABEL: fcmpLe
; CHECK: ucomiss
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
define internal i32 @fcmpFalseFloat(float %a, float %b) {
entry:
%cmp = fcmp false float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpFalseFloat
; CHECK: mov {{.*}},0x0
define internal i32 @fcmpFalseDouble(double %a, double %b) {
entry:
%cmp = fcmp false double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpFalseDouble
; CHECK: mov {{.*}},0x0
define internal i32 @fcmpOeqFloat(float %a, float %b) {
entry:
%cmp = fcmp oeq float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOeqFloat
; CHECK: ucomiss
; CHECK: jne
; CHECK: jp
define internal i32 @fcmpOeqDouble(double %a, double %b) {
entry:
%cmp = fcmp oeq double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOeqDouble
; CHECK: ucomisd
; CHECK: jne
; CHECK: jp
define internal i32 @fcmpOgtFloat(float %a, float %b) {
entry:
%cmp = fcmp ogt float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOgtFloat
; CHECK: ucomiss
; CHECK: seta
define internal i32 @fcmpOgtDouble(double %a, double %b) {
entry:
%cmp = fcmp ogt double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOgtDouble
; CHECK: ucomisd
; CHECK: seta
define internal i32 @fcmpOgeFloat(float %a, float %b) {
entry:
%cmp = fcmp oge float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOgeFloat
; CHECK: ucomiss
; CHECK: setae
define internal i32 @fcmpOgeDouble(double %a, double %b) {
entry:
%cmp = fcmp oge double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOgeDouble
; CHECK: ucomisd
; CHECK: setae
define internal i32 @fcmpOltFloat(float %a, float %b) {
entry:
%cmp = fcmp olt float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOltFloat
; CHECK: ucomiss
; CHECK: seta
define internal i32 @fcmpOltDouble(double %a, double %b) {
entry:
%cmp = fcmp olt double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOltDouble
; CHECK: ucomisd
; CHECK: seta
define internal i32 @fcmpOleFloat(float %a, float %b) {
entry:
%cmp = fcmp ole float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOleFloat
; CHECK: ucomiss
; CHECK: setae
define internal i32 @fcmpOleDouble(double %a, double %b) {
entry:
%cmp = fcmp ole double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOleDouble
; CHECK: ucomisd
; CHECK: setae
define internal i32 @fcmpOneFloat(float %a, float %b) {
entry:
%cmp = fcmp one float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOneFloat
; CHECK: ucomiss
; CHECK: setne
define internal i32 @fcmpOneDouble(double %a, double %b) {
entry:
%cmp = fcmp one double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOneDouble
; CHECK: ucomisd
; CHECK: setne
define internal i32 @fcmpOrdFloat(float %a, float %b) {
entry:
%cmp = fcmp ord float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOrdFloat
; CHECK: ucomiss
; CHECK: setnp
define internal i32 @fcmpOrdDouble(double %a, double %b) {
entry:
%cmp = fcmp ord double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOrdDouble
; CHECK: ucomisd
; CHECK: setnp
define internal i32 @fcmpUeqFloat(float %a, float %b) {
entry:
%cmp = fcmp ueq float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUeqFloat
; CHECK: ucomiss
; CHECK: sete
define internal i32 @fcmpUeqDouble(double %a, double %b) {
entry:
%cmp = fcmp ueq double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUeqDouble
; CHECK: ucomisd
; CHECK: sete
define internal i32 @fcmpUgtFloat(float %a, float %b) {
entry:
%cmp = fcmp ugt float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUgtFloat
; CHECK: ucomiss
; CHECK: setb
define internal i32 @fcmpUgtDouble(double %a, double %b) {
entry:
%cmp = fcmp ugt double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUgtDouble
; CHECK: ucomisd
; CHECK: setb
define internal i32 @fcmpUgeFloat(float %a, float %b) {
entry:
%cmp = fcmp uge float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUgeFloat
; CHECK: ucomiss
; CHECK: setbe
define internal i32 @fcmpUgeDouble(double %a, double %b) {
entry:
%cmp = fcmp uge double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUgeDouble
; CHECK: ucomisd
; CHECK: setbe
define internal i32 @fcmpUltFloat(float %a, float %b) {
entry:
%cmp = fcmp ult float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUltFloat
; CHECK: ucomiss
; CHECK: setb
define internal i32 @fcmpUltDouble(double %a, double %b) {
entry:
%cmp = fcmp ult double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUltDouble
; CHECK: ucomisd
; CHECK: setb
define internal i32 @fcmpUleFloat(float %a, float %b) {
entry:
%cmp = fcmp ule float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUleFloat
; CHECK: ucomiss
; CHECK: setbe
define internal i32 @fcmpUleDouble(double %a, double %b) {
entry:
%cmp = fcmp ule double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUleDouble
; CHECK: ucomisd
; CHECK: setbe
define internal i32 @fcmpUneFloat(float %a, float %b) {
entry:
%cmp = fcmp une float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUneFloat
; CHECK: ucomiss
; CHECK: jne
; CHECK: jp
define internal i32 @fcmpUneDouble(double %a, double %b) {
entry:
%cmp = fcmp une double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUneDouble
; CHECK: ucomisd
; CHECK: jne
; CHECK: jp
define internal i32 @fcmpUnoFloat(float %a, float %b) {
entry:
%cmp = fcmp uno float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUnoFloat
; CHECK: ucomiss
; CHECK: setp
define internal i32 @fcmpUnoDouble(double %a, double %b) {
entry:
%cmp = fcmp uno double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUnoDouble
; CHECK: ucomisd
; CHECK: setp
define internal i32 @fcmpTrueFloat(float %a, float %b) {
entry:
%cmp = fcmp true float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpTrueFloat
; CHECK: mov {{.*}},0x1
define internal i32 @fcmpTrueDouble(double %a, double %b) {
entry:
%cmp = fcmp true double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpTrueDouble
; CHECK: mov {{.*}},0x1
define internal float @selectFloatVarVar(float %a, float %b) {
entry:
%cmp = fcmp olt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
; CHECK-LABEL: selectFloatVarVar
; CHECK: ucomiss
; CHECK: seta
; CHECK: fld
define internal double @selectDoubleVarVar(double %a, double %b) {
entry:
%cmp = fcmp olt double %a, %b
%cond = select i1 %cmp, double %a, double %b
ret double %cond
}
; CHECK-LABEL: selectDoubleVarVar
; CHECK: ucomisd
; CHECK: seta
; CHECK: fld
; This tries to be a comprehensive test of f32 and f64 convert operations.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 | FileCheck %s
define internal float @fptrunc(double %a) {
entry:
%conv = fptrunc double %a to float
ret float %conv
}
; CHECK-LABEL: fptrunc
; CHECK: cvtsd2ss
; CHECK: fld
define internal double @fpext(float %a) {
entry:
%conv = fpext float %a to double
ret double %conv
}
; CHECK-LABEL: fpext
; CHECK: cvtss2sd
; CHECK: fld
define internal i64 @doubleToSigned64(double %a) {
entry:
%conv = fptosi double %a to i64
ret i64 %conv
}
; CHECK-LABEL: doubleToSigned64
; CHECK: call {{.*}} R_{{.*}} __Sz_fptosi_f64_i64
define internal i64 @floatToSigned64(float %a) {
entry:
%conv = fptosi float %a to i64
ret i64 %conv
}
; CHECK-LABEL: floatToSigned64
; CHECK: call {{.*}} R_{{.*}} __Sz_fptosi_f32_i64
define internal i64 @doubleToUnsigned64(double %a) {
entry:
%conv = fptoui double %a to i64
ret i64 %conv
}
; CHECK-LABEL: doubleToUnsigned64
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f64_i64
define internal i64 @floatToUnsigned64(float %a) {
entry:
%conv = fptoui float %a to i64
ret i64 %conv
}
; CHECK-LABEL: floatToUnsigned64
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f32_i64
define internal i32 @doubleToSigned32(double %a) {
entry:
%conv = fptosi double %a to i32
ret i32 %conv
}
; CHECK-LABEL: doubleToSigned32
; CHECK: cvttsd2si
define internal i32 @doubleToSigned32Const() {
entry:
%conv = fptosi double 867.5309 to i32
ret i32 %conv
}
; CHECK-LABEL: doubleToSigned32Const
; CHECK: cvttsd2si
define internal i32 @floatToSigned32(float %a) {
entry:
%conv = fptosi float %a to i32
ret i32 %conv
}
; CHECK-LABEL: floatToSigned32
; CHECK: cvttss2si
define internal i32 @doubleToUnsigned32(double %a) {
entry:
%conv = fptoui double %a to i32
ret i32 %conv
}
; CHECK-LABEL: doubleToUnsigned32
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f64_i32
define internal i32 @floatToUnsigned32(float %a) {
entry:
%conv = fptoui float %a to i32
ret i32 %conv
}
; CHECK-LABEL: floatToUnsigned32
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f32_i32
define internal i32 @doubleToSigned16(double %a) {
entry:
%conv = fptosi double %a to i16
%conv.ret_ext = sext i16 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: doubleToSigned16
; CHECK: cvttsd2si
; CHECK: movsx
define internal i32 @floatToSigned16(float %a) {
entry:
%conv = fptosi float %a to i16
%conv.ret_ext = sext i16 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: floatToSigned16
; CHECK: cvttss2si
; CHECK: movsx
define internal i32 @doubleToUnsigned16(double %a) {
entry:
%conv = fptoui double %a to i16
%conv.ret_ext = zext i16 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: doubleToUnsigned16
; CHECK: cvttsd2si
; CHECK: movzx
define internal i32 @floatToUnsigned16(float %a) {
entry:
%conv = fptoui float %a to i16
%conv.ret_ext = zext i16 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: floatToUnsigned16
; CHECK: cvttss2si
; CHECK: movzx
define internal i32 @doubleToSigned8(double %a) {
entry:
%conv = fptosi double %a to i8
%conv.ret_ext = sext i8 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: doubleToSigned8
; CHECK: cvttsd2si
; CHECK: movsx
define internal i32 @floatToSigned8(float %a) {
entry:
%conv = fptosi float %a to i8
%conv.ret_ext = sext i8 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: floatToSigned8
; CHECK: cvttss2si
; CHECK: movsx
define internal i32 @doubleToUnsigned8(double %a) {
entry:
%conv = fptoui double %a to i8
%conv.ret_ext = zext i8 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: doubleToUnsigned8
; CHECK: cvttsd2si
; CHECK: movzx
define internal i32 @floatToUnsigned8(float %a) {
entry:
%conv = fptoui float %a to i8
%conv.ret_ext = zext i8 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: floatToUnsigned8
; CHECK: cvttss2si
; CHECK: movzx
define internal i32 @doubleToUnsigned1(double %a) {
entry:
%tobool = fptoui double %a to i1
%tobool.ret_ext = zext i1 %tobool to i32
ret i32 %tobool.ret_ext
}
; CHECK-LABEL: doubleToUnsigned1
; CHECK: cvttsd2si
; CHECK: and eax,0x1
define internal i32 @floatToUnsigned1(float %a) {
entry:
%tobool = fptoui float %a to i1
%tobool.ret_ext = zext i1 %tobool to i32
ret i32 %tobool.ret_ext
}
; CHECK-LABEL: floatToUnsigned1
; CHECK: cvttss2si
; CHECK: and eax,0x1
define internal double @signed64ToDouble(i64 %a) {
entry:
%conv = sitofp i64 %a to double
ret double %conv
}
; CHECK-LABEL: signed64ToDouble
; CHECK: call {{.*}} R_{{.*}} __Sz_sitofp_i64_f64
; CHECK: fstp QWORD
define internal float @signed64ToFloat(i64 %a) {
entry:
%conv = sitofp i64 %a to float
ret float %conv
}
; CHECK-LABEL: signed64ToFloat
; CHECK: call {{.*}} R_{{.*}} __Sz_sitofp_i64_f32
; CHECK: fstp DWORD
define internal double @unsigned64ToDouble(i64 %a) {
entry:
%conv = uitofp i64 %a to double
ret double %conv
}
; CHECK-LABEL: unsigned64ToDouble
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i64_f64
; CHECK: fstp
define internal float @unsigned64ToFloat(i64 %a) {
entry:
%conv = uitofp i64 %a to float
ret float %conv
}
; CHECK-LABEL: unsigned64ToFloat
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i64_f32
; CHECK: fstp
define internal double @unsigned64ToDoubleConst() {
entry:
%conv = uitofp i64 12345678901234 to double
ret double %conv
}
; CHECK-LABEL: unsigned64ToDouble
; CHECK: mov DWORD PTR [esp+0x4],0xb3a
; CHECK: mov DWORD PTR [esp],0x73ce2ff2
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i64_f64
; CHECK: fstp
define internal double @signed32ToDouble(i32 %a) {
entry:
%conv = sitofp i32 %a to double
ret double %conv
}
; CHECK-LABEL: signed32ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal double @signed32ToDoubleConst() {
entry:
%conv = sitofp i32 123 to double
ret double %conv
}
; CHECK-LABEL: signed32ToDoubleConst
; CHECK: cvtsi2sd {{.*[^1]}}
; CHECK: fld
define internal float @signed32ToFloat(i32 %a) {
entry:
%conv = sitofp i32 %a to float
ret float %conv
}
; CHECK-LABEL: signed32ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal double @unsigned32ToDouble(i32 %a) {
entry:
%conv = uitofp i32 %a to double
ret double %conv
}
; CHECK-LABEL: unsigned32ToDouble
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i32_f64
; CHECK: fstp QWORD
define internal float @unsigned32ToFloat(i32 %a) {
entry:
%conv = uitofp i32 %a to float
ret float %conv
}
; CHECK-LABEL: unsigned32ToFloat
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i32_f32
; CHECK: fstp DWORD
define internal double @signed16ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i16
%conv = sitofp i16 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: signed16ToDouble
; CHECK: cvtsi2sd
; CHECK: fld QWORD
define internal float @signed16ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i16
%conv = sitofp i16 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: signed16ToFloat
; CHECK: cvtsi2ss
; CHECK: fld DWORD
define internal double @unsigned16ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i16
%conv = uitofp i16 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: unsigned16ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal double @unsigned16ToDoubleConst() {
entry:
%conv = uitofp i16 12345 to double
ret double %conv
}
; CHECK-LABEL: unsigned16ToDoubleConst
; CHECK: cvtsi2sd
; CHECK: fld
define internal float @unsigned16ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i16
%conv = uitofp i16 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: unsigned16ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal double @signed8ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i8
%conv = sitofp i8 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: signed8ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal float @signed8ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i8
%conv = sitofp i8 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: signed8ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal double @unsigned8ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i8
%conv = uitofp i8 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: unsigned8ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal float @unsigned8ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i8
%conv = uitofp i8 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: unsigned8ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal double @unsigned1ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i1
%conv = uitofp i1 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: unsigned1ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal float @unsigned1ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i1
%conv = uitofp i1 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: unsigned1ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal float @int32BitcastToFloat(i32 %a) {
entry:
%conv = bitcast i32 %a to float
ret float %conv
}
; CHECK-LABEL: int32BitcastToFloat
; CHECK: mov
define internal float @int32BitcastToFloatConst() {
entry:
%conv = bitcast i32 8675309 to float
ret float %conv
}
; CHECK-LABEL: int32BitcastToFloatConst
; CHECK: mov
define internal double @int64BitcastToDouble(i64 %a) {
entry:
%conv = bitcast i64 %a to double
ret double %conv
}
; CHECK-LABEL: int64BitcastToDouble
; CHECK: mov
define internal double @int64BitcastToDoubleConst() {
entry:
%conv = bitcast i64 9035768 to double
ret double %conv
}
; CHECK-LABEL: int64BitcastToDoubleConst
; CHECK: mov
; This tries to be a comprehensive test of f32 and f64 compare operations.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 | FileCheck %s
define internal float @loadFloat(i32 %a) {
entry:
%__1 = inttoptr i32 %a to float*
%v0 = load float, float* %__1, align 4
ret float %v0
}
; CHECK-LABEL: loadFloat
; CHECK: movss
; CHECK: fld
define internal double @loadDouble(i32 %a) {
entry:
%__1 = inttoptr i32 %a to double*
%v0 = load double, double* %__1, align 8
ret double %v0
}
; CHECK-LABEL: loadDouble
; CHECK: movsd
; CHECK: fld
define internal void @storeFloat(i32 %a, float %value) {
entry:
%__2 = inttoptr i32 %a to float*
store float %value, float* %__2, align 4
ret void
}
; CHECK-LABEL: storeFloat
; CHECK: movss
; CHECK: movss
define internal void @storeDouble(i32 %a, double %value) {
entry:
%__2 = inttoptr i32 %a to double*
store double %value, double* %__2, align 8
ret void
}
; CHECK-LABEL: storeDouble
; CHECK: movsd
; CHECK: movsd
define internal void @storeFloatConst(i32 %a) {
entry:
%a.asptr = inttoptr i32 %a to float*
store float 0x3FF3AE1480000000, float* %a.asptr, align 4
ret void
}
; CHECK-LABEL: storeFloatConst
; CHECK: movss
; CHECK: movss
define internal void @storeDoubleConst(i32 %a) {
entry:
%a.asptr = inttoptr i32 %a to double*
store double 1.230000e+00, double* %a.asptr, align 8
ret void
}
; CHECK-LABEL: storeDoubleConst
; CHECK: movsd
; CHECK: movsd
; This tries to be a comprehensive test of f32 and f64 operations.
; The CHECK lines are only checking for basic instruction patterns
; that should be present regardless of the optimization level, so
; there are no special OPTM1 match lines.
; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 | FileCheck %s
@__init_array_start = internal constant [0 x i8] zeroinitializer, align 4
@__fini_array_start = internal constant [0 x i8] zeroinitializer, align 4
@__tls_template_start = internal constant [0 x i8] zeroinitializer, align 8
@__tls_template_alignment = internal constant [4 x i8] c"\01\00\00\00", align 4
define internal i32 @doubleArgs(double %a, i32 %b, double %c) {
entry:
ret i32 %b
}
; CHECK-LABEL: doubleArgs
; CHECK: mov eax,DWORD PTR [esp+0xc]
; CHECK-NEXT: ret
define internal i32 @floatArgs(float %a, i32 %b, float %c) {
entry:
ret i32 %b
}
; CHECK-LABEL: floatArgs
; CHECK: mov eax,DWORD PTR [esp+0x8]
; CHECK-NEXT: ret
define internal i32 @passFpArgs(float %a, double %b, float %c, double %d, float %e, double %f) {
entry:
%call = call i32 @ignoreFpArgsNoInline(float %a, i32 123, double %b)
%call1 = call i32 @ignoreFpArgsNoInline(float %c, i32 123, double %d)
%call2 = call i32 @ignoreFpArgsNoInline(float %e, i32 123, double %f)
%add = add i32 %call1, %call
%add3 = add i32 %add, %call2
ret i32 %add3
}
; CHECK-LABEL: passFpArgs
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
declare i32 @ignoreFpArgsNoInline(float %x, i32 %y, double %z)
define internal i32 @passFpConstArg(float %a, double %b) {
entry:
%call = call i32 @ignoreFpArgsNoInline(float %a, i32 123, double 2.340000e+00)
ret i32 %call
}
; CHECK-LABEL: passFpConstArg
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
define internal i32 @passFp32ConstArg(float %a) {
entry:
%call = call i32 @ignoreFp32ArgsNoInline(float %a, i32 123, float 2.0)
ret i32 %call
}
; CHECK-LABEL: passFp32ConstArg
; CHECK: mov DWORD PTR [esp+0x4],0x7b
; CHECK: movss DWORD PTR [esp+0x8]
; CHECK: call {{.*}} R_{{.*}} ignoreFp32ArgsNoInline
declare i32 @ignoreFp32ArgsNoInline(float %x, i32 %y, float %z)
define internal float @returnFloatArg(float %a) {
entry:
ret float %a
}
; CHECK-LABEL: returnFloatArg
; CHECK: fld DWORD PTR [esp
define internal double @returnDoubleArg(double %a) {
entry:
ret double %a
}
; CHECK-LABEL: returnDoubleArg
; CHECK: fld QWORD PTR [esp
define internal float @returnFloatConst() {
entry:
ret float 0x3FF3AE1480000000
}
; CHECK-LABEL: returnFloatConst
; CHECK: fld
define internal double @returnDoubleConst() {
entry:
ret double 1.230000e+00
}
; CHECK-LABEL: returnDoubleConst
; CHECK: fld
define internal float @addFloat(float %a, float %b) {
entry:
%add = fadd float %a, %b
ret float %add
}
; CHECK-LABEL: addFloat
; CHECK: addss
; CHECK: fld
define internal double @addDouble(double %a, double %b) {
entry:
%add = fadd double %a, %b
ret double %add
}
; CHECK-LABEL: addDouble
; CHECK: addsd
; CHECK: fld
define internal float @subFloat(float %a, float %b) {
entry:
%sub = fsub float %a, %b
ret float %sub
}
; CHECK-LABEL: subFloat
; CHECK: subss
; CHECK: fld
define internal double @subDouble(double %a, double %b) {
entry:
%sub = fsub double %a, %b
ret double %sub
}
; CHECK-LABEL: subDouble
; CHECK: subsd
; CHECK: fld
define internal float @mulFloat(float %a, float %b) {
entry:
%mul = fmul float %a, %b
ret float %mul
}
; CHECK-LABEL: mulFloat
; CHECK: mulss
; CHECK: fld
define internal double @mulDouble(double %a, double %b) {
entry:
%mul = fmul double %a, %b
ret double %mul
}
; CHECK-LABEL: mulDouble
; CHECK: mulsd
; CHECK: fld
define internal float @divFloat(float %a, float %b) {
entry:
%div = fdiv float %a, %b
ret float %div
}
; CHECK-LABEL: divFloat
; CHECK: divss
; CHECK: fld
define internal double @divDouble(double %a, double %b) {
entry:
%div = fdiv double %a, %b
ret double %div
}
; CHECK-LABEL: divDouble
; CHECK: divsd
; CHECK: fld
define internal float @remFloat(float %a, float %b) {
entry:
%div = frem float %a, %b
ret float %div
}
; CHECK-LABEL: remFloat
; CHECK: call {{.*}} R_{{.*}} fmodf
define internal double @remDouble(double %a, double %b) {
entry:
%div = frem double %a, %b
ret double %div
}
; CHECK-LABEL: remDouble
; CHECK: call {{.*}} R_{{.*}} fmod
define internal float @fptrunc(double %a) {
entry:
%conv = fptrunc double %a to float
ret float %conv
}
; CHECK-LABEL: fptrunc
; CHECK: cvtsd2ss
; CHECK: fld
define internal double @fpext(float %a) {
entry:
%conv = fpext float %a to double
ret double %conv
}
; CHECK-LABEL: fpext
; CHECK: cvtss2sd
; CHECK: fld
define internal i64 @doubleToSigned64(double %a) {
entry:
%conv = fptosi double %a to i64
ret i64 %conv
}
; CHECK-LABEL: doubleToSigned64
; CHECK: call {{.*}} R_{{.*}} __Sz_fptosi_f64_i64
define internal i64 @floatToSigned64(float %a) {
entry:
%conv = fptosi float %a to i64
ret i64 %conv
}
; CHECK-LABEL: floatToSigned64
; CHECK: call {{.*}} R_{{.*}} __Sz_fptosi_f32_i64
define internal i64 @doubleToUnsigned64(double %a) {
entry:
%conv = fptoui double %a to i64
ret i64 %conv
}
; CHECK-LABEL: doubleToUnsigned64
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f64_i64
define internal i64 @floatToUnsigned64(float %a) {
entry:
%conv = fptoui float %a to i64
ret i64 %conv
}
; CHECK-LABEL: floatToUnsigned64
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f32_i64
define internal i32 @doubleToSigned32(double %a) {
entry:
%conv = fptosi double %a to i32
ret i32 %conv
}
; CHECK-LABEL: doubleToSigned32
; CHECK: cvttsd2si
define internal i32 @doubleToSigned32Const() {
entry:
%conv = fptosi double 867.5309 to i32
ret i32 %conv
}
; CHECK-LABEL: doubleToSigned32Const
; CHECK: cvttsd2si
define internal i32 @floatToSigned32(float %a) {
entry:
%conv = fptosi float %a to i32
ret i32 %conv
}
; CHECK-LABEL: floatToSigned32
; CHECK: cvttss2si
define internal i32 @doubleToUnsigned32(double %a) {
entry:
%conv = fptoui double %a to i32
ret i32 %conv
}
; CHECK-LABEL: doubleToUnsigned32
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f64_i32
define internal i32 @floatToUnsigned32(float %a) {
entry:
%conv = fptoui float %a to i32
ret i32 %conv
}
; CHECK-LABEL: floatToUnsigned32
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_f32_i32
define internal i32 @doubleToSigned16(double %a) {
entry:
%conv = fptosi double %a to i16
%conv.ret_ext = sext i16 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: doubleToSigned16
; CHECK: cvttsd2si
; CHECK: movsx
define internal i32 @floatToSigned16(float %a) {
entry:
%conv = fptosi float %a to i16
%conv.ret_ext = sext i16 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: floatToSigned16
; CHECK: cvttss2si
; CHECK: movsx
define internal i32 @doubleToUnsigned16(double %a) {
entry:
%conv = fptoui double %a to i16
%conv.ret_ext = zext i16 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: doubleToUnsigned16
; CHECK: cvttsd2si
; CHECK: movzx
define internal i32 @floatToUnsigned16(float %a) {
entry:
%conv = fptoui float %a to i16
%conv.ret_ext = zext i16 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: floatToUnsigned16
; CHECK: cvttss2si
; CHECK: movzx
define internal i32 @doubleToSigned8(double %a) {
entry:
%conv = fptosi double %a to i8
%conv.ret_ext = sext i8 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: doubleToSigned8
; CHECK: cvttsd2si
; CHECK: movsx
define internal i32 @floatToSigned8(float %a) {
entry:
%conv = fptosi float %a to i8
%conv.ret_ext = sext i8 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: floatToSigned8
; CHECK: cvttss2si
; CHECK: movsx
define internal i32 @doubleToUnsigned8(double %a) {
entry:
%conv = fptoui double %a to i8
%conv.ret_ext = zext i8 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: doubleToUnsigned8
; CHECK: cvttsd2si
; CHECK: movzx
define internal i32 @floatToUnsigned8(float %a) {
entry:
%conv = fptoui float %a to i8
%conv.ret_ext = zext i8 %conv to i32
ret i32 %conv.ret_ext
}
; CHECK-LABEL: floatToUnsigned8
; CHECK: cvttss2si
; CHECK: movzx
define internal i32 @doubleToUnsigned1(double %a) {
entry:
%tobool = fptoui double %a to i1
%tobool.ret_ext = zext i1 %tobool to i32
ret i32 %tobool.ret_ext
}
; CHECK-LABEL: doubleToUnsigned1
; CHECK: cvttsd2si
; CHECK: and eax,0x1
define internal i32 @floatToUnsigned1(float %a) {
entry:
%tobool = fptoui float %a to i1
%tobool.ret_ext = zext i1 %tobool to i32
ret i32 %tobool.ret_ext
}
; CHECK-LABEL: floatToUnsigned1
; CHECK: cvttss2si
; CHECK: and eax,0x1
define internal double @signed64ToDouble(i64 %a) {
entry:
%conv = sitofp i64 %a to double
ret double %conv
}
; CHECK-LABEL: signed64ToDouble
; CHECK: call {{.*}} R_{{.*}} __Sz_sitofp_i64_f64
; CHECK: fstp QWORD
define internal float @signed64ToFloat(i64 %a) {
entry:
%conv = sitofp i64 %a to float
ret float %conv
}
; CHECK-LABEL: signed64ToFloat
; CHECK: call {{.*}} R_{{.*}} __Sz_sitofp_i64_f32
; CHECK: fstp DWORD
define internal double @unsigned64ToDouble(i64 %a) {
entry:
%conv = uitofp i64 %a to double
ret double %conv
}
; CHECK-LABEL: unsigned64ToDouble
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i64_f64
; CHECK: fstp
define internal float @unsigned64ToFloat(i64 %a) {
entry:
%conv = uitofp i64 %a to float
ret float %conv
}
; CHECK-LABEL: unsigned64ToFloat
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i64_f32
; CHECK: fstp
define internal double @unsigned64ToDoubleConst() {
entry:
%conv = uitofp i64 12345678901234 to double
ret double %conv
}
; CHECK-LABEL: unsigned64ToDouble
; CHECK: mov DWORD PTR [esp+0x4],0xb3a
; CHECK: mov DWORD PTR [esp],0x73ce2ff2
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i64_f64
; CHECK: fstp
define internal double @signed32ToDouble(i32 %a) {
entry:
%conv = sitofp i32 %a to double
ret double %conv
}
; CHECK-LABEL: signed32ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal double @signed32ToDoubleConst() {
entry:
%conv = sitofp i32 123 to double
ret double %conv
}
; CHECK-LABEL: signed32ToDoubleConst
; CHECK: cvtsi2sd {{.*[^1]}}
; CHECK: fld
define internal float @signed32ToFloat(i32 %a) {
entry:
%conv = sitofp i32 %a to float
ret float %conv
}
; CHECK-LABEL: signed32ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal double @unsigned32ToDouble(i32 %a) {
entry:
%conv = uitofp i32 %a to double
ret double %conv
}
; CHECK-LABEL: unsigned32ToDouble
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i32_f64
; CHECK: fstp QWORD
define internal float @unsigned32ToFloat(i32 %a) {
entry:
%conv = uitofp i32 %a to float
ret float %conv
}
; CHECK-LABEL: unsigned32ToFloat
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_i32_f32
; CHECK: fstp DWORD
define internal double @signed16ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i16
%conv = sitofp i16 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: signed16ToDouble
; CHECK: cvtsi2sd
; CHECK: fld QWORD
define internal float @signed16ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i16
%conv = sitofp i16 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: signed16ToFloat
; CHECK: cvtsi2ss
; CHECK: fld DWORD
define internal double @unsigned16ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i16
%conv = uitofp i16 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: unsigned16ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal double @unsigned16ToDoubleConst() {
entry:
%conv = uitofp i16 12345 to double
ret double %conv
}
; CHECK-LABEL: unsigned16ToDoubleConst
; CHECK: cvtsi2sd
; CHECK: fld
define internal float @unsigned16ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i16
%conv = uitofp i16 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: unsigned16ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal double @signed8ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i8
%conv = sitofp i8 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: signed8ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal float @signed8ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i8
%conv = sitofp i8 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: signed8ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal double @unsigned8ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i8
%conv = uitofp i8 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: unsigned8ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal float @unsigned8ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i8
%conv = uitofp i8 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: unsigned8ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal double @unsigned1ToDouble(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i1
%conv = uitofp i1 %a.arg_trunc to double
ret double %conv
}
; CHECK-LABEL: unsigned1ToDouble
; CHECK: cvtsi2sd
; CHECK: fld
define internal float @unsigned1ToFloat(i32 %a) {
entry:
%a.arg_trunc = trunc i32 %a to i1
%conv = uitofp i1 %a.arg_trunc to float
ret float %conv
}
; CHECK-LABEL: unsigned1ToFloat
; CHECK: cvtsi2ss
; CHECK: fld
define internal float @int32BitcastToFloat(i32 %a) {
entry:
%conv = bitcast i32 %a to float
ret float %conv
}
; CHECK-LABEL: int32BitcastToFloat
; CHECK: mov
define internal float @int32BitcastToFloatConst() {
entry:
%conv = bitcast i32 8675309 to float
ret float %conv
}
; CHECK-LABEL: int32BitcastToFloatConst
; CHECK: mov
define internal double @int64BitcastToDouble(i64 %a) {
entry:
%conv = bitcast i64 %a to double
ret double %conv
}
; CHECK-LABEL: int64BitcastToDouble
; CHECK: mov
define internal double @int64BitcastToDoubleConst() {
entry:
%conv = bitcast i64 9035768 to double
ret double %conv
}
; CHECK-LABEL: int64BitcastToDoubleConst
; CHECK: mov
define internal void @fcmpEq(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp oeq float %a, %b
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %if.then, %entry
%cmp1 = fcmp oeq double %c, %d
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.then2, %if.end
ret void
}
; CHECK-LABEL: fcmpEq
; CHECK: ucomiss
; CHECK: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
declare void @func()
define internal void @fcmpNe(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp une float %a, %b
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %if.then, %entry
%cmp1 = fcmp une double %c, %d
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.then2, %if.end
ret void
}
; CHECK-LABEL: fcmpNe
; CHECK: ucomiss
; CHECK: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: jne
; CHECK-NEXT: jp
; CHECK: call {{.*}} R_{{.*}} func
define internal void @fcmpGt(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp ogt float %a, %b
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %if.then, %entry
%cmp1 = fcmp ogt double %c, %d
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.then2, %if.end
ret void
}
; CHECK-LABEL: fcmpGt
; CHECK: ucomiss
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
define internal void @fcmpGe(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp ult float %a, %b
br i1 %cmp, label %if.end, label %if.then
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %entry, %if.then
%cmp1 = fcmp ult double %c, %d
br i1 %cmp1, label %if.end3, label %if.then2
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.end, %if.then2
ret void
}
; CHECK-LABEL: fcmpGe
; CHECK: ucomiss
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
define internal void @fcmpLt(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp olt float %a, %b
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %if.then, %entry
%cmp1 = fcmp olt double %c, %d
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.then2, %if.end
ret void
}
; CHECK-LABEL: fcmpLt
; CHECK: ucomiss
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: seta
; CHECK: call {{.*}} R_{{.*}} func
define internal void @fcmpLe(float %a, float %b, double %c, double %d) {
entry:
%cmp = fcmp ugt float %a, %b
br i1 %cmp, label %if.end, label %if.then
if.then: ; preds = %entry
call void @func()
br label %if.end
if.end: ; preds = %entry, %if.then
%cmp1 = fcmp ugt double %c, %d
br i1 %cmp1, label %if.end3, label %if.then2
if.then2: ; preds = %if.end
call void @func()
br label %if.end3
if.end3: ; preds = %if.end, %if.then2
ret void
}
; CHECK-LABEL: fcmpLe
; CHECK: ucomiss
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
; CHECK: ucomisd
; CHECK: setb
; CHECK: call {{.*}} R_{{.*}} func
define internal i32 @fcmpFalseFloat(float %a, float %b) {
entry:
%cmp = fcmp false float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpFalseFloat
; CHECK: mov {{.*}},0x0
define internal i32 @fcmpFalseDouble(double %a, double %b) {
entry:
%cmp = fcmp false double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpFalseDouble
; CHECK: mov {{.*}},0x0
define internal i32 @fcmpOeqFloat(float %a, float %b) {
entry:
%cmp = fcmp oeq float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOeqFloat
; CHECK: ucomiss
; CHECK: jne
; CHECK: jp
define internal i32 @fcmpOeqDouble(double %a, double %b) {
entry:
%cmp = fcmp oeq double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOeqDouble
; CHECK: ucomisd
; CHECK: jne
; CHECK: jp
define internal i32 @fcmpOgtFloat(float %a, float %b) {
entry:
%cmp = fcmp ogt float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOgtFloat
; CHECK: ucomiss
; CHECK: seta
define internal i32 @fcmpOgtDouble(double %a, double %b) {
entry:
%cmp = fcmp ogt double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOgtDouble
; CHECK: ucomisd
; CHECK: seta
define internal i32 @fcmpOgeFloat(float %a, float %b) {
entry:
%cmp = fcmp oge float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOgeFloat
; CHECK: ucomiss
; CHECK: setae
define internal i32 @fcmpOgeDouble(double %a, double %b) {
entry:
%cmp = fcmp oge double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOgeDouble
; CHECK: ucomisd
; CHECK: setae
define internal i32 @fcmpOltFloat(float %a, float %b) {
entry:
%cmp = fcmp olt float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOltFloat
; CHECK: ucomiss
; CHECK: seta
define internal i32 @fcmpOltDouble(double %a, double %b) {
entry:
%cmp = fcmp olt double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOltDouble
; CHECK: ucomisd
; CHECK: seta
define internal i32 @fcmpOleFloat(float %a, float %b) {
entry:
%cmp = fcmp ole float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOleFloat
; CHECK: ucomiss
; CHECK: setae
define internal i32 @fcmpOleDouble(double %a, double %b) {
entry:
%cmp = fcmp ole double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOleDouble
; CHECK: ucomisd
; CHECK: setae
define internal i32 @fcmpOneFloat(float %a, float %b) {
entry:
%cmp = fcmp one float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOneFloat
; CHECK: ucomiss
; CHECK: setne
define internal i32 @fcmpOneDouble(double %a, double %b) {
entry:
%cmp = fcmp one double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOneDouble
; CHECK: ucomisd
; CHECK: setne
define internal i32 @fcmpOrdFloat(float %a, float %b) {
entry:
%cmp = fcmp ord float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOrdFloat
; CHECK: ucomiss
; CHECK: setnp
define internal i32 @fcmpOrdDouble(double %a, double %b) {
entry:
%cmp = fcmp ord double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpOrdDouble
; CHECK: ucomisd
; CHECK: setnp
define internal i32 @fcmpUeqFloat(float %a, float %b) {
entry:
%cmp = fcmp ueq float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUeqFloat
; CHECK: ucomiss
; CHECK: sete
define internal i32 @fcmpUeqDouble(double %a, double %b) {
entry:
%cmp = fcmp ueq double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUeqDouble
; CHECK: ucomisd
; CHECK: sete
define internal i32 @fcmpUgtFloat(float %a, float %b) {
entry:
%cmp = fcmp ugt float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUgtFloat
; CHECK: ucomiss
; CHECK: setb
define internal i32 @fcmpUgtDouble(double %a, double %b) {
entry:
%cmp = fcmp ugt double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUgtDouble
; CHECK: ucomisd
; CHECK: setb
define internal i32 @fcmpUgeFloat(float %a, float %b) {
entry:
%cmp = fcmp uge float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUgeFloat
; CHECK: ucomiss
; CHECK: setbe
define internal i32 @fcmpUgeDouble(double %a, double %b) {
entry:
%cmp = fcmp uge double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUgeDouble
; CHECK: ucomisd
; CHECK: setbe
define internal i32 @fcmpUltFloat(float %a, float %b) {
entry:
%cmp = fcmp ult float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUltFloat
; CHECK: ucomiss
; CHECK: setb
define internal i32 @fcmpUltDouble(double %a, double %b) {
entry:
%cmp = fcmp ult double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUltDouble
; CHECK: ucomisd
; CHECK: setb
define internal i32 @fcmpUleFloat(float %a, float %b) {
entry:
%cmp = fcmp ule float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUleFloat
; CHECK: ucomiss
; CHECK: setbe
define internal i32 @fcmpUleDouble(double %a, double %b) {
entry:
%cmp = fcmp ule double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUleDouble
; CHECK: ucomisd
; CHECK: setbe
define internal i32 @fcmpUneFloat(float %a, float %b) {
entry:
%cmp = fcmp une float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUneFloat
; CHECK: ucomiss
; CHECK: jne
; CHECK: jp
define internal i32 @fcmpUneDouble(double %a, double %b) {
entry:
%cmp = fcmp une double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUneDouble
; CHECK: ucomisd
; CHECK: jne
; CHECK: jp
define internal i32 @fcmpUnoFloat(float %a, float %b) {
entry:
%cmp = fcmp uno float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUnoFloat
; CHECK: ucomiss
; CHECK: setp
define internal i32 @fcmpUnoDouble(double %a, double %b) {
entry:
%cmp = fcmp uno double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpUnoDouble
; CHECK: ucomisd
; CHECK: setp
define internal i32 @fcmpTrueFloat(float %a, float %b) {
entry:
%cmp = fcmp true float %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpTrueFloat
; CHECK: mov {{.*}},0x1
define internal i32 @fcmpTrueDouble(double %a, double %b) {
entry:
%cmp = fcmp true double %a, %b
%cmp.ret_ext = zext i1 %cmp to i32
ret i32 %cmp.ret_ext
}
; CHECK-LABEL: fcmpTrueDouble
; CHECK: mov {{.*}},0x1
define internal float @loadFloat(i32 %a) {
entry:
%__1 = inttoptr i32 %a to float*
%v0 = load float, float* %__1, align 4
ret float %v0
}
; CHECK-LABEL: loadFloat
; CHECK: movss
; CHECK: fld
define internal double @loadDouble(i32 %a) {
entry:
%__1 = inttoptr i32 %a to double*
%v0 = load double, double* %__1, align 8
ret double %v0
}
; CHECK-LABEL: loadDouble
; CHECK: movsd
; CHECK: fld
define internal void @storeFloat(i32 %a, float %value) {
entry:
%__2 = inttoptr i32 %a to float*
store float %value, float* %__2, align 4
ret void
}
; CHECK-LABEL: storeFloat
; CHECK: movss
; CHECK: movss
define internal void @storeDouble(i32 %a, double %value) {
entry:
%__2 = inttoptr i32 %a to double*
store double %value, double* %__2, align 8
ret void
}
; CHECK-LABEL: storeDouble
; CHECK: movsd
; CHECK: movsd
define internal void @storeFloatConst(i32 %a) {
entry:
%a.asptr = inttoptr i32 %a to float*
store float 0x3FF3AE1480000000, float* %a.asptr, align 4
ret void
}
; CHECK-LABEL: storeFloatConst
; CHECK: movss
; CHECK: movss
define internal void @storeDoubleConst(i32 %a) {
entry:
%a.asptr = inttoptr i32 %a to double*
store double 1.230000e+00, double* %a.asptr, align 8
ret void
}
; CHECK-LABEL: storeDoubleConst
; CHECK: movsd
; CHECK: movsd
define internal float @selectFloatVarVar(float %a, float %b) {
entry:
%cmp = fcmp olt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
; CHECK-LABEL: selectFloatVarVar
; CHECK: ucomiss
; CHECK: seta
; CHECK: fld
define internal double @selectDoubleVarVar(double %a, double %b) {
entry:
%cmp = fcmp olt double %a, %b
%cond = select i1 %cmp, double %a, double %b
ret double %cond
}
; CHECK-LABEL: selectDoubleVarVar
; CHECK: ucomisd
; CHECK: seta
; CHECK: fld
...@@ -150,6 +150,11 @@ entry: ...@@ -150,6 +150,11 @@ entry:
; CHECK: sqrtss xmm{{.*}} ; CHECK: sqrtss xmm{{.*}}
; CHECK: sqrtss xmm{{.*}} ; CHECK: sqrtss xmm{{.*}}
; CHECK: sqrtss xmm{{.*}},DWORD PTR ; CHECK: sqrtss xmm{{.*}},DWORD PTR
; ARM32-LABEL: test_sqrt_float
; ARM32: vsqrt.f32
; ARM32: vsqrt.f32
; ARM32: vsqrt.f32
; ARM32: vadd.f32
define float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) { define float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) {
entry: entry:
...@@ -164,6 +169,9 @@ entry: ...@@ -164,6 +169,9 @@ entry:
; current folding only handles load + arithmetic op. The sqrt inst ; current folding only handles load + arithmetic op. The sqrt inst
; is considered an intrinsic call and not an arithmetic op. ; is considered an intrinsic call and not an arithmetic op.
; CHECK: sqrtss xmm{{.*}} ; CHECK: sqrtss xmm{{.*}}
; ARM32-LABEL: test_sqrt_float_mergeable_load
; ARM32: vldr s{{.*}}
; ARM32: vsqrt.f32
define double @test_sqrt_double(double %x, i32 %iptr) { define double @test_sqrt_double(double %x, i32 %iptr) {
entry: entry:
...@@ -177,6 +185,11 @@ entry: ...@@ -177,6 +185,11 @@ entry:
; CHECK: sqrtsd xmm{{.*}} ; CHECK: sqrtsd xmm{{.*}}
; CHECK: sqrtsd xmm{{.*}} ; CHECK: sqrtsd xmm{{.*}}
; CHECK: sqrtsd xmm{{.*}},QWORD PTR ; CHECK: sqrtsd xmm{{.*}},QWORD PTR
; ARM32-LABEL: test_sqrt_double
; ARM32: vsqrt.f64
; ARM32: vsqrt.f64
; ARM32: vsqrt.f64
; ARM32: vadd.f64
define double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) { define double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) {
entry: entry:
...@@ -188,6 +201,9 @@ entry: ...@@ -188,6 +201,9 @@ entry:
} }
; CHECK-LABEL: test_sqrt_double_mergeable_load ; CHECK-LABEL: test_sqrt_double_mergeable_load
; CHECK: sqrtsd xmm{{.*}} ; CHECK: sqrtsd xmm{{.*}}
; ARM32-LABEL: test_sqrt_double_mergeable_load
; ARM32: vldr d{{.*}}
; ARM32: vsqrt.f64
define float @test_sqrt_ignored(float %x, double %y) { define float @test_sqrt_ignored(float %x, double %y) {
entry: entry:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment