Commit eb13acc6 by John Porto

Subzero. ARM32. Show FP lowering some love.

After some time of being neglected, this CL improves FP lowering for ARM32. 1) It emits vpush {list}, and vpop {list} when possible. 2) It stops saving alised Vfp registers multiple times (yes, sz used to save both D and S registers even when they aliased.) 3) Introduces Vmla (fp multiply and accumulate) and Vmls (multiply and subtract.) (1 + 2) minimally (but positively) affected SPEC. (3) caused a 2% geomean improvement. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1481133002 .
parent e293b5f4
...@@ -981,7 +981,7 @@ bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, ...@@ -981,7 +981,7 @@ bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
if (Printed) if (Printed)
Str << ","; Str << ",";
else else
Str << " \t# END="; Str << " \t@ END=";
Var->emit(Func); Var->emit(Func);
Printed = true; Printed = true;
} }
......
...@@ -185,6 +185,22 @@ void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst, ...@@ -185,6 +185,22 @@ void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
Inst->getSrc(1)->emit(Func); Inst->getSrc(1)->emit(Func);
} }
void InstARM32::emitFourAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 3);
assert(Inst->getSrc(0) == Inst->getDest());
Str << "\t" << Opcode << getVecWidthString(Inst->getDest()->getType())
<< "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
Str << ", ";
Inst->getSrc(2)->emit(Func);
}
void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst, void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) { const Cfg *Func) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -571,18 +587,43 @@ IceString InstARM32Label::getName(const Cfg *Func) const { ...@@ -571,18 +587,43 @@ IceString InstARM32Label::getName(const Cfg *Func) const {
return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number); return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
} }
namespace {
// Requirements for Push/Pop:
// 1) All the Variables have the same type;
// 2) All the variables have registers assigned to them.
void validatePushOrPopRegisterListOrDie(const VarList &RegList) {
Type PreviousTy = IceType_void;
for (Variable *Reg : RegList) {
if (PreviousTy != IceType_void && Reg->getType() != PreviousTy) {
llvm::report_fatal_error("Type mismatch when popping/pushing "
"registers.");
}
if (!Reg->hasReg()) {
llvm::report_fatal_error("Push/pop operand does not have a register "
"assigned to it.");
}
PreviousTy = Reg->getType();
}
}
} // end of anonymous namespace
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests) InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) { : InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs. Also, a pop // Track modifications to Dests separately via FakeDefs. Also, a pop
// instruction affects the stack pointer and so it should not be allowed to // instruction affects the stack pointer and so it should not be allowed to
// be automatically dead-code eliminated. This is automatic since we leave // be automatically dead-code eliminated. This is automatic since we leave
// the Dest as nullptr. // the Dest as nullptr.
validatePushOrPopRegisterListOrDie(Dests);
} }
InstARM32Push::InstARM32Push(Cfg *Func, const VarList &Srcs) InstARM32Push::InstARM32Push(Cfg *Func, const VarList &Srcs)
: InstARM32(Func, InstARM32::Push, Srcs.size(), nullptr) { : InstARM32(Func, InstARM32::Push, Srcs.size(), nullptr) {
for (Variable *Source : Srcs) validatePushOrPopRegisterListOrDie(Srcs);
for (Variable *Source : Srcs) {
addSource(Source); addSource(Source);
}
} }
InstARM32Ret::InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source) InstARM32Ret::InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source)
...@@ -736,8 +777,10 @@ template <> const char *InstARM32Udiv::Opcode = "udiv"; ...@@ -736,8 +777,10 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";
// FP // FP
template <> const char *InstARM32Vadd::Opcode = "vadd"; template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vdiv::Opcode = "vdiv"; template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Veor::Opcode = "veor"; template <> const char *InstARM32Veor::Opcode = "veor";
template <> const char *InstARM32Vmla::Opcode = "vmla";
template <> const char *InstARM32Vmls::Opcode = "vmls";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vsub::Opcode = "vsub"; template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
...@@ -1216,51 +1259,74 @@ template <> void InstARM32Uxt::emitIAS(const Cfg *Func) const { ...@@ -1216,51 +1259,74 @@ template <> void InstARM32Uxt::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func); emitUsingTextFixup(Func);
} }
namespace {
bool isAssignedConsecutiveRegisters(Variable *Before, Variable *After) {
assert(Before->hasReg());
assert(After->hasReg());
return Before->getRegNum() + 1 == After->getRegNum();
}
} // end of anonymous namespace
void InstARM32Pop::emit(const Cfg *Func) const { void InstARM32Pop::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
SizeT IntegerCount = 0;
for (const Operand *Op : Dests) { const SizeT DestSize = Dests.size();
if (isScalarIntegerType(Op->getType())) { if (DestSize == 0) {
++IntegerCount; assert(false && "Empty pop list");
} return;
} }
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
bool NeedNewline = false;
if (IntegerCount != 0) { Variable *Reg = Dests[0];
if (isScalarIntegerType(Reg->getType())) {
// GPR push.
Str << "\t" Str << "\t"
<< "pop" "pop"
<< "\t{"; "\t{";
bool PrintComma = false; Reg->emit(Func);
for (const Operand *Op : Dests) { for (SizeT i = 1; i < DestSize; ++i) {
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", "; Str << ", ";
Op->emit(Func); Reg = Dests[i];
PrintComma = true; Reg->emit(Func);
}
} }
Str << "}"; Str << "}";
NeedNewline = true; return;
} }
for (const Operand *Op : Dests) { // VFP "s" reg push.
if (isScalarIntegerType(Op->getType())) SizeT End = DestSize - 1;
continue; SizeT Start = DestSize - 1;
if (NeedNewline) { Reg = Dests[DestSize - 1];
Str << "\n"; Str << "\t"
"vpop"
"\t{";
for (SizeT i = 2; i <= DestSize; ++i) {
Variable *PreviousReg = Dests[DestSize - i];
if (!isAssignedConsecutiveRegisters(PreviousReg, Reg)) {
Dests[Start]->emit(Func);
for (SizeT j = Start + 1; j <= End; ++j) {
Str << ", ";
Dests[j]->emit(Func);
}
startNextInst(Func); startNextInst(Func);
NeedNewline = false; Str << "}\n\t"
"vpop"
"\t{";
End = DestSize - i;
} }
Str << "\t" Reg = PreviousReg;
<< "vpop" Start = DestSize - i;
<< "\t{"; }
Op->emit(Func); Dests[Start]->emit(Func);
Str << "}"; for (SizeT j = Start + 1; j <= End; ++j) {
NeedNewline = true; Str << ", ";
Dests[j]->emit(Func);
} }
assert(NeedNewline); // caller will add the newline Str << "}";
} }
void InstARM32Pop::emitIAS(const Cfg *Func) const { void InstARM32Pop::emitIAS(const Cfg *Func) const {
...@@ -1310,56 +1376,55 @@ void InstARM32Pop::dump(const Cfg *Func) const { ...@@ -1310,56 +1376,55 @@ void InstARM32Pop::dump(const Cfg *Func) const {
} }
void InstARM32Push::emit(const Cfg *Func) const { void InstARM32Push::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
SizeT IntegerCount = 0;
for (SizeT i = 0; i < getSrcSize(); ++i) { // Push can't be emitted if there are no registers to save. This should never
if (isScalarIntegerType(getSrc(i)->getType())) { // happen, but if it does, we don't need to bring Subzero down -- we just skip
++IntegerCount; // emitting the push instruction (and maybe emit a nop?) The assert() is here
} // so that we can detect this error during development.
const SizeT SrcSize = getSrcSize();
if (SrcSize == 0) {
assert(false && "Empty push list");
return;
} }
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
bool NeedNewline = false;
for (SizeT i = getSrcSize(); i > 0; --i) { Variable *Reg = llvm::cast<Variable>(getSrc(0));
Operand *Op = getSrc(i - 1); if (isScalarIntegerType(Reg->getType())) {
if (isScalarIntegerType(Op->getType())) // GPR push.
continue;
if (NeedNewline) {
Str << "\n";
startNextInst(Func);
NeedNewline = false;
}
Str << "\t" Str << "\t"
<< "vpush" "push"
<< "\t{"; "\t{";
Op->emit(Func); Reg->emit(Func);
Str << "}"; for (SizeT i = 1; i < SrcSize; ++i) {
NeedNewline = true; Str << ", ";
getSrc(i)->emit(Func);
} }
if (IntegerCount != 0) { Str << "}";
if (NeedNewline) { return;
Str << "\n";
startNextInst(Func);
NeedNewline = false;
} }
// VFP "s" reg push.
Str << "\t" Str << "\t"
<< "push" "vpush"
<< "\t{"; "\t{";
bool PrintComma = false; Reg->emit(Func);
for (SizeT i = 0; i < getSrcSize(); ++i) { for (SizeT i = 1; i < SrcSize; ++i) {
Operand *Op = getSrc(i); Variable *NextReg = llvm::cast<Variable>(getSrc(i));
if (isScalarIntegerType(Op->getType())) { if (isAssignedConsecutiveRegisters(Reg, NextReg)) {
if (PrintComma)
Str << ", "; Str << ", ";
Op->emit(Func); } else {
PrintComma = true; startNextInst(Func);
Str << "}\n\t"
"vpush"
"\t{";
} }
Reg = NextReg;
Reg->emit(Func);
} }
Str << "}"; Str << "}";
NeedNewline = true;
}
assert(NeedNewline); // caller will add the newline
} }
void InstARM32Push::emitIAS(const Cfg *Func) const { void InstARM32Push::emitIAS(const Cfg *Func) const {
...@@ -1925,8 +1990,10 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>; ...@@ -1925,8 +1990,10 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vadd>; template class InstARM32ThreeAddrFP<InstARM32::Vadd>;
template class InstARM32ThreeAddrFP<InstARM32::Vdiv>; template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Veor>; template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Vmla>;
template class InstARM32ThreeAddrFP<InstARM32::Vmls>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>; template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>; template class InstARM32LoadBase<InstARM32::Ldr>;
......
...@@ -28,6 +28,9 @@ ...@@ -28,6 +28,9 @@
// LR is not considered isInt to avoid being allocated as a register. It is // LR is not considered isInt to avoid being allocated as a register. It is
// technically preserved, but save/restore is handled separately, based on // technically preserved, but save/restore is handled separately, based on
// whether or not the function MaybeLeafFunc. // whether or not the function MaybeLeafFunc.
//
// The register tables can be generated using the gen_arm32_reg_tables.py
// script.
#define REGARM32_GPR_TABLE \ #define REGARM32_GPR_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \ /* val, encode, name, scratch,preserved,stackptr,frameptr, \
...@@ -69,21 +72,6 @@ ...@@ -69,21 +72,6 @@
// isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) // isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)
// S registers 0-15 are scratch, but 16-31 are preserved. // S registers 0-15 are scratch, but 16-31 are preserved.
// Regenerate this with the following python script:
//
// def print_sregs():
// for i in xrange(0, 32):
// is_scratch = 1 if i < 16 else 0
// is_preserved = 1 if i >= 16 else 0
// print (' X(Reg_s{regnum:<2}, {regnum:<2}, "s{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 1, 0, 0, ' +
// 'REGLIST2(RegARM32, d{regnum:<2}, ' +
// 'q{regnum_q:<2})) \\').format(
// regnum=i, regnum_d=i>>1,
// regnum_q=i>>2, scratch=is_scratch, preserved=is_preserved)
//
// print_sregs()
//
#define REGARM32_FP32_TABLE \ #define REGARM32_FP32_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \ /* val, encode, name, scratch,preserved,stackptr,frameptr, \
isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \ isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \
...@@ -128,29 +116,6 @@ ...@@ -128,29 +116,6 @@
// registers. In processors supporting the D32 feature this will effectively // registers. In processors supporting the D32 feature this will effectively
// cause double allocation to bias towards allocating "high" D registers, which // cause double allocation to bias towards allocating "high" D registers, which
// do not alias any S registers. // do not alias any S registers.
//
// Regenerate this with the following python script:
// def print_dregs():
// for i in xrange(31, 15, -1):
// is_scratch = 1 if (i < 8 or i >= 16) else 0
// is_preserved = 1 if (8 <= i and i < 16) else 0
// print (' X(Reg_d{regnum:<2}, {regnum:<2}, "d{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 1, 0, ' +
// 'REGLIST1(RegARM32, q{regnum_q:<2}) \\').format(
// regnum=i, regnum_q=i>>1, scratch=is_scratch,
// preserved=is_preserved)
// for i in xrange(15, -1, -1):
// is_scratch = 1 if (i < 8 or i >= 16) else 0
// is_preserved = 1 if (8 <= i and i < 16) else 0
// print (' X(Reg_d{regnum:<2}, {regnum:<2}, "d{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 1, 0, ' +
// 'REGLIST3(RegARM32, s{regnum_s0:<2}, s{regnum_s1:<2}, ' +
// 'q{regnum_q:<2})) \\').format(
// regnum_s0 = (i<<1), regnum_s1 = (i<<1) + 1, regnum=i,
// regnum_q=i>>1, scratch=is_scratch, preserved=is_preserved)
//
// print_dregs()
//
#define REGARM32_FP64_TABLE \ #define REGARM32_FP64_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \ /* val, encode, name, scratch,preserved,stackptr,frameptr, \
isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \ isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \
...@@ -192,31 +157,6 @@ ...@@ -192,31 +157,6 @@
// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 are also scratch // Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 are also scratch
// (if supported by the D32 feature). Q registers are defined in reverse order // (if supported by the D32 feature). Q registers are defined in reverse order
// for the same reason as D registers. // for the same reason as D registers.
//
// Regenerate this with the following python script:
// def print_qregs():
// for i in xrange(15, 7, -1):
// is_scratch = 1 if (i < 4 or i >= 8) else 0
// is_preserved = 1 if (4 <= i and i < 8) else 0
// print (' X(Reg_q{regnum:<2}, {regnum:<2}, "q{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 0, 1, REGLIST2(' +
// 'RegARM32, d{regnum_d0:<2}, d{regnum_d1:<2})) \\').format(
// regnum_d0=(i<<1), regnum_d1=(i<<1)+1, regnum=i,
// scratch=is_scratch, preserved=is_preserved)
// for i in xrange(7, -1, -1):
// is_scratch = 1 if (i < 4 or i >= 8) else 0
// is_preserved = 1 if (4 <= i and i < 8) else 0
// print (' X(Reg_q{regnum:<2}, {regnum:<2}, "q{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 0, 1, REGLIST6(' +
// 'RegARM32, s{regnum_s0:<2}, s{regnum_s1:<2}, ' +
// 's{regnum_s2:<2}, s{regnum_s3:<2}, ' +
// 'd{regnum_d0:<2}, d{regnum_d1:<2})) \\').format(
// regnum_s0=(i<<2), regnum_s1=(i<<2)+1, regnum_s2=(i<<2)+2,
// regnum_s3=(i<<2)+3, regnum_d0=(i<<1), regnum_d1=(i<<1)+1,
// regnum=i, scratch=is_scratch, preserved=is_preserved)
//
// print_qregs()
//
#define REGARM32_VEC128_TABLE \ #define REGARM32_VEC128_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, \ /* val, encode, name, scratch, preserved, stackptr, frameptr, \
isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init */ \ isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init */ \
......
...@@ -415,6 +415,8 @@ public: ...@@ -415,6 +415,8 @@ public:
Vcvt, Vcvt,
Vdiv, Vdiv,
Veor, Veor,
Vmla,
Vmls,
Vmrs, Vmrs,
Vmul, Vmul,
Vsqrt, Vsqrt,
...@@ -436,6 +438,8 @@ public: ...@@ -436,6 +438,8 @@ public:
/// Shared emit routines for common forms of instructions. /// Shared emit routines for common forms of instructions.
static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst, static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func); const Cfg *Func);
static void emitFourAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func);
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
...@@ -708,7 +712,7 @@ private: ...@@ -708,7 +712,7 @@ private:
/// Instructions of the form x := y op z, for vector/FP. We leave these as /// Instructions of the form x := y op z, for vector/FP. We leave these as
/// unconditional: "ARM deprecates the conditional execution of any instruction /// unconditional: "ARM deprecates the conditional execution of any instruction
/// encoding provided by the Advanced SIMD Extension that is not also provided /// encoding provided by the Advanced SIMD Extension that is not also provided
/// by the Floating-point (VFP) extension". They do not set flags. /// by the floating-point (VFP) extension". They do not set flags.
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrFP : public InstARM32 { class InstARM32ThreeAddrFP : public InstARM32 {
InstARM32ThreeAddrFP() = delete; InstARM32ThreeAddrFP() = delete;
...@@ -796,6 +800,54 @@ private: ...@@ -796,6 +800,54 @@ private:
static const char *Opcode; static const char *Opcode;
}; };
/// Instructions of the form x := x op1 (y op2 z). E.g., multiply accumulate.
/// We leave these as unconditional: "ARM deprecates the conditional execution
/// of any instruction encoding provided by the Advanced SIMD Extension that is
/// not also provided by the floating-point (VFP) extension". They do not set
/// flags.
template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrFP : public InstARM32 {
InstARM32FourAddrFP() = delete;
InstARM32FourAddrFP(const InstARM32FourAddrFP &) = delete;
InstARM32FourAddrFP &operator=(const InstARM32FourAddrFP &) = delete;
public:
// Every operand must be a register.
static InstARM32FourAddrFP *create(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1) {
return new (Func->allocate<InstARM32FourAddrFP>())
InstARM32FourAddrFP(Func, Dest, Src0, Src1);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitFourAddrFP(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override { emitUsingTextFixup(Func); }
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
Str << Opcode << "." << getDest()->getType() << " ";
dumpDest(Func);
Str << ", ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32FourAddrFP(Cfg *Func, Variable *Dest, Variable *Src0, Variable *Src1)
: InstARM32(Func, K, 3, Dest) {
addSource(Dest);
addSource(Src0);
addSource(Src1);
}
static const char *Opcode;
};
/// Instructions of the form x cmpop y (setting flags). /// Instructions of the form x cmpop y (setting flags).
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32CmpLike : public InstARM32Pred { class InstARM32CmpLike : public InstARM32Pred {
...@@ -855,8 +907,10 @@ using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>; ...@@ -855,8 +907,10 @@ using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>;
using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>; using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>;
using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>; using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>;
using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>; using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>; using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>; using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>; using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
...@@ -1001,8 +1055,8 @@ private: ...@@ -1001,8 +1055,8 @@ private:
InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget); InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
}; };
/// Pop into a list of GPRs. Technically this can be predicated, but we don't /// Pops a list of registers. It may be a list of GPRs, or a list of VFP "s"
/// need that functionality. /// regs, but not both. In any case, the list must be sorted.
class InstARM32Pop : public InstARM32 { class InstARM32Pop : public InstARM32 {
InstARM32Pop() = delete; InstARM32Pop() = delete;
InstARM32Pop(const InstARM32Pop &) = delete; InstARM32Pop(const InstARM32Pop &) = delete;
...@@ -1023,8 +1077,8 @@ private: ...@@ -1023,8 +1077,8 @@ private:
VarList Dests; VarList Dests;
}; };
/// Push a list of GPRs. Technically this can be predicated, but we don't need /// Pushes a list of registers. Just like Pop (see above), the list may be of
/// that functionality. /// GPRs, or VFP "s" registers, but not both.
class InstARM32Push : public InstARM32 { class InstARM32Push : public InstARM32 {
InstARM32Push() = delete; InstARM32Push() = delete;
InstARM32Push(const InstARM32Push &) = delete; InstARM32Push(const InstARM32Push &) = delete;
......
...@@ -60,9 +60,9 @@ public: ...@@ -60,9 +60,9 @@ public:
static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); } static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); }
void initNodeForLowering(CfgNode *Node) override { void initNodeForLowering(CfgNode *Node) override {
BoolComputations.forgetProducers(); Computations.forgetProducers();
BoolComputations.recordProducers(Node); Computations.recordProducers(Node);
BoolComputations.dump(Func); Computations.dump(Func);
} }
void translateOm1() override; void translateOm1() override;
...@@ -798,6 +798,12 @@ protected: ...@@ -798,6 +798,12 @@ protected:
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmrs::create(Func, Pred)); Context.insert(InstARM32Vmrs::create(Func, Pred));
} }
void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmla::create(Func, Dest, Src0, Src1));
}
void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmls::create(Func, Dest, Src0, Src1));
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1)); Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
} }
...@@ -1019,6 +1025,8 @@ protected: ...@@ -1019,6 +1025,8 @@ protected:
static llvm::SmallBitVector ScratchRegs; static llvm::SmallBitVector ScratchRegs;
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM]; VarList PhysicalRegisters[IceType_NUM];
VarList PreservedGPRs;
VarList PreservedSRegs;
/// Helper class that understands the Calling Convention and register /// Helper class that understands the Calling Convention and register
/// assignments. The first few integer type parameters can use r0-r3, /// assignments. The first few integer type parameters can use r0-r3,
...@@ -1081,10 +1089,10 @@ private: ...@@ -1081,10 +1089,10 @@ private:
std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)> std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
ARM32HelpersPostamble; ARM32HelpersPostamble;
class BoolComputationTracker { class ComputationTracker {
public: public:
BoolComputationTracker() = default; ComputationTracker() = default;
~BoolComputationTracker() = default; ~ComputationTracker() = default;
void forgetProducers() { KnownComputations.clear(); } void forgetProducers() { KnownComputations.clear(); }
void recordProducers(CfgNode *Node); void recordProducers(CfgNode *Node);
...@@ -1118,9 +1126,9 @@ private: ...@@ -1118,9 +1126,9 @@ private:
} }
private: private:
class BoolComputationEntry { class ComputationEntry {
public: public:
explicit BoolComputationEntry(Inst *I) : Instr(I) {} ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
Inst *const Instr; Inst *const Instr;
// Boolean folding is disabled for variables whose live range is multi // Boolean folding is disabled for variables whose live range is multi
// block. We conservatively initialize IsLiveOut to true, and set it to // block. We conservatively initialize IsLiveOut to true, and set it to
...@@ -1130,13 +1138,16 @@ private: ...@@ -1130,13 +1138,16 @@ private:
// disabled. // disabled.
bool IsLiveOut = true; bool IsLiveOut = true;
int32_t NumUses = 0; int32_t NumUses = 0;
Type ComputationType;
}; };
using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>; // ComputationMap maps a Variable number to a payload identifying which
BoolComputationMap KnownComputations; // instruction defined it.
using ComputationMap = std::unordered_map<SizeT, ComputationEntry>;
ComputationMap KnownComputations;
}; };
BoolComputationTracker BoolComputations; ComputationTracker Computations;
// AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
// without specifying a physical register. This is needed for creating unbound // without specifying a physical register. This is needed for creating unbound
......
...@@ -24,96 +24,92 @@ define internal i32 @foo(i32 %x) { ...@@ -24,96 +24,92 @@ define internal i32 @foo(i32 %x) {
entry: entry:
; ASM-LABEL: foo: ; ASM-LABEL: foo:
; ASM-NEXT: .Lfoo$entry:
; ******* Movw case to check *******
; ASM-NEXT: movw ip, #4092
; ASM-NEXT: sub sp, sp, ip
; ASM-NEXT: str r0, [sp, #4088]
; ASM-NEXT: # [sp, #4088] = def.pseudo
; DIS-LABEL: 00000000 <foo>: ; DIS-LABEL: 00000000 <foo>:
; DIS-NEXT: 0: e300cffc
; DIS-NEXT: 4: e04dd00c
; DIS-NEXT: 8: e58d0ff8
; IASM-LABEL: foo: ; IASM-LABEL: foo:
; ASM-NEXT: .Lfoo$entry:
; IASM-NEXT: .Lfoo$entry: ; IASM-NEXT: .Lfoo$entry:
; ASM-NEXT: movw ip, #4092
; DIS-NEXT: 0: e300cffc
; IASM-NEXT: .byte 0xfc ; IASM-NEXT: .byte 0xfc
; IASM-NEXT: .byte 0xcf ; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: sub sp, sp, ip
; DIS-NEXT: 4: e04dd00c
; IASM-NEXT: .byte 0xc ; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d ; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4088]
; DIS-NEXT: 8: e58d0ff8
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: # [sp, #4088] = def.pseudo
%mul = mul i32 %x, %x %mul = mul i32 %x, %x
; ASM-NEXT: ldr r0, [sp, #4088] ; ASM-NEXT: ldr r0, [sp, #4088]
; ASM-NEXT: ldr r1, [sp, #4088]
; ASM-NEXT: mul r0, r0, r1
; ASM-NEXT: str r0, [sp, #4084]
; ASM-NEXT: # [sp, #4084] = def.pseudo
; DIS-NEXT: c: e59d0ff8 ; DIS-NEXT: c: e59d0ff8
; DIS-NEXT: 10: e59d1ff8
; DIS-NEXT: 14: e0000190
; DIS-NEXT: 18: e58d0ff4
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r1, [sp, #4088]
; DIS-NEXT: 10: e59d1ff8
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0x1f ; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mul r0, r0, r1
; DIS-NEXT: 14: e0000190
; IASM-NEXT: .byte 0x90 ; IASM-NEXT: .byte 0x90
; IASM-NEXT: .byte 0x1 ; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4084]
; DIS-NEXT: 18: e58d0ff4
; IASM-NEXT: .byte 0xf4 ; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: # [sp, #4084] = def.pseudo
ret i32 %mul ret i32 %mul
; ASM-NEXT: ldr r0, [sp, #4084] ; ASM-NEXT: ldr r0, [sp, #4084]
; ******* Movw case to check *******
; ASM-NEXT: movw ip, #4092
; ASM-NEXT: add sp, sp, ip
; ASM-NEXT: bx lr
; DIS-NEXT: 1c: e59d0ff4 ; DIS-NEXT: 1c: e59d0ff4
; DIS-NEXT: 20: e300cffc
; DIS-NEXT: 24: e08dd00c
; DIS-NEXT: 28: e12fff1e
; IASM-NEXT: .byte 0xf4 ; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw ip, #4092
; DIS-NEXT: 20: e300cffc
; IASM-NEXT: .byte 0xfc ; IASM-NEXT: .byte 0xfc
; IASM-NEXT: .byte 0xcf ; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: add sp, sp, ip
; DIS-NEXT: 24: e08dd00c
; IASM-NEXT: .byte 0xc ; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe0
; ASM-NEXT: bx lr
; DIS-NEXT: 28: e12fff1e
; IASM-NEXT: .byte 0x1e ; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff ; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f ; IASM-NEXT: .byte 0x2f
...@@ -121,84 +117,88 @@ entry: ...@@ -121,84 +117,88 @@ entry:
} }
define internal void @saveMinus1(i32 %loc) { define internal void @saveConstI32(i32 %loc) {
; ASM-LABEL:saveMinus1: ; ASM-LABEL:saveConstI32:
; DIS-LABEL:00000030 <saveMinus1>: ; DIS-LABEL:00000030 <saveConstI32>:
; IASM-LABEL:saveMinus1: ; IASM-LABEL:saveConstI32:
entry: entry:
; ASM-NEXT:.LsaveMinus1$entry: ; ASM-NEXT:.LsaveConstI32$entry:
; IASM-NEXT:.LsaveConstI32$entry:
; ASM-NEXT: movw ip, #4088 ; ASM-NEXT: movw ip, #4088
; DIS-NEXT: 30: e300cff8 ; DIS-NEXT: 30: e300cff8
; IASM-NEXT:.LsaveMinus1$entry:
; ASM-NEXT: sub sp, sp, ip
; DIS-NEXT: 34: e04dd00c
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf ; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: str r0, [sp, #4084] ; ASM-NEXT: sub sp, sp, ip
; ASM-NEXT: # [sp, #4084] = def.pseudo ; DIS-NEXT: 34: e04dd00c
; DIS-NEXT: 38: e58d0ff4
; IASM-NEXT: .byte 0xc ; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d ; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe0
%loc.asptr = inttoptr i32 %loc to i32* ; ASM-NEXT: str r0, [sp, #4084]
store i32 -1, i32* %loc.asptr, align 1 ; ASM-NEXT: # [sp, #4084] = def.pseudo
; DIS-NEXT: 38: e58d0ff4
; ASM-NEXT: ldr r0, [sp, #4084]
; DIS-NEXT: 3c: e59d0ff4
; IASM-NEXT: .byte 0xf4 ; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw r1, #65535 %loc.asptr = inttoptr i32 %loc to i32*
; DIS-NEXT: 40: e30f1fff store i32 524289, i32* %loc.asptr, align 1
; ASM-NEXT: ldr r0, [sp, #4084]
; DIS-NEXT: 3c: e59d0ff4
; IASM-NEXT: .byte 0xf4 ; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movt r1, #65535 ; ASM-NEXT: movw r1, #1
; DIS-NEXT: 44: e34f1fff ; DIS-NEXT: 40: e3001001
; IASM-NEXT: .byte 0xff ; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x1f ; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: str r1, [r0] ; ASM-NEXT: movt r1, #8
; DIS-NEXT: 48: e5801000 ; DIS-NEXT: 44: e3401008
; IASM-NEXT: .byte 0xff ; IASM-NEXT: .byte 0x8
; IASM-NEXT: .byte 0x1f ; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x4f ; IASM-NEXT: .byte 0x40
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
ret void ; ASM-NEXT: str r1, [r0]
; DIS-NEXT: 48: e5801000
; ASM-NEXT: movw ip, #4088
; DIS-NEXT: 4c: e300cff8
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x10 ; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x80 ; IASM-NEXT: .byte 0x80
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: add sp, sp, ip ret void
; DIS-NEXT: 50: e08dd00c
; ASM-NEXT: movw ip, #4088
; DIS-NEXT: 4c: e300cff8
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf ; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: bx lr ; ASM-NEXT: add sp, sp, ip
; DIS-NEXT: 54: e12fff1e ; DIS-NEXT: 50: e08dd00c
; IASM-NEXT: .byte 0xc ; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe0
; ASM-NEXT: bx lr
; DIS-NEXT: 54: e12fff1e
; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
; IASM-NEXT: .byte 0xe1
} }
...@@ -9,7 +9,9 @@ ...@@ -9,7 +9,9 @@
; RUN: -ffunction-sections | FileCheck %s ; RUN: -ffunction-sections | FileCheck %s
declare void @call_target() declare void @call_target()
declare void @call_target1(i32 %arg) declare void @call_target1(i32 %arg0)
declare void @call_target2(i32 %arg0, i32 %arg1)
declare void @call_target3(i32 %arg0, i32 %arg1, i32 %arg2)
@global_short = internal global [2 x i8] zeroinitializer @global_short = internal global [2 x i8] zeroinitializer
; A direct call sequence uses the right mask and register-call sequence. ; A direct call sequence uses the right mask and register-call sequence.
...@@ -60,7 +62,7 @@ entry: ...@@ -60,7 +62,7 @@ entry:
; CHECK-LABEL: bundle_lock_without_padding ; CHECK-LABEL: bundle_lock_without_padding
; CHECK: 0: {{.*}} movw ; CHECK: 0: {{.*}} movw
; CHECK-NEXT: movt ; CHECK-NEXT: movt
; CHECK-NEXT: movw ; CHECK-NEXT: mov
; CHECK-NEXT: nop ; CHECK-NEXT: nop
; CHECK-NEXT: bic [[REG:r[0-9]+]], {{.*}} 0xc0000000 ; CHECK-NEXT: bic [[REG:r[0-9]+]], {{.*}} 0xc0000000
; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]] ; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]]
...@@ -91,18 +93,16 @@ define internal void @bundle_lock_align_to_end_padding_0() { ...@@ -91,18 +93,16 @@ define internal void @bundle_lock_align_to_end_padding_0() {
entry: entry:
call void @call_target() call void @call_target()
; bundle boundary ; bundle boundary
store i16 0, i16* undef, align 1 call void @call_target3(i32 1, i32 2, i32 3)
call void @call_target()
; bundle boundary ; bundle boundary
ret void ret void
} }
; CHECK-LABEL: bundle_lock_align_to_end_padding_0 ; CHECK-LABEL: bundle_lock_align_to_end_padding_0
; CHECK: c: {{.*}} bl {{.*}} call_target ; CHECK: c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: movw ; CHECK-NEXT: mov
; CHECK-NEXT: movw ; CHECK-NEXT: mov
; CHECK-NEXT: bic [[REG:r[0-9]+]] ; CHECK-NEXT: mov
; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]] ; CHECK-NEXT: {{[0-9]+}}c: {{.*}} bl {{.*}} call_target3
; CHECK: {{[0-9]+}}c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: add sp ; CHECK-NEXT: add sp
; CHECK-NEXT: bic sp, {{.*}} 0xc0000000 ; CHECK-NEXT: bic sp, {{.*}} 0xc0000000
; CHECK-NEXT: pop ; CHECK-NEXT: pop
...@@ -114,41 +114,29 @@ define internal void @bundle_lock_align_to_end_padding_1() { ...@@ -114,41 +114,29 @@ define internal void @bundle_lock_align_to_end_padding_1() {
entry: entry:
call void @call_target() call void @call_target()
; bundle boundary ; bundle boundary
store i32 65536, i32* undef, align 1 call void @call_target2(i32 1, i32 2)
; bundle boundary
call void @call_target()
; bundle boundary ; bundle boundary
ret void ret void
} }
; CHECK-LABEL: bundle_lock_align_to_end_padding_1 ; CHECK-LABEL: bundle_lock_align_to_end_padding_1
; CHECK: {{[0-9]*}}c: {{.*}} bl {{.*}} call_target ; CHECK: {{[0-9]*}}c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: movw [[BASE:r[0-9]+]] ; CHECK-NEXT: mov
; CHECK-NEXT: movw [[REG:r[0-9]+]], #0 ; CHECK-NEXT: mov
; CHECK-NEXT: movt [[REG]], #1
; CHECK-NEXT: nop ; CHECK-NEXT: nop
; CHECK-NEXT: bic [[BASE]], [[BASE]], {{.*}} 0xc0000000 ; CHECK-NEXT: bl {{.*}} call_target2
; CHECK-NEXT: str [[REG]], {{[[]}}[[BASE]]
; CHECK-NEXT: nop
; CHECK-NEXT: bl {{.*}} call_target
; CHECK: {{[0-9]+}}0: {{.*}} bic lr, lr, {{.*}} 0xc000000f ; CHECK: {{[0-9]+}}0: {{.*}} bic lr, lr, {{.*}} 0xc000000f
; CHECK-NEXT: {{.*}} bx lr ; CHECK-NEXT: {{.*}} bx lr
; Bundle lock align_to_end with two bunches of padding. ; Bundle lock align_to_end with two bunches of padding.
define internal void @bundle_lock_align_to_end_padding_2(i32 %target) { define internal void @bundle_lock_align_to_end_padding_2() {
entry: entry:
call void @call_target1(i32 1) call void @call_target2(i32 1, i32 2)
; bundle boundary ; bundle boundary
%__1 = inttoptr i32 %target to void (i32, i32, i32)*
call void %__1(i32 2, i32 3, i32 4)
ret void ret void
} }
; CHECK-LABEL: bundle_lock_align_to_end_padding_2 ; CHECK-LABEL: bundle_lock_align_to_end_padding_2
; CHECK: {{[0-9]+}}0: ; CHECK: mov
; CHECK-NEXT: nop ; CHECK-NEXT: mov
; CHECK-NEXT: nop
; CHECK-NEXT: bl {{.*}} call_target
; CHECK: {{[0-9]+}}c: {{.*}} movw r2, #4
; CHECK-NEXT: nop ; CHECK-NEXT: nop
; CHECK-NEXT: nop ; CHECK-NEXT: nop
; CHECK-NEXT: bic [[REG:r[0-9]+]], [[REG]], {{.*}} 0xc000000f ; CHECK-NEXT: bl {{.*}} call_target2
; CHECK-NEXT: {{.*}} blx [[REG]]
...@@ -89,17 +89,17 @@ entry: ...@@ -89,17 +89,17 @@ entry:
; ARM32-LABEL: pass64BitArg ; ARM32-LABEL: pass64BitArg
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: movw r2, #123 ; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123 ; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123 ; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
...@@ -142,7 +142,7 @@ entry: ...@@ -142,7 +142,7 @@ entry:
; ARM32: str [[REG2]], [sp] ; ARM32: str [[REG2]], [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123 ; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
define internal i32 @pass64BitUndefArg() { define internal i32 @pass64BitUndefArg() {
...@@ -162,9 +162,9 @@ entry: ...@@ -162,9 +162,9 @@ entry:
; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline ; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline
; ARM32-LABEL: pass64BitUndefArg ; ARM32-LABEL: pass64BitUndefArg
; ARM32: sub sp ; ARM32: sub sp
; ARM32: movw {{.*}}, #0 ; ARM32: mov {{.*}}, #0
; ARM32: str ; ARM32: str
; ARM32: movw {{.*}}, #123 ; ARM32: mov {{.*}}, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
define internal i64 @return64BitArg(i64 %padding, i64 %a) { define internal i64 @return64BitArg(i64 %padding, i64 %a) {
......
...@@ -117,7 +117,7 @@ entry: ...@@ -117,7 +117,7 @@ entry:
; CHECK-LABEL: MulImm ; CHECK-LABEL: MulImm
; CHECK: imul e{{.*}},e{{.*}},0x63 ; CHECK: imul e{{.*}},e{{.*}},0x63
; ARM32-LABEL: MulImm ; ARM32-LABEL: MulImm
; ARM32-OPTM1: movw {{.*}}, #99 ; ARM32-OPTM1: mov {{.*}}, #99
; ARM32-OPTM1: mul r{{.*}}, r{{.*}}, r{{.*}} ; ARM32-OPTM1: mul r{{.*}}, r{{.*}}, r{{.*}}
; ARM32-OPT2: rsb [[T:r[0-9]+]], [[S:r[0-9]+]], [[S]], lsl #2 ; ARM32-OPT2: rsb [[T:r[0-9]+]], [[S:r[0-9]+]], [[S]], lsl #2
; ARM32-OPT2-DAG: add [[T]], [[T]], [[S]], lsl #7 ; ARM32-OPT2-DAG: add [[T]], [[T]], [[S]], lsl #7
...@@ -141,8 +141,8 @@ entry: ...@@ -141,8 +141,8 @@ entry:
; CHECK-NOT: mul {{[0-9]+}} ; CHECK-NOT: mul {{[0-9]+}}
; ;
; ARM32-LABEL: MulImm64 ; ARM32-LABEL: MulImm64
; ARM32: movw {{.*}}, #99 ; ARM32: mov {{.*}}, #99
; ARM32: movw {{.*}}, #0 ; ARM32: mov {{.*}}, #0
; ARM32: mul r ; ARM32: mul r
; ARM32: mla r ; ARM32: mla r
; ARM32: umull r ; ARM32: umull r
......
...@@ -358,7 +358,7 @@ entry: ...@@ -358,7 +358,7 @@ entry:
; CHECK: cvtsi2sd {{.*[^1]}} ; CHECK: cvtsi2sd {{.*[^1]}}
; CHECK: fld ; CHECK: fld
; ARM32-LABEL: signed32ToDoubleConst ; ARM32-LABEL: signed32ToDoubleConst
; ARM32-DAG: movw [[CONST:r[0-9]+]], #123 ; ARM32-DAG: mov [[CONST:r[0-9]+]], #123
; ARM32-DAG: vmov [[SRC:s[0-9]+]], [[CONST]] ; ARM32-DAG: vmov [[SRC:s[0-9]+]], [[CONST]]
; ARM32-DAG: vcvt.f64.s32 {{d[0-9]+}}, [[SRC]] ; ARM32-DAG: vcvt.f64.s32 {{d[0-9]+}}, [[SRC]]
......
...@@ -1329,7 +1329,7 @@ entry: ...@@ -1329,7 +1329,7 @@ entry:
; CHECK-LABEL: test_atomic_is_lock_free ; CHECK-LABEL: test_atomic_is_lock_free
; CHECK: mov {{.*}},0x1 ; CHECK: mov {{.*}},0x1
; ARM32-LABEL: test_atomic_is_lock_free ; ARM32-LABEL: test_atomic_is_lock_free
; ARM32: movw {{.*}}, #1 ; ARM32: mov {{.*}}, #1
define internal i32 @test_not_lock_free(i32 %iptr) { define internal i32 @test_not_lock_free(i32 %iptr) {
entry: entry:
......
...@@ -303,8 +303,8 @@ define internal i64 @ret_64bits_shift_left0() { ...@@ -303,8 +303,8 @@ define internal i64 @ret_64bits_shift_left0() {
; CHECK-NEXT: mov eax,0xff ; CHECK-NEXT: mov eax,0xff
; CHECK-NEXT: mov edx,0xff ; CHECK-NEXT: mov edx,0xff
; ARM32-LABEL: ret_64bits_shift_left0 ; ARM32-LABEL: ret_64bits_shift_left0
; ARM32-NEXT: movw r0, #255 ; ARM32-NEXT: mov r0, #255
; ARM32-NEXT: movw r1, #255 ; ARM32-NEXT: mov r1, #255
; MIPS32-LABEL: ret_64bits_shift_left0 ; MIPS32-LABEL: ret_64bits_shift_left0
; MIPS32-NEXT: li v0,255 ; MIPS32-NEXT: li v0,255
; MIPS32-NEXT: li v1,255 ; MIPS32-NEXT: li v1,255
......
...@@ -130,5 +130,5 @@ sw.default: ...@@ -130,5 +130,5 @@ sw.default:
ret i32 20 ret i32 20
} }
; ARM32-LABEL: testSwitchUndef64 ; ARM32-LABEL: testSwitchUndef64
; ARM32: movw {{.*}}, #0 ; ARM32: mov {{.*}}, #0
; ARM32: movw {{.*}}, #0 ; ARM32: mov {{.*}}, #0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment