Commit eb13acc6 by John Porto

Subzero. ARM32. Show FP lowering some love.

After some time of being neglected, this CL improves FP lowering for ARM32. 1) It emits vpush {list}, and vpop {list} when possible. 2) It stops saving alised Vfp registers multiple times (yes, sz used to save both D and S registers even when they aliased.) 3) Introduces Vmla (fp multiply and accumulate) and Vmls (multiply and subtract.) (1 + 2) minimally (but positively) affected SPEC. (3) caused a 2% geomean improvement. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1481133002 .
parent e293b5f4
......@@ -981,7 +981,7 @@ bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
if (Printed)
Str << ",";
else
Str << " \t# END=";
Str << " \t@ END=";
Var->emit(Func);
Printed = true;
}
......
......@@ -185,6 +185,22 @@ void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
Inst->getSrc(1)->emit(Func);
}
void InstARM32::emitFourAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 3);
assert(Inst->getSrc(0) == Inst->getDest());
Str << "\t" << Opcode << getVecWidthString(Inst->getDest()->getType())
<< "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
Str << ", ";
Inst->getSrc(2)->emit(Func);
}
void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
......@@ -571,18 +587,43 @@ IceString InstARM32Label::getName(const Cfg *Func) const {
return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
}
namespace {
// Requirements for Push/Pop:
// 1) All the Variables have the same type;
// 2) All the variables have registers assigned to them.
void validatePushOrPopRegisterListOrDie(const VarList &RegList) {
Type PreviousTy = IceType_void;
for (Variable *Reg : RegList) {
if (PreviousTy != IceType_void && Reg->getType() != PreviousTy) {
llvm::report_fatal_error("Type mismatch when popping/pushing "
"registers.");
}
if (!Reg->hasReg()) {
llvm::report_fatal_error("Push/pop operand does not have a register "
"assigned to it.");
}
PreviousTy = Reg->getType();
}
}
} // end of anonymous namespace
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs. Also, a pop
// instruction affects the stack pointer and so it should not be allowed to
// be automatically dead-code eliminated. This is automatic since we leave
// the Dest as nullptr.
validatePushOrPopRegisterListOrDie(Dests);
}
InstARM32Push::InstARM32Push(Cfg *Func, const VarList &Srcs)
: InstARM32(Func, InstARM32::Push, Srcs.size(), nullptr) {
for (Variable *Source : Srcs)
validatePushOrPopRegisterListOrDie(Srcs);
for (Variable *Source : Srcs) {
addSource(Source);
}
}
InstARM32Ret::InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source)
......@@ -736,8 +777,10 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";
// FP
template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Veor::Opcode = "veor";
template <> const char *InstARM32Vmla::Opcode = "vmla";
template <> const char *InstARM32Vmls::Opcode = "vmls";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
......@@ -1216,51 +1259,74 @@ template <> void InstARM32Uxt::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
}
namespace {
bool isAssignedConsecutiveRegisters(Variable *Before, Variable *After) {
assert(Before->hasReg());
assert(After->hasReg());
return Before->getRegNum() + 1 == After->getRegNum();
}
} // end of anonymous namespace
void InstARM32Pop::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump())
return;
SizeT IntegerCount = 0;
for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType())) {
++IntegerCount;
}
const SizeT DestSize = Dests.size();
if (DestSize == 0) {
assert(false && "Empty pop list");
return;
}
Ostream &Str = Func->getContext()->getStrEmit();
bool NeedNewline = false;
if (IntegerCount != 0) {
Variable *Reg = Dests[0];
if (isScalarIntegerType(Reg->getType())) {
// GPR push.
Str << "\t"
<< "pop"
<< "\t{";
bool PrintComma = false;
for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
}
"pop"
"\t{";
Reg->emit(Func);
for (SizeT i = 1; i < DestSize; ++i) {
Str << ", ";
Reg = Dests[i];
Reg->emit(Func);
}
Str << "}";
NeedNewline = true;
return;
}
for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType()))
continue;
if (NeedNewline) {
Str << "\n";
// VFP "s" reg push.
SizeT End = DestSize - 1;
SizeT Start = DestSize - 1;
Reg = Dests[DestSize - 1];
Str << "\t"
"vpop"
"\t{";
for (SizeT i = 2; i <= DestSize; ++i) {
Variable *PreviousReg = Dests[DestSize - i];
if (!isAssignedConsecutiveRegisters(PreviousReg, Reg)) {
Dests[Start]->emit(Func);
for (SizeT j = Start + 1; j <= End; ++j) {
Str << ", ";
Dests[j]->emit(Func);
}
startNextInst(Func);
NeedNewline = false;
Str << "}\n\t"
"vpop"
"\t{";
End = DestSize - i;
}
Str << "\t"
<< "vpop"
<< "\t{";
Op->emit(Func);
Str << "}";
NeedNewline = true;
Reg = PreviousReg;
Start = DestSize - i;
}
Dests[Start]->emit(Func);
for (SizeT j = Start + 1; j <= End; ++j) {
Str << ", ";
Dests[j]->emit(Func);
}
assert(NeedNewline); // caller will add the newline
Str << "}";
}
void InstARM32Pop::emitIAS(const Cfg *Func) const {
......@@ -1310,56 +1376,55 @@ void InstARM32Pop::dump(const Cfg *Func) const {
}
void InstARM32Push::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump())
return;
SizeT IntegerCount = 0;
for (SizeT i = 0; i < getSrcSize(); ++i) {
if (isScalarIntegerType(getSrc(i)->getType())) {
++IntegerCount;
}
// Push can't be emitted if there are no registers to save. This should never
// happen, but if it does, we don't need to bring Subzero down -- we just skip
// emitting the push instruction (and maybe emit a nop?) The assert() is here
// so that we can detect this error during development.
const SizeT SrcSize = getSrcSize();
if (SrcSize == 0) {
assert(false && "Empty push list");
return;
}
Ostream &Str = Func->getContext()->getStrEmit();
bool NeedNewline = false;
for (SizeT i = getSrcSize(); i > 0; --i) {
Operand *Op = getSrc(i - 1);
if (isScalarIntegerType(Op->getType()))
continue;
if (NeedNewline) {
Str << "\n";
startNextInst(Func);
NeedNewline = false;
}
Variable *Reg = llvm::cast<Variable>(getSrc(0));
if (isScalarIntegerType(Reg->getType())) {
// GPR push.
Str << "\t"
<< "vpush"
<< "\t{";
Op->emit(Func);
"push"
"\t{";
Reg->emit(Func);
for (SizeT i = 1; i < SrcSize; ++i) {
Str << ", ";
getSrc(i)->emit(Func);
}
Str << "}";
NeedNewline = true;
return;
}
if (IntegerCount != 0) {
if (NeedNewline) {
Str << "\n";
// VFP "s" reg push.
Str << "\t"
"vpush"
"\t{";
Reg->emit(Func);
for (SizeT i = 1; i < SrcSize; ++i) {
Variable *NextReg = llvm::cast<Variable>(getSrc(i));
if (isAssignedConsecutiveRegisters(Reg, NextReg)) {
Str << ", ";
} else {
startNextInst(Func);
NeedNewline = false;
}
Str << "\t"
<< "push"
<< "\t{";
bool PrintComma = false;
for (SizeT i = 0; i < getSrcSize(); ++i) {
Operand *Op = getSrc(i);
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
}
Str << "}\n\t"
"vpush"
"\t{";
}
Str << "}";
NeedNewline = true;
Reg = NextReg;
Reg->emit(Func);
}
assert(NeedNewline); // caller will add the newline
Str << "}";
}
void InstARM32Push::emitIAS(const Cfg *Func) const {
......@@ -1925,8 +1990,10 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vadd>;
template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Vmla>;
template class InstARM32ThreeAddrFP<InstARM32::Vmls>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>;
......
......@@ -28,6 +28,9 @@
// LR is not considered isInt to avoid being allocated as a register. It is
// technically preserved, but save/restore is handled separately, based on
// whether or not the function MaybeLeafFunc.
//
// The register tables can be generated using the gen_arm32_reg_tables.py
// script.
#define REGARM32_GPR_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \
......@@ -69,21 +72,6 @@
// isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)
// S registers 0-15 are scratch, but 16-31 are preserved.
// Regenerate this with the following python script:
//
// def print_sregs():
// for i in xrange(0, 32):
// is_scratch = 1 if i < 16 else 0
// is_preserved = 1 if i >= 16 else 0
// print (' X(Reg_s{regnum:<2}, {regnum:<2}, "s{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 1, 0, 0, ' +
// 'REGLIST2(RegARM32, d{regnum:<2}, ' +
// 'q{regnum_q:<2})) \\').format(
// regnum=i, regnum_d=i>>1,
// regnum_q=i>>2, scratch=is_scratch, preserved=is_preserved)
//
// print_sregs()
//
#define REGARM32_FP32_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \
isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \
......@@ -128,29 +116,6 @@
// registers. In processors supporting the D32 feature this will effectively
// cause double allocation to bias towards allocating "high" D registers, which
// do not alias any S registers.
//
// Regenerate this with the following python script:
// def print_dregs():
// for i in xrange(31, 15, -1):
// is_scratch = 1 if (i < 8 or i >= 16) else 0
// is_preserved = 1 if (8 <= i and i < 16) else 0
// print (' X(Reg_d{regnum:<2}, {regnum:<2}, "d{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 1, 0, ' +
// 'REGLIST1(RegARM32, q{regnum_q:<2}) \\').format(
// regnum=i, regnum_q=i>>1, scratch=is_scratch,
// preserved=is_preserved)
// for i in xrange(15, -1, -1):
// is_scratch = 1 if (i < 8 or i >= 16) else 0
// is_preserved = 1 if (8 <= i and i < 16) else 0
// print (' X(Reg_d{regnum:<2}, {regnum:<2}, "d{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 1, 0, ' +
// 'REGLIST3(RegARM32, s{regnum_s0:<2}, s{regnum_s1:<2}, ' +
// 'q{regnum_q:<2})) \\').format(
// regnum_s0 = (i<<1), regnum_s1 = (i<<1) + 1, regnum=i,
// regnum_q=i>>1, scratch=is_scratch, preserved=is_preserved)
//
// print_dregs()
//
#define REGARM32_FP64_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \
isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \
......@@ -192,31 +157,6 @@
// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 are also scratch
// (if supported by the D32 feature). Q registers are defined in reverse order
// for the same reason as D registers.
//
// Regenerate this with the following python script:
// def print_qregs():
// for i in xrange(15, 7, -1):
// is_scratch = 1 if (i < 4 or i >= 8) else 0
// is_preserved = 1 if (4 <= i and i < 8) else 0
// print (' X(Reg_q{regnum:<2}, {regnum:<2}, "q{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 0, 1, REGLIST2(' +
// 'RegARM32, d{regnum_d0:<2}, d{regnum_d1:<2})) \\').format(
// regnum_d0=(i<<1), regnum_d1=(i<<1)+1, regnum=i,
// scratch=is_scratch, preserved=is_preserved)
// for i in xrange(7, -1, -1):
// is_scratch = 1 if (i < 4 or i >= 8) else 0
// is_preserved = 1 if (4 <= i and i < 8) else 0
// print (' X(Reg_q{regnum:<2}, {regnum:<2}, "q{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 0, 1, REGLIST6(' +
// 'RegARM32, s{regnum_s0:<2}, s{regnum_s1:<2}, ' +
// 's{regnum_s2:<2}, s{regnum_s3:<2}, ' +
// 'd{regnum_d0:<2}, d{regnum_d1:<2})) \\').format(
// regnum_s0=(i<<2), regnum_s1=(i<<2)+1, regnum_s2=(i<<2)+2,
// regnum_s3=(i<<2)+3, regnum_d0=(i<<1), regnum_d1=(i<<1)+1,
// regnum=i, scratch=is_scratch, preserved=is_preserved)
//
// print_qregs()
//
#define REGARM32_VEC128_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, \
isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init */ \
......
......@@ -415,6 +415,8 @@ public:
Vcvt,
Vdiv,
Veor,
Vmla,
Vmls,
Vmrs,
Vmul,
Vsqrt,
......@@ -436,6 +438,8 @@ public:
/// Shared emit routines for common forms of instructions.
static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func);
static void emitFourAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func);
void dump(const Cfg *Func) const override;
......@@ -708,7 +712,7 @@ private:
/// Instructions of the form x := y op z, for vector/FP. We leave these as
/// unconditional: "ARM deprecates the conditional execution of any instruction
/// encoding provided by the Advanced SIMD Extension that is not also provided
/// by the Floating-point (VFP) extension". They do not set flags.
/// by the floating-point (VFP) extension". They do not set flags.
template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrFP : public InstARM32 {
InstARM32ThreeAddrFP() = delete;
......@@ -796,6 +800,54 @@ private:
static const char *Opcode;
};
/// Instructions of the form x := x op1 (y op2 z). E.g., multiply accumulate.
/// We leave these as unconditional: "ARM deprecates the conditional execution
/// of any instruction encoding provided by the Advanced SIMD Extension that is
/// not also provided by the floating-point (VFP) extension". They do not set
/// flags.
template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrFP : public InstARM32 {
InstARM32FourAddrFP() = delete;
InstARM32FourAddrFP(const InstARM32FourAddrFP &) = delete;
InstARM32FourAddrFP &operator=(const InstARM32FourAddrFP &) = delete;
public:
// Every operand must be a register.
static InstARM32FourAddrFP *create(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1) {
return new (Func->allocate<InstARM32FourAddrFP>())
InstARM32FourAddrFP(Func, Dest, Src0, Src1);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitFourAddrFP(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override { emitUsingTextFixup(Func); }
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
Str << Opcode << "." << getDest()->getType() << " ";
dumpDest(Func);
Str << ", ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32FourAddrFP(Cfg *Func, Variable *Dest, Variable *Src0, Variable *Src1)
: InstARM32(Func, K, 3, Dest) {
addSource(Dest);
addSource(Src0);
addSource(Src1);
}
static const char *Opcode;
};
/// Instructions of the form x cmpop y (setting flags).
template <InstARM32::InstKindARM32 K>
class InstARM32CmpLike : public InstARM32Pred {
......@@ -855,8 +907,10 @@ using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>;
using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>;
using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>;
using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
......@@ -1001,8 +1055,8 @@ private:
InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
};
/// Pop into a list of GPRs. Technically this can be predicated, but we don't
/// need that functionality.
/// Pops a list of registers. It may be a list of GPRs, or a list of VFP "s"
/// regs, but not both. In any case, the list must be sorted.
class InstARM32Pop : public InstARM32 {
InstARM32Pop() = delete;
InstARM32Pop(const InstARM32Pop &) = delete;
......@@ -1023,8 +1077,8 @@ private:
VarList Dests;
};
/// Push a list of GPRs. Technically this can be predicated, but we don't need
/// that functionality.
/// Pushes a list of registers. Just like Pop (see above), the list may be of
/// GPRs, or VFP "s" registers, but not both.
class InstARM32Push : public InstARM32 {
InstARM32Push() = delete;
InstARM32Push(const InstARM32Push &) = delete;
......
......@@ -60,9 +60,9 @@ public:
static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); }
void initNodeForLowering(CfgNode *Node) override {
BoolComputations.forgetProducers();
BoolComputations.recordProducers(Node);
BoolComputations.dump(Func);
Computations.forgetProducers();
Computations.recordProducers(Node);
Computations.dump(Func);
}
void translateOm1() override;
......@@ -798,6 +798,12 @@ protected:
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmrs::create(Func, Pred));
}
void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmla::create(Func, Dest, Src0, Src1));
}
void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmls::create(Func, Dest, Src0, Src1));
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
}
......@@ -1019,6 +1025,8 @@ protected:
static llvm::SmallBitVector ScratchRegs;
llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM];
VarList PreservedGPRs;
VarList PreservedSRegs;
/// Helper class that understands the Calling Convention and register
/// assignments. The first few integer type parameters can use r0-r3,
......@@ -1081,10 +1089,10 @@ private:
std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
ARM32HelpersPostamble;
class BoolComputationTracker {
class ComputationTracker {
public:
BoolComputationTracker() = default;
~BoolComputationTracker() = default;
ComputationTracker() = default;
~ComputationTracker() = default;
void forgetProducers() { KnownComputations.clear(); }
void recordProducers(CfgNode *Node);
......@@ -1118,9 +1126,9 @@ private:
}
private:
class BoolComputationEntry {
class ComputationEntry {
public:
explicit BoolComputationEntry(Inst *I) : Instr(I) {}
ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
Inst *const Instr;
// Boolean folding is disabled for variables whose live range is multi
// block. We conservatively initialize IsLiveOut to true, and set it to
......@@ -1130,13 +1138,16 @@ private:
// disabled.
bool IsLiveOut = true;
int32_t NumUses = 0;
Type ComputationType;
};
using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>;
BoolComputationMap KnownComputations;
// ComputationMap maps a Variable number to a payload identifying which
// instruction defined it.
using ComputationMap = std::unordered_map<SizeT, ComputationEntry>;
ComputationMap KnownComputations;
};
BoolComputationTracker BoolComputations;
ComputationTracker Computations;
// AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
// without specifying a physical register. This is needed for creating unbound
......
......@@ -24,96 +24,92 @@ define internal i32 @foo(i32 %x) {
entry:
; ASM-LABEL: foo:
; ASM-NEXT: .Lfoo$entry:
; ******* Movw case to check *******
; ASM-NEXT: movw ip, #4092
; ASM-NEXT: sub sp, sp, ip
; ASM-NEXT: str r0, [sp, #4088]
; ASM-NEXT: # [sp, #4088] = def.pseudo
; DIS-LABEL: 00000000 <foo>:
; DIS-NEXT: 0: e300cffc
; DIS-NEXT: 4: e04dd00c
; DIS-NEXT: 8: e58d0ff8
; IASM-LABEL: foo:
; ASM-NEXT: .Lfoo$entry:
; IASM-NEXT: .Lfoo$entry:
; ASM-NEXT: movw ip, #4092
; DIS-NEXT: 0: e300cffc
; IASM-NEXT: .byte 0xfc
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: sub sp, sp, ip
; DIS-NEXT: 4: e04dd00c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4088]
; DIS-NEXT: 8: e58d0ff8
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: # [sp, #4088] = def.pseudo
%mul = mul i32 %x, %x
; ASM-NEXT: ldr r0, [sp, #4088]
; ASM-NEXT: ldr r1, [sp, #4088]
; ASM-NEXT: mul r0, r0, r1
; ASM-NEXT: str r0, [sp, #4084]
; ASM-NEXT: # [sp, #4084] = def.pseudo
; DIS-NEXT: c: e59d0ff8
; DIS-NEXT: 10: e59d1ff8
; DIS-NEXT: 14: e0000190
; DIS-NEXT: 18: e58d0ff4
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r1, [sp, #4088]
; DIS-NEXT: 10: e59d1ff8
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mul r0, r0, r1
; DIS-NEXT: 14: e0000190
; IASM-NEXT: .byte 0x90
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4084]
; DIS-NEXT: 18: e58d0ff4
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: # [sp, #4084] = def.pseudo
ret i32 %mul
; ASM-NEXT: ldr r0, [sp, #4084]
; ******* Movw case to check *******
; ASM-NEXT: movw ip, #4092
; ASM-NEXT: add sp, sp, ip
; ASM-NEXT: bx lr
; DIS-NEXT: 1c: e59d0ff4
; DIS-NEXT: 20: e300cffc
; DIS-NEXT: 24: e08dd00c
; DIS-NEXT: 28: e12fff1e
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw ip, #4092
; DIS-NEXT: 20: e300cffc
; IASM-NEXT: .byte 0xfc
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: add sp, sp, ip
; DIS-NEXT: 24: e08dd00c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: bx lr
; DIS-NEXT: 28: e12fff1e
; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
......@@ -121,84 +117,88 @@ entry:
}
define internal void @saveMinus1(i32 %loc) {
; ASM-LABEL:saveMinus1:
; DIS-LABEL:00000030 <saveMinus1>:
; IASM-LABEL:saveMinus1:
define internal void @saveConstI32(i32 %loc) {
; ASM-LABEL:saveConstI32:
; DIS-LABEL:00000030 <saveConstI32>:
; IASM-LABEL:saveConstI32:
entry:
; ASM-NEXT:.LsaveMinus1$entry:
; ASM-NEXT:.LsaveConstI32$entry:
; IASM-NEXT:.LsaveConstI32$entry:
; ASM-NEXT: movw ip, #4088
; DIS-NEXT: 30: e300cff8
; IASM-NEXT:.LsaveMinus1$entry:
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: sub sp, sp, ip
; DIS-NEXT: 34: e04dd00c
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4084]
; ASM-NEXT: # [sp, #4084] = def.pseudo
; ASM-NEXT: # [sp, #4084] = def.pseudo
; DIS-NEXT: 38: e58d0ff4
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
%loc.asptr = inttoptr i32 %loc to i32*
store i32 -1, i32* %loc.asptr, align 1
store i32 524289, i32* %loc.asptr, align 1
; ASM-NEXT: ldr r0, [sp, #4084]
; DIS-NEXT: 3c: e59d0ff4
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw r1, #65535
; DIS-NEXT: 40: e30f1fff
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movt r1, #65535
; DIS-NEXT: 44: e34f1fff
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw r1, #1
; DIS-NEXT: 40: e3001001
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: movt r1, #8
; DIS-NEXT: 44: e3401008
; IASM-NEXT: .byte 0x8
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x40
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: str r1, [r0]
; DIS-NEXT: 48: e5801000
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0x4f
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x80
; IASM-NEXT: .byte 0xe5
ret void
; ASM-NEXT: movw ip, #4088
; DIS-NEXT: 4c: e300cff8
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x80
; IASM-NEXT: .byte 0xe5
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: add sp, sp, ip
; DIS-NEXT: 50: e08dd00c
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: bx lr
; DIS-NEXT: 54: e12fff1e
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
; IASM-NEXT: .byte 0xe1
}
......@@ -9,7 +9,9 @@
; RUN: -ffunction-sections | FileCheck %s
declare void @call_target()
declare void @call_target1(i32 %arg)
declare void @call_target1(i32 %arg0)
declare void @call_target2(i32 %arg0, i32 %arg1)
declare void @call_target3(i32 %arg0, i32 %arg1, i32 %arg2)
@global_short = internal global [2 x i8] zeroinitializer
; A direct call sequence uses the right mask and register-call sequence.
......@@ -60,7 +62,7 @@ entry:
; CHECK-LABEL: bundle_lock_without_padding
; CHECK: 0: {{.*}} movw
; CHECK-NEXT: movt
; CHECK-NEXT: movw
; CHECK-NEXT: mov
; CHECK-NEXT: nop
; CHECK-NEXT: bic [[REG:r[0-9]+]], {{.*}} 0xc0000000
; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]]
......@@ -91,18 +93,16 @@ define internal void @bundle_lock_align_to_end_padding_0() {
entry:
call void @call_target()
; bundle boundary
store i16 0, i16* undef, align 1
call void @call_target()
call void @call_target3(i32 1, i32 2, i32 3)
; bundle boundary
ret void
}
; CHECK-LABEL: bundle_lock_align_to_end_padding_0
; CHECK: c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: movw
; CHECK-NEXT: movw
; CHECK-NEXT: bic [[REG:r[0-9]+]]
; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]]
; CHECK: {{[0-9]+}}c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: mov
; CHECK-NEXT: mov
; CHECK-NEXT: mov
; CHECK-NEXT: {{[0-9]+}}c: {{.*}} bl {{.*}} call_target3
; CHECK-NEXT: add sp
; CHECK-NEXT: bic sp, {{.*}} 0xc0000000
; CHECK-NEXT: pop
......@@ -114,41 +114,29 @@ define internal void @bundle_lock_align_to_end_padding_1() {
entry:
call void @call_target()
; bundle boundary
store i32 65536, i32* undef, align 1
; bundle boundary
call void @call_target()
call void @call_target2(i32 1, i32 2)
; bundle boundary
ret void
}
; CHECK-LABEL: bundle_lock_align_to_end_padding_1
; CHECK: {{[0-9]*}}c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: movw [[BASE:r[0-9]+]]
; CHECK-NEXT: movw [[REG:r[0-9]+]], #0
; CHECK-NEXT: movt [[REG]], #1
; CHECK-NEXT: mov
; CHECK-NEXT: mov
; CHECK-NEXT: nop
; CHECK-NEXT: bic [[BASE]], [[BASE]], {{.*}} 0xc0000000
; CHECK-NEXT: str [[REG]], {{[[]}}[[BASE]]
; CHECK-NEXT: nop
; CHECK-NEXT: bl {{.*}} call_target
; CHECK-NEXT: bl {{.*}} call_target2
; CHECK: {{[0-9]+}}0: {{.*}} bic lr, lr, {{.*}} 0xc000000f
; CHECK-NEXT: {{.*}} bx lr
; Bundle lock align_to_end with two bunches of padding.
define internal void @bundle_lock_align_to_end_padding_2(i32 %target) {
define internal void @bundle_lock_align_to_end_padding_2() {
entry:
call void @call_target1(i32 1)
call void @call_target2(i32 1, i32 2)
; bundle boundary
%__1 = inttoptr i32 %target to void (i32, i32, i32)*
call void %__1(i32 2, i32 3, i32 4)
ret void
}
; CHECK-LABEL: bundle_lock_align_to_end_padding_2
; CHECK: {{[0-9]+}}0:
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: bl {{.*}} call_target
; CHECK: {{[0-9]+}}c: {{.*}} movw r2, #4
; CHECK: mov
; CHECK-NEXT: mov
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: bic [[REG:r[0-9]+]], [[REG]], {{.*}} 0xc000000f
; CHECK-NEXT: {{.*}} blx [[REG]]
; CHECK-NEXT: bl {{.*}} call_target2
......@@ -89,17 +89,17 @@ entry:
; ARM32-LABEL: pass64BitArg
; ARM32: str {{.*}}, [sp]
; ARM32: movw r2, #123
; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123
; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123
; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
......@@ -142,7 +142,7 @@ entry:
; ARM32: str [[REG2]], [sp]
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123
; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
define internal i32 @pass64BitUndefArg() {
......@@ -162,9 +162,9 @@ entry:
; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline
; ARM32-LABEL: pass64BitUndefArg
; ARM32: sub sp
; ARM32: movw {{.*}}, #0
; ARM32: mov {{.*}}, #0
; ARM32: str
; ARM32: movw {{.*}}, #123
; ARM32: mov {{.*}}, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
define internal i64 @return64BitArg(i64 %padding, i64 %a) {
......
......@@ -117,7 +117,7 @@ entry:
; CHECK-LABEL: MulImm
; CHECK: imul e{{.*}},e{{.*}},0x63
; ARM32-LABEL: MulImm
; ARM32-OPTM1: movw {{.*}}, #99
; ARM32-OPTM1: mov {{.*}}, #99
; ARM32-OPTM1: mul r{{.*}}, r{{.*}}, r{{.*}}
; ARM32-OPT2: rsb [[T:r[0-9]+]], [[S:r[0-9]+]], [[S]], lsl #2
; ARM32-OPT2-DAG: add [[T]], [[T]], [[S]], lsl #7
......@@ -141,8 +141,8 @@ entry:
; CHECK-NOT: mul {{[0-9]+}}
;
; ARM32-LABEL: MulImm64
; ARM32: movw {{.*}}, #99
; ARM32: movw {{.*}}, #0
; ARM32: mov {{.*}}, #99
; ARM32: mov {{.*}}, #0
; ARM32: mul r
; ARM32: mla r
; ARM32: umull r
......
......@@ -358,7 +358,7 @@ entry:
; CHECK: cvtsi2sd {{.*[^1]}}
; CHECK: fld
; ARM32-LABEL: signed32ToDoubleConst
; ARM32-DAG: movw [[CONST:r[0-9]+]], #123
; ARM32-DAG: mov [[CONST:r[0-9]+]], #123
; ARM32-DAG: vmov [[SRC:s[0-9]+]], [[CONST]]
; ARM32-DAG: vcvt.f64.s32 {{d[0-9]+}}, [[SRC]]
......
......@@ -1329,7 +1329,7 @@ entry:
; CHECK-LABEL: test_atomic_is_lock_free
; CHECK: mov {{.*}},0x1
; ARM32-LABEL: test_atomic_is_lock_free
; ARM32: movw {{.*}}, #1
; ARM32: mov {{.*}}, #1
define internal i32 @test_not_lock_free(i32 %iptr) {
entry:
......
......@@ -303,8 +303,8 @@ define internal i64 @ret_64bits_shift_left0() {
; CHECK-NEXT: mov eax,0xff
; CHECK-NEXT: mov edx,0xff
; ARM32-LABEL: ret_64bits_shift_left0
; ARM32-NEXT: movw r0, #255
; ARM32-NEXT: movw r1, #255
; ARM32-NEXT: mov r0, #255
; ARM32-NEXT: mov r1, #255
; MIPS32-LABEL: ret_64bits_shift_left0
; MIPS32-NEXT: li v0,255
; MIPS32-NEXT: li v1,255
......
......@@ -130,5 +130,5 @@ sw.default:
ret i32 20
}
; ARM32-LABEL: testSwitchUndef64
; ARM32: movw {{.*}}, #0
; ARM32: movw {{.*}}, #0
; ARM32: mov {{.*}}, #0
; ARM32: mov {{.*}}, #0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment