Commit eb13acc6 by John Porto

Subzero. ARM32. Show FP lowering some love.

After some time of being neglected, this CL improves FP lowering for ARM32. 1) It emits vpush {list}, and vpop {list} when possible. 2) It stops saving alised Vfp registers multiple times (yes, sz used to save both D and S registers even when they aliased.) 3) Introduces Vmla (fp multiply and accumulate) and Vmls (multiply and subtract.) (1 + 2) minimally (but positively) affected SPEC. (3) caused a 2% geomean improvement. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1481133002 .
parent e293b5f4
class RegAliases(object):
def __init__(self, *Aliases):
self.Aliases = list(Aliases)
def __str__(self):
return 'REGLIST{AliasCount}(RegARM32, {Aliases})'.format(
AliasCount=len(self.Aliases), Aliases=', '.join(self.Aliases))
def _ArgumentNames(Method):
import inspect
return (ArgName for ArgName in inspect.getargspec(Method).args
if ArgName != 'self')
class RegFeatures(object):
def __init__(self, IsScratch=0, IsPreserved=0, IsStackPtr=0, IsFramePtr=0,
IsInt=0, IsI64Pair=0, IsFP32=0, IsFP64=0, IsVec128=0,
Aliases=None):
assert not (IsInt and IsI64Pair)
assert not (IsFP32 and IsFP64)
assert not (IsFP32 and IsVec128)
assert not (IsFP64 and IsVec128)
assert not ((IsInt or IsI64Pair) and (IsFP32 or IsFP64 or IsVec128))
assert (not IsFramePtr) or IsInt
assert (not IsStackPtr) or not(
IsInt or IsI64Pair or IsFP32 or IsFP64 or IsVec128)
assert not (IsScratch and IsPreserved)
self.Features = [x for x in _ArgumentNames(self.__init__)]
self.FeaturesDict = {}
for Feature in self.Features:
self.FeaturesDict[Feature] = locals()[Feature]
def __str__(self):
return '%s' % (', '.join(str(self.FeaturesDict[Feature]) for
Feature in self.Features))
def Aliases(self):
return self.FeaturesDict['Aliases']
def LivesInGPR(self):
return (any(self.FeaturesDict[IntFeature] for IntFeature in (
'IsInt', 'IsI64Pair', 'IsStackPtr', 'IsFramePtr')) or
not self.LivesInVFP())
def LivesInVFP(self):
return any(self.FeaturesDict[FpFeature] for FpFeature in (
'IsFP32', 'IsFP64', 'IsVec128'))
class Reg(object):
def __init__(self, Name, Encode, **Features):
self.Name = Name
self.Encode = Encode
self.Features = RegFeatures(**Features)
def __str__(self):
return 'Reg_{Name}, {Encode}, {Features}'.format(Name=self.Name,
Encode=self.Encode, Features=self.Features)
def IsAnAliasOf(self, Other):
return self.Name in self.Features.Aliases().Aliases
# Note: The following tables break the usual 80-col on purpose -- it is easier
# to read the register tables if each register entry is contained on a single
# line.
GPRs = [
Reg( 'r0', 0, IsScratch=1, IsInt=1, Aliases=RegAliases( 'r0', 'r0r1')),
Reg( 'r1', 1, IsScratch=1, IsInt=1, Aliases=RegAliases( 'r1', 'r0r1')),
Reg( 'r2', 2, IsScratch=1, IsInt=1, Aliases=RegAliases( 'r2', 'r2r3')),
Reg( 'r3', 3, IsScratch=1, IsInt=1, Aliases=RegAliases( 'r3', 'r2r3')),
Reg( 'r4', 4, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r4', 'r4r5')),
Reg( 'r5', 5, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r5', 'r4r5')),
Reg( 'r6', 6, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r6', 'r6r7')),
Reg( 'r7', 7, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r7', 'r6r7')),
Reg( 'r8', 8, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r8', 'r8r9')),
Reg( 'r9', 9, IsPreserved=1, IsInt=0, Aliases=RegAliases( 'r9', 'r8r9')),
Reg('r10', 10, IsPreserved=1, IsInt=1, Aliases=RegAliases('r10', 'r10fp')),
Reg( 'fp', 11, IsPreserved=1, IsInt=1, IsFramePtr=1, Aliases=RegAliases( 'fp', 'r10fp')),
Reg( 'ip', 12, IsScratch=1, IsInt=1, Aliases=RegAliases( 'ip')),
Reg( 'sp', 13, IsScratch=0, IsInt=0, IsStackPtr=1, Aliases=RegAliases( 'sp')),
Reg( 'lr', 14, IsScratch=0, IsInt=0, Aliases=RegAliases( 'lr')),
Reg( 'pc', 15, IsScratch=0, IsInt=0, Aliases=RegAliases( 'pc')),
]
I64Pairs = [
Reg( 'r0r1', 0, IsScratch=1, IsI64Pair=1, Aliases=RegAliases( 'r0r1', 'r0', 'r1')),
Reg( 'r2r3', 2, IsScratch=1, IsI64Pair=1, Aliases=RegAliases( 'r2r3', 'r2', 'r3')),
Reg( 'r4r5', 4, IsPreserved=1, IsI64Pair=1, Aliases=RegAliases( 'r4r5', 'r4', 'r5')),
Reg( 'r6r7', 6, IsPreserved=1, IsI64Pair=1, Aliases=RegAliases( 'r6r7', 'r6', 'r7')),
Reg( 'r8r9', 8, IsPreserved=1, IsI64Pair=0, Aliases=RegAliases( 'r8r9', 'r8', 'r9')),
Reg('r10fp', 10, IsPreserved=1, IsI64Pair=1, Aliases=RegAliases('r10fp', 'r10', 'fp')),
]
FP32 = [
Reg( 's0', 0, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's0', 'd0' , 'q0')),
Reg( 's1', 1, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's1', 'd0' , 'q0')),
Reg( 's2', 2, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's2', 'd1' , 'q0')),
Reg( 's3', 3, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's3', 'd1' , 'q0')),
Reg( 's4', 4, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's4', 'd2' , 'q1')),
Reg( 's5', 5, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's5', 'd2' , 'q1')),
Reg( 's6', 6, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's6', 'd3' , 'q1')),
Reg( 's7', 7, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's7', 'd3' , 'q1')),
Reg( 's8', 8, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's8', 'd4' , 'q2')),
Reg( 's9', 9, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's9', 'd4' , 'q2')),
Reg('s10', 10, IsScratch=1, IsFP32=1, Aliases=RegAliases('s10', 'd5' , 'q2')),
Reg('s11', 11, IsScratch=1, IsFP32=1, Aliases=RegAliases('s11', 'd5' , 'q2')),
Reg('s12', 12, IsScratch=1, IsFP32=1, Aliases=RegAliases('s12', 'd6' , 'q3')),
Reg('s13', 13, IsScratch=1, IsFP32=1, Aliases=RegAliases('s13', 'd6' , 'q3')),
Reg('s14', 14, IsScratch=1, IsFP32=1, Aliases=RegAliases('s14', 'd7' , 'q3')),
Reg('s15', 15, IsScratch=1, IsFP32=1, Aliases=RegAliases('s15', 'd7' , 'q3')),
Reg('s16', 16, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s16', 'd8' , 'q4')),
Reg('s17', 17, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s17', 'd8' , 'q4')),
Reg('s18', 18, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s18', 'd9' , 'q4')),
Reg('s19', 19, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s19', 'd9' , 'q4')),
Reg('s20', 20, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s20', 'd10', 'q5')),
Reg('s21', 21, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s21', 'd10', 'q5')),
Reg('s22', 22, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s22', 'd11', 'q5')),
Reg('s23', 23, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s23', 'd11', 'q5')),
Reg('s24', 24, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s24', 'd12', 'q6')),
Reg('s25', 25, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s25', 'd12', 'q6')),
Reg('s26', 26, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s26', 'd13', 'q6')),
Reg('s27', 27, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s27', 'd13', 'q6')),
Reg('s28', 28, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s28', 'd14', 'q7')),
Reg('s29', 29, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s29', 'd14', 'q7')),
Reg('s30', 30, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s30', 'd15', 'q7')),
Reg('s31', 31, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s31', 'd14', 'q7')),
]
FP64 = [
Reg( 'd0', 0, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd0', 'q0', 's0', 's1')),
Reg( 'd1', 1, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd1', 'q0', 's2', 's3')),
Reg( 'd2', 2, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd2', 'q1', 's4', 's5')),
Reg( 'd3', 3, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd3', 'q1', 's6', 's7')),
Reg( 'd4', 4, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd4', 'q2', 's8', 's9')),
Reg( 'd5', 5, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd5', 'q2', 's10', 's11')),
Reg( 'd6', 6, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd6', 'q3', 's12', 's13')),
Reg( 'd7', 7, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd7', 'q3', 's14', 's15')),
Reg( 'd8', 8, IsPreserved=1, IsFP64=1, Aliases=RegAliases( 'd8', 'q4', 's16', 's17')),
Reg( 'd9', 9, IsPreserved=1, IsFP64=1, Aliases=RegAliases( 'd9', 'q4', 's18', 's19')),
Reg('d10', 10, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d10', 'q5', 's20', 's21')),
Reg('d11', 11, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d11', 'q5', 's22', 's24')),
Reg('d12', 12, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d12', 'q6', 's24', 's25')),
Reg('d13', 13, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d13', 'q6', 's26', 's27')),
Reg('d14', 14, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d14', 'q7', 's28', 's28')),
Reg('d15', 15, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d15', 'q7', 's30', 's31')),
Reg('d16', 16, IsScratch=1, IsFP64=1, Aliases=RegAliases('d16', 'q8')),
Reg('d17', 17, IsScratch=1, IsFP64=1, Aliases=RegAliases('d17', 'q8')),
Reg('d18', 18, IsScratch=1, IsFP64=1, Aliases=RegAliases('d18', 'q9')),
Reg('d19', 19, IsScratch=1, IsFP64=1, Aliases=RegAliases('d19', 'q9')),
Reg('d20', 20, IsScratch=1, IsFP64=1, Aliases=RegAliases('d20', 'q10')),
Reg('d21', 21, IsScratch=1, IsFP64=1, Aliases=RegAliases('d21', 'q10')),
Reg('d22', 22, IsScratch=1, IsFP64=1, Aliases=RegAliases('d22', 'q11')),
Reg('d23', 23, IsScratch=1, IsFP64=1, Aliases=RegAliases('d23', 'q11')),
Reg('d24', 24, IsScratch=1, IsFP64=1, Aliases=RegAliases('d24', 'q12')),
Reg('d25', 25, IsScratch=1, IsFP64=1, Aliases=RegAliases('d25', 'q12')),
Reg('d26', 26, IsScratch=1, IsFP64=1, Aliases=RegAliases('d26', 'q13')),
Reg('d27', 27, IsScratch=1, IsFP64=1, Aliases=RegAliases('d27', 'q13')),
Reg('d28', 28, IsScratch=1, IsFP64=1, Aliases=RegAliases('d28', 'q14')),
Reg('d29', 29, IsScratch=1, IsFP64=1, Aliases=RegAliases('d29', 'q14')),
Reg('d30', 30, IsScratch=1, IsFP64=1, Aliases=RegAliases('d30', 'q15')),
Reg('d31', 31, IsScratch=1, IsFP64=1, Aliases=RegAliases('d31', 'q15')),
]
Vec128 = [
Reg( 'q0', 0, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q0', 'd0', 'd1', 's0', 's1', 's2', 's3')),
Reg( 'q1', 1, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q1', 'd2', 'd3', 's4', 's5', 's6', 's7')),
Reg( 'q2', 2, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q2', 'd4', 'd5', 's8', 's9', 's10', 's11')),
Reg( 'q3', 3, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q3', 'd6', 'd7', 's12', 's13', 's14', 's15')),
Reg( 'q4', 4, IsPreserved=1, IsVec128=1, Aliases=RegAliases( 'q4', 'd8', 'd9', 's16', 's17', 's18', 's19')),
Reg( 'q5', 5, IsPreserved=1, IsVec128=1, Aliases=RegAliases( 'q5', 'd10', 'd11', 's20', 's21', 's22', 's23')),
Reg( 'q6', 6, IsPreserved=1, IsVec128=1, Aliases=RegAliases( 'q6', 'd12', 'd13', 's24', 's25', 's26', 's27')),
Reg( 'q7', 7, IsPreserved=1, IsVec128=1, Aliases=RegAliases( 'q7', 'd14', 'd15', 's28', 's29', 's30', 's31')),
Reg( 'q8', 8, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q8', 'd16', 'd17')),
Reg( 'q9', 9, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q9', 'd18', 'd19')),
Reg('q10', 10, IsScratch=1, IsVec128=1, Aliases=RegAliases('q10', 'd20', 'd21')),
Reg('q11', 11, IsScratch=1, IsVec128=1, Aliases=RegAliases('q11', 'd22', 'd23')),
Reg('q12', 12, IsScratch=1, IsVec128=1, Aliases=RegAliases('q12', 'd24', 'd25')),
Reg('q13', 13, IsScratch=1, IsVec128=1, Aliases=RegAliases('q13', 'd26', 'd27')),
Reg('q14', 14, IsScratch=1, IsVec128=1, Aliases=RegAliases('q14', 'd28', 'd29')),
Reg('q15', 15, IsScratch=1, IsVec128=1, Aliases=RegAliases('q15', 'd30', 'd31')),
]
def _reverse(x):
return sorted(x, key=lambda x: x.Encode, reverse=True)
RegClasses = [GPRs, I64Pairs, FP32, _reverse(FP64), _reverse(Vec128)]
AllRegs = {}
for RegClass in RegClasses:
for Reg in RegClass:
assert Reg.Name not in AllRegs
AllRegs[Reg.Name] = Reg
for RegClass in RegClasses:
for Reg in RegClass:
for Alias in AllRegs[Reg.Name].Features.Aliases().Aliases:
assert AllRegs[Alias].IsAnAliasOf(Reg), '%s VS %s' % (Reg, AllRegs[Alias])
assert AllRegs[Alias].IsAnAliasOf(Reg), '%s VS %s' % (Reg, AllRegs[Alias])
assert (AllRegs[Alias].Features.LivesInGPR() ==
Reg.Features.LivesInGPR()), '%s VS %s' % (Reg, AllRegs[Alias])
assert (AllRegs[Alias].Features.LivesInVFP() ==
Reg.Features.LivesInVFP()), '%s VS %s' % (Reg, AllRegs[Alias])
for RegClass in RegClasses:
for Reg in RegClass:
print 'X({Reg})'.format(Reg=Reg)
print
......@@ -981,7 +981,7 @@ bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
if (Printed)
Str << ",";
else
Str << " \t# END=";
Str << " \t@ END=";
Var->emit(Func);
Printed = true;
}
......
......@@ -185,6 +185,22 @@ void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
Inst->getSrc(1)->emit(Func);
}
void InstARM32::emitFourAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 3);
assert(Inst->getSrc(0) == Inst->getDest());
Str << "\t" << Opcode << getVecWidthString(Inst->getDest()->getType())
<< "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
Str << ", ";
Inst->getSrc(2)->emit(Func);
}
void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
......@@ -571,18 +587,43 @@ IceString InstARM32Label::getName(const Cfg *Func) const {
return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
}
namespace {
// Requirements for Push/Pop:
// 1) All the Variables have the same type;
// 2) All the variables have registers assigned to them.
void validatePushOrPopRegisterListOrDie(const VarList &RegList) {
Type PreviousTy = IceType_void;
for (Variable *Reg : RegList) {
if (PreviousTy != IceType_void && Reg->getType() != PreviousTy) {
llvm::report_fatal_error("Type mismatch when popping/pushing "
"registers.");
}
if (!Reg->hasReg()) {
llvm::report_fatal_error("Push/pop operand does not have a register "
"assigned to it.");
}
PreviousTy = Reg->getType();
}
}
} // end of anonymous namespace
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs. Also, a pop
// instruction affects the stack pointer and so it should not be allowed to
// be automatically dead-code eliminated. This is automatic since we leave
// the Dest as nullptr.
validatePushOrPopRegisterListOrDie(Dests);
}
InstARM32Push::InstARM32Push(Cfg *Func, const VarList &Srcs)
: InstARM32(Func, InstARM32::Push, Srcs.size(), nullptr) {
for (Variable *Source : Srcs)
validatePushOrPopRegisterListOrDie(Srcs);
for (Variable *Source : Srcs) {
addSource(Source);
}
}
InstARM32Ret::InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source)
......@@ -736,8 +777,10 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";
// FP
template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Veor::Opcode = "veor";
template <> const char *InstARM32Vmla::Opcode = "vmla";
template <> const char *InstARM32Vmls::Opcode = "vmls";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
......@@ -1216,51 +1259,74 @@ template <> void InstARM32Uxt::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
}
namespace {
bool isAssignedConsecutiveRegisters(Variable *Before, Variable *After) {
assert(Before->hasReg());
assert(After->hasReg());
return Before->getRegNum() + 1 == After->getRegNum();
}
} // end of anonymous namespace
void InstARM32Pop::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump())
return;
SizeT IntegerCount = 0;
for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType())) {
++IntegerCount;
}
const SizeT DestSize = Dests.size();
if (DestSize == 0) {
assert(false && "Empty pop list");
return;
}
Ostream &Str = Func->getContext()->getStrEmit();
bool NeedNewline = false;
if (IntegerCount != 0) {
Variable *Reg = Dests[0];
if (isScalarIntegerType(Reg->getType())) {
// GPR push.
Str << "\t"
<< "pop"
<< "\t{";
bool PrintComma = false;
for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
}
"pop"
"\t{";
Reg->emit(Func);
for (SizeT i = 1; i < DestSize; ++i) {
Str << ", ";
Reg = Dests[i];
Reg->emit(Func);
}
Str << "}";
NeedNewline = true;
return;
}
for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType()))
continue;
if (NeedNewline) {
Str << "\n";
// VFP "s" reg push.
SizeT End = DestSize - 1;
SizeT Start = DestSize - 1;
Reg = Dests[DestSize - 1];
Str << "\t"
"vpop"
"\t{";
for (SizeT i = 2; i <= DestSize; ++i) {
Variable *PreviousReg = Dests[DestSize - i];
if (!isAssignedConsecutiveRegisters(PreviousReg, Reg)) {
Dests[Start]->emit(Func);
for (SizeT j = Start + 1; j <= End; ++j) {
Str << ", ";
Dests[j]->emit(Func);
}
startNextInst(Func);
NeedNewline = false;
Str << "}\n\t"
"vpop"
"\t{";
End = DestSize - i;
}
Str << "\t"
<< "vpop"
<< "\t{";
Op->emit(Func);
Str << "}";
NeedNewline = true;
Reg = PreviousReg;
Start = DestSize - i;
}
Dests[Start]->emit(Func);
for (SizeT j = Start + 1; j <= End; ++j) {
Str << ", ";
Dests[j]->emit(Func);
}
assert(NeedNewline); // caller will add the newline
Str << "}";
}
void InstARM32Pop::emitIAS(const Cfg *Func) const {
......@@ -1310,56 +1376,55 @@ void InstARM32Pop::dump(const Cfg *Func) const {
}
void InstARM32Push::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump())
return;
SizeT IntegerCount = 0;
for (SizeT i = 0; i < getSrcSize(); ++i) {
if (isScalarIntegerType(getSrc(i)->getType())) {
++IntegerCount;
}
// Push can't be emitted if there are no registers to save. This should never
// happen, but if it does, we don't need to bring Subzero down -- we just skip
// emitting the push instruction (and maybe emit a nop?) The assert() is here
// so that we can detect this error during development.
const SizeT SrcSize = getSrcSize();
if (SrcSize == 0) {
assert(false && "Empty push list");
return;
}
Ostream &Str = Func->getContext()->getStrEmit();
bool NeedNewline = false;
for (SizeT i = getSrcSize(); i > 0; --i) {
Operand *Op = getSrc(i - 1);
if (isScalarIntegerType(Op->getType()))
continue;
if (NeedNewline) {
Str << "\n";
startNextInst(Func);
NeedNewline = false;
}
Variable *Reg = llvm::cast<Variable>(getSrc(0));
if (isScalarIntegerType(Reg->getType())) {
// GPR push.
Str << "\t"
<< "vpush"
<< "\t{";
Op->emit(Func);
"push"
"\t{";
Reg->emit(Func);
for (SizeT i = 1; i < SrcSize; ++i) {
Str << ", ";
getSrc(i)->emit(Func);
}
Str << "}";
NeedNewline = true;
return;
}
if (IntegerCount != 0) {
if (NeedNewline) {
Str << "\n";
// VFP "s" reg push.
Str << "\t"
"vpush"
"\t{";
Reg->emit(Func);
for (SizeT i = 1; i < SrcSize; ++i) {
Variable *NextReg = llvm::cast<Variable>(getSrc(i));
if (isAssignedConsecutiveRegisters(Reg, NextReg)) {
Str << ", ";
} else {
startNextInst(Func);
NeedNewline = false;
}
Str << "\t"
<< "push"
<< "\t{";
bool PrintComma = false;
for (SizeT i = 0; i < getSrcSize(); ++i) {
Operand *Op = getSrc(i);
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
}
Str << "}\n\t"
"vpush"
"\t{";
}
Str << "}";
NeedNewline = true;
Reg = NextReg;
Reg->emit(Func);
}
assert(NeedNewline); // caller will add the newline
Str << "}";
}
void InstARM32Push::emitIAS(const Cfg *Func) const {
......@@ -1925,8 +1990,10 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vadd>;
template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Vmla>;
template class InstARM32ThreeAddrFP<InstARM32::Vmls>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>;
......
......@@ -28,6 +28,9 @@
// LR is not considered isInt to avoid being allocated as a register. It is
// technically preserved, but save/restore is handled separately, based on
// whether or not the function MaybeLeafFunc.
//
// The register tables can be generated using the gen_arm32_reg_tables.py
// script.
#define REGARM32_GPR_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \
......@@ -69,21 +72,6 @@
// isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)
// S registers 0-15 are scratch, but 16-31 are preserved.
// Regenerate this with the following python script:
//
// def print_sregs():
// for i in xrange(0, 32):
// is_scratch = 1 if i < 16 else 0
// is_preserved = 1 if i >= 16 else 0
// print (' X(Reg_s{regnum:<2}, {regnum:<2}, "s{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 1, 0, 0, ' +
// 'REGLIST2(RegARM32, d{regnum:<2}, ' +
// 'q{regnum_q:<2})) \\').format(
// regnum=i, regnum_d=i>>1,
// regnum_q=i>>2, scratch=is_scratch, preserved=is_preserved)
//
// print_sregs()
//
#define REGARM32_FP32_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \
isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \
......@@ -128,29 +116,6 @@
// registers. In processors supporting the D32 feature this will effectively
// cause double allocation to bias towards allocating "high" D registers, which
// do not alias any S registers.
//
// Regenerate this with the following python script:
// def print_dregs():
// for i in xrange(31, 15, -1):
// is_scratch = 1 if (i < 8 or i >= 16) else 0
// is_preserved = 1 if (8 <= i and i < 16) else 0
// print (' X(Reg_d{regnum:<2}, {regnum:<2}, "d{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 1, 0, ' +
// 'REGLIST1(RegARM32, q{regnum_q:<2}) \\').format(
// regnum=i, regnum_q=i>>1, scratch=is_scratch,
// preserved=is_preserved)
// for i in xrange(15, -1, -1):
// is_scratch = 1 if (i < 8 or i >= 16) else 0
// is_preserved = 1 if (8 <= i and i < 16) else 0
// print (' X(Reg_d{regnum:<2}, {regnum:<2}, "d{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 1, 0, ' +
// 'REGLIST3(RegARM32, s{regnum_s0:<2}, s{regnum_s1:<2}, ' +
// 'q{regnum_q:<2})) \\').format(
// regnum_s0 = (i<<1), regnum_s1 = (i<<1) + 1, regnum=i,
// regnum_q=i>>1, scratch=is_scratch, preserved=is_preserved)
//
// print_dregs()
//
#define REGARM32_FP64_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \
isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \
......@@ -192,31 +157,6 @@
// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 are also scratch
// (if supported by the D32 feature). Q registers are defined in reverse order
// for the same reason as D registers.
//
// Regenerate this with the following python script:
// def print_qregs():
// for i in xrange(15, 7, -1):
// is_scratch = 1 if (i < 4 or i >= 8) else 0
// is_preserved = 1 if (4 <= i and i < 8) else 0
// print (' X(Reg_q{regnum:<2}, {regnum:<2}, "q{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 0, 1, REGLIST2(' +
// 'RegARM32, d{regnum_d0:<2}, d{regnum_d1:<2})) \\').format(
// regnum_d0=(i<<1), regnum_d1=(i<<1)+1, regnum=i,
// scratch=is_scratch, preserved=is_preserved)
// for i in xrange(7, -1, -1):
// is_scratch = 1 if (i < 4 or i >= 8) else 0
// is_preserved = 1 if (4 <= i and i < 8) else 0
// print (' X(Reg_q{regnum:<2}, {regnum:<2}, "q{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 0, 1, REGLIST6(' +
// 'RegARM32, s{regnum_s0:<2}, s{regnum_s1:<2}, ' +
// 's{regnum_s2:<2}, s{regnum_s3:<2}, ' +
// 'd{regnum_d0:<2}, d{regnum_d1:<2})) \\').format(
// regnum_s0=(i<<2), regnum_s1=(i<<2)+1, regnum_s2=(i<<2)+2,
// regnum_s3=(i<<2)+3, regnum_d0=(i<<1), regnum_d1=(i<<1)+1,
// regnum=i, scratch=is_scratch, preserved=is_preserved)
//
// print_qregs()
//
#define REGARM32_VEC128_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, \
isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init */ \
......
......@@ -415,6 +415,8 @@ public:
Vcvt,
Vdiv,
Veor,
Vmla,
Vmls,
Vmrs,
Vmul,
Vsqrt,
......@@ -436,6 +438,8 @@ public:
/// Shared emit routines for common forms of instructions.
static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func);
static void emitFourAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func);
void dump(const Cfg *Func) const override;
......@@ -708,7 +712,7 @@ private:
/// Instructions of the form x := y op z, for vector/FP. We leave these as
/// unconditional: "ARM deprecates the conditional execution of any instruction
/// encoding provided by the Advanced SIMD Extension that is not also provided
/// by the Floating-point (VFP) extension". They do not set flags.
/// by the floating-point (VFP) extension". They do not set flags.
template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrFP : public InstARM32 {
InstARM32ThreeAddrFP() = delete;
......@@ -796,6 +800,54 @@ private:
static const char *Opcode;
};
/// Instructions of the form x := x op1 (y op2 z). E.g., multiply accumulate.
/// We leave these as unconditional: "ARM deprecates the conditional execution
/// of any instruction encoding provided by the Advanced SIMD Extension that is
/// not also provided by the floating-point (VFP) extension". They do not set
/// flags.
template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrFP : public InstARM32 {
InstARM32FourAddrFP() = delete;
InstARM32FourAddrFP(const InstARM32FourAddrFP &) = delete;
InstARM32FourAddrFP &operator=(const InstARM32FourAddrFP &) = delete;
public:
// Every operand must be a register.
static InstARM32FourAddrFP *create(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1) {
return new (Func->allocate<InstARM32FourAddrFP>())
InstARM32FourAddrFP(Func, Dest, Src0, Src1);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitFourAddrFP(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override { emitUsingTextFixup(Func); }
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
Str << Opcode << "." << getDest()->getType() << " ";
dumpDest(Func);
Str << ", ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32FourAddrFP(Cfg *Func, Variable *Dest, Variable *Src0, Variable *Src1)
: InstARM32(Func, K, 3, Dest) {
addSource(Dest);
addSource(Src0);
addSource(Src1);
}
static const char *Opcode;
};
/// Instructions of the form x cmpop y (setting flags).
template <InstARM32::InstKindARM32 K>
class InstARM32CmpLike : public InstARM32Pred {
......@@ -855,8 +907,10 @@ using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>;
using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>;
using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>;
using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
......@@ -1001,8 +1055,8 @@ private:
InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
};
/// Pop into a list of GPRs. Technically this can be predicated, but we don't
/// need that functionality.
/// Pops a list of registers. It may be a list of GPRs, or a list of VFP "s"
/// regs, but not both. In any case, the list must be sorted.
class InstARM32Pop : public InstARM32 {
InstARM32Pop() = delete;
InstARM32Pop(const InstARM32Pop &) = delete;
......@@ -1023,8 +1077,8 @@ private:
VarList Dests;
};
/// Push a list of GPRs. Technically this can be predicated, but we don't need
/// that functionality.
/// Pushes a list of registers. Just like Pop (see above), the list may be of
/// GPRs, or VFP "s" registers, but not both.
class InstARM32Push : public InstARM32 {
InstARM32Push() = delete;
InstARM32Push(const InstARM32Push &) = delete;
......
......@@ -876,6 +876,54 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
return true;
}
// The calling convention helper class (TargetARM32::CallingConv) expects the
// following registers to be declared in a certain order, so we have these
// sanity checks to ensure nothing breaks unknowingly.
// TODO(jpp): modify the CallingConv class so it does not rely on any register
// declaration order.
#define SANITY_CHECK_QS(_0, _1) \
static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \
"ARM32 " #_0 " and " #_1 " registers are declared " \
"incorrectly.")
SANITY_CHECK_QS(q0, q1);
SANITY_CHECK_QS(q1, q2);
SANITY_CHECK_QS(q2, q3);
SANITY_CHECK_QS(q3, q4);
#undef SANITY_CHECK_QS
#define SANITY_CHECK_DS(_0, _1) \
static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \
"ARM32 " #_0 " and " #_1 " registers are declared " \
"incorrectly.")
SANITY_CHECK_DS(d0, d1);
SANITY_CHECK_DS(d1, d2);
SANITY_CHECK_DS(d2, d3);
SANITY_CHECK_DS(d3, d4);
SANITY_CHECK_DS(d4, d5);
SANITY_CHECK_DS(d5, d6);
SANITY_CHECK_DS(d6, d7);
SANITY_CHECK_DS(d7, d8);
#undef SANITY_CHECK_DS
#define SANITY_CHECK_SS(_0, _1) \
static_assert((RegARM32::Reg_##_0 + 1) == RegARM32::Reg_##_1, \
"ARM32 " #_0 " and " #_1 " registers are declared " \
"incorrectly.")
SANITY_CHECK_SS(s0, s1);
SANITY_CHECK_SS(s1, s2);
SANITY_CHECK_SS(s2, s3);
SANITY_CHECK_SS(s3, s4);
SANITY_CHECK_SS(s4, s5);
SANITY_CHECK_SS(s5, s6);
SANITY_CHECK_SS(s6, s7);
SANITY_CHECK_SS(s7, s8);
SANITY_CHECK_SS(s8, s9);
SANITY_CHECK_SS(s9, s10);
SANITY_CHECK_SS(s10, s11);
SANITY_CHECK_SS(s11, s12);
SANITY_CHECK_SS(s12, s13);
SANITY_CHECK_SS(s13, s14);
SANITY_CHECK_SS(s14, s15);
#undef SANITY_CHECK_SS
bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
if (!VFPRegsFree.any()) {
return false;
......@@ -885,9 +933,6 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
// Q registers are declared in reverse order, so RegARM32::Reg_q0 >
// RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0.
// Same thing goes for D registers.
static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
"ARM32 Q registers are possibly declared incorrectly.");
int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first();
if (QRegStart >= 0) {
VFPRegsFree.reset(QRegStart, QRegStart + 4);
......@@ -895,9 +940,6 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
return true;
}
} else if (Ty == IceType_f64) {
static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
"ARM32 D registers are possibly declared incorrectly.");
int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first();
if (DRegStart >= 0) {
VFPRegsFree.reset(DRegStart, DRegStart + 2);
......@@ -905,9 +947,6 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
return true;
}
} else {
static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
"ARM32 S registers are possibly declared incorrectly.");
assert(Ty == IceType_f32);
int32_t SReg = VFPRegsFree.find_first();
assert(SReg >= 0);
......@@ -1096,44 +1135,78 @@ void TargetARM32::addProlog(CfgNode *Node) {
// Add push instructions for preserved registers. On ARM, "push" can push a
// whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
// callee-saved float/vector registers. The "vpush" instruction can handle a
// whole list of float/vector registers, but it only handles contiguous
// sequences of registers by specifying the start and the length.
VarList GPRsToPreserve;
GPRsToPreserve.reserve(CalleeSaves.size());
uint32_t NumCallee = 0;
size_t PreservedRegsSizeBytes = 0;
// callee-saved float/vector registers.
//
// The "vpush" instruction can handle a whole list of float/vector registers,
// but it only handles contiguous sequences of registers by specifying the
// start and the length.
PreservedGPRs.reserve(CalleeSaves.size());
PreservedSRegs.reserve(CalleeSaves.size());
// Consider FP and LR as callee-save / used as needed.
if (UsesFramePointer) {
if (RegsUsed[RegARM32::Reg_fp]) {
llvm::report_fatal_error("Frame pointer has been used.");
}
CalleeSaves[RegARM32::Reg_fp] = true;
assert(RegsUsed[RegARM32::Reg_fp] == false);
RegsUsed[RegARM32::Reg_fp] = true;
}
if (!MaybeLeafFunc) {
CalleeSaves[RegARM32::Reg_lr] = true;
RegsUsed[RegARM32::Reg_lr] = true;
}
// Make two passes over the used registers. The first pass records all the
// used registers -- and their aliases. Then, we figure out which GPRs and
// VFP S registers should be saved. We don't bother saving D/Q registers
// because their uses are recorded as S regs uses.
llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM);
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (RegARM32::isI64RegisterPair(i)) {
// We don't save register pairs explicitly. Instead, we rely on the code
// fake-defing/fake-using each register in the pair.
if (NeedSandboxing && i == RegARM32::Reg_r9) {
// r9 is never updated in sandboxed code.
continue;
}
if (CalleeSaves[i] && RegsUsed[i]) {
if (NeedSandboxing && i == RegARM32::Reg_r9) {
// r9 is never updated in sandboxed code.
ToPreserve |= RegisterAliases[i];
}
}
uint32_t NumCallee = 0;
size_t PreservedRegsSizeBytes = 0;
// RegClasses is a tuple of
//
// <First Register in Class, Last Register in Class, Vector of Save Registers>
//
// We use this tuple to figure out which register we should push/pop during
// prolog/epilog.
using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
const RegClassType RegClasses[] = {
RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
&PreservedGPRs),
RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
&PreservedSRegs)};
for (const auto &RegClass : RegClasses) {
const uint32_t FirstRegInClass = std::get<0>(RegClass);
const uint32_t LastRegInClass = std::get<1>(RegClass);
VarList *const PreservedRegsInClass = std::get<2>(RegClass);
for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
if (!ToPreserve[Reg]) {
continue;
}
++NumCallee;
Variable *PhysicalRegister = getPhysicalRegister(i);
Variable *PhysicalRegister = getPhysicalRegister(Reg);
PreservedRegsSizeBytes +=
typeWidthInBytesOnStack(PhysicalRegister->getType());
GPRsToPreserve.push_back(getPhysicalRegister(i));
PreservedRegsInClass->push_back(PhysicalRegister);
}
}
Ctx->statsUpdateRegistersSaved(NumCallee);
if (!GPRsToPreserve.empty())
_push(GPRsToPreserve);
if (!PreservedSRegs.empty())
_push(PreservedSRegs);
if (!PreservedGPRs.empty())
_push(PreservedGPRs);
// Generate "mov FP, SP" if needed.
if (UsesFramePointer) {
......@@ -1160,13 +1233,13 @@ void TargetARM32::addProlog(CfgNode *Node) {
GlobalsSize + LocalsSlotsPaddingBytes;
// Adds the out args space to the stack, and align SP if necessary.
if (NeedsStackAlignment) {
if (!NeedsStackAlignment) {
SpillAreaSizeBytes += MaxOutArgsSizeBytes;
} else {
uint32_t StackOffset = PreservedRegsSizeBytes;
uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
SpillAreaSizeBytes = StackSize - StackOffset;
} else {
SpillAreaSizeBytes += MaxOutArgsSizeBytes;
}
// Combine fixed alloca with SpillAreaSize.
......@@ -1285,43 +1358,21 @@ void TargetARM32::addEpilog(CfgNode *Node) {
}
}
// Add pop instructions for preserved registers.
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
VarList GPRsToRestore;
GPRsToRestore.reserve(CalleeSaves.size());
// Consider FP and LR as callee-save / used as needed.
if (UsesFramePointer) {
CalleeSaves[RegARM32::Reg_fp] = true;
}
if (!MaybeLeafFunc) {
CalleeSaves[RegARM32::Reg_lr] = true;
}
// Pop registers in ascending order just like push (instead of in reverse
// order).
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (RegARM32::isI64RegisterPair(i)) {
continue;
}
if (CalleeSaves[i] && RegsUsed[i]) {
if (NeedSandboxing && i == RegARM32::Reg_r9) {
continue;
}
GPRsToRestore.push_back(getPhysicalRegister(i));
}
}
if (!GPRsToRestore.empty())
_pop(GPRsToRestore);
if (!PreservedGPRs.empty())
_pop(PreservedGPRs);
if (!PreservedSRegs.empty())
_pop(PreservedSRegs);
if (!Ctx->getFlags().getUseSandboxing())
return;
// Change the original ret instruction into a sandboxed return sequence.
//
// bundle_lock
// bic lr, #0xc000000f
// bx lr
// bundle_unlock
//
// This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
// restrict to the lower 1GB as well.
Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
......@@ -2641,8 +2692,8 @@ bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
} // end of namespace StrengthReduction
} // end of anonymous namespace
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
Variable *Dest = Instr->getDest();
if (Dest->isRematerializable()) {
Context.insert(InstFakeDef::create(Func, Dest));
......@@ -2651,14 +2702,14 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Type DestTy = Dest->getType();
if (DestTy == IceType_i1) {
lowerInt1Arithmetic(Inst);
lowerInt1Arithmetic(Instr);
return;
}
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Operand *Src1 = legalizeUndef(Inst->getSrc(1));
Operand *Src0 = legalizeUndef(Instr->getSrc(0));
Operand *Src1 = legalizeUndef(Instr->getSrc(1));
if (DestTy == IceType_i64) {
lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);
lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
return;
}
......@@ -2679,7 +2730,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// difficult to determine (constant may be moved to a register).
// * Handle floating point arithmetic separately: they require Src1 to be
// legalized to a register.
switch (Inst->getOp()) {
switch (Instr->getOp()) {
default:
break;
case InstArithmetic::Udiv: {
......@@ -2718,6 +2769,14 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
}
case InstArithmetic::Fadd: {
Variable *Src0R = legalizeToReg(Src0);
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
_vmla(Src0R, Src1R, Src2R);
_mov(Dest, Src0R);
return;
}
Variable *Src1R = legalizeToReg(Src1);
_vadd(T, Src0R, Src1R);
_mov(Dest, T);
......@@ -2725,6 +2784,13 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
}
case InstArithmetic::Fsub: {
Variable *Src0R = legalizeToReg(Src0);
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
_vmls(Src0R, Src1R, Src2R);
_mov(Dest, Src0R);
return;
}
Variable *Src1R = legalizeToReg(Src1);
_vsub(T, Src0R, Src1R);
_mov(Dest, T);
......@@ -2748,11 +2814,20 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// Handle everything else here.
Int32Operands Srcs(Src0, Src1);
switch (Inst->getOp()) {
switch (Instr->getOp()) {
case InstArithmetic::_num:
llvm::report_fatal_error("Unknown arithmetic operator");
return;
case InstArithmetic::Add: {
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
_mla(T, Src1R, Src2R, Src0R);
_mov(Dest, T);
return;
}
if (Srcs.hasConstOperand()) {
if (!Srcs.immediateIsFlexEncodable() &&
Srcs.negatedImmediateIsFlexEncodable()) {
......@@ -2805,6 +2880,15 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
return;
}
case InstArithmetic::Sub: {
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
_mls(T, Src1R, Src2R, Src0R);
_mov(Dest, T);
return;
}
if (Srcs.hasConstOperand()) {
if (Srcs.immediateIsFlexEncodable()) {
Variable *Src0R = Srcs.src0R(this);
......@@ -3013,7 +3097,7 @@ TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
InstARM32Label *NewShortCircuitLabel = nullptr;
Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
const Inst *Producer = BoolComputations.getProducerOf(Boolean);
const Inst *Producer = Computations.getProducerOf(Boolean);
if (Producer == nullptr) {
// No producer, no problem: just do emit code to perform (Boolean & 1) and
......@@ -3234,7 +3318,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
case IceType_void:
break;
case IceType_i1:
assert(BoolComputations.getProducerOf(Dest) == nullptr);
assert(Computations.getProducerOf(Dest) == nullptr);
// Fall-through intended.
case IceType_i8:
case IceType_i16:
......@@ -5309,6 +5393,7 @@ Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
return Reg;
}
// TODO(jpp): remove unneeded else clauses in legalize.
Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
int32_t RegNum) {
Type Ty = From->getType();
......@@ -5412,24 +5497,27 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
}
// There should be no constants of vector type (other than undef).
assert(!isVectorType(Ty));
bool CanBeFlex = Allowed & Legal_Flex;
if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
uint32_t RotateAmt;
uint32_t Immed_8;
uint32_t Value = static_cast<uint32_t>(C32->getValue());
// Check if the immediate will fit in a Flexible second operand, if a
// Flexible second operand is allowed. We need to know the exact value,
// so that rules out relocatable constants. Also try the inverse and use
// MVN if possible.
if (CanBeFlex &&
OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
} else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
~Value, &RotateAmt, &Immed_8)) {
auto InvertedFlex =
if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
// The immediate can be encoded as a Flex immediate. We may return the
// Flex operand if the caller has Allow'ed it.
auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
const bool CanBeFlex = Allowed & Legal_Flex;
if (CanBeFlex)
return OpF;
return copyToReg(OpF, RegNum);
} else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
&Immed_8)) {
// Even though the immediate can't be encoded as a Flex operand, its
// inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
// constant with a single instruction.
auto *InvOpF =
OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Variable *Reg = makeReg(Ty, RegNum);
_mvn(Reg, InvertedFlex);
_mvn(Reg, InvOpF);
return Reg;
} else {
// Do a movw/movt to a register.
......@@ -5486,8 +5574,6 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
return From;
}
// TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
// for a Variable in a Mem operand.
Variable *T = makeReg(Var->getType(), RegNum);
_mov(T, Var);
return T;
......@@ -5688,7 +5774,7 @@ void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
// FlagsWereSet is used to determine wether Boolean was folded or not. If not,
// add an explicit _tst instruction below.
bool FlagsWereSet = false;
if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) {
if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
switch (Producer->getKind()) {
default:
llvm::report_fatal_error("Unexpected producer.");
......@@ -5772,7 +5858,7 @@ TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
SafeBoolChain Safe = SBC_Yes;
if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) {
if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
switch (Producer->getKind()) {
default:
llvm::report_fatal_error("Unexpected producer.");
......@@ -5884,9 +5970,75 @@ bool isValidConsumer(const Inst &Instr) {
}
}
} // end of namespace BoolFolding
namespace FpFolding {
bool shouldTrackProducer(const Inst &Instr) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::Fmul:
return true;
}
}
}
}
bool isValidConsumer(const Inst &Instr) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::Fadd:
case InstArithmetic::Fsub:
return true;
}
}
}
}
} // end of namespace FpFolding
namespace IntFolding {
bool shouldTrackProducer(const Inst &Instr) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::Mul:
return true;
}
}
}
}
bool isValidConsumer(const Inst &Instr) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::Add:
case InstArithmetic::Sub:
return true;
}
}
}
}
} // end of namespace FpFolding
} // end of anonymous namespace
void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {
void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
for (Inst &Instr : Node->getInsts()) {
// Check whether Instr is a valid producer.
Variable *Dest = Instr.getDest();
......@@ -5894,7 +6046,22 @@ void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {
&& Dest // only instructions with an actual dest var; and
&& Dest->getType() == IceType_i1 // only bool-type dest vars; and
&& BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr));
KnownComputations.emplace(Dest->getIndex(),
ComputationEntry(&Instr, IceType_i1));
}
if (!Instr.isDeleted() // only consider non-deleted instructions; and
&& Dest // only instructions with an actual dest var; and
&& isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
&& FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
KnownComputations.emplace(Dest->getIndex(),
ComputationEntry(&Instr, Dest->getType()));
}
if (!Instr.isDeleted() // only consider non-deleted instructions; and
&& Dest // only instructions with an actual dest var; and
&& Dest->getType() == IceType_i32 // i32 only dest vars; and
&& IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
KnownComputations.emplace(Dest->getIndex(),
ComputationEntry(&Instr, IceType_i32));
}
// Check each src variable against the map.
FOREACH_VAR_IN_INST(Var, Instr) {
......@@ -5905,9 +6072,29 @@ void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {
}
++ComputationIter->second.NumUses;
if (!BoolFolding::isValidConsumer(Instr)) {
switch (ComputationIter->second.ComputationType) {
default:
KnownComputations.erase(VarNum);
continue;
case IceType_i1:
if (!BoolFolding::isValidConsumer(Instr)) {
KnownComputations.erase(VarNum);
continue;
}
break;
case IceType_i32:
if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
KnownComputations.erase(VarNum);
continue;
}
break;
case IceType_f32:
case IceType_f64:
if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
KnownComputations.erase(VarNum);
continue;
}
break;
}
if (Instr.isLastUse(Var)) {
......
......@@ -60,9 +60,9 @@ public:
static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); }
void initNodeForLowering(CfgNode *Node) override {
BoolComputations.forgetProducers();
BoolComputations.recordProducers(Node);
BoolComputations.dump(Func);
Computations.forgetProducers();
Computations.recordProducers(Node);
Computations.dump(Func);
}
void translateOm1() override;
......@@ -798,6 +798,12 @@ protected:
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmrs::create(Func, Pred));
}
void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmla::create(Func, Dest, Src0, Src1));
}
void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmls::create(Func, Dest, Src0, Src1));
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
}
......@@ -1019,6 +1025,8 @@ protected:
static llvm::SmallBitVector ScratchRegs;
llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM];
VarList PreservedGPRs;
VarList PreservedSRegs;
/// Helper class that understands the Calling Convention and register
/// assignments. The first few integer type parameters can use r0-r3,
......@@ -1081,10 +1089,10 @@ private:
std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
ARM32HelpersPostamble;
class BoolComputationTracker {
class ComputationTracker {
public:
BoolComputationTracker() = default;
~BoolComputationTracker() = default;
ComputationTracker() = default;
~ComputationTracker() = default;
void forgetProducers() { KnownComputations.clear(); }
void recordProducers(CfgNode *Node);
......@@ -1118,9 +1126,9 @@ private:
}
private:
class BoolComputationEntry {
class ComputationEntry {
public:
explicit BoolComputationEntry(Inst *I) : Instr(I) {}
ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
Inst *const Instr;
// Boolean folding is disabled for variables whose live range is multi
// block. We conservatively initialize IsLiveOut to true, and set it to
......@@ -1130,13 +1138,16 @@ private:
// disabled.
bool IsLiveOut = true;
int32_t NumUses = 0;
Type ComputationType;
};
using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>;
BoolComputationMap KnownComputations;
// ComputationMap maps a Variable number to a payload identifying which
// instruction defined it.
using ComputationMap = std::unordered_map<SizeT, ComputationEntry>;
ComputationMap KnownComputations;
};
BoolComputationTracker BoolComputations;
ComputationTracker Computations;
// AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
// without specifying a physical register. This is needed for creating unbound
......
......@@ -24,96 +24,92 @@ define internal i32 @foo(i32 %x) {
entry:
; ASM-LABEL: foo:
; ASM-NEXT: .Lfoo$entry:
; ******* Movw case to check *******
; ASM-NEXT: movw ip, #4092
; ASM-NEXT: sub sp, sp, ip
; ASM-NEXT: str r0, [sp, #4088]
; ASM-NEXT: # [sp, #4088] = def.pseudo
; DIS-LABEL: 00000000 <foo>:
; DIS-NEXT: 0: e300cffc
; DIS-NEXT: 4: e04dd00c
; DIS-NEXT: 8: e58d0ff8
; IASM-LABEL: foo:
; ASM-NEXT: .Lfoo$entry:
; IASM-NEXT: .Lfoo$entry:
; ASM-NEXT: movw ip, #4092
; DIS-NEXT: 0: e300cffc
; IASM-NEXT: .byte 0xfc
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: sub sp, sp, ip
; DIS-NEXT: 4: e04dd00c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4088]
; DIS-NEXT: 8: e58d0ff8
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: # [sp, #4088] = def.pseudo
%mul = mul i32 %x, %x
; ASM-NEXT: ldr r0, [sp, #4088]
; ASM-NEXT: ldr r1, [sp, #4088]
; ASM-NEXT: mul r0, r0, r1
; ASM-NEXT: str r0, [sp, #4084]
; ASM-NEXT: # [sp, #4084] = def.pseudo
; DIS-NEXT: c: e59d0ff8
; DIS-NEXT: 10: e59d1ff8
; DIS-NEXT: 14: e0000190
; DIS-NEXT: 18: e58d0ff4
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r1, [sp, #4088]
; DIS-NEXT: 10: e59d1ff8
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mul r0, r0, r1
; DIS-NEXT: 14: e0000190
; IASM-NEXT: .byte 0x90
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4084]
; DIS-NEXT: 18: e58d0ff4
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: # [sp, #4084] = def.pseudo
ret i32 %mul
; ASM-NEXT: ldr r0, [sp, #4084]
; ******* Movw case to check *******
; ASM-NEXT: movw ip, #4092
; ASM-NEXT: add sp, sp, ip
; ASM-NEXT: bx lr
; DIS-NEXT: 1c: e59d0ff4
; DIS-NEXT: 20: e300cffc
; DIS-NEXT: 24: e08dd00c
; DIS-NEXT: 28: e12fff1e
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw ip, #4092
; DIS-NEXT: 20: e300cffc
; IASM-NEXT: .byte 0xfc
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: add sp, sp, ip
; DIS-NEXT: 24: e08dd00c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: bx lr
; DIS-NEXT: 28: e12fff1e
; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
......@@ -121,84 +117,88 @@ entry:
}
define internal void @saveMinus1(i32 %loc) {
; ASM-LABEL:saveMinus1:
; DIS-LABEL:00000030 <saveMinus1>:
; IASM-LABEL:saveMinus1:
define internal void @saveConstI32(i32 %loc) {
; ASM-LABEL:saveConstI32:
; DIS-LABEL:00000030 <saveConstI32>:
; IASM-LABEL:saveConstI32:
entry:
; ASM-NEXT:.LsaveMinus1$entry:
; ASM-NEXT:.LsaveConstI32$entry:
; IASM-NEXT:.LsaveConstI32$entry:
; ASM-NEXT: movw ip, #4088
; DIS-NEXT: 30: e300cff8
; IASM-NEXT:.LsaveMinus1$entry:
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: sub sp, sp, ip
; DIS-NEXT: 34: e04dd00c
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4084]
; ASM-NEXT: # [sp, #4084] = def.pseudo
; ASM-NEXT: # [sp, #4084] = def.pseudo
; DIS-NEXT: 38: e58d0ff4
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
%loc.asptr = inttoptr i32 %loc to i32*
store i32 -1, i32* %loc.asptr, align 1
store i32 524289, i32* %loc.asptr, align 1
; ASM-NEXT: ldr r0, [sp, #4084]
; DIS-NEXT: 3c: e59d0ff4
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw r1, #65535
; DIS-NEXT: 40: e30f1fff
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movt r1, #65535
; DIS-NEXT: 44: e34f1fff
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw r1, #1
; DIS-NEXT: 40: e3001001
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: movt r1, #8
; DIS-NEXT: 44: e3401008
; IASM-NEXT: .byte 0x8
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x40
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: str r1, [r0]
; DIS-NEXT: 48: e5801000
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0x4f
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x80
; IASM-NEXT: .byte 0xe5
ret void
; ASM-NEXT: movw ip, #4088
; DIS-NEXT: 4c: e300cff8
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x80
; IASM-NEXT: .byte 0xe5
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: add sp, sp, ip
; DIS-NEXT: 50: e08dd00c
; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: bx lr
; DIS-NEXT: 54: e12fff1e
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
; IASM-NEXT: .byte 0xe1
}
......@@ -9,7 +9,9 @@
; RUN: -ffunction-sections | FileCheck %s
declare void @call_target()
declare void @call_target1(i32 %arg)
declare void @call_target1(i32 %arg0)
declare void @call_target2(i32 %arg0, i32 %arg1)
declare void @call_target3(i32 %arg0, i32 %arg1, i32 %arg2)
@global_short = internal global [2 x i8] zeroinitializer
; A direct call sequence uses the right mask and register-call sequence.
......@@ -60,7 +62,7 @@ entry:
; CHECK-LABEL: bundle_lock_without_padding
; CHECK: 0: {{.*}} movw
; CHECK-NEXT: movt
; CHECK-NEXT: movw
; CHECK-NEXT: mov
; CHECK-NEXT: nop
; CHECK-NEXT: bic [[REG:r[0-9]+]], {{.*}} 0xc0000000
; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]]
......@@ -91,18 +93,16 @@ define internal void @bundle_lock_align_to_end_padding_0() {
entry:
call void @call_target()
; bundle boundary
store i16 0, i16* undef, align 1
call void @call_target()
call void @call_target3(i32 1, i32 2, i32 3)
; bundle boundary
ret void
}
; CHECK-LABEL: bundle_lock_align_to_end_padding_0
; CHECK: c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: movw
; CHECK-NEXT: movw
; CHECK-NEXT: bic [[REG:r[0-9]+]]
; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]]
; CHECK: {{[0-9]+}}c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: mov
; CHECK-NEXT: mov
; CHECK-NEXT: mov
; CHECK-NEXT: {{[0-9]+}}c: {{.*}} bl {{.*}} call_target3
; CHECK-NEXT: add sp
; CHECK-NEXT: bic sp, {{.*}} 0xc0000000
; CHECK-NEXT: pop
......@@ -114,41 +114,29 @@ define internal void @bundle_lock_align_to_end_padding_1() {
entry:
call void @call_target()
; bundle boundary
store i32 65536, i32* undef, align 1
; bundle boundary
call void @call_target()
call void @call_target2(i32 1, i32 2)
; bundle boundary
ret void
}
; CHECK-LABEL: bundle_lock_align_to_end_padding_1
; CHECK: {{[0-9]*}}c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: movw [[BASE:r[0-9]+]]
; CHECK-NEXT: movw [[REG:r[0-9]+]], #0
; CHECK-NEXT: movt [[REG]], #1
; CHECK-NEXT: mov
; CHECK-NEXT: mov
; CHECK-NEXT: nop
; CHECK-NEXT: bic [[BASE]], [[BASE]], {{.*}} 0xc0000000
; CHECK-NEXT: str [[REG]], {{[[]}}[[BASE]]
; CHECK-NEXT: nop
; CHECK-NEXT: bl {{.*}} call_target
; CHECK-NEXT: bl {{.*}} call_target2
; CHECK: {{[0-9]+}}0: {{.*}} bic lr, lr, {{.*}} 0xc000000f
; CHECK-NEXT: {{.*}} bx lr
; Bundle lock align_to_end with two bunches of padding.
define internal void @bundle_lock_align_to_end_padding_2(i32 %target) {
define internal void @bundle_lock_align_to_end_padding_2() {
entry:
call void @call_target1(i32 1)
call void @call_target2(i32 1, i32 2)
; bundle boundary
%__1 = inttoptr i32 %target to void (i32, i32, i32)*
call void %__1(i32 2, i32 3, i32 4)
ret void
}
; CHECK-LABEL: bundle_lock_align_to_end_padding_2
; CHECK: {{[0-9]+}}0:
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: bl {{.*}} call_target
; CHECK: {{[0-9]+}}c: {{.*}} movw r2, #4
; CHECK: mov
; CHECK-NEXT: mov
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: bic [[REG:r[0-9]+]], [[REG]], {{.*}} 0xc000000f
; CHECK-NEXT: {{.*}} blx [[REG]]
; CHECK-NEXT: bl {{.*}} call_target2
......@@ -89,17 +89,17 @@ entry:
; ARM32-LABEL: pass64BitArg
; ARM32: str {{.*}}, [sp]
; ARM32: movw r2, #123
; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123
; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123
; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
......@@ -142,7 +142,7 @@ entry:
; ARM32: str [[REG2]], [sp]
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123
; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
define internal i32 @pass64BitUndefArg() {
......@@ -162,9 +162,9 @@ entry:
; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline
; ARM32-LABEL: pass64BitUndefArg
; ARM32: sub sp
; ARM32: movw {{.*}}, #0
; ARM32: mov {{.*}}, #0
; ARM32: str
; ARM32: movw {{.*}}, #123
; ARM32: mov {{.*}}, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
define internal i64 @return64BitArg(i64 %padding, i64 %a) {
......
......@@ -117,7 +117,7 @@ entry:
; CHECK-LABEL: MulImm
; CHECK: imul e{{.*}},e{{.*}},0x63
; ARM32-LABEL: MulImm
; ARM32-OPTM1: movw {{.*}}, #99
; ARM32-OPTM1: mov {{.*}}, #99
; ARM32-OPTM1: mul r{{.*}}, r{{.*}}, r{{.*}}
; ARM32-OPT2: rsb [[T:r[0-9]+]], [[S:r[0-9]+]], [[S]], lsl #2
; ARM32-OPT2-DAG: add [[T]], [[T]], [[S]], lsl #7
......@@ -141,8 +141,8 @@ entry:
; CHECK-NOT: mul {{[0-9]+}}
;
; ARM32-LABEL: MulImm64
; ARM32: movw {{.*}}, #99
; ARM32: movw {{.*}}, #0
; ARM32: mov {{.*}}, #99
; ARM32: mov {{.*}}, #0
; ARM32: mul r
; ARM32: mla r
; ARM32: umull r
......
......@@ -358,7 +358,7 @@ entry:
; CHECK: cvtsi2sd {{.*[^1]}}
; CHECK: fld
; ARM32-LABEL: signed32ToDoubleConst
; ARM32-DAG: movw [[CONST:r[0-9]+]], #123
; ARM32-DAG: mov [[CONST:r[0-9]+]], #123
; ARM32-DAG: vmov [[SRC:s[0-9]+]], [[CONST]]
; ARM32-DAG: vcvt.f64.s32 {{d[0-9]+}}, [[SRC]]
......
......@@ -1329,7 +1329,7 @@ entry:
; CHECK-LABEL: test_atomic_is_lock_free
; CHECK: mov {{.*}},0x1
; ARM32-LABEL: test_atomic_is_lock_free
; ARM32: movw {{.*}}, #1
; ARM32: mov {{.*}}, #1
define internal i32 @test_not_lock_free(i32 %iptr) {
entry:
......
......@@ -303,8 +303,8 @@ define internal i64 @ret_64bits_shift_left0() {
; CHECK-NEXT: mov eax,0xff
; CHECK-NEXT: mov edx,0xff
; ARM32-LABEL: ret_64bits_shift_left0
; ARM32-NEXT: movw r0, #255
; ARM32-NEXT: movw r1, #255
; ARM32-NEXT: mov r0, #255
; ARM32-NEXT: mov r1, #255
; MIPS32-LABEL: ret_64bits_shift_left0
; MIPS32-NEXT: li v0,255
; MIPS32-NEXT: li v1,255
......
......@@ -130,5 +130,5 @@ sw.default:
ret i32 20
}
; ARM32-LABEL: testSwitchUndef64
; ARM32: movw {{.*}}, #0
; ARM32: movw {{.*}}, #0
; ARM32: mov {{.*}}, #0
; ARM32: mov {{.*}}, #0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment