Commit eb13acc6 by John Porto

Subzero. ARM32. Show FP lowering some love.

After some time of being neglected, this CL improves FP lowering for ARM32. 1) It emits vpush {list}, and vpop {list} when possible. 2) It stops saving alised Vfp registers multiple times (yes, sz used to save both D and S registers even when they aliased.) 3) Introduces Vmla (fp multiply and accumulate) and Vmls (multiply and subtract.) (1 + 2) minimally (but positively) affected SPEC. (3) caused a 2% geomean improvement. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1481133002 .
parent e293b5f4
class RegAliases(object):
def __init__(self, *Aliases):
self.Aliases = list(Aliases)
def __str__(self):
return 'REGLIST{AliasCount}(RegARM32, {Aliases})'.format(
AliasCount=len(self.Aliases), Aliases=', '.join(self.Aliases))
def _ArgumentNames(Method):
import inspect
return (ArgName for ArgName in inspect.getargspec(Method).args
if ArgName != 'self')
class RegFeatures(object):
def __init__(self, IsScratch=0, IsPreserved=0, IsStackPtr=0, IsFramePtr=0,
IsInt=0, IsI64Pair=0, IsFP32=0, IsFP64=0, IsVec128=0,
Aliases=None):
assert not (IsInt and IsI64Pair)
assert not (IsFP32 and IsFP64)
assert not (IsFP32 and IsVec128)
assert not (IsFP64 and IsVec128)
assert not ((IsInt or IsI64Pair) and (IsFP32 or IsFP64 or IsVec128))
assert (not IsFramePtr) or IsInt
assert (not IsStackPtr) or not(
IsInt or IsI64Pair or IsFP32 or IsFP64 or IsVec128)
assert not (IsScratch and IsPreserved)
self.Features = [x for x in _ArgumentNames(self.__init__)]
self.FeaturesDict = {}
for Feature in self.Features:
self.FeaturesDict[Feature] = locals()[Feature]
def __str__(self):
return '%s' % (', '.join(str(self.FeaturesDict[Feature]) for
Feature in self.Features))
def Aliases(self):
return self.FeaturesDict['Aliases']
def LivesInGPR(self):
return (any(self.FeaturesDict[IntFeature] for IntFeature in (
'IsInt', 'IsI64Pair', 'IsStackPtr', 'IsFramePtr')) or
not self.LivesInVFP())
def LivesInVFP(self):
return any(self.FeaturesDict[FpFeature] for FpFeature in (
'IsFP32', 'IsFP64', 'IsVec128'))
class Reg(object):
def __init__(self, Name, Encode, **Features):
self.Name = Name
self.Encode = Encode
self.Features = RegFeatures(**Features)
def __str__(self):
return 'Reg_{Name}, {Encode}, {Features}'.format(Name=self.Name,
Encode=self.Encode, Features=self.Features)
def IsAnAliasOf(self, Other):
return self.Name in self.Features.Aliases().Aliases
# Note: The following tables break the usual 80-col on purpose -- it is easier
# to read the register tables if each register entry is contained on a single
# line.
GPRs = [
Reg( 'r0', 0, IsScratch=1, IsInt=1, Aliases=RegAliases( 'r0', 'r0r1')),
Reg( 'r1', 1, IsScratch=1, IsInt=1, Aliases=RegAliases( 'r1', 'r0r1')),
Reg( 'r2', 2, IsScratch=1, IsInt=1, Aliases=RegAliases( 'r2', 'r2r3')),
Reg( 'r3', 3, IsScratch=1, IsInt=1, Aliases=RegAliases( 'r3', 'r2r3')),
Reg( 'r4', 4, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r4', 'r4r5')),
Reg( 'r5', 5, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r5', 'r4r5')),
Reg( 'r6', 6, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r6', 'r6r7')),
Reg( 'r7', 7, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r7', 'r6r7')),
Reg( 'r8', 8, IsPreserved=1, IsInt=1, Aliases=RegAliases( 'r8', 'r8r9')),
Reg( 'r9', 9, IsPreserved=1, IsInt=0, Aliases=RegAliases( 'r9', 'r8r9')),
Reg('r10', 10, IsPreserved=1, IsInt=1, Aliases=RegAliases('r10', 'r10fp')),
Reg( 'fp', 11, IsPreserved=1, IsInt=1, IsFramePtr=1, Aliases=RegAliases( 'fp', 'r10fp')),
Reg( 'ip', 12, IsScratch=1, IsInt=1, Aliases=RegAliases( 'ip')),
Reg( 'sp', 13, IsScratch=0, IsInt=0, IsStackPtr=1, Aliases=RegAliases( 'sp')),
Reg( 'lr', 14, IsScratch=0, IsInt=0, Aliases=RegAliases( 'lr')),
Reg( 'pc', 15, IsScratch=0, IsInt=0, Aliases=RegAliases( 'pc')),
]
I64Pairs = [
Reg( 'r0r1', 0, IsScratch=1, IsI64Pair=1, Aliases=RegAliases( 'r0r1', 'r0', 'r1')),
Reg( 'r2r3', 2, IsScratch=1, IsI64Pair=1, Aliases=RegAliases( 'r2r3', 'r2', 'r3')),
Reg( 'r4r5', 4, IsPreserved=1, IsI64Pair=1, Aliases=RegAliases( 'r4r5', 'r4', 'r5')),
Reg( 'r6r7', 6, IsPreserved=1, IsI64Pair=1, Aliases=RegAliases( 'r6r7', 'r6', 'r7')),
Reg( 'r8r9', 8, IsPreserved=1, IsI64Pair=0, Aliases=RegAliases( 'r8r9', 'r8', 'r9')),
Reg('r10fp', 10, IsPreserved=1, IsI64Pair=1, Aliases=RegAliases('r10fp', 'r10', 'fp')),
]
FP32 = [
Reg( 's0', 0, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's0', 'd0' , 'q0')),
Reg( 's1', 1, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's1', 'd0' , 'q0')),
Reg( 's2', 2, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's2', 'd1' , 'q0')),
Reg( 's3', 3, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's3', 'd1' , 'q0')),
Reg( 's4', 4, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's4', 'd2' , 'q1')),
Reg( 's5', 5, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's5', 'd2' , 'q1')),
Reg( 's6', 6, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's6', 'd3' , 'q1')),
Reg( 's7', 7, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's7', 'd3' , 'q1')),
Reg( 's8', 8, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's8', 'd4' , 'q2')),
Reg( 's9', 9, IsScratch=1, IsFP32=1, Aliases=RegAliases( 's9', 'd4' , 'q2')),
Reg('s10', 10, IsScratch=1, IsFP32=1, Aliases=RegAliases('s10', 'd5' , 'q2')),
Reg('s11', 11, IsScratch=1, IsFP32=1, Aliases=RegAliases('s11', 'd5' , 'q2')),
Reg('s12', 12, IsScratch=1, IsFP32=1, Aliases=RegAliases('s12', 'd6' , 'q3')),
Reg('s13', 13, IsScratch=1, IsFP32=1, Aliases=RegAliases('s13', 'd6' , 'q3')),
Reg('s14', 14, IsScratch=1, IsFP32=1, Aliases=RegAliases('s14', 'd7' , 'q3')),
Reg('s15', 15, IsScratch=1, IsFP32=1, Aliases=RegAliases('s15', 'd7' , 'q3')),
Reg('s16', 16, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s16', 'd8' , 'q4')),
Reg('s17', 17, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s17', 'd8' , 'q4')),
Reg('s18', 18, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s18', 'd9' , 'q4')),
Reg('s19', 19, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s19', 'd9' , 'q4')),
Reg('s20', 20, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s20', 'd10', 'q5')),
Reg('s21', 21, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s21', 'd10', 'q5')),
Reg('s22', 22, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s22', 'd11', 'q5')),
Reg('s23', 23, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s23', 'd11', 'q5')),
Reg('s24', 24, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s24', 'd12', 'q6')),
Reg('s25', 25, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s25', 'd12', 'q6')),
Reg('s26', 26, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s26', 'd13', 'q6')),
Reg('s27', 27, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s27', 'd13', 'q6')),
Reg('s28', 28, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s28', 'd14', 'q7')),
Reg('s29', 29, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s29', 'd14', 'q7')),
Reg('s30', 30, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s30', 'd15', 'q7')),
Reg('s31', 31, IsPreserved=1, IsFP32=1, Aliases=RegAliases('s31', 'd14', 'q7')),
]
FP64 = [
Reg( 'd0', 0, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd0', 'q0', 's0', 's1')),
Reg( 'd1', 1, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd1', 'q0', 's2', 's3')),
Reg( 'd2', 2, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd2', 'q1', 's4', 's5')),
Reg( 'd3', 3, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd3', 'q1', 's6', 's7')),
Reg( 'd4', 4, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd4', 'q2', 's8', 's9')),
Reg( 'd5', 5, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd5', 'q2', 's10', 's11')),
Reg( 'd6', 6, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd6', 'q3', 's12', 's13')),
Reg( 'd7', 7, IsScratch=1, IsFP64=1, Aliases=RegAliases( 'd7', 'q3', 's14', 's15')),
Reg( 'd8', 8, IsPreserved=1, IsFP64=1, Aliases=RegAliases( 'd8', 'q4', 's16', 's17')),
Reg( 'd9', 9, IsPreserved=1, IsFP64=1, Aliases=RegAliases( 'd9', 'q4', 's18', 's19')),
Reg('d10', 10, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d10', 'q5', 's20', 's21')),
Reg('d11', 11, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d11', 'q5', 's22', 's24')),
Reg('d12', 12, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d12', 'q6', 's24', 's25')),
Reg('d13', 13, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d13', 'q6', 's26', 's27')),
Reg('d14', 14, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d14', 'q7', 's28', 's28')),
Reg('d15', 15, IsPreserved=1, IsFP64=1, Aliases=RegAliases('d15', 'q7', 's30', 's31')),
Reg('d16', 16, IsScratch=1, IsFP64=1, Aliases=RegAliases('d16', 'q8')),
Reg('d17', 17, IsScratch=1, IsFP64=1, Aliases=RegAliases('d17', 'q8')),
Reg('d18', 18, IsScratch=1, IsFP64=1, Aliases=RegAliases('d18', 'q9')),
Reg('d19', 19, IsScratch=1, IsFP64=1, Aliases=RegAliases('d19', 'q9')),
Reg('d20', 20, IsScratch=1, IsFP64=1, Aliases=RegAliases('d20', 'q10')),
Reg('d21', 21, IsScratch=1, IsFP64=1, Aliases=RegAliases('d21', 'q10')),
Reg('d22', 22, IsScratch=1, IsFP64=1, Aliases=RegAliases('d22', 'q11')),
Reg('d23', 23, IsScratch=1, IsFP64=1, Aliases=RegAliases('d23', 'q11')),
Reg('d24', 24, IsScratch=1, IsFP64=1, Aliases=RegAliases('d24', 'q12')),
Reg('d25', 25, IsScratch=1, IsFP64=1, Aliases=RegAliases('d25', 'q12')),
Reg('d26', 26, IsScratch=1, IsFP64=1, Aliases=RegAliases('d26', 'q13')),
Reg('d27', 27, IsScratch=1, IsFP64=1, Aliases=RegAliases('d27', 'q13')),
Reg('d28', 28, IsScratch=1, IsFP64=1, Aliases=RegAliases('d28', 'q14')),
Reg('d29', 29, IsScratch=1, IsFP64=1, Aliases=RegAliases('d29', 'q14')),
Reg('d30', 30, IsScratch=1, IsFP64=1, Aliases=RegAliases('d30', 'q15')),
Reg('d31', 31, IsScratch=1, IsFP64=1, Aliases=RegAliases('d31', 'q15')),
]
Vec128 = [
Reg( 'q0', 0, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q0', 'd0', 'd1', 's0', 's1', 's2', 's3')),
Reg( 'q1', 1, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q1', 'd2', 'd3', 's4', 's5', 's6', 's7')),
Reg( 'q2', 2, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q2', 'd4', 'd5', 's8', 's9', 's10', 's11')),
Reg( 'q3', 3, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q3', 'd6', 'd7', 's12', 's13', 's14', 's15')),
Reg( 'q4', 4, IsPreserved=1, IsVec128=1, Aliases=RegAliases( 'q4', 'd8', 'd9', 's16', 's17', 's18', 's19')),
Reg( 'q5', 5, IsPreserved=1, IsVec128=1, Aliases=RegAliases( 'q5', 'd10', 'd11', 's20', 's21', 's22', 's23')),
Reg( 'q6', 6, IsPreserved=1, IsVec128=1, Aliases=RegAliases( 'q6', 'd12', 'd13', 's24', 's25', 's26', 's27')),
Reg( 'q7', 7, IsPreserved=1, IsVec128=1, Aliases=RegAliases( 'q7', 'd14', 'd15', 's28', 's29', 's30', 's31')),
Reg( 'q8', 8, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q8', 'd16', 'd17')),
Reg( 'q9', 9, IsScratch=1, IsVec128=1, Aliases=RegAliases( 'q9', 'd18', 'd19')),
Reg('q10', 10, IsScratch=1, IsVec128=1, Aliases=RegAliases('q10', 'd20', 'd21')),
Reg('q11', 11, IsScratch=1, IsVec128=1, Aliases=RegAliases('q11', 'd22', 'd23')),
Reg('q12', 12, IsScratch=1, IsVec128=1, Aliases=RegAliases('q12', 'd24', 'd25')),
Reg('q13', 13, IsScratch=1, IsVec128=1, Aliases=RegAliases('q13', 'd26', 'd27')),
Reg('q14', 14, IsScratch=1, IsVec128=1, Aliases=RegAliases('q14', 'd28', 'd29')),
Reg('q15', 15, IsScratch=1, IsVec128=1, Aliases=RegAliases('q15', 'd30', 'd31')),
]
def _reverse(x):
return sorted(x, key=lambda x: x.Encode, reverse=True)
RegClasses = [GPRs, I64Pairs, FP32, _reverse(FP64), _reverse(Vec128)]
AllRegs = {}
for RegClass in RegClasses:
for Reg in RegClass:
assert Reg.Name not in AllRegs
AllRegs[Reg.Name] = Reg
for RegClass in RegClasses:
for Reg in RegClass:
for Alias in AllRegs[Reg.Name].Features.Aliases().Aliases:
assert AllRegs[Alias].IsAnAliasOf(Reg), '%s VS %s' % (Reg, AllRegs[Alias])
assert AllRegs[Alias].IsAnAliasOf(Reg), '%s VS %s' % (Reg, AllRegs[Alias])
assert (AllRegs[Alias].Features.LivesInGPR() ==
Reg.Features.LivesInGPR()), '%s VS %s' % (Reg, AllRegs[Alias])
assert (AllRegs[Alias].Features.LivesInVFP() ==
Reg.Features.LivesInVFP()), '%s VS %s' % (Reg, AllRegs[Alias])
for RegClass in RegClasses:
for Reg in RegClass:
print 'X({Reg})'.format(Reg=Reg)
print
...@@ -981,7 +981,7 @@ bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, ...@@ -981,7 +981,7 @@ bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
if (Printed) if (Printed)
Str << ","; Str << ",";
else else
Str << " \t# END="; Str << " \t@ END=";
Var->emit(Func); Var->emit(Func);
Printed = true; Printed = true;
} }
......
...@@ -185,6 +185,22 @@ void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst, ...@@ -185,6 +185,22 @@ void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
Inst->getSrc(1)->emit(Func); Inst->getSrc(1)->emit(Func);
} }
void InstARM32::emitFourAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 3);
assert(Inst->getSrc(0) == Inst->getDest());
Str << "\t" << Opcode << getVecWidthString(Inst->getDest()->getType())
<< "\t";
Inst->getDest()->emit(Func);
Str << ", ";
Inst->getSrc(1)->emit(Func);
Str << ", ";
Inst->getSrc(2)->emit(Func);
}
void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst, void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func) { const Cfg *Func) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -571,18 +587,43 @@ IceString InstARM32Label::getName(const Cfg *Func) const { ...@@ -571,18 +587,43 @@ IceString InstARM32Label::getName(const Cfg *Func) const {
return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number); return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
} }
namespace {
// Requirements for Push/Pop:
// 1) All the Variables have the same type;
// 2) All the variables have registers assigned to them.
void validatePushOrPopRegisterListOrDie(const VarList &RegList) {
Type PreviousTy = IceType_void;
for (Variable *Reg : RegList) {
if (PreviousTy != IceType_void && Reg->getType() != PreviousTy) {
llvm::report_fatal_error("Type mismatch when popping/pushing "
"registers.");
}
if (!Reg->hasReg()) {
llvm::report_fatal_error("Push/pop operand does not have a register "
"assigned to it.");
}
PreviousTy = Reg->getType();
}
}
} // end of anonymous namespace
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests) InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) { : InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs. Also, a pop // Track modifications to Dests separately via FakeDefs. Also, a pop
// instruction affects the stack pointer and so it should not be allowed to // instruction affects the stack pointer and so it should not be allowed to
// be automatically dead-code eliminated. This is automatic since we leave // be automatically dead-code eliminated. This is automatic since we leave
// the Dest as nullptr. // the Dest as nullptr.
validatePushOrPopRegisterListOrDie(Dests);
} }
InstARM32Push::InstARM32Push(Cfg *Func, const VarList &Srcs) InstARM32Push::InstARM32Push(Cfg *Func, const VarList &Srcs)
: InstARM32(Func, InstARM32::Push, Srcs.size(), nullptr) { : InstARM32(Func, InstARM32::Push, Srcs.size(), nullptr) {
for (Variable *Source : Srcs) validatePushOrPopRegisterListOrDie(Srcs);
for (Variable *Source : Srcs) {
addSource(Source); addSource(Source);
}
} }
InstARM32Ret::InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source) InstARM32Ret::InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source)
...@@ -736,8 +777,10 @@ template <> const char *InstARM32Udiv::Opcode = "udiv"; ...@@ -736,8 +777,10 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";
// FP // FP
template <> const char *InstARM32Vadd::Opcode = "vadd"; template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vdiv::Opcode = "vdiv"; template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Veor::Opcode = "veor"; template <> const char *InstARM32Veor::Opcode = "veor";
template <> const char *InstARM32Vmla::Opcode = "vmla";
template <> const char *InstARM32Vmls::Opcode = "vmls";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vsub::Opcode = "vsub"; template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
...@@ -1216,51 +1259,74 @@ template <> void InstARM32Uxt::emitIAS(const Cfg *Func) const { ...@@ -1216,51 +1259,74 @@ template <> void InstARM32Uxt::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func); emitUsingTextFixup(Func);
} }
namespace {
bool isAssignedConsecutiveRegisters(Variable *Before, Variable *After) {
assert(Before->hasReg());
assert(After->hasReg());
return Before->getRegNum() + 1 == After->getRegNum();
}
} // end of anonymous namespace
void InstARM32Pop::emit(const Cfg *Func) const { void InstARM32Pop::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
SizeT IntegerCount = 0;
for (const Operand *Op : Dests) { const SizeT DestSize = Dests.size();
if (isScalarIntegerType(Op->getType())) { if (DestSize == 0) {
++IntegerCount; assert(false && "Empty pop list");
} return;
} }
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
bool NeedNewline = false;
if (IntegerCount != 0) { Variable *Reg = Dests[0];
if (isScalarIntegerType(Reg->getType())) {
// GPR push.
Str << "\t" Str << "\t"
<< "pop" "pop"
<< "\t{"; "\t{";
bool PrintComma = false; Reg->emit(Func);
for (const Operand *Op : Dests) { for (SizeT i = 1; i < DestSize; ++i) {
if (isScalarIntegerType(Op->getType())) { Str << ", ";
if (PrintComma) Reg = Dests[i];
Str << ", "; Reg->emit(Func);
Op->emit(Func);
PrintComma = true;
}
} }
Str << "}"; Str << "}";
NeedNewline = true; return;
} }
for (const Operand *Op : Dests) { // VFP "s" reg push.
if (isScalarIntegerType(Op->getType())) SizeT End = DestSize - 1;
continue; SizeT Start = DestSize - 1;
if (NeedNewline) { Reg = Dests[DestSize - 1];
Str << "\n"; Str << "\t"
"vpop"
"\t{";
for (SizeT i = 2; i <= DestSize; ++i) {
Variable *PreviousReg = Dests[DestSize - i];
if (!isAssignedConsecutiveRegisters(PreviousReg, Reg)) {
Dests[Start]->emit(Func);
for (SizeT j = Start + 1; j <= End; ++j) {
Str << ", ";
Dests[j]->emit(Func);
}
startNextInst(Func); startNextInst(Func);
NeedNewline = false; Str << "}\n\t"
"vpop"
"\t{";
End = DestSize - i;
} }
Str << "\t" Reg = PreviousReg;
<< "vpop" Start = DestSize - i;
<< "\t{"; }
Op->emit(Func); Dests[Start]->emit(Func);
Str << "}"; for (SizeT j = Start + 1; j <= End; ++j) {
NeedNewline = true; Str << ", ";
Dests[j]->emit(Func);
} }
assert(NeedNewline); // caller will add the newline Str << "}";
} }
void InstARM32Pop::emitIAS(const Cfg *Func) const { void InstARM32Pop::emitIAS(const Cfg *Func) const {
...@@ -1310,56 +1376,55 @@ void InstARM32Pop::dump(const Cfg *Func) const { ...@@ -1310,56 +1376,55 @@ void InstARM32Pop::dump(const Cfg *Func) const {
} }
void InstARM32Push::emit(const Cfg *Func) const { void InstARM32Push::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
SizeT IntegerCount = 0;
for (SizeT i = 0; i < getSrcSize(); ++i) { // Push can't be emitted if there are no registers to save. This should never
if (isScalarIntegerType(getSrc(i)->getType())) { // happen, but if it does, we don't need to bring Subzero down -- we just skip
++IntegerCount; // emitting the push instruction (and maybe emit a nop?) The assert() is here
} // so that we can detect this error during development.
const SizeT SrcSize = getSrcSize();
if (SrcSize == 0) {
assert(false && "Empty push list");
return;
} }
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
bool NeedNewline = false;
for (SizeT i = getSrcSize(); i > 0; --i) { Variable *Reg = llvm::cast<Variable>(getSrc(0));
Operand *Op = getSrc(i - 1); if (isScalarIntegerType(Reg->getType())) {
if (isScalarIntegerType(Op->getType())) // GPR push.
continue;
if (NeedNewline) {
Str << "\n";
startNextInst(Func);
NeedNewline = false;
}
Str << "\t" Str << "\t"
<< "vpush" "push"
<< "\t{"; "\t{";
Op->emit(Func); Reg->emit(Func);
for (SizeT i = 1; i < SrcSize; ++i) {
Str << ", ";
getSrc(i)->emit(Func);
}
Str << "}"; Str << "}";
NeedNewline = true; return;
} }
if (IntegerCount != 0) {
if (NeedNewline) { // VFP "s" reg push.
Str << "\n"; Str << "\t"
"vpush"
"\t{";
Reg->emit(Func);
for (SizeT i = 1; i < SrcSize; ++i) {
Variable *NextReg = llvm::cast<Variable>(getSrc(i));
if (isAssignedConsecutiveRegisters(Reg, NextReg)) {
Str << ", ";
} else {
startNextInst(Func); startNextInst(Func);
NeedNewline = false; Str << "}\n\t"
} "vpush"
Str << "\t" "\t{";
<< "push"
<< "\t{";
bool PrintComma = false;
for (SizeT i = 0; i < getSrcSize(); ++i) {
Operand *Op = getSrc(i);
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
}
} }
Str << "}"; Reg = NextReg;
NeedNewline = true; Reg->emit(Func);
} }
assert(NeedNewline); // caller will add the newline Str << "}";
} }
void InstARM32Push::emitIAS(const Cfg *Func) const { void InstARM32Push::emitIAS(const Cfg *Func) const {
...@@ -1925,8 +1990,10 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>; ...@@ -1925,8 +1990,10 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vadd>; template class InstARM32ThreeAddrFP<InstARM32::Vadd>;
template class InstARM32ThreeAddrFP<InstARM32::Vdiv>; template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Veor>; template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32ThreeAddrFP<InstARM32::Vmla>;
template class InstARM32ThreeAddrFP<InstARM32::Vmls>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>; template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>; template class InstARM32LoadBase<InstARM32::Ldr>;
......
...@@ -28,6 +28,9 @@ ...@@ -28,6 +28,9 @@
// LR is not considered isInt to avoid being allocated as a register. It is // LR is not considered isInt to avoid being allocated as a register. It is
// technically preserved, but save/restore is handled separately, based on // technically preserved, but save/restore is handled separately, based on
// whether or not the function MaybeLeafFunc. // whether or not the function MaybeLeafFunc.
//
// The register tables can be generated using the gen_arm32_reg_tables.py
// script.
#define REGARM32_GPR_TABLE \ #define REGARM32_GPR_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \ /* val, encode, name, scratch,preserved,stackptr,frameptr, \
...@@ -69,21 +72,6 @@ ...@@ -69,21 +72,6 @@
// isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) // isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)
// S registers 0-15 are scratch, but 16-31 are preserved. // S registers 0-15 are scratch, but 16-31 are preserved.
// Regenerate this with the following python script:
//
// def print_sregs():
// for i in xrange(0, 32):
// is_scratch = 1 if i < 16 else 0
// is_preserved = 1 if i >= 16 else 0
// print (' X(Reg_s{regnum:<2}, {regnum:<2}, "s{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 1, 0, 0, ' +
// 'REGLIST2(RegARM32, d{regnum:<2}, ' +
// 'q{regnum_q:<2})) \\').format(
// regnum=i, regnum_d=i>>1,
// regnum_q=i>>2, scratch=is_scratch, preserved=is_preserved)
//
// print_sregs()
//
#define REGARM32_FP32_TABLE \ #define REGARM32_FP32_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \ /* val, encode, name, scratch,preserved,stackptr,frameptr, \
isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \ isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \
...@@ -128,29 +116,6 @@ ...@@ -128,29 +116,6 @@
// registers. In processors supporting the D32 feature this will effectively // registers. In processors supporting the D32 feature this will effectively
// cause double allocation to bias towards allocating "high" D registers, which // cause double allocation to bias towards allocating "high" D registers, which
// do not alias any S registers. // do not alias any S registers.
//
// Regenerate this with the following python script:
// def print_dregs():
// for i in xrange(31, 15, -1):
// is_scratch = 1 if (i < 8 or i >= 16) else 0
// is_preserved = 1 if (8 <= i and i < 16) else 0
// print (' X(Reg_d{regnum:<2}, {regnum:<2}, "d{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 1, 0, ' +
// 'REGLIST1(RegARM32, q{regnum_q:<2}) \\').format(
// regnum=i, regnum_q=i>>1, scratch=is_scratch,
// preserved=is_preserved)
// for i in xrange(15, -1, -1):
// is_scratch = 1 if (i < 8 or i >= 16) else 0
// is_preserved = 1 if (8 <= i and i < 16) else 0
// print (' X(Reg_d{regnum:<2}, {regnum:<2}, "d{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 1, 0, ' +
// 'REGLIST3(RegARM32, s{regnum_s0:<2}, s{regnum_s1:<2}, ' +
// 'q{regnum_q:<2})) \\').format(
// regnum_s0 = (i<<1), regnum_s1 = (i<<1) + 1, regnum=i,
// regnum_q=i>>1, scratch=is_scratch, preserved=is_preserved)
//
// print_dregs()
//
#define REGARM32_FP64_TABLE \ #define REGARM32_FP64_TABLE \
/* val, encode, name, scratch,preserved,stackptr,frameptr, \ /* val, encode, name, scratch,preserved,stackptr,frameptr, \
isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \ isInt,isI64Pair,isFP32,isFP64,isVec128, alias_init */ \
...@@ -192,31 +157,6 @@ ...@@ -192,31 +157,6 @@
// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 are also scratch // Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 are also scratch
// (if supported by the D32 feature). Q registers are defined in reverse order // (if supported by the D32 feature). Q registers are defined in reverse order
// for the same reason as D registers. // for the same reason as D registers.
//
// Regenerate this with the following python script:
// def print_qregs():
// for i in xrange(15, 7, -1):
// is_scratch = 1 if (i < 4 or i >= 8) else 0
// is_preserved = 1 if (4 <= i and i < 8) else 0
// print (' X(Reg_q{regnum:<2}, {regnum:<2}, "q{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 0, 1, REGLIST2(' +
// 'RegARM32, d{regnum_d0:<2}, d{regnum_d1:<2})) \\').format(
// regnum_d0=(i<<1), regnum_d1=(i<<1)+1, regnum=i,
// scratch=is_scratch, preserved=is_preserved)
// for i in xrange(7, -1, -1):
// is_scratch = 1 if (i < 4 or i >= 8) else 0
// is_preserved = 1 if (4 <= i and i < 8) else 0
// print (' X(Reg_q{regnum:<2}, {regnum:<2}, "q{regnum}", ' +
// '{scratch}, {preserved}, 0, 0, 0, 0, 0, 0, 1, REGLIST6(' +
// 'RegARM32, s{regnum_s0:<2}, s{regnum_s1:<2}, ' +
// 's{regnum_s2:<2}, s{regnum_s3:<2}, ' +
// 'd{regnum_d0:<2}, d{regnum_d1:<2})) \\').format(
// regnum_s0=(i<<2), regnum_s1=(i<<2)+1, regnum_s2=(i<<2)+2,
// regnum_s3=(i<<2)+3, regnum_d0=(i<<1), regnum_d1=(i<<1)+1,
// regnum=i, scratch=is_scratch, preserved=is_preserved)
//
// print_qregs()
//
#define REGARM32_VEC128_TABLE \ #define REGARM32_VEC128_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, \ /* val, encode, name, scratch, preserved, stackptr, frameptr, \
isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init */ \ isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init */ \
......
...@@ -415,6 +415,8 @@ public: ...@@ -415,6 +415,8 @@ public:
Vcvt, Vcvt,
Vdiv, Vdiv,
Veor, Veor,
Vmla,
Vmls,
Vmrs, Vmrs,
Vmul, Vmul,
Vsqrt, Vsqrt,
...@@ -436,6 +438,8 @@ public: ...@@ -436,6 +438,8 @@ public:
/// Shared emit routines for common forms of instructions. /// Shared emit routines for common forms of instructions.
static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst, static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func); const Cfg *Func);
static void emitFourAddrFP(const char *Opcode, const InstARM32 *Inst,
const Cfg *Func);
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
...@@ -708,7 +712,7 @@ private: ...@@ -708,7 +712,7 @@ private:
/// Instructions of the form x := y op z, for vector/FP. We leave these as /// Instructions of the form x := y op z, for vector/FP. We leave these as
/// unconditional: "ARM deprecates the conditional execution of any instruction /// unconditional: "ARM deprecates the conditional execution of any instruction
/// encoding provided by the Advanced SIMD Extension that is not also provided /// encoding provided by the Advanced SIMD Extension that is not also provided
/// by the Floating-point (VFP) extension". They do not set flags. /// by the floating-point (VFP) extension". They do not set flags.
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrFP : public InstARM32 { class InstARM32ThreeAddrFP : public InstARM32 {
InstARM32ThreeAddrFP() = delete; InstARM32ThreeAddrFP() = delete;
...@@ -796,6 +800,54 @@ private: ...@@ -796,6 +800,54 @@ private:
static const char *Opcode; static const char *Opcode;
}; };
/// Instructions of the form x := x op1 (y op2 z). E.g., multiply accumulate.
/// We leave these as unconditional: "ARM deprecates the conditional execution
/// of any instruction encoding provided by the Advanced SIMD Extension that is
/// not also provided by the floating-point (VFP) extension". They do not set
/// flags.
template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrFP : public InstARM32 {
InstARM32FourAddrFP() = delete;
InstARM32FourAddrFP(const InstARM32FourAddrFP &) = delete;
InstARM32FourAddrFP &operator=(const InstARM32FourAddrFP &) = delete;
public:
// Every operand must be a register.
static InstARM32FourAddrFP *create(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1) {
return new (Func->allocate<InstARM32FourAddrFP>())
InstARM32FourAddrFP(Func, Dest, Src0, Src1);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitFourAddrFP(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override { emitUsingTextFixup(Func); }
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
Str << Opcode << "." << getDest()->getType() << " ";
dumpDest(Func);
Str << ", ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstARM32FourAddrFP(Cfg *Func, Variable *Dest, Variable *Src0, Variable *Src1)
: InstARM32(Func, K, 3, Dest) {
addSource(Dest);
addSource(Src0);
addSource(Src1);
}
static const char *Opcode;
};
/// Instructions of the form x cmpop y (setting flags). /// Instructions of the form x cmpop y (setting flags).
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32CmpLike : public InstARM32Pred { class InstARM32CmpLike : public InstARM32Pred {
...@@ -855,8 +907,10 @@ using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>; ...@@ -855,8 +907,10 @@ using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>;
using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>; using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>;
using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>; using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>;
using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>; using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>; using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>; using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>; using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
...@@ -1001,8 +1055,8 @@ private: ...@@ -1001,8 +1055,8 @@ private:
InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget); InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
}; };
/// Pop into a list of GPRs. Technically this can be predicated, but we don't /// Pops a list of registers. It may be a list of GPRs, or a list of VFP "s"
/// need that functionality. /// regs, but not both. In any case, the list must be sorted.
class InstARM32Pop : public InstARM32 { class InstARM32Pop : public InstARM32 {
InstARM32Pop() = delete; InstARM32Pop() = delete;
InstARM32Pop(const InstARM32Pop &) = delete; InstARM32Pop(const InstARM32Pop &) = delete;
...@@ -1023,8 +1077,8 @@ private: ...@@ -1023,8 +1077,8 @@ private:
VarList Dests; VarList Dests;
}; };
/// Push a list of GPRs. Technically this can be predicated, but we don't need /// Pushes a list of registers. Just like Pop (see above), the list may be of
/// that functionality. /// GPRs, or VFP "s" registers, but not both.
class InstARM32Push : public InstARM32 { class InstARM32Push : public InstARM32 {
InstARM32Push() = delete; InstARM32Push() = delete;
InstARM32Push(const InstARM32Push &) = delete; InstARM32Push(const InstARM32Push &) = delete;
......
...@@ -876,6 +876,54 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { ...@@ -876,6 +876,54 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
return true; return true;
} }
// The calling convention helper class (TargetARM32::CallingConv) expects the
// following registers to be declared in a certain order, so we have these
// sanity checks to ensure nothing breaks unknowingly.
// TODO(jpp): modify the CallingConv class so it does not rely on any register
// declaration order.
#define SANITY_CHECK_QS(_0, _1) \
static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \
"ARM32 " #_0 " and " #_1 " registers are declared " \
"incorrectly.")
SANITY_CHECK_QS(q0, q1);
SANITY_CHECK_QS(q1, q2);
SANITY_CHECK_QS(q2, q3);
SANITY_CHECK_QS(q3, q4);
#undef SANITY_CHECK_QS
#define SANITY_CHECK_DS(_0, _1) \
static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \
"ARM32 " #_0 " and " #_1 " registers are declared " \
"incorrectly.")
SANITY_CHECK_DS(d0, d1);
SANITY_CHECK_DS(d1, d2);
SANITY_CHECK_DS(d2, d3);
SANITY_CHECK_DS(d3, d4);
SANITY_CHECK_DS(d4, d5);
SANITY_CHECK_DS(d5, d6);
SANITY_CHECK_DS(d6, d7);
SANITY_CHECK_DS(d7, d8);
#undef SANITY_CHECK_DS
#define SANITY_CHECK_SS(_0, _1) \
static_assert((RegARM32::Reg_##_0 + 1) == RegARM32::Reg_##_1, \
"ARM32 " #_0 " and " #_1 " registers are declared " \
"incorrectly.")
SANITY_CHECK_SS(s0, s1);
SANITY_CHECK_SS(s1, s2);
SANITY_CHECK_SS(s2, s3);
SANITY_CHECK_SS(s3, s4);
SANITY_CHECK_SS(s4, s5);
SANITY_CHECK_SS(s5, s6);
SANITY_CHECK_SS(s6, s7);
SANITY_CHECK_SS(s7, s8);
SANITY_CHECK_SS(s8, s9);
SANITY_CHECK_SS(s9, s10);
SANITY_CHECK_SS(s10, s11);
SANITY_CHECK_SS(s11, s12);
SANITY_CHECK_SS(s12, s13);
SANITY_CHECK_SS(s13, s14);
SANITY_CHECK_SS(s14, s15);
#undef SANITY_CHECK_SS
bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
if (!VFPRegsFree.any()) { if (!VFPRegsFree.any()) {
return false; return false;
...@@ -885,9 +933,6 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { ...@@ -885,9 +933,6 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
// Q registers are declared in reverse order, so RegARM32::Reg_q0 > // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
// RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0. // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0.
// Same thing goes for D registers. // Same thing goes for D registers.
static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
"ARM32 Q registers are possibly declared incorrectly.");
int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first(); int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first();
if (QRegStart >= 0) { if (QRegStart >= 0) {
VFPRegsFree.reset(QRegStart, QRegStart + 4); VFPRegsFree.reset(QRegStart, QRegStart + 4);
...@@ -895,9 +940,6 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { ...@@ -895,9 +940,6 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
return true; return true;
} }
} else if (Ty == IceType_f64) { } else if (Ty == IceType_f64) {
static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
"ARM32 D registers are possibly declared incorrectly.");
int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first(); int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first();
if (DRegStart >= 0) { if (DRegStart >= 0) {
VFPRegsFree.reset(DRegStart, DRegStart + 2); VFPRegsFree.reset(DRegStart, DRegStart + 2);
...@@ -905,9 +947,6 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { ...@@ -905,9 +947,6 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
return true; return true;
} }
} else { } else {
static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
"ARM32 S registers are possibly declared incorrectly.");
assert(Ty == IceType_f32); assert(Ty == IceType_f32);
int32_t SReg = VFPRegsFree.find_first(); int32_t SReg = VFPRegsFree.find_first();
assert(SReg >= 0); assert(SReg >= 0);
...@@ -1096,44 +1135,78 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -1096,44 +1135,78 @@ void TargetARM32::addProlog(CfgNode *Node) {
// Add push instructions for preserved registers. On ARM, "push" can push a // Add push instructions for preserved registers. On ARM, "push" can push a
// whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
// callee-saved float/vector registers. The "vpush" instruction can handle a // callee-saved float/vector registers.
// whole list of float/vector registers, but it only handles contiguous //
// sequences of registers by specifying the start and the length. // The "vpush" instruction can handle a whole list of float/vector registers,
VarList GPRsToPreserve; // but it only handles contiguous sequences of registers by specifying the
GPRsToPreserve.reserve(CalleeSaves.size()); // start and the length.
uint32_t NumCallee = 0; PreservedGPRs.reserve(CalleeSaves.size());
size_t PreservedRegsSizeBytes = 0; PreservedSRegs.reserve(CalleeSaves.size());
// Consider FP and LR as callee-save / used as needed. // Consider FP and LR as callee-save / used as needed.
if (UsesFramePointer) { if (UsesFramePointer) {
if (RegsUsed[RegARM32::Reg_fp]) {
llvm::report_fatal_error("Frame pointer has been used.");
}
CalleeSaves[RegARM32::Reg_fp] = true; CalleeSaves[RegARM32::Reg_fp] = true;
assert(RegsUsed[RegARM32::Reg_fp] == false);
RegsUsed[RegARM32::Reg_fp] = true; RegsUsed[RegARM32::Reg_fp] = true;
} }
if (!MaybeLeafFunc) { if (!MaybeLeafFunc) {
CalleeSaves[RegARM32::Reg_lr] = true; CalleeSaves[RegARM32::Reg_lr] = true;
RegsUsed[RegARM32::Reg_lr] = true; RegsUsed[RegARM32::Reg_lr] = true;
} }
// Make two passes over the used registers. The first pass records all the
// used registers -- and their aliases. Then, we figure out which GPRs and
// VFP S registers should be saved. We don't bother saving D/Q registers
// because their uses are recorded as S regs uses.
llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM);
for (SizeT i = 0; i < CalleeSaves.size(); ++i) { for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (RegARM32::isI64RegisterPair(i)) { if (NeedSandboxing && i == RegARM32::Reg_r9) {
// We don't save register pairs explicitly. Instead, we rely on the code // r9 is never updated in sandboxed code.
// fake-defing/fake-using each register in the pair.
continue; continue;
} }
if (CalleeSaves[i] && RegsUsed[i]) { if (CalleeSaves[i] && RegsUsed[i]) {
if (NeedSandboxing && i == RegARM32::Reg_r9) { ToPreserve |= RegisterAliases[i];
// r9 is never updated in sandboxed code. }
}
uint32_t NumCallee = 0;
size_t PreservedRegsSizeBytes = 0;
// RegClasses is a tuple of
//
// <First Register in Class, Last Register in Class, Vector of Save Registers>
//
// We use this tuple to figure out which register we should push/pop during
// prolog/epilog.
using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
const RegClassType RegClasses[] = {
RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
&PreservedGPRs),
RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
&PreservedSRegs)};
for (const auto &RegClass : RegClasses) {
const uint32_t FirstRegInClass = std::get<0>(RegClass);
const uint32_t LastRegInClass = std::get<1>(RegClass);
VarList *const PreservedRegsInClass = std::get<2>(RegClass);
for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
if (!ToPreserve[Reg]) {
continue; continue;
} }
++NumCallee; ++NumCallee;
Variable *PhysicalRegister = getPhysicalRegister(i); Variable *PhysicalRegister = getPhysicalRegister(Reg);
PreservedRegsSizeBytes += PreservedRegsSizeBytes +=
typeWidthInBytesOnStack(PhysicalRegister->getType()); typeWidthInBytesOnStack(PhysicalRegister->getType());
GPRsToPreserve.push_back(getPhysicalRegister(i)); PreservedRegsInClass->push_back(PhysicalRegister);
} }
} }
Ctx->statsUpdateRegistersSaved(NumCallee); Ctx->statsUpdateRegistersSaved(NumCallee);
if (!GPRsToPreserve.empty()) if (!PreservedSRegs.empty())
_push(GPRsToPreserve); _push(PreservedSRegs);
if (!PreservedGPRs.empty())
_push(PreservedGPRs);
// Generate "mov FP, SP" if needed. // Generate "mov FP, SP" if needed.
if (UsesFramePointer) { if (UsesFramePointer) {
...@@ -1160,13 +1233,13 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -1160,13 +1233,13 @@ void TargetARM32::addProlog(CfgNode *Node) {
GlobalsSize + LocalsSlotsPaddingBytes; GlobalsSize + LocalsSlotsPaddingBytes;
// Adds the out args space to the stack, and align SP if necessary. // Adds the out args space to the stack, and align SP if necessary.
if (NeedsStackAlignment) { if (!NeedsStackAlignment) {
SpillAreaSizeBytes += MaxOutArgsSizeBytes;
} else {
uint32_t StackOffset = PreservedRegsSizeBytes; uint32_t StackOffset = PreservedRegsSizeBytes;
uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
SpillAreaSizeBytes = StackSize - StackOffset; SpillAreaSizeBytes = StackSize - StackOffset;
} else {
SpillAreaSizeBytes += MaxOutArgsSizeBytes;
} }
// Combine fixed alloca with SpillAreaSize. // Combine fixed alloca with SpillAreaSize.
...@@ -1285,43 +1358,21 @@ void TargetARM32::addEpilog(CfgNode *Node) { ...@@ -1285,43 +1358,21 @@ void TargetARM32::addEpilog(CfgNode *Node) {
} }
} }
// Add pop instructions for preserved registers. if (!PreservedGPRs.empty())
llvm::SmallBitVector CalleeSaves = _pop(PreservedGPRs);
getRegisterSet(RegSet_CalleeSave, RegSet_None); if (!PreservedSRegs.empty())
VarList GPRsToRestore; _pop(PreservedSRegs);
GPRsToRestore.reserve(CalleeSaves.size());
// Consider FP and LR as callee-save / used as needed.
if (UsesFramePointer) {
CalleeSaves[RegARM32::Reg_fp] = true;
}
if (!MaybeLeafFunc) {
CalleeSaves[RegARM32::Reg_lr] = true;
}
// Pop registers in ascending order just like push (instead of in reverse
// order).
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (RegARM32::isI64RegisterPair(i)) {
continue;
}
if (CalleeSaves[i] && RegsUsed[i]) {
if (NeedSandboxing && i == RegARM32::Reg_r9) {
continue;
}
GPRsToRestore.push_back(getPhysicalRegister(i));
}
}
if (!GPRsToRestore.empty())
_pop(GPRsToRestore);
if (!Ctx->getFlags().getUseSandboxing()) if (!Ctx->getFlags().getUseSandboxing())
return; return;
// Change the original ret instruction into a sandboxed return sequence. // Change the original ret instruction into a sandboxed return sequence.
//
// bundle_lock // bundle_lock
// bic lr, #0xc000000f // bic lr, #0xc000000f
// bx lr // bx lr
// bundle_unlock // bundle_unlock
//
// This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
// restrict to the lower 1GB as well. // restrict to the lower 1GB as well.
Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
...@@ -2641,8 +2692,8 @@ bool tryToOptimize(uint32_t Src, SizeT *NumOperations, ...@@ -2641,8 +2692,8 @@ bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
} // end of namespace StrengthReduction } // end of namespace StrengthReduction
} // end of anonymous namespace } // end of anonymous namespace
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
Variable *Dest = Inst->getDest(); Variable *Dest = Instr->getDest();
if (Dest->isRematerializable()) { if (Dest->isRematerializable()) {
Context.insert(InstFakeDef::create(Func, Dest)); Context.insert(InstFakeDef::create(Func, Dest));
...@@ -2651,14 +2702,14 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2651,14 +2702,14 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Type DestTy = Dest->getType(); Type DestTy = Dest->getType();
if (DestTy == IceType_i1) { if (DestTy == IceType_i1) {
lowerInt1Arithmetic(Inst); lowerInt1Arithmetic(Instr);
return; return;
} }
Operand *Src0 = legalizeUndef(Inst->getSrc(0)); Operand *Src0 = legalizeUndef(Instr->getSrc(0));
Operand *Src1 = legalizeUndef(Inst->getSrc(1)); Operand *Src1 = legalizeUndef(Instr->getSrc(1));
if (DestTy == IceType_i64) { if (DestTy == IceType_i64) {
lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
return; return;
} }
...@@ -2679,7 +2730,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2679,7 +2730,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// difficult to determine (constant may be moved to a register). // difficult to determine (constant may be moved to a register).
// * Handle floating point arithmetic separately: they require Src1 to be // * Handle floating point arithmetic separately: they require Src1 to be
// legalized to a register. // legalized to a register.
switch (Inst->getOp()) { switch (Instr->getOp()) {
default: default:
break; break;
case InstArithmetic::Udiv: { case InstArithmetic::Udiv: {
...@@ -2718,6 +2769,14 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2718,6 +2769,14 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
} }
case InstArithmetic::Fadd: { case InstArithmetic::Fadd: {
Variable *Src0R = legalizeToReg(Src0); Variable *Src0R = legalizeToReg(Src0);
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
_vmla(Src0R, Src1R, Src2R);
_mov(Dest, Src0R);
return;
}
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vadd(T, Src0R, Src1R); _vadd(T, Src0R, Src1R);
_mov(Dest, T); _mov(Dest, T);
...@@ -2725,6 +2784,13 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2725,6 +2784,13 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
} }
case InstArithmetic::Fsub: { case InstArithmetic::Fsub: {
Variable *Src0R = legalizeToReg(Src0); Variable *Src0R = legalizeToReg(Src0);
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
_vmls(Src0R, Src1R, Src2R);
_mov(Dest, Src0R);
return;
}
Variable *Src1R = legalizeToReg(Src1); Variable *Src1R = legalizeToReg(Src1);
_vsub(T, Src0R, Src1R); _vsub(T, Src0R, Src1R);
_mov(Dest, T); _mov(Dest, T);
...@@ -2748,11 +2814,20 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2748,11 +2814,20 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// Handle everything else here. // Handle everything else here.
Int32Operands Srcs(Src0, Src1); Int32Operands Srcs(Src0, Src1);
switch (Inst->getOp()) { switch (Instr->getOp()) {
case InstArithmetic::_num: case InstArithmetic::_num:
llvm::report_fatal_error("Unknown arithmetic operator"); llvm::report_fatal_error("Unknown arithmetic operator");
return; return;
case InstArithmetic::Add: { case InstArithmetic::Add: {
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
_mla(T, Src1R, Src2R, Src0R);
_mov(Dest, T);
return;
}
if (Srcs.hasConstOperand()) { if (Srcs.hasConstOperand()) {
if (!Srcs.immediateIsFlexEncodable() && if (!Srcs.immediateIsFlexEncodable() &&
Srcs.negatedImmediateIsFlexEncodable()) { Srcs.negatedImmediateIsFlexEncodable()) {
...@@ -2805,6 +2880,15 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2805,6 +2880,15 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
return; return;
} }
case InstArithmetic::Sub: { case InstArithmetic::Sub: {
if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
_mls(T, Src1R, Src2R, Src0R);
_mov(Dest, T);
return;
}
if (Srcs.hasConstOperand()) { if (Srcs.hasConstOperand()) {
if (Srcs.immediateIsFlexEncodable()) { if (Srcs.immediateIsFlexEncodable()) {
Variable *Src0R = Srcs.src0R(this); Variable *Src0R = Srcs.src0R(this);
...@@ -3013,7 +3097,7 @@ TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( ...@@ -3013,7 +3097,7 @@ TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
InstARM32Label *NewShortCircuitLabel = nullptr; InstARM32Label *NewShortCircuitLabel = nullptr;
Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
const Inst *Producer = BoolComputations.getProducerOf(Boolean); const Inst *Producer = Computations.getProducerOf(Boolean);
if (Producer == nullptr) { if (Producer == nullptr) {
// No producer, no problem: just do emit code to perform (Boolean & 1) and // No producer, no problem: just do emit code to perform (Boolean & 1) and
...@@ -3234,7 +3318,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -3234,7 +3318,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
case IceType_void: case IceType_void:
break; break;
case IceType_i1: case IceType_i1:
assert(BoolComputations.getProducerOf(Dest) == nullptr); assert(Computations.getProducerOf(Dest) == nullptr);
// Fall-through intended. // Fall-through intended.
case IceType_i8: case IceType_i8:
case IceType_i16: case IceType_i16:
...@@ -5309,6 +5393,7 @@ Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { ...@@ -5309,6 +5393,7 @@ Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
return Reg; return Reg;
} }
// TODO(jpp): remove unneeded else clauses in legalize.
Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
int32_t RegNum) { int32_t RegNum) {
Type Ty = From->getType(); Type Ty = From->getType();
...@@ -5412,24 +5497,27 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, ...@@ -5412,24 +5497,27 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
} }
// There should be no constants of vector type (other than undef). // There should be no constants of vector type (other than undef).
assert(!isVectorType(Ty)); assert(!isVectorType(Ty));
bool CanBeFlex = Allowed & Legal_Flex;
if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
uint32_t RotateAmt; uint32_t RotateAmt;
uint32_t Immed_8; uint32_t Immed_8;
uint32_t Value = static_cast<uint32_t>(C32->getValue()); uint32_t Value = static_cast<uint32_t>(C32->getValue());
// Check if the immediate will fit in a Flexible second operand, if a if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
// Flexible second operand is allowed. We need to know the exact value, // The immediate can be encoded as a Flex immediate. We may return the
// so that rules out relocatable constants. Also try the inverse and use // Flex operand if the caller has Allow'ed it.
// MVN if possible. auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
if (CanBeFlex && const bool CanBeFlex = Allowed & Legal_Flex;
OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { if (CanBeFlex)
return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); return OpF;
} else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( return copyToReg(OpF, RegNum);
~Value, &RotateAmt, &Immed_8)) { } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
auto InvertedFlex = &Immed_8)) {
// Even though the immediate can't be encoded as a Flex operand, its
// inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
// constant with a single instruction.
auto *InvOpF =
OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Variable *Reg = makeReg(Ty, RegNum); Variable *Reg = makeReg(Ty, RegNum);
_mvn(Reg, InvertedFlex); _mvn(Reg, InvOpF);
return Reg; return Reg;
} else { } else {
// Do a movw/movt to a register. // Do a movw/movt to a register.
...@@ -5486,8 +5574,6 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, ...@@ -5486,8 +5574,6 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
return From; return From;
} }
// TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
// for a Variable in a Mem operand.
Variable *T = makeReg(Var->getType(), RegNum); Variable *T = makeReg(Var->getType(), RegNum);
_mov(T, Var); _mov(T, Var);
return T; return T;
...@@ -5688,7 +5774,7 @@ void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean, ...@@ -5688,7 +5774,7 @@ void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
// FlagsWereSet is used to determine wether Boolean was folded or not. If not, // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
// add an explicit _tst instruction below. // add an explicit _tst instruction below.
bool FlagsWereSet = false; bool FlagsWereSet = false;
if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
switch (Producer->getKind()) { switch (Producer->getKind()) {
default: default:
llvm::report_fatal_error("Unexpected producer."); llvm::report_fatal_error("Unexpected producer.");
...@@ -5772,7 +5858,7 @@ TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest, ...@@ -5772,7 +5858,7 @@ TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
SafeBoolChain Safe = SBC_Yes; SafeBoolChain Safe = SBC_Yes;
if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
switch (Producer->getKind()) { switch (Producer->getKind()) {
default: default:
llvm::report_fatal_error("Unexpected producer."); llvm::report_fatal_error("Unexpected producer.");
...@@ -5884,9 +5970,75 @@ bool isValidConsumer(const Inst &Instr) { ...@@ -5884,9 +5970,75 @@ bool isValidConsumer(const Inst &Instr) {
} }
} }
} // end of namespace BoolFolding } // end of namespace BoolFolding
namespace FpFolding {
bool shouldTrackProducer(const Inst &Instr) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::Fmul:
return true;
}
}
}
}
bool isValidConsumer(const Inst &Instr) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::Fadd:
case InstArithmetic::Fsub:
return true;
}
}
}
}
} // end of namespace FpFolding
namespace IntFolding {
bool shouldTrackProducer(const Inst &Instr) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::Mul:
return true;
}
}
}
}
bool isValidConsumer(const Inst &Instr) {
switch (Instr.getKind()) {
default:
return false;
case Inst::Arithmetic: {
switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
default:
return false;
case InstArithmetic::Add:
case InstArithmetic::Sub:
return true;
}
}
}
}
} // end of namespace FpFolding
} // end of anonymous namespace } // end of anonymous namespace
void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
for (Inst &Instr : Node->getInsts()) { for (Inst &Instr : Node->getInsts()) {
// Check whether Instr is a valid producer. // Check whether Instr is a valid producer.
Variable *Dest = Instr.getDest(); Variable *Dest = Instr.getDest();
...@@ -5894,7 +6046,22 @@ void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { ...@@ -5894,7 +6046,22 @@ void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {
&& Dest // only instructions with an actual dest var; and && Dest // only instructions with an actual dest var; and
&& Dest->getType() == IceType_i1 // only bool-type dest vars; and && Dest->getType() == IceType_i1 // only bool-type dest vars; and
&& BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr)); KnownComputations.emplace(Dest->getIndex(),
ComputationEntry(&Instr, IceType_i1));
}
if (!Instr.isDeleted() // only consider non-deleted instructions; and
&& Dest // only instructions with an actual dest var; and
&& isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
&& FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
KnownComputations.emplace(Dest->getIndex(),
ComputationEntry(&Instr, Dest->getType()));
}
if (!Instr.isDeleted() // only consider non-deleted instructions; and
&& Dest // only instructions with an actual dest var; and
&& Dest->getType() == IceType_i32 // i32 only dest vars; and
&& IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
KnownComputations.emplace(Dest->getIndex(),
ComputationEntry(&Instr, IceType_i32));
} }
// Check each src variable against the map. // Check each src variable against the map.
FOREACH_VAR_IN_INST(Var, Instr) { FOREACH_VAR_IN_INST(Var, Instr) {
...@@ -5905,9 +6072,29 @@ void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { ...@@ -5905,9 +6072,29 @@ void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {
} }
++ComputationIter->second.NumUses; ++ComputationIter->second.NumUses;
if (!BoolFolding::isValidConsumer(Instr)) { switch (ComputationIter->second.ComputationType) {
default:
KnownComputations.erase(VarNum); KnownComputations.erase(VarNum);
continue; continue;
case IceType_i1:
if (!BoolFolding::isValidConsumer(Instr)) {
KnownComputations.erase(VarNum);
continue;
}
break;
case IceType_i32:
if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
KnownComputations.erase(VarNum);
continue;
}
break;
case IceType_f32:
case IceType_f64:
if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
KnownComputations.erase(VarNum);
continue;
}
break;
} }
if (Instr.isLastUse(Var)) { if (Instr.isLastUse(Var)) {
......
...@@ -60,9 +60,9 @@ public: ...@@ -60,9 +60,9 @@ public:
static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); } static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); }
void initNodeForLowering(CfgNode *Node) override { void initNodeForLowering(CfgNode *Node) override {
BoolComputations.forgetProducers(); Computations.forgetProducers();
BoolComputations.recordProducers(Node); Computations.recordProducers(Node);
BoolComputations.dump(Func); Computations.dump(Func);
} }
void translateOm1() override; void translateOm1() override;
...@@ -798,6 +798,12 @@ protected: ...@@ -798,6 +798,12 @@ protected:
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vmrs::create(Func, Pred)); Context.insert(InstARM32Vmrs::create(Func, Pred));
} }
void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmla::create(Func, Dest, Src0, Src1));
}
void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmls::create(Func, Dest, Src0, Src1));
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1)); Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
} }
...@@ -1019,6 +1025,8 @@ protected: ...@@ -1019,6 +1025,8 @@ protected:
static llvm::SmallBitVector ScratchRegs; static llvm::SmallBitVector ScratchRegs;
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM]; VarList PhysicalRegisters[IceType_NUM];
VarList PreservedGPRs;
VarList PreservedSRegs;
/// Helper class that understands the Calling Convention and register /// Helper class that understands the Calling Convention and register
/// assignments. The first few integer type parameters can use r0-r3, /// assignments. The first few integer type parameters can use r0-r3,
...@@ -1081,10 +1089,10 @@ private: ...@@ -1081,10 +1089,10 @@ private:
std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)> std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
ARM32HelpersPostamble; ARM32HelpersPostamble;
class BoolComputationTracker { class ComputationTracker {
public: public:
BoolComputationTracker() = default; ComputationTracker() = default;
~BoolComputationTracker() = default; ~ComputationTracker() = default;
void forgetProducers() { KnownComputations.clear(); } void forgetProducers() { KnownComputations.clear(); }
void recordProducers(CfgNode *Node); void recordProducers(CfgNode *Node);
...@@ -1118,9 +1126,9 @@ private: ...@@ -1118,9 +1126,9 @@ private:
} }
private: private:
class BoolComputationEntry { class ComputationEntry {
public: public:
explicit BoolComputationEntry(Inst *I) : Instr(I) {} ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
Inst *const Instr; Inst *const Instr;
// Boolean folding is disabled for variables whose live range is multi // Boolean folding is disabled for variables whose live range is multi
// block. We conservatively initialize IsLiveOut to true, and set it to // block. We conservatively initialize IsLiveOut to true, and set it to
...@@ -1130,13 +1138,16 @@ private: ...@@ -1130,13 +1138,16 @@ private:
// disabled. // disabled.
bool IsLiveOut = true; bool IsLiveOut = true;
int32_t NumUses = 0; int32_t NumUses = 0;
Type ComputationType;
}; };
using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>; // ComputationMap maps a Variable number to a payload identifying which
BoolComputationMap KnownComputations; // instruction defined it.
using ComputationMap = std::unordered_map<SizeT, ComputationEntry>;
ComputationMap KnownComputations;
}; };
BoolComputationTracker BoolComputations; ComputationTracker Computations;
// AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
// without specifying a physical register. This is needed for creating unbound // without specifying a physical register. This is needed for creating unbound
......
...@@ -24,96 +24,92 @@ define internal i32 @foo(i32 %x) { ...@@ -24,96 +24,92 @@ define internal i32 @foo(i32 %x) {
entry: entry:
; ASM-LABEL: foo: ; ASM-LABEL: foo:
; ASM-NEXT: .Lfoo$entry:
; ******* Movw case to check *******
; ASM-NEXT: movw ip, #4092
; ASM-NEXT: sub sp, sp, ip
; ASM-NEXT: str r0, [sp, #4088]
; ASM-NEXT: # [sp, #4088] = def.pseudo
; DIS-LABEL: 00000000 <foo>: ; DIS-LABEL: 00000000 <foo>:
; DIS-NEXT: 0: e300cffc
; DIS-NEXT: 4: e04dd00c
; DIS-NEXT: 8: e58d0ff8
; IASM-LABEL: foo: ; IASM-LABEL: foo:
; ASM-NEXT: .Lfoo$entry:
; IASM-NEXT: .Lfoo$entry: ; IASM-NEXT: .Lfoo$entry:
; ASM-NEXT: movw ip, #4092
; DIS-NEXT: 0: e300cffc
; IASM-NEXT: .byte 0xfc ; IASM-NEXT: .byte 0xfc
; IASM-NEXT: .byte 0xcf ; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: sub sp, sp, ip
; DIS-NEXT: 4: e04dd00c
; IASM-NEXT: .byte 0xc ; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d ; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4088]
; DIS-NEXT: 8: e58d0ff8
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: # [sp, #4088] = def.pseudo
%mul = mul i32 %x, %x %mul = mul i32 %x, %x
; ASM-NEXT: ldr r0, [sp, #4088] ; ASM-NEXT: ldr r0, [sp, #4088]
; ASM-NEXT: ldr r1, [sp, #4088]
; ASM-NEXT: mul r0, r0, r1
; ASM-NEXT: str r0, [sp, #4084]
; ASM-NEXT: # [sp, #4084] = def.pseudo
; DIS-NEXT: c: e59d0ff8 ; DIS-NEXT: c: e59d0ff8
; DIS-NEXT: 10: e59d1ff8
; DIS-NEXT: 14: e0000190
; DIS-NEXT: 18: e58d0ff4
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r1, [sp, #4088]
; DIS-NEXT: 10: e59d1ff8
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0x1f ; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mul r0, r0, r1
; DIS-NEXT: 14: e0000190
; IASM-NEXT: .byte 0x90 ; IASM-NEXT: .byte 0x90
; IASM-NEXT: .byte 0x1 ; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4084]
; DIS-NEXT: 18: e58d0ff4
; IASM-NEXT: .byte 0xf4 ; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: # [sp, #4084] = def.pseudo
ret i32 %mul ret i32 %mul
; ASM-NEXT: ldr r0, [sp, #4084] ; ASM-NEXT: ldr r0, [sp, #4084]
; ******* Movw case to check *******
; ASM-NEXT: movw ip, #4092
; ASM-NEXT: add sp, sp, ip
; ASM-NEXT: bx lr
; DIS-NEXT: 1c: e59d0ff4 ; DIS-NEXT: 1c: e59d0ff4
; DIS-NEXT: 20: e300cffc
; DIS-NEXT: 24: e08dd00c
; DIS-NEXT: 28: e12fff1e
; IASM-NEXT: .byte 0xf4 ; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw ip, #4092
; DIS-NEXT: 20: e300cffc
; IASM-NEXT: .byte 0xfc ; IASM-NEXT: .byte 0xfc
; IASM-NEXT: .byte 0xcf ; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: add sp, sp, ip
; DIS-NEXT: 24: e08dd00c
; IASM-NEXT: .byte 0xc ; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe0
; ASM-NEXT: bx lr
; DIS-NEXT: 28: e12fff1e
; IASM-NEXT: .byte 0x1e ; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff ; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f ; IASM-NEXT: .byte 0x2f
...@@ -121,84 +117,88 @@ entry: ...@@ -121,84 +117,88 @@ entry:
} }
define internal void @saveMinus1(i32 %loc) { define internal void @saveConstI32(i32 %loc) {
; ASM-LABEL:saveMinus1: ; ASM-LABEL:saveConstI32:
; DIS-LABEL:00000030 <saveMinus1>: ; DIS-LABEL:00000030 <saveConstI32>:
; IASM-LABEL:saveMinus1: ; IASM-LABEL:saveConstI32:
entry: entry:
; ASM-NEXT:.LsaveMinus1$entry: ; ASM-NEXT:.LsaveConstI32$entry:
; IASM-NEXT:.LsaveConstI32$entry:
; ASM-NEXT: movw ip, #4088 ; ASM-NEXT: movw ip, #4088
; DIS-NEXT: 30: e300cff8 ; DIS-NEXT: 30: e300cff8
; IASM-NEXT:.LsaveMinus1$entry: ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe3
; ASM-NEXT: sub sp, sp, ip ; ASM-NEXT: sub sp, sp, ip
; DIS-NEXT: 34: e04dd00c ; DIS-NEXT: 34: e04dd00c
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xcf ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe0
; ASM-NEXT: str r0, [sp, #4084] ; ASM-NEXT: str r0, [sp, #4084]
; ASM-NEXT: # [sp, #4084] = def.pseudo ; ASM-NEXT: # [sp, #4084] = def.pseudo
; DIS-NEXT: 38: e58d0ff4 ; DIS-NEXT: 38: e58d0ff4
; IASM-NEXT: .byte 0xc ; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x4d ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe5
%loc.asptr = inttoptr i32 %loc to i32* %loc.asptr = inttoptr i32 %loc to i32*
store i32 -1, i32* %loc.asptr, align 1 store i32 524289, i32* %loc.asptr, align 1
; ASM-NEXT: ldr r0, [sp, #4084] ; ASM-NEXT: ldr r0, [sp, #4084]
; DIS-NEXT: 3c: e59d0ff4 ; DIS-NEXT: 3c: e59d0ff4
; IASM-NEXT: .byte 0xf4 ; IASM-NEXT: .byte 0xf4
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0xf
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: movw r1, #65535 ; ASM-NEXT: movw r1, #1
; DIS-NEXT: 40: e30f1fff ; DIS-NEXT: 40: e3001001
; IASM-NEXT: .byte 0xf4 ; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: movt r1, #65535 ; ASM-NEXT: movt r1, #8
; DIS-NEXT: 44: e34f1fff ; DIS-NEXT: 44: e3401008
; IASM-NEXT: .byte 0xff ; IASM-NEXT: .byte 0x8
; IASM-NEXT: .byte 0x1f ; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0xf ; IASM-NEXT: .byte 0x40
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: str r1, [r0] ; ASM-NEXT: str r1, [r0]
; DIS-NEXT: 48: e5801000 ; DIS-NEXT: 48: e5801000
; IASM-NEXT: .byte 0xff ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x1f ; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x4f ; IASM-NEXT: .byte 0x80
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe5
ret void ret void
; ASM-NEXT: movw ip, #4088 ; ASM-NEXT: movw ip, #4088
; DIS-NEXT: 4c: e300cff8 ; DIS-NEXT: 4c: e300cff8
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0xf8
; IASM-NEXT: .byte 0x10 ; IASM-NEXT: .byte 0xcf
; IASM-NEXT: .byte 0x80 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe3
; ASM-NEXT: add sp, sp, ip ; ASM-NEXT: add sp, sp, ip
; DIS-NEXT: 50: e08dd00c ; DIS-NEXT: 50: e08dd00c
; IASM-NEXT: .byte 0xf8 ; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xcf ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe0
; ASM-NEXT: bx lr ; ASM-NEXT: bx lr
; DIS-NEXT: 54: e12fff1e ; DIS-NEXT: 54: e12fff1e
; IASM-NEXT: .byte 0xc ; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x8d ; IASM-NEXT: .byte 0x2f
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe1
} }
...@@ -9,7 +9,9 @@ ...@@ -9,7 +9,9 @@
; RUN: -ffunction-sections | FileCheck %s ; RUN: -ffunction-sections | FileCheck %s
declare void @call_target() declare void @call_target()
declare void @call_target1(i32 %arg) declare void @call_target1(i32 %arg0)
declare void @call_target2(i32 %arg0, i32 %arg1)
declare void @call_target3(i32 %arg0, i32 %arg1, i32 %arg2)
@global_short = internal global [2 x i8] zeroinitializer @global_short = internal global [2 x i8] zeroinitializer
; A direct call sequence uses the right mask and register-call sequence. ; A direct call sequence uses the right mask and register-call sequence.
...@@ -60,7 +62,7 @@ entry: ...@@ -60,7 +62,7 @@ entry:
; CHECK-LABEL: bundle_lock_without_padding ; CHECK-LABEL: bundle_lock_without_padding
; CHECK: 0: {{.*}} movw ; CHECK: 0: {{.*}} movw
; CHECK-NEXT: movt ; CHECK-NEXT: movt
; CHECK-NEXT: movw ; CHECK-NEXT: mov
; CHECK-NEXT: nop ; CHECK-NEXT: nop
; CHECK-NEXT: bic [[REG:r[0-9]+]], {{.*}} 0xc0000000 ; CHECK-NEXT: bic [[REG:r[0-9]+]], {{.*}} 0xc0000000
; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]] ; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]]
...@@ -91,18 +93,16 @@ define internal void @bundle_lock_align_to_end_padding_0() { ...@@ -91,18 +93,16 @@ define internal void @bundle_lock_align_to_end_padding_0() {
entry: entry:
call void @call_target() call void @call_target()
; bundle boundary ; bundle boundary
store i16 0, i16* undef, align 1 call void @call_target3(i32 1, i32 2, i32 3)
call void @call_target()
; bundle boundary ; bundle boundary
ret void ret void
} }
; CHECK-LABEL: bundle_lock_align_to_end_padding_0 ; CHECK-LABEL: bundle_lock_align_to_end_padding_0
; CHECK: c: {{.*}} bl {{.*}} call_target ; CHECK: c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: movw ; CHECK-NEXT: mov
; CHECK-NEXT: movw ; CHECK-NEXT: mov
; CHECK-NEXT: bic [[REG:r[0-9]+]] ; CHECK-NEXT: mov
; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]] ; CHECK-NEXT: {{[0-9]+}}c: {{.*}} bl {{.*}} call_target3
; CHECK: {{[0-9]+}}c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: add sp ; CHECK-NEXT: add sp
; CHECK-NEXT: bic sp, {{.*}} 0xc0000000 ; CHECK-NEXT: bic sp, {{.*}} 0xc0000000
; CHECK-NEXT: pop ; CHECK-NEXT: pop
...@@ -114,41 +114,29 @@ define internal void @bundle_lock_align_to_end_padding_1() { ...@@ -114,41 +114,29 @@ define internal void @bundle_lock_align_to_end_padding_1() {
entry: entry:
call void @call_target() call void @call_target()
; bundle boundary ; bundle boundary
store i32 65536, i32* undef, align 1 call void @call_target2(i32 1, i32 2)
; bundle boundary
call void @call_target()
; bundle boundary ; bundle boundary
ret void ret void
} }
; CHECK-LABEL: bundle_lock_align_to_end_padding_1 ; CHECK-LABEL: bundle_lock_align_to_end_padding_1
; CHECK: {{[0-9]*}}c: {{.*}} bl {{.*}} call_target ; CHECK: {{[0-9]*}}c: {{.*}} bl {{.*}} call_target
; CHECK-NEXT: movw [[BASE:r[0-9]+]] ; CHECK-NEXT: mov
; CHECK-NEXT: movw [[REG:r[0-9]+]], #0 ; CHECK-NEXT: mov
; CHECK-NEXT: movt [[REG]], #1
; CHECK-NEXT: nop ; CHECK-NEXT: nop
; CHECK-NEXT: bic [[BASE]], [[BASE]], {{.*}} 0xc0000000 ; CHECK-NEXT: bl {{.*}} call_target2
; CHECK-NEXT: str [[REG]], {{[[]}}[[BASE]]
; CHECK-NEXT: nop
; CHECK-NEXT: bl {{.*}} call_target
; CHECK: {{[0-9]+}}0: {{.*}} bic lr, lr, {{.*}} 0xc000000f ; CHECK: {{[0-9]+}}0: {{.*}} bic lr, lr, {{.*}} 0xc000000f
; CHECK-NEXT: {{.*}} bx lr ; CHECK-NEXT: {{.*}} bx lr
; Bundle lock align_to_end with two bunches of padding. ; Bundle lock align_to_end with two bunches of padding.
define internal void @bundle_lock_align_to_end_padding_2(i32 %target) { define internal void @bundle_lock_align_to_end_padding_2() {
entry: entry:
call void @call_target1(i32 1) call void @call_target2(i32 1, i32 2)
; bundle boundary ; bundle boundary
%__1 = inttoptr i32 %target to void (i32, i32, i32)*
call void %__1(i32 2, i32 3, i32 4)
ret void ret void
} }
; CHECK-LABEL: bundle_lock_align_to_end_padding_2 ; CHECK-LABEL: bundle_lock_align_to_end_padding_2
; CHECK: {{[0-9]+}}0: ; CHECK: mov
; CHECK-NEXT: nop ; CHECK-NEXT: mov
; CHECK-NEXT: nop
; CHECK-NEXT: bl {{.*}} call_target
; CHECK: {{[0-9]+}}c: {{.*}} movw r2, #4
; CHECK-NEXT: nop ; CHECK-NEXT: nop
; CHECK-NEXT: nop ; CHECK-NEXT: nop
; CHECK-NEXT: bic [[REG:r[0-9]+]], [[REG]], {{.*}} 0xc000000f ; CHECK-NEXT: bl {{.*}} call_target2
; CHECK-NEXT: {{.*}} blx [[REG]]
...@@ -89,17 +89,17 @@ entry: ...@@ -89,17 +89,17 @@ entry:
; ARM32-LABEL: pass64BitArg ; ARM32-LABEL: pass64BitArg
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: movw r2, #123 ; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123 ; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123 ; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
...@@ -142,7 +142,7 @@ entry: ...@@ -142,7 +142,7 @@ entry:
; ARM32: str [[REG2]], [sp] ; ARM32: str [[REG2]], [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123 ; ARM32: mov r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
define internal i32 @pass64BitUndefArg() { define internal i32 @pass64BitUndefArg() {
...@@ -162,9 +162,9 @@ entry: ...@@ -162,9 +162,9 @@ entry:
; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline ; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline
; ARM32-LABEL: pass64BitUndefArg ; ARM32-LABEL: pass64BitUndefArg
; ARM32: sub sp ; ARM32: sub sp
; ARM32: movw {{.*}}, #0 ; ARM32: mov {{.*}}, #0
; ARM32: str ; ARM32: str
; ARM32: movw {{.*}}, #123 ; ARM32: mov {{.*}}, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
define internal i64 @return64BitArg(i64 %padding, i64 %a) { define internal i64 @return64BitArg(i64 %padding, i64 %a) {
......
...@@ -117,7 +117,7 @@ entry: ...@@ -117,7 +117,7 @@ entry:
; CHECK-LABEL: MulImm ; CHECK-LABEL: MulImm
; CHECK: imul e{{.*}},e{{.*}},0x63 ; CHECK: imul e{{.*}},e{{.*}},0x63
; ARM32-LABEL: MulImm ; ARM32-LABEL: MulImm
; ARM32-OPTM1: movw {{.*}}, #99 ; ARM32-OPTM1: mov {{.*}}, #99
; ARM32-OPTM1: mul r{{.*}}, r{{.*}}, r{{.*}} ; ARM32-OPTM1: mul r{{.*}}, r{{.*}}, r{{.*}}
; ARM32-OPT2: rsb [[T:r[0-9]+]], [[S:r[0-9]+]], [[S]], lsl #2 ; ARM32-OPT2: rsb [[T:r[0-9]+]], [[S:r[0-9]+]], [[S]], lsl #2
; ARM32-OPT2-DAG: add [[T]], [[T]], [[S]], lsl #7 ; ARM32-OPT2-DAG: add [[T]], [[T]], [[S]], lsl #7
...@@ -141,8 +141,8 @@ entry: ...@@ -141,8 +141,8 @@ entry:
; CHECK-NOT: mul {{[0-9]+}} ; CHECK-NOT: mul {{[0-9]+}}
; ;
; ARM32-LABEL: MulImm64 ; ARM32-LABEL: MulImm64
; ARM32: movw {{.*}}, #99 ; ARM32: mov {{.*}}, #99
; ARM32: movw {{.*}}, #0 ; ARM32: mov {{.*}}, #0
; ARM32: mul r ; ARM32: mul r
; ARM32: mla r ; ARM32: mla r
; ARM32: umull r ; ARM32: umull r
......
...@@ -358,7 +358,7 @@ entry: ...@@ -358,7 +358,7 @@ entry:
; CHECK: cvtsi2sd {{.*[^1]}} ; CHECK: cvtsi2sd {{.*[^1]}}
; CHECK: fld ; CHECK: fld
; ARM32-LABEL: signed32ToDoubleConst ; ARM32-LABEL: signed32ToDoubleConst
; ARM32-DAG: movw [[CONST:r[0-9]+]], #123 ; ARM32-DAG: mov [[CONST:r[0-9]+]], #123
; ARM32-DAG: vmov [[SRC:s[0-9]+]], [[CONST]] ; ARM32-DAG: vmov [[SRC:s[0-9]+]], [[CONST]]
; ARM32-DAG: vcvt.f64.s32 {{d[0-9]+}}, [[SRC]] ; ARM32-DAG: vcvt.f64.s32 {{d[0-9]+}}, [[SRC]]
......
...@@ -1329,7 +1329,7 @@ entry: ...@@ -1329,7 +1329,7 @@ entry:
; CHECK-LABEL: test_atomic_is_lock_free ; CHECK-LABEL: test_atomic_is_lock_free
; CHECK: mov {{.*}},0x1 ; CHECK: mov {{.*}},0x1
; ARM32-LABEL: test_atomic_is_lock_free ; ARM32-LABEL: test_atomic_is_lock_free
; ARM32: movw {{.*}}, #1 ; ARM32: mov {{.*}}, #1
define internal i32 @test_not_lock_free(i32 %iptr) { define internal i32 @test_not_lock_free(i32 %iptr) {
entry: entry:
......
...@@ -303,8 +303,8 @@ define internal i64 @ret_64bits_shift_left0() { ...@@ -303,8 +303,8 @@ define internal i64 @ret_64bits_shift_left0() {
; CHECK-NEXT: mov eax,0xff ; CHECK-NEXT: mov eax,0xff
; CHECK-NEXT: mov edx,0xff ; CHECK-NEXT: mov edx,0xff
; ARM32-LABEL: ret_64bits_shift_left0 ; ARM32-LABEL: ret_64bits_shift_left0
; ARM32-NEXT: movw r0, #255 ; ARM32-NEXT: mov r0, #255
; ARM32-NEXT: movw r1, #255 ; ARM32-NEXT: mov r1, #255
; MIPS32-LABEL: ret_64bits_shift_left0 ; MIPS32-LABEL: ret_64bits_shift_left0
; MIPS32-NEXT: li v0,255 ; MIPS32-NEXT: li v0,255
; MIPS32-NEXT: li v1,255 ; MIPS32-NEXT: li v1,255
......
...@@ -130,5 +130,5 @@ sw.default: ...@@ -130,5 +130,5 @@ sw.default:
ret i32 20 ret i32 20
} }
; ARM32-LABEL: testSwitchUndef64 ; ARM32-LABEL: testSwitchUndef64
; ARM32: movw {{.*}}, #0 ; ARM32: mov {{.*}}, #0
; ARM32: movw {{.*}}, #0 ; ARM32: mov {{.*}}, #0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment