Commit afc92af5 by John Porto

Subzero. Misc ARM32 bugfixes.

With this CL, Spec2k built by the Sz ARM32 backend runs and verifies successfully. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1407063002 .
parent 745ad1d8
......@@ -88,6 +88,9 @@ def AddOptionalArgs(argparser):
argparser.add_argument('--enable-block-profile',
dest='enable_block_profile', action='store_true',
help='Enable basic block profiling.')
argparser.add_argument('--target', default='x8632', dest='target',
choices=['arm32', 'x8632'],
help='Generate code for specified target.')
argparser.add_argument('--verbose', '-v', dest='verbose',
action='store_true',
help='Display some extra debugging output')
......@@ -185,13 +188,18 @@ def ProcessPexe(args, pexe, exe):
if hybrid and (args.force or
NewerThanOrNotThere(pexe, obj_llc) or
NewerThanOrNotThere(llcbin, obj_llc)):
arch = {
'arm32': 'armv7' if args.sandbox else 'arm-nonsfi',
'x8632': 'x86-32' if args.sandbox else 'x86-32-linux',
}[args.target]
# Only run pnacl-translate in hybrid mode.
shellcmd(['pnacl-translate',
'-split-module=1',
'-ffunction-sections',
'-fdata-sections',
'-c',
'-arch', 'x86-32' if args.sandbox else 'x86-32-linux',
'-arch', arch,
'-O' + opt_level_map[opt_level],
'--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize',
'-o', obj_llc] +
......@@ -216,7 +224,8 @@ def ProcessPexe(args, pexe, exe):
'-O' + opt_level,
'-bitcode-format=pnacl',
'-filetype=' + args.filetype,
'-o', obj_sz if args.filetype == 'obj' else asm_sz] +
'-o', obj_sz if args.filetype == 'obj' else asm_sz,
'-target=' + args.target] +
(['-externalize',
'-ffunction-sections',
'-fdata-sections'] if hybrid else []) +
......@@ -227,10 +236,14 @@ def ProcessPexe(args, pexe, exe):
[pexe],
echo=args.verbose)
if args.filetype != 'obj':
triple = {
'arm32': 'arm-nacl' if args.sandbox else 'arm',
'x8632': 'i686-nacl' if args.sandbox else 'i686',
}[args.target]
shellcmd((
'llvm-mc -triple={triple} -filetype=obj -o {obj} {asm}'
).format(asm=asm_sz, obj=obj_sz,
triple='i686-nacl' if args.sandbox else 'i686'),
).format(asm=asm_sz, obj=obj_sz, triple=triple),
echo=args.verbose)
if not args.sandbox:
shellcmd((
......@@ -277,9 +290,18 @@ def ProcessPexe(args, pexe, exe):
).format(objcopy=objcopy, obj=obj_llc, weak=obj_llc_weak),
echo=args.verbose)
obj_partial = pexe_base + '.o'
ld = {
'arm32': 'arm-linux-gnueabihf-ld',
'x8632': 'ld',
}[args.target]
emulation = {
'arm32': 'armelf_linux_eabi',
'x8632': 'elf_i386',
}[args.target]
shellcmd((
'ld -r -m elf_i386 -o {partial} {sz} {llc}'
).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak),
'{ld} -r -m {emulation} -o {partial} {sz} {llc}'
).format(ld=ld, emulation=emulation, partial=obj_partial,
sz=obj_sz_weak, llc=obj_llc_weak),
echo=args.verbose)
shellcmd((
'{objcopy} -w --localize-symbol="*" {partial}'
......@@ -293,10 +315,9 @@ def ProcessPexe(args, pexe, exe):
echo=args.verbose)
# Run the linker regardless of hybrid mode.
linker = (
'{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
).format(root=nacl_root)
if args.sandbox:
assert args.target in ['x8632'], \
'-sandbox is not available for %s' % args.target
linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
'x86-32/lib').format(root=nacl_root)
shellcmd((
......@@ -304,27 +325,45 @@ def ProcessPexe(args, pexe, exe):
'--build-id --entry=__pnacl_start -static ' +
'{linklib}/crtbegin.o {partial} ' +
'{root}/toolchain_build/src/subzero/build/runtime/' +
'szrt_sb_x8632.o ' +
'szrt_sb_{target}.o ' +
'{linklib}/libpnacl_irt_shim_dummy.a --start-group ' +
'{linklib}/libgcc.a {linklib}/libcrt_platform.a ' +
'--end-group {linklib}/crtend.o --undefined=_start ' +
'--defsym=__Sz_AbsoluteZero=0 ' +
'-o {exe}'
).format(gold=gold, linklib=linklib, partial=obj_partial, exe=exe,
root=nacl_root),
root=nacl_root, target=args.target),
echo=args.verbose)
else:
linker = {
'arm32': '/usr/bin/arm-linux-gnueabihf-g++',
'x8632': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
).format(root=nacl_root)
}[args.target]
extra_linker_args = ' '.join({
'arm32': ['-mcpu=cortex-a9'],
'x8632': ['-m32']
}[args.target])
lib_dir = {
'arm32': 'arm-linux',
'x8632': 'x86-32-linux',
}[args.target]
shellcmd((
'{ld} -m32 {partial} -o {exe} ' +
'{ld} {ld_extra_args} {partial} -o {exe} ' +
# Keep the rest of this command line (except szrt_native_x8632.o) in
# sync with RunHostLD() in pnacl-translate.py.
'{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
'x86-32-linux/lib/' +
'{lib_dir}/lib/' +
'{{unsandboxed_irt,irt_random,irt_query_list}}.o ' +
'{root}/toolchain_build/src/subzero/build/runtime/' +
'szrt_native_x8632.o -lpthread -lrt ' +
'szrt_native_{target}.o -lpthread -lrt ' +
'-Wl,--defsym=__Sz_AbsoluteZero=0'
).format(ld=linker, partial=obj_partial, exe=exe, root=nacl_root),
).format(ld=linker, ld_extra_args=extra_linker_args,
partial=obj_partial, exe=exe, root=nacl_root,
target=args.target, lib_dir=lib_dir),
echo=args.verbose)
# Put the extra verbose printing at the end.
......
......@@ -33,7 +33,9 @@ def main():
print 'Unknown component{s}: '.format(s='s' if len(bad) > 1 else '') + \
' '.join(x for x in bad)
sys.exit(1)
suffix = 'pnacl.opt.x8632' if args.sandbox else 'gcc.opt.x8632'
suffix = (
'pnacl.opt.{target}' if args.sandbox else 'gcc.opt.{target}').format(
target=args.target);
for comp in args.comps:
name = os.path.splitext(comp)[1] or comp
if name[0] == '.':
......
......@@ -682,7 +682,7 @@ void Cfg::emitTextHeader(const IceString &MangledName, GlobalContext *Ctx,
Ostream &Str = Ctx->getStrEmit();
Str << "\t.text\n";
if (Ctx->getFlags().getFunctionSections())
Str << "\t.section\t.text." << MangledName << ",\"ax\",@progbits\n";
Str << "\t.section\t.text." << MangledName << ",\"ax\",%progbits\n";
if (!Asm->getInternal() || Ctx->getFlags().getDisableInternal()) {
Str << "\t.globl\t" << MangledName << "\n";
Str << "\t.type\t" << MangledName << ",%function\n";
......
......@@ -889,6 +889,7 @@ template <> void InstARM32Movt::emit(const Cfg *Func) const {
}
void InstARM32Pop::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump())
return;
SizeT IntegerCount = 0;
......@@ -898,23 +899,31 @@ void InstARM32Pop::emit(const Cfg *Func) const {
}
}
Ostream &Str = Func->getContext()->getStrEmit();
if (IntegerCount == 0) {
Str << "\t@ empty pop";
return;
if (IntegerCount != 0) {
Str << "\t"
<< "pop"
<< "\t{";
bool PrintComma = false;
for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
}
}
Str << "}\n";
}
Str << "\t"
<< "pop"
<< "\t{";
bool PrintComma = false;
for (const Operand *Op : Dests) {
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
}
if (isScalarIntegerType(Op->getType()))
continue;
Str << "\t"
<< "vpop"
<< "\t{";
Op->emit(Func);
Str << "}\n";
}
Str << "}";
}
void InstARM32Pop::emitIAS(const Cfg *Func) const {
......@@ -969,6 +978,7 @@ void InstARM32AdjustStack::dump(const Cfg *Func) const {
}
void InstARM32Push::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump())
return;
SizeT IntegerCount = 0;
......@@ -978,25 +988,32 @@ void InstARM32Push::emit(const Cfg *Func) const {
}
}
Ostream &Str = Func->getContext()->getStrEmit();
if (IntegerCount == 0) {
for (SizeT i = getSrcSize(); i > 0; --i) {
Operand *Op = getSrc(i - 1);
if (isScalarIntegerType(Op->getType()))
continue;
Str << "\t"
<< "@empty push";
return;
<< "vpush"
<< "\t{";
Op->emit(Func);
Str << "}\n";
}
Str << "\t"
<< "push"
<< "\t{";
bool PrintComma = false;
for (SizeT i = 0; i < getSrcSize(); ++i) {
Operand *Op = getSrc(i);
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
if (IntegerCount != 0) {
Str << "\t"
<< "push"
<< "\t{";
bool PrintComma = false;
for (SizeT i = 0; i < getSrcSize(); ++i) {
Operand *Op = getSrc(i);
if (isScalarIntegerType(Op->getType())) {
if (PrintComma)
Str << ", ";
Op->emit(Func);
PrintComma = true;
}
}
Str << "}\n";
}
Str << "}";
}
void InstARM32Push::emitIAS(const Cfg *Func) const {
......
......@@ -742,7 +742,9 @@ void TargetARM32::addProlog(CfgNode *Node) {
// TODO(jvoung): do separate vpush for each floating point register
// segment and += 4, or 8 depending on type.
++NumCallee;
PreservedRegsSizeBytes += 4;
Variable *PhysicalRegister = getPhysicalRegister(i);
PreservedRegsSizeBytes +=
typeWidthInBytesOnStack(PhysicalRegister->getType());
GPRsToPreserve.push_back(getPhysicalRegister(i));
}
}
......@@ -1628,15 +1630,15 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
default:
break;
case InstArithmetic::Udiv: {
constexpr bool IsRemainder = false;
constexpr bool NotRemainder = false;
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
H_udiv_i32, IsRemainder);
H_udiv_i32, NotRemainder);
return;
}
case InstArithmetic::Sdiv: {
constexpr bool IsRemainder = false;
constexpr bool NotRemainder = false;
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
H_sdiv_i32, IsRemainder);
H_sdiv_i32, NotRemainder);
return;
}
case InstArithmetic::Urem: {
......@@ -1730,10 +1732,16 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
_mov(Dest, T);
return;
case InstArithmetic::Lshr:
if (Dest->getType() != IceType_i32) {
_uxt(Src0R, Src0R);
}
_lsr(T, Src0R, Src1RF);
_mov(Dest, T);
return;
case InstArithmetic::Ashr:
if (Dest->getType() != IceType_i32) {
_sxt(Src0R, Src0R);
}
_asr(T, Src0R, Src1RF);
_mov(Dest, T);
return;
......@@ -1803,7 +1811,11 @@ void TargetARM32::lowerBr(const InstBr *Inst) {
Operand *Cond = Inst->getCondition();
// TODO(jvoung): Handle folding opportunities.
Type Ty = Cond->getType();
Variable *Src0R = legalizeToReg(Cond);
assert(Ty == IceType_i1);
if (Ty != IceType_i32)
_uxt(Src0R, Src0R);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
_cmp(Src0R, Zero);
_br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
......@@ -2298,6 +2310,8 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
configureBitcastTemporary(T);
Variable *Src0R = legalizeToReg(Src0);
_mov(T, Src0R);
Context.insert(InstFakeUse::create(Func, T->getHi()));
Context.insert(InstFakeUse::create(Func, T->getLo()));
lowerAssign(InstAssign::create(Func, Dest, T));
break;
}
......@@ -3248,9 +3262,7 @@ void TargetARM32::lowerLoad(const InstLoad *Load) {
lowerAssign(Assign);
}
void TargetARM32::doAddressOptLoad() {
UnimplementedError(Func->getContext()->getFlags());
}
void TargetARM32::doAddressOptLoad() {}
void TargetARM32::randomlyInsertNop(float Probability,
RandomNumberGenerator &RNG) {
......@@ -3320,7 +3332,11 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) {
// TODO(jvoung): handle folding opportunities.
// cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
Variable *CmpOpnd0 = legalizeToReg(Condition);
Type CmpOpnd0Ty = CmpOpnd0->getType();
Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
assert(CmpOpnd0Ty == IceType_i1);
if (CmpOpnd0Ty != IceType_i32)
_uxt(CmpOpnd0, CmpOpnd0);
_cmp(CmpOpnd0, CmpOpnd1);
static constexpr CondARM32::Cond Cond = CondARM32::NE;
if (DestTy == IceType_i64) {
......@@ -3384,9 +3400,7 @@ void TargetARM32::lowerStore(const InstStore *Inst) {
}
}
void TargetARM32::doAddressOptStore() {
UnimplementedError(Func->getContext()->getFlags());
}
void TargetARM32::doAddressOptStore() {}
void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
// This implements the most naive possible lowering.
......@@ -3410,10 +3424,21 @@ void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
return;
}
// 32 bit integer
Variable *Src0Var = legalizeToReg(Src0);
// If Src0 is not an i32, we left shift it -- see the icmp lowering for the
// reason.
assert(Src0Var->mustHaveReg());
const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
assert(ShiftAmt < 32);
if (ShiftAmt > 0) {
Operand *ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Variable *T = makeReg(IceType_i32);
_lsl(T, Src0Var, ShiftConst);
Src0Var = T;
}
for (SizeT I = 0; I < NumCases; ++I) {
Operand *Value = Ctx->getConstantInt32(Inst->getValue(I));
Operand *Value = Ctx->getConstantInt32(Inst->getValue(I) << ShiftAmt);
Value = legalize(Value, Legal_Reg | Legal_Flex);
_cmp(Src0Var, Value);
_br(Inst->getLabel(I), CondARM32::EQ);
......
......@@ -226,19 +226,20 @@ entry:
; CHECK: add
; CHECK: add
; ARM32-LABEL: fold_cmp_select_multi
; ARM32-DAG: mov [[T0:r[0-9]+]], #0
; ARM32-DAG: cmp r0, r1
; ARM32: mov [[T0:r[0-9]+]], #0
; ARM32: cmp r0, r1
; ARM32: movlt [[T0]], #1
; ARM32-DAG: mov [[T1:r[0-9]+]], r1
; ARM32-DAG: cmp [[T0]], #0
; ARM32: [[T1]], r0
; ARM32-DAG: mov [[T2:r[0-9]+]], r0
; ARM32-DAG: cmp [[T0]], #0
; ARM32: [[T2]], r1
; ARM32: cmp [[T0]], #0
; ARM32: movne
; ARM32: add
; ARM32: add
; ARM32: uxtb [[T1:r[0-9]+]], [[T1]]
; ARM32-NEXT: cmp [[T1]], #0
; ARM32: movne [[T2:r[0-9]+]], r0
; ARM32: uxtb [[T3:r[0-9]+]], [[T3]]
; ARM32-NEXT: cmp [[T3]], #0
; ARM32: movne [[T4:r[0-9]+]], r1
; ARM32: uxtb [[T5:r[0-9]+]], [[T5]]
; ARM32-NEXT: cmp [[T5]], #0
; ARM32: movne [[T6:r[0-9]+]], #123
; ARM32: add [[T7:r[0-9]+]], [[T2]], [[T4]]
; ARM32: add {{r[0-9]+}}, [[T7]], [[T6]]
; ARM32: bx lr
......@@ -268,19 +269,21 @@ next:
; CHECK: add
; ARM32-LABEL: no_fold_cmp_select_multi_liveout
; ARM32-LABEL: fold_cmp_select_multi
; ARM32-DAG: mov [[T0:r[0-9]+]], #0
; ARM32-DAG: cmp r0, r1
; ARM32: mov [[T0:r[0-9]+]], #0
; ARM32: cmp r0, r1
; ARM32: movlt [[T0]], #1
; ARM32-DAG: mov [[T1:r[0-9]+]], r1
; ARM32-DAG: cmp [[T0]], #0
; ARM32: [[T1]], r0
; ARM32-DAG: mov [[T2:r[0-9]+]], r0
; ARM32-DAG: cmp [[T0]], #0
; ARM32: [[T2]], r1
; ARM32: cmp [[T0]], #0
; ARM32: movne
; ARM32: add
; ARM32: add
; ARM32: uxtb [[T2:r[0-9]+]], [[T2]]
; ARM32-NEXT: cmp [[T2]], #0
; ARM32: movne [[T1]], r0
; ARM32: uxtb [[T4:r[0-9]+]], [[T4]]
; ARM32-NEXT: cmp [[T4]], #0
; ARM32: movne [[T3]], r1
; ARM32-LABEL: .Lno_fold_cmp_select_multi_liveout$next:
; ARM32: uxtb [[T5:r[0-9]+]], [[T5]]
; ARM32: cmp [[T5]], #0
; ARM32: movne [[T6:r[0-9]+]], #123
; ARM32: add [[T7:r[0-9]+]], [[T2]], [[T4]]
; ARM32: add {{r[0-9]+}}, [[T7]], [[T6]]
; ARM32: bx lr
; Cmp/multi-select non-folding because of extra non-whitelisted uses.
......@@ -311,19 +314,19 @@ entry:
; CHECK: add
; CHECK: add
; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist
; ARM32-DAG: mov [[T0:r[0-9]+]], #0
; ARM32-DAG: cmp r0, r1
; ARM32: mov [[T0:r[0-9]+]], #0
; ARM32: cmp r0, r1
; ARM32: movlt [[T0]], #1
; ARM32-DAG: mov [[T1:r[0-9]+]], r1
; ARM32-DAG: cmp [[T0]], #0
; ARM32: [[T1]], r0
; ARM32-DAG: mov [[T2:r[0-9]+]], r0
; ARM32-DAG: cmp [[T0]], #0
; ARM32: [[T2]], r1
; ARM32: cmp [[T0]], #0
; ARM32: movne
; ARM32: and {{.*}}, [[T0]], #1
; ARM32: add
; ARM32: add
; ARM32: add
; ARM32: uxtb [[T1:r[0-9]+]], [[T1]]
; ARM32-NEXT: cmp [[T1]], #0
; ARM32: movne [[T2:r[0-9]+]], r0
; ARM32: uxtb [[T3:r[0-9]+]], [[T3]]
; ARM32-NEXT: cmp [[T3]], #0
; ARM32: movne [[T4:r[0-9]+]], r1
; ARM32: uxtb [[T5:r[0-9]+]], [[T5]]
; ARM32-NEXT: cmp [[T5]], #0
; ARM32: movne [[T6:r[0-9]+]], #123
; ARM32: and [[T7:r[0-9]+]], [[T0]], #1
; ARM32: add [[T8:r[0-9]+]], [[T2]], [[T4]]
; ARM32: add {{r[0-9]+}}, [[T8]], [[T7]]
; ARM32: bx lr
......@@ -96,6 +96,7 @@ target:
; ARM32O2-LABEL: testCondFallthroughToNextBlock
; ARM32O2: cmp {{.*}}, #123
; ARM32O2-NEXT: movge {{.*}}, #1
; ARM32O2-NEXT: uxtb
; ARM32O2-NEXT: cmp {{.*}}, #0
; ARM32O2-NEXT: bne
; ARM32O2-NEXT: bl
......@@ -154,6 +155,7 @@ target:
; ARM32O2-LABEL: testCondTargetNextBlock
; ARM32O2: cmp {{.*}}, #123
; ARM32O2-NEXT: movge {{.*}}, #1
; ARM32O2-NEXT: uxtb
; ARM32O2-NEXT: cmp {{.*}}, #0
; ARM32O2-NEXT: beq
; ARM32O2-NEXT: bl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment