Commit 4b6e4b44 by John Porto

Subzero. ARM32. Refactors atomic intrinsics lowering.

parent 816fd68f
...@@ -47,7 +47,8 @@ ...@@ -47,7 +47,8 @@
if (fetch) { \ if (fetch) { \
return __sync_fetch_and_##inst(ptr, 42); \ return __sync_fetch_and_##inst(ptr, 42); \
} else { \ } else { \
return __sync_##inst##_and_fetch(ptr, 99); \ const type value = static_cast<type>(0xaaaaaaaaaaaaaaaaull); \
return __sync_##inst##_and_fetch(ptr, value); \
} \ } \
} }
......
...@@ -65,14 +65,27 @@ def MakeRuntimesForTarget(target_info, ll_files, ...@@ -65,14 +65,27 @@ def MakeRuntimesForTarget(target_info, ll_files,
'-target=' + target_info.triple, '-target=' + target_info.triple,
'-c', '-c',
'{srcdir}/szrt_profiler.c'.format(srcdir=srcdir), '{srcdir}/szrt_profiler.c'.format(srcdir=srcdir),
'-o', TmpFile('{dir}/szrt_profiler_native_{target}.o') '-o', TmpFile('{dir}/szrt_native_profiler_{target}.o')
], echo=verbose)
# Assemble srcdir/szrt_asm_{target}.s to tempdir/szrt_asm_{target}.o.
shellcmd(['llvm-mc',
'-triple=' + target_info.triple, '--defsym NATIVE=1',
'-filetype=obj',
'-o', TmpFile('{dir}/szrt_native_asm_{target}.o'),
'{srcdir}/szrt_asm_{target}.s'.format(
srcdir=srcdir, target=target_info.target)
], echo=verbose) ], echo=verbose)
# Write full szrt_native_{target}.o. # Write full szrt_native_{target}.o.
PartialLink([TmpFile('{dir}/szrt_native_{target}.tmp.o'), PartialLink([TmpFile('{dir}/szrt_native_{target}.tmp.o'),
TmpFile('{dir}/szrt_profiler_native_{target}.o')], TmpFile('{dir}/szrt_native_asm_{target}.o'),
TmpFile('{dir}/szrt_native_profiler_{target}.o')],
['-m {ld_emu}'.format(ld_emu=target_info.ld_emu)], ['-m {ld_emu}'.format(ld_emu=target_info.ld_emu)],
OutFile('{rtdir}/szrt_native_{target}.o'), OutFile('{rtdir}/szrt_native_{target}.o'),
verbose) verbose)
shellcmd(['le32-nacl-objcopy',
'--strip-symbol=NATIVE',
OutFile('{rtdir}/szrt_native_{target}.o')])
# Helper function for building the sandboxed runtime. # Helper function for building the sandboxed runtime.
def MakeSandboxedRuntime(): def MakeSandboxedRuntime():
"""Builds just the sandboxed runtime.""" """Builds just the sandboxed runtime."""
...@@ -82,8 +95,26 @@ def MakeRuntimesForTarget(target_info, ll_files, ...@@ -82,8 +95,26 @@ def MakeRuntimesForTarget(target_info, ll_files,
Translate(ll_files, Translate(ll_files,
['-mtriple=' + targets.ConvertTripleToNaCl(target_info.triple)] + ['-mtriple=' + targets.ConvertTripleToNaCl(target_info.triple)] +
target_info.llc_flags, target_info.llc_flags,
TmpFile('{dir}/szrt_sb_{target}.tmp.o'),
verbose)
# Assemble srcdir/szrt_asm_{target}.s to tempdir/szrt_asm_{target}.o.
shellcmd(['llvm-mc',
'-triple=' + targets.ConvertTripleToNaCl(target_info.triple),
'--defsym NACL=1',
'-filetype=obj',
'-o', TmpFile('{dir}/szrt_sb_asm_{target}.o'),
'{srcdir}/szrt_asm_{target}.s'.format(
srcdir=srcdir, target=target_info.target)
], echo=verbose)
PartialLink([TmpFile('{dir}/szrt_sb_{target}.tmp.o'),
TmpFile('{dir}/szrt_sb_asm_{target}.o')],
['-m {ld_emu}'.format(ld_emu=target_info.sb_emu)],
OutFile('{rtdir}/szrt_sb_{target}.o'), OutFile('{rtdir}/szrt_sb_{target}.o'),
verbose) verbose)
shellcmd(['le32-nacl-objcopy',
'--strip-symbol=NACL',
OutFile('{rtdir}/szrt_sb_{target}.o')])
# Helper function for building the Non-SFI runtime. # Helper function for building the Non-SFI runtime.
def MakeNonsfiRuntime(): def MakeNonsfiRuntime():
"""Builds just the nonsfi runtime.""" """Builds just the nonsfi runtime."""
...@@ -96,18 +127,22 @@ def MakeRuntimesForTarget(target_info, ll_files, ...@@ -96,18 +127,22 @@ def MakeRuntimesForTarget(target_info, ll_files,
verbose) verbose)
# Assemble srcdir/szrt_asm_{target}.s to tempdir/szrt_asm_{target}.o. # Assemble srcdir/szrt_asm_{target}.s to tempdir/szrt_asm_{target}.o.
shellcmd(['llvm-mc', shellcmd(['llvm-mc',
'-triple=' + target_info.triple, '-triple=' + target_info.triple, '--defsym NONSFI=1',
'-filetype=obj', '-filetype=obj',
'-o', TmpFile('{dir}/szrt_asm_{target}.o'), '-o', TmpFile('{dir}/szrt_nonsfi_asm_{target}.o'),
'{srcdir}/szrt_asm_{target}.s'.format( '{srcdir}/szrt_asm_{target}.s'.format(
srcdir=srcdir, target=target_info.target) srcdir=srcdir, target=target_info.target)
], echo=verbose) ], echo=verbose)
# Write full szrt_nonsfi_{target}.o. # Write full szrt_nonsfi_{target}.o.
PartialLink([TmpFile('{dir}/szrt_nonsfi_{target}.tmp.o'), PartialLink([TmpFile('{dir}/szrt_nonsfi_{target}.tmp.o'),
TmpFile('{dir}/szrt_asm_{target}.o')], TmpFile('{dir}/szrt_nonsfi_asm_{target}.o')],
['-m {ld_emu}'.format(ld_emu=target_info.ld_emu)], ['-m {ld_emu}'.format(ld_emu=target_info.ld_emu)],
OutFile('{rtdir}/szrt_nonsfi_{target}.o'), OutFile('{rtdir}/szrt_nonsfi_{target}.o'),
verbose) verbose)
shellcmd(['le32-nacl-objcopy',
'--strip-symbol=NONSFI',
OutFile('{rtdir}/szrt_nonsfi_{target}.o')])
# Run the helper functions. # Run the helper functions.
MakeNativeRuntime() MakeNativeRuntime()
......
...@@ -18,13 +18,14 @@ def FindARMCrossInclude(): ...@@ -18,13 +18,14 @@ def FindARMCrossInclude():
TargetInfo = namedtuple('TargetInfo', TargetInfo = namedtuple('TargetInfo',
['target', 'compiler_arch', 'triple', 'llc_flags', ['target', 'compiler_arch', 'triple', 'llc_flags',
'ld_emu', 'cross_headers']) 'ld_emu', 'sb_emu', 'cross_headers'])
X8632Target = TargetInfo(target='x8632', X8632Target = TargetInfo(target='x8632',
compiler_arch='x8632', compiler_arch='x8632',
triple='i686-none-linux', triple='i686-none-linux',
llc_flags=['-mcpu=pentium4m'], llc_flags=['-mcpu=pentium4m'],
ld_emu='elf_i386_nacl', ld_emu='elf_i386_nacl',
sb_emu='elf_i386_nacl',
cross_headers=[]) cross_headers=[])
X8664Target = TargetInfo(target='x8664', X8664Target = TargetInfo(target='x8664',
...@@ -32,6 +33,7 @@ X8664Target = TargetInfo(target='x8664', ...@@ -32,6 +33,7 @@ X8664Target = TargetInfo(target='x8664',
triple='x86_64-none-linux-gnux32', triple='x86_64-none-linux-gnux32',
llc_flags=['-mcpu=x86-64'], llc_flags=['-mcpu=x86-64'],
ld_emu='elf32_x86_64_nacl', ld_emu='elf32_x86_64_nacl',
sb_emu='elf_x86_64_nacl',
cross_headers=[]) cross_headers=[])
ARM32Target = TargetInfo(target='arm32', ARM32Target = TargetInfo(target='arm32',
...@@ -41,6 +43,7 @@ ARM32Target = TargetInfo(target='arm32', ...@@ -41,6 +43,7 @@ ARM32Target = TargetInfo(target='arm32',
'-float-abi=hard', '-float-abi=hard',
'-mattr=+neon'], '-mattr=+neon'],
ld_emu='armelf_nacl', ld_emu='armelf_nacl',
sb_emu='armelf_nacl',
cross_headers=['-isystem', FindARMCrossInclude()]) cross_headers=['-isystem', FindARMCrossInclude()])
def ConvertTripleToNaCl(nonsfi_triple): def ConvertTripleToNaCl(nonsfi_triple):
......
...@@ -14,6 +14,3 @@ ...@@ -14,6 +14,3 @@
.text .text
.p2alignl 4,0xE7FEDEF0 .p2alignl 4,0xE7FEDEF0
.globl __nacl_read_tp
__nacl_read_tp:
b __aeabi_read_tp
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
.text .text
.p2align 5,0xf4 .p2align 5,0xf4
.ifdef NONSFI
.globl __Sz_getIP_eax .globl __Sz_getIP_eax
__Sz_getIP_eax: __Sz_getIP_eax:
movl (%esp), %eax movl (%esp), %eax
...@@ -49,3 +50,4 @@ __Sz_getIP_esi: ...@@ -49,3 +50,4 @@ __Sz_getIP_esi:
__Sz_getIP_edi: __Sz_getIP_edi:
movl (%esp), %edi movl (%esp), %edi
ret ret
.endif # NONSFI
...@@ -642,7 +642,7 @@ void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) { ...@@ -642,7 +642,7 @@ void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) {
*RegNumBVIter(Filtered ? Iter.Free : Iter.FreeUnfiltered).begin(); *RegNumBVIter(Filtered ? Iter.Free : Iter.FreeUnfiltered).begin();
Iter.Cur->setRegNumTmp(RegNum); Iter.Cur->setRegNumTmp(RegNum);
if (Filtered) if (Filtered)
dumpLiveRangeTrace("Allocating ", Iter.Cur); dumpLiveRangeTrace("Allocating Y ", Iter.Cur);
else else
dumpLiveRangeTrace("Allocating X ", Iter.Cur); dumpLiveRangeTrace("Allocating X ", Iter.Cur);
const llvm::SmallBitVector &Aliases = *RegAliases[RegNum]; const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
...@@ -768,7 +768,7 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) { ...@@ -768,7 +768,7 @@ void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
++RegUses[RegAlias]; ++RegUses[RegAlias];
} }
Active.push_back(Iter.Cur); Active.push_back(Iter.Cur);
dumpLiveRangeTrace("Allocating ", Iter.Cur); dumpLiveRangeTrace("Allocating Z ", Iter.Cur);
} }
void LinearScan::assignFinalRegisters( void LinearScan::assignFinalRegisters(
......
...@@ -385,7 +385,8 @@ void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { ...@@ -385,7 +385,8 @@ void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
// This is not the variable we are looking for. // This is not the variable we are looking for.
continue; continue;
} }
assert(Var64->hasReg() || !Var64->mustHaveReg()); // only allow infinite-weight i64 temporaries to be register allocated.
assert(!Var64->hasReg() || Var64->mustHaveReg());
if (!Var64->hasReg()) { if (!Var64->hasReg()) {
continue; continue;
} }
...@@ -4401,10 +4402,16 @@ TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0, ...@@ -4401,10 +4402,16 @@ TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
} }
TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
Operand *Src0 = legalizeUndef(Instr->getSrc(0)); return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
Operand *Src1 = legalizeUndef(Instr->getSrc(1)); Instr->getSrc(1));
}
TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
Operand *Src0,
Operand *Src1) {
Src0 = legalizeUndef(Src0);
Src1 = legalizeUndef(Src1);
const InstIcmp::ICond Condition = Instr->getCondition();
// a=icmp cond b, c ==> // a=icmp cond b, c ==>
// GCC does: // GCC does:
// <u/s>xtb tb, b // <u/s>xtb tb, b
...@@ -4504,162 +4511,156 @@ inline uint64_t getConstantMemoryOrder(Operand *Opnd) { ...@@ -4504,162 +4511,156 @@ inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
} }
} // end of anonymous namespace } // end of anonymous namespace
void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, void TargetARM32::lowerLoadLinkedStoreExclusive(
Operand *Ptr, Operand *Val) { Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
// retry: CondARM32::Cond Cond) {
// ldrex contents, [addr]
// op tmp, contents, operand auto *Retry = Context.insert<InstARM32Label>(this);
// strex success, tmp, [addr] { // scoping for loop highlighting.
// jne retry Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
// fake-use(addr, operand) @ prevents undesirable clobbering. auto *Success = makeReg(IceType_i32);
// mov dest, contents auto *_0 = Ctx->getConstantZero(IceType_i32);
assert(Dest != nullptr);
Type DestTy = Dest->getType(); Context.insert<InstFakeDef>(Tmp);
(void)Ptr; Context.insert<InstFakeUse>(Tmp);
(void)Val; Variable *AddrR = legalizeToReg(Addr);
_ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
OperandARM32Mem *Mem; auto *StoreValue = Operation(Tmp);
Variable *PtrContentsReg; assert(StoreValue->mustHaveReg());
Variable *PtrContentsHiReg; _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond);
Variable *PtrContentsLoReg; _cmp(Success, _0, Cond);
Variable *Value = Func->makeVariable(DestTy);
Variable *ValueReg;
Variable *ValueHiReg;
Variable *ValueLoReg;
Variable *Success = makeReg(IceType_i32);
Variable *TmpReg;
Variable *TmpHiReg;
Variable *TmpLoReg;
Operand *_0 = Ctx->getConstantZero(IceType_i32);
auto *Retry = InstARM32Label::create(Func, this);
if (DestTy == IceType_i64) {
Variable64On32 *PtrContentsReg64 = makeI64RegPair();
PtrContentsHiReg = PtrContentsReg64->getHi();
PtrContentsLoReg = PtrContentsReg64->getLo();
PtrContentsReg = PtrContentsReg64;
llvm::cast<Variable64On32>(Value)->initHiLo(Func);
Variable64On32 *ValueReg64 = makeI64RegPair();
ValueHiReg = ValueReg64->getHi();
ValueLoReg = ValueReg64->getLo();
ValueReg = ValueReg64;
Variable64On32 *TmpReg64 = makeI64RegPair();
TmpHiReg = TmpReg64->getHi();
TmpLoReg = TmpReg64->getLo();
TmpReg = TmpReg64;
} else {
PtrContentsReg = makeReg(DestTy);
PtrContentsHiReg = nullptr;
PtrContentsLoReg = PtrContentsReg;
ValueReg = makeReg(DestTy);
ValueHiReg = nullptr;
ValueLoReg = ValueReg;
TmpReg = makeReg(DestTy);
TmpHiReg = nullptr;
TmpLoReg = TmpReg;
} }
_br(Retry, CondARM32::NE);
}
if (DestTy == IceType_i64) { namespace {
Context.insert<InstFakeDef>(Value); InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
} Variable *Src0, Operand *Src1) {
lowerAssign(InstAssign::create(Func, Value, Val)); InstArithmetic::OpKind Oper;
Variable *PtrVar = Func->makeVariable(IceType_i32);
lowerAssign(InstAssign::create(Func, PtrVar, Ptr));
_dmb();
Context.insert(Retry);
Mem = formMemoryOperand(PtrVar, DestTy);
if (DestTy == IceType_i64) {
Context.insert<InstFakeDef>(ValueReg, Value);
}
lowerAssign(InstAssign::create(Func, ValueReg, Value));
if (DestTy == IceType_i8 || DestTy == IceType_i16) {
_uxt(ValueReg, ValueReg);
}
_ldrex(PtrContentsReg, Mem);
if (DestTy == IceType_i64) {
Context.insert<InstFakeDef>(TmpReg, ValueReg);
}
switch (Operation) { switch (Operation) {
default: default:
Func->setError("Unknown AtomicRMW operation"); llvm::report_fatal_error("Unknown AtomicRMW operation");
return; case Intrinsics::AtomicExchange:
llvm::report_fatal_error("Can't handle Atomic xchg operation");
case Intrinsics::AtomicAdd: case Intrinsics::AtomicAdd:
if (DestTy == IceType_i64) { Oper = InstArithmetic::Add;
_adds(TmpLoReg, PtrContentsLoReg, ValueLoReg); break;
_adc(TmpHiReg, PtrContentsHiReg, ValueHiReg); case Intrinsics::AtomicAnd:
} else { Oper = InstArithmetic::And;
_add(TmpLoReg, PtrContentsLoReg, ValueLoReg);
}
break; break;
case Intrinsics::AtomicSub: case Intrinsics::AtomicSub:
if (DestTy == IceType_i64) { Oper = InstArithmetic::Sub;
_subs(TmpLoReg, PtrContentsLoReg, ValueLoReg);
_sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
} else {
_sub(TmpLoReg, PtrContentsLoReg, ValueLoReg);
}
break; break;
case Intrinsics::AtomicOr: case Intrinsics::AtomicOr:
_orr(TmpLoReg, PtrContentsLoReg, ValueLoReg); Oper = InstArithmetic::Or;
if (DestTy == IceType_i64) {
_orr(TmpHiReg, PtrContentsHiReg, ValueHiReg);
}
break;
case Intrinsics::AtomicAnd:
_and(TmpLoReg, PtrContentsLoReg, ValueLoReg);
if (DestTy == IceType_i64) {
_and(TmpHiReg, PtrContentsHiReg, ValueHiReg);
}
break; break;
case Intrinsics::AtomicXor: case Intrinsics::AtomicXor:
_eor(TmpLoReg, PtrContentsLoReg, ValueLoReg); Oper = InstArithmetic::Xor;
if (DestTy == IceType_i64) {
_eor(TmpHiReg, PtrContentsHiReg, ValueHiReg);
}
break; break;
case Intrinsics::AtomicExchange: }
_mov(TmpLoReg, ValueLoReg); return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
}
} // end of anonymous namespace
void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
Operand *Addr, Operand *Val) {
// retry:
// ldrex tmp, [addr]
// mov contents, tmp
// op result, contents, Val
// strex success, result, [addr]
// cmp success, 0
// jne retry
// fake-use(addr, operand) @ prevents undesirable clobbering.
// mov dest, contents
auto DestTy = Dest->getType();
if (DestTy == IceType_i64) { if (DestTy == IceType_i64) {
_mov(TmpHiReg, ValueHiReg); lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
return;
} }
break;
Operand *ValRF = nullptr;
if (llvm::isa<ConstantInteger32>(Val)) {
ValRF = Val;
} else {
ValRF = legalizeToReg(Val);
} }
_strex(Success, TmpReg, Mem); auto *ContentsR = makeReg(DestTy);
_cmp(Success, _0); auto *ResultR = makeReg(DestTy);
_br(Retry, CondARM32::NE);
// The following fake-uses ensure that Subzero will not clobber them in the _dmb();
// load-linked/store-conditional loop above. We might have to spill them, but lowerLoadLinkedStoreExclusive(
// spilling is preferable over incorrect behavior. DestTy, Addr,
Context.insert<InstFakeUse>(PtrVar); [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
Context.insert<InstFakeUse>(Value64->getHi()); if (Operation == Intrinsics::AtomicExchange) {
Context.insert<InstFakeUse>(Value64->getLo()); lowerAssign(InstAssign::create(Func, ResultR, ValRF));
} else { } else {
Context.insert<InstFakeUse>(Value); lowerArithmetic(
createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
} }
return ResultR;
});
_dmb(); _dmb();
if (DestTy == IceType_i8 || DestTy == IceType_i16) { if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
_uxt(PtrContentsReg, PtrContentsReg); Context.insert<InstFakeUse>(ValR);
} }
// Can't dce ContentsR.
Context.insert<InstFakeUse>(ContentsR);
lowerAssign(InstAssign::create(Func, Dest, ContentsR));
}
if (DestTy == IceType_i64) { void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
Context.insert<InstFakeUse>(PtrContentsReg); Operand *Addr, Operand *Val) {
assert(Dest->getType() == IceType_i64);
auto *ResultR = makeI64RegPair();
Context.insert<InstFakeDef>(ResultR);
Operand *ValRF = nullptr;
if (llvm::dyn_cast<ConstantInteger64>(Val)) {
ValRF = Val;
} else {
auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
ValR64->initHiLo(Func);
ValR64->setMustNotHaveReg();
ValR64->getLo()->setMustHaveReg();
ValR64->getHi()->setMustHaveReg();
lowerAssign(InstAssign::create(Func, ValR64, Val));
ValRF = ValR64;
} }
lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg));
if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
Context.insert<InstFakeUse>(Dest64->getLo()); ContentsR->initHiLo(Func);
Context.insert<InstFakeUse>(Dest64->getHi()); ContentsR->setMustNotHaveReg();
ContentsR->getLo()->setMustHaveReg();
ContentsR->getHi()->setMustHaveReg();
_dmb();
lowerLoadLinkedStoreExclusive(
IceType_i64, Addr,
[this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
Context.insert<InstFakeUse>(Tmp);
if (Operation == Intrinsics::AtomicExchange) {
lowerAssign(InstAssign::create(Func, ResultR, ValRF));
} else { } else {
Context.insert<InstFakeUse>(Dest); lowerArithmetic(
createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
}
Context.insert<InstFakeUse>(ResultR->getHi());
Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
->setDestRedefined();
return ResultR;
});
_dmb();
if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
Context.insert<InstFakeUse>(ValR64->getLo());
Context.insert<InstFakeUse>(ValR64->getHi());
} }
lowerAssign(InstAssign::create(Func, Dest, ContentsR));
} }
void TargetARM32::postambleCtpop64(const InstCall *Instr) { void TargetARM32::postambleCtpop64(const InstCall *Instr) {
...@@ -4733,10 +4734,9 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -4733,10 +4734,9 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
} }
_dmb(); _dmb();
lowerAssign(InstAssign::create(Func, Dest, T)); lowerAssign(InstAssign::create(Func, Dest, T));
// Make sure the atomic load isn't elided when unused, by adding a FakeUse. // Adding a fake-use T to ensure the atomic load is not removed if Dest is
// Since lowerLoad may fuse the load w/ an arithmetic instruction, insert // unused.
// the FakeUse on the last-inserted instruction's dest. Context.insert<InstFakeUse>(T);
Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
return; return;
} }
case Intrinsics::AtomicStore: { case Intrinsics::AtomicStore: {
...@@ -4747,105 +4747,48 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -4747,105 +4747,48 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Func->setError("Unexpected memory ordering for AtomicStore"); Func->setError("Unexpected memory ordering for AtomicStore");
return; return;
} }
Operand *Value = Instr->getArg(0);
Type ValueTy = Value->getType();
assert(isScalarIntegerType(ValueTy));
Operand *Addr = Instr->getArg(1);
if (ValueTy == IceType_i64) {
// Atomic 64-bit stores require a load-locked/store-conditional loop using
// ldrexd, and strexd. The lowered code is:
//
// retry:
// ldrexd t.lo, t.hi, [addr]
// strexd success, value.lo, value.hi, [addr]
// cmp success, #0
// bne retry
// fake-use(addr, value.lo, value.hi)
//
// The fake-use is needed to prevent those variables from being clobbered
// in the loop (which will happen under register pressure.)
Variable64On32 *Tmp = makeI64RegPair();
Variable64On32 *ValueVar =
llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
Variable *AddrVar = makeReg(IceType_i32);
Variable *Success = makeReg(IceType_i32);
OperandARM32Mem *Mem;
Operand *_0 = Ctx->getConstantZero(IceType_i32);
auto *Retry = InstARM32Label::create(Func, this);
Variable64On32 *NewReg = makeI64RegPair();
ValueVar->initHiLo(Func);
ValueVar->mustNotHaveReg();
auto *Value = Instr->getArg(0);
if (Value->getType() == IceType_i64) {
auto *ValueR = makeI64RegPair();
Context.insert<InstFakeDef>(ValueR);
lowerAssign(InstAssign::create(Func, ValueR, Value));
_dmb(); _dmb();
lowerAssign(InstAssign::create(Func, ValueVar, Value)); lowerLoadLinkedStoreExclusive(
lowerAssign(InstAssign::create(Func, AddrVar, Addr)); IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
// The following fake-use prevents the ldrex instruction from being
Context.insert(Retry); // dead code eliminated.
Context.insert<InstFakeDef>(NewReg); Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
lowerAssign(InstAssign::create(Func, NewReg, ValueVar)); Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
Mem = formMemoryOperand(AddrVar, IceType_i64); Context.insert<InstFakeUse>(Tmp);
_ldrex(Tmp, Mem); return ValueR;
// This fake-use both prevents the ldrex from being dead-code eliminated, });
// while also keeping liveness happy about all defs being used. Context.insert<InstFakeUse>(ValueR);
Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
_strex(Success, NewReg, Mem);
_cmp(Success, _0);
_br(Retry, CondARM32::NE);
Context.insert<InstFakeUse>(ValueVar->getLo());
Context.insert<InstFakeUse>(ValueVar->getHi());
Context.insert<InstFakeUse>(AddrVar);
_dmb(); _dmb();
return; return;
} }
auto *ValueR = legalizeToReg(Instr->getArg(0));
const auto ValueTy = ValueR->getType();
assert(isScalarIntegerType(ValueTy));
auto *Addr = legalizeToReg(Instr->getArg(1));
// non-64-bit stores are atomically as long as the address is aligned. This // non-64-bit stores are atomically as long as the address is aligned. This
// is PNaCl, so addresses are aligned. // is PNaCl, so addresses are aligned.
Variable *T = makeReg(ValueTy);
_dmb(); _dmb();
lowerAssign(InstAssign::create(Func, T, Value)); _str(ValueR, formMemoryOperand(Addr, ValueTy));
_str(T, formMemoryOperand(Addr, ValueTy));
_dmb(); _dmb();
return; return;
} }
case Intrinsics::AtomicCmpxchg: { case Intrinsics::AtomicCmpxchg: {
// The initial lowering for cmpxchg was:
//
// retry: // retry:
// ldrex tmp, [addr] // ldrex tmp, [addr]
// cmp tmp, expected // cmp tmp, expected
// mov expected, tmp // mov expected, tmp
// jne retry
// strex success, new, [addr]
// cmp success, #0
// bne retry
// mov dest, expected
//
// Besides requiring two branches, that lowering could also potentially
// write to memory (in mov expected, tmp) unless we were OK with increasing
// the register pressure and requiring expected to be an infinite-weight
// variable (spoiler alert: that was a problem for i64 cmpxchg.) Through
// careful rewritting, and thanks to predication, we now implement the
// lowering as:
//
// retry:
// ldrex tmp, [addr]
// cmp tmp, expected
// strexeq success, new, [addr] // strexeq success, new, [addr]
// movne expected, tmp
// cmpeq success, #0 // cmpeq success, #0
// bne retry // bne retry
// mov dest, expected // mov dest, expected
//
// Predication lets us move the strex ahead of the mov expected, tmp, which
// allows tmp to be a non-infinite weight temporary. We wanted to avoid
// writing to memory between ldrex and strex because, even though most times
// that would cause no issues, if any interleaving memory write aliased
// [addr] than we would have undefined behavior. Undefined behavior isn't
// cool, so we try to avoid it. See the "Synchronization and semaphores"
// section of the "ARM Architecture Reference Manual."
assert(isScalarIntegerType(DestTy)); assert(isScalarIntegerType(DestTy));
// We require the memory address to be naturally aligned. Given that is the // We require the memory address to be naturally aligned. Given that is the
// case, then normal loads are atomic. // case, then normal loads are atomic.
...@@ -4856,98 +4799,63 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -4856,98 +4799,63 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return; return;
} }
OperandARM32Mem *Mem;
Variable *TmpReg;
Variable *Expected, *ExpectedReg;
Variable *New, *NewReg;
Variable *Success = makeReg(IceType_i32);
Operand *_0 = Ctx->getConstantZero(IceType_i32);
auto *Retry = InstARM32Label::create(Func, this);
if (DestTy == IceType_i64) { if (DestTy == IceType_i64) {
Variable64On32 *TmpReg64 = makeI64RegPair(); auto *New = makeI64RegPair();
Variable64On32 *New64 = Context.insert<InstFakeDef>(New);
llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
Variable64On32 *NewReg64 = makeI64RegPair();
Variable64On32 *Expected64 =
llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
Variable64On32 *ExpectedReg64 = makeI64RegPair();
New64->initHiLo(Func);
New64->mustNotHaveReg();
Expected64->initHiLo(Func);
Expected64->mustNotHaveReg();
TmpReg = TmpReg64;
New = New64;
NewReg = NewReg64;
Expected = Expected64;
ExpectedReg = ExpectedReg64;
} else {
TmpReg = makeReg(DestTy);
New = Func->makeVariable(DestTy);
NewReg = makeReg(DestTy);
Expected = Func->makeVariable(DestTy);
ExpectedReg = makeReg(DestTy);
}
Mem = formMemoryOperand(Instr->getArg(0), DestTy); auto *Expected = makeI64RegPair();
if (DestTy == IceType_i64) {
Context.insert<InstFakeDef>(Expected); Context.insert<InstFakeDef>(Expected);
}
lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
if (DestTy == IceType_i64) {
Context.insert<InstFakeDef>(New); _dmb();
} lowerLoadLinkedStoreExclusive(
lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); DestTy, Instr->getArg(0),
[this, Expected, New, Instr, DestTy](Variable *Tmp) {
auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
_cmp(TmpLoR, ExpectedLoR);
_cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
// Adding an explicit use of Tmp here, or its live range will not
// reach here (only those of Tmp.Lo and Tmp.Hi will.)
Context.insert<InstFakeUse>(Tmp);
_mov_redefined(ExpectedLoR, TmpLoR);
_mov_redefined(ExpectedHiR, TmpHiR);
// Same as above.
Context.insert<InstFakeUse>(Tmp);
return New;
},
CondARM32::EQ);
_dmb(); _dmb();
Context.insert(Retry); lowerAssign(InstAssign::create(Func, Dest, Expected));
if (DestTy == IceType_i64) { // The fake-use Expected prevents the assignments to Expected (above)
Context.insert<InstFakeDef>(ExpectedReg, Expected); // from being removed if Dest is not used.
} Context.insert<InstFakeUse>(Expected);
lowerAssign(InstAssign::create(Func, ExpectedReg, Expected)); // New needs to be alive here, or its live range will end in the
if (DestTy == IceType_i64) { // strex instruction.
Context.insert<InstFakeDef>(NewReg, New); Context.insert<InstFakeUse>(New);
return;
} }
lowerAssign(InstAssign::create(Func, NewReg, New));
_ldrex(TmpReg, Mem); auto *New = legalizeToReg(Instr->getArg(2));
Context.insert<InstFakeUse>(Context.getLastInserted()->getDest()); auto *Expected = legalizeToReg(Instr->getArg(1));
if (DestTy == IceType_i64) {
auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg);
// lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's
// keep liveness happy, shall we?
Context.insert<InstFakeUse>(TmpReg);
Context.insert<InstFakeUse>(ExpectedReg);
_cmp(TmpReg64->getHi(), ExpectedReg64->getHi());
_cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ);
} else {
_cmp(TmpReg, ExpectedReg);
}
_strex(Success, NewReg, Mem, CondARM32::EQ);
if (DestTy == IceType_i64) {
auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
auto *Expected64 = llvm::cast<Variable64On32>(Expected);
_mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE);
_mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE);
Context.insert<InstFakeDef>(Expected, TmpReg);
_set_dest_redefined();
} else {
_mov_redefined(Expected, TmpReg, CondARM32::NE);
}
_cmp(Success, _0, CondARM32::EQ);
_br(Retry, CondARM32::NE);
_dmb(); _dmb();
lowerLoadLinkedStoreExclusive(
DestTy,
Instr->getArg(0), [this, Expected, New, Instr, DestTy](Variable *Tmp) {
lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
_mov_redefined(Expected, Tmp);
return New;
}, CondARM32::EQ);
_dmb();
lowerAssign(InstAssign::create(Func, Dest, Expected)); lowerAssign(InstAssign::create(Func, Dest, Expected));
Context.insert<InstFakeUse>(Expected); Context.insert<InstFakeUse>(Expected);
if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) {
Context.insert<InstFakeUse>(New64->getLo());
Context.insert<InstFakeUse>(New64->getHi());
} else {
Context.insert<InstFakeUse>(New); Context.insert<InstFakeUse>(New);
}
return; return;
} }
case Intrinsics::AtomicRMW: { case Intrinsics::AtomicRMW: {
......
...@@ -246,8 +246,29 @@ protected: ...@@ -246,8 +246,29 @@ protected:
Operand *Src1); Operand *Src1);
CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1); Operand *Src1);
CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerIcmpCond(const InstIcmp *Instr); CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
void lowerIcmp(const InstIcmp *Instr) override; void lowerIcmp(const InstIcmp *Instr) override;
/// Emits the basic sequence for lower-linked/store-exclusive loops:
///
/// retry:
/// ldrex tmp, [Addr]
/// StoreValue = Operation(tmp)
/// strexCond success, StoreValue, [Addr]
/// cmpCond success, #0
/// bne retry
///
/// Operation needs to return which value to strex in Addr, it must not change
/// the flags if Cond is not AL, and must not emit any instructions that could
/// end up writing to memory. Operation also needs to handle fake-defing for
/// i64 handling.
void
lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr,
std::function<Variable *(Variable *)> Operation,
CondARM32::Cond Cond = CondARM32::AL);
void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val);
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val); Operand *Val);
void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override; void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override;
...@@ -360,13 +381,14 @@ protected: ...@@ -360,13 +381,14 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Ldr>(Dest, Addr, Pred); Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
} }
void _ldrex(Variable *Dest, OperandARM32Mem *Addr, InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Ldrex>(Dest, Addr, Pred); auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
Context.insert<InstFakeDef>(Dest64->getLo(), Dest); Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
Context.insert<InstFakeDef>(Dest64->getHi(), Dest); Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
} }
return Ldrex;
} }
void _lsl(Variable *Dest, Variable *Src0, Operand *Src1, void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
......
...@@ -28,140 +28,38 @@ declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32) #0 ...@@ -28,140 +28,38 @@ declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32) #0
define internal i32 @testI8Form(i32 %ptr, i32 %a) { define internal i32 @testI8Form(i32 %ptr, i32 %a) {
; ASM-LABEL:testI8Form: ; ASM-LABEL:testI8Form:
; DIS-LABEL:00000000 <testI8Form>: ; DIS-LABEL:<testI8Form>:
; IASM-LABEL:testI8Form: ; IASM-LABEL:testI8Form:
entry: entry:
; ASM-NEXT:.LtestI8Form$entry:
; IASM-NEXT:.LtestI8Form$entry:
; ASM-NEXT: sub sp, sp, #28
; DIS-NEXT: 0: e24dd01c
; IASM-NEXT: .byte 0x1c
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: str r0, [sp, #24]
; ASM-NEXT: # [sp, #24] = def.pseudo
; DIS-NEXT: 4: e58d0018
; IASM-NEXT: .byte 0x18
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r1, [sp, #20]
; ASM-NEXT: # [sp, #20] = def.pseudo
; DIS-NEXT: 8: e58d1014
; IASM-NEXT: .byte 0x14
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
%ptr.asptr = inttoptr i32 %ptr to i8* %ptr.asptr = inttoptr i32 %ptr to i8*
%a.arg_trunc = trunc i32 %a to i8 %a.arg_trunc = trunc i32 %a to i8
; ASM-NEXT: ldr r0, [sp, #20]
; DIS-NEXT: c: e59d0014
; IASM-NEXT: .byte 0x14
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: strb r0, [sp, #16]
; DIS-NEXT: 10: e5cd0010
; ASM-NEXT: # [sp, #16] = def.pseudo
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xcd
; IASM-NEXT: .byte 0xe5
%v = call i8 @llvm.nacl.atomic.rmw.i8(i32 1, i8* %ptr.asptr, %v = call i8 @llvm.nacl.atomic.rmw.i8(i32 1, i8* %ptr.asptr,
i8 %a.arg_trunc, i32 6) i8 %a.arg_trunc, i32 6)
; ASM-NEXT: ldrb r0, [sp, #16] ; ****** Example of dmb *******
; DIS-NEXT: 14: e5dd0010 ; ASM: dmb sy
; IASM-NEXT: .byte 0x10 ; DIS: 1c: f57ff05f
; IASM-NEXT: .byte 0x0 ; IASM: .byte 0x5f
; IASM-NEXT: .byte 0xdd
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: strb r0, [sp, #4]
; ASM-NEXT: # [sp, #4] = def.pseudo
; DIS-NEXT: 18: e5cd0004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xcd
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r0, [sp, #24]
; DIS-NEXT: 1c: e59d0018
; IASM-NEXT: .byte 0x18
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r0, [sp]
; ASM-NEXT: # [sp] = def.pseudo
; DIS-NEXT: 20: e58d0000
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: dmb sy
; DIS-NEXT: 24: f57ff05f
; IASM-NEXT: .byte 0x5f
; IASM-NEXT: .byte 0xf0 ; IASM-NEXT: .byte 0xf0
; IASM-NEXT: .byte 0x7f ; IASM-NEXT: .byte 0x7f
; IASM-NEXT: .byte 0xf5 ; IASM-NEXT: .byte 0xf5
; ASM-NEXT:.LtestI8Form$local$__0:
; IASM-NEXT:.LtestI8Form$local$__0:
; ASM-NEXT: ldr r0, [sp]
; DIS-NEXT: 28: e59d0000
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldrb r1, [sp, #4]
; DIS-NEXT: 2c: e5dd1004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0xdd
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: uxtb r1, r1
; DIS-NEXT: 30: e6ef1071
; IASM-NEXT: .byte 0x71
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0xef
; IASM-NEXT: .byte 0xe6
; ***** Example of ldrexb ***** ; ***** Example of ldrexb *****
; ASM-NEXT: ldrexb r2, [r0] ; ASM: ldrexb r1, [r2]
; DIS-NEXT: 34: e1d02f9f ; DIS: 24: e1d21f9f
; IASM-NEXT: .byte 0x9f ; IASM: .byte 0x9f
; IASM-NEXT: .byte 0x2f ; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd2
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
; ASM-NEXT: add r1, r2, r1
; ASM-NEXT: # r3 = def.pseudo
; DIS-NEXT: 38: e0821001
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x82
; IASM-NEXT: .byte 0xe0
; ***** Example of strexb ***** ; ***** Example of strexb *****
; ASM-NEXT: strexb r3, r1, [r0] ; ASM: strexb r4, r3, [r2]
; DIS-NEXT: 3c: e1c03f91 ; DIS: 2c: e1c24f93
; IASM-NEXT: .byte 0x91 ; IASM: .byte 0x93
; IASM-NEXT: .byte 0x3f ; IASM-NEXT: .byte 0x4f
; IASM-NEXT: .byte 0xc0 ; IASM-NEXT: .byte 0xc2
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
%retval = zext i8 %v to i32 %retval = zext i8 %v to i32
...@@ -170,140 +68,29 @@ entry: ...@@ -170,140 +68,29 @@ entry:
define internal i32 @testI16Form(i32 %ptr, i32 %a) { define internal i32 @testI16Form(i32 %ptr, i32 %a) {
; ASM-LABEL:testI16Form: ; ASM-LABEL:testI16Form:
; DIS-LABEL:00000070 <testI16Form>: ; DIS-LABEL:<testI16Form>:
; IASM-LABEL:testI16Form: ; IASM-LABEL:testI16Form:
entry: entry:
; ASM-NEXT:.LtestI16Form$entry:
; IASM-NEXT:.LtestI16Form$entry:
; ASM-NEXT: sub sp, sp, #28
; DIS-NEXT: 70: e24dd01c
; IASM-NEXT: .byte 0x1c
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: str r0, [sp, #24]
; ASM-NEXT: # [sp, #24] = def.pseudo
; DIS-NEXT: 74: e58d0018
; IASM-NEXT: .byte 0x18
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r1, [sp, #20]
; ASM-NEXT: # [sp, #20] = def.pseudo
; DIS-NEXT: 78: e58d1014
; IASM-NEXT: .byte 0x14
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
%ptr.asptr = inttoptr i32 %ptr to i16* %ptr.asptr = inttoptr i32 %ptr to i16*
%a.arg_trunc = trunc i32 %a to i16 %a.arg_trunc = trunc i32 %a to i16
; ASM-NEXT: ldr r0, [sp, #20]
; DIS-NEXT: 7c: e59d0014
; IASM-NEXT: .byte 0x14
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: strh r0, [sp, #16]
; ASM-NEXT: # [sp, #16] = def.pseudo
; DIS-NEXT: 80: e1cd01b0
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xcd
; IASM-NEXT: .byte 0xe1
%v = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr.asptr, %v = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr.asptr,
i16 %a.arg_trunc, i32 6) i16 %a.arg_trunc, i32 6)
; ASM-NEXT: ldrh r0, [sp, #16]
; DIS-NEXT: 84: e1dd01b0
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xdd
; IASM-NEXT: .byte 0xe1
; ASM-NEXT: strh r0, [sp, #4]
; ASM-NEXT: # [sp, #4] = def.pseudo
; DIS-NEXT: 88: e1cd00b4
; IASM-NEXT: .byte 0xb4
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xcd
; IASM-NEXT: .byte 0xe1
; ASM-NEXT: ldr r0, [sp, #24]
; DIS-NEXT: 8c: e59d0018
; IASM-NEXT: .byte 0x18
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r0, [sp]
; ASM-NEXT: # [sp] = def.pseudo
; DIS-NEXT: 90: e58d0000
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: dmb sy
; DIS-NEXT: 94: f57ff05f
; IASM-NEXT: .byte 0x5f
; IASM-NEXT: .byte 0xf0
; IASM-NEXT: .byte 0x7f
; IASM-NEXT: .byte 0xf5
; ASM-NEXT:.LtestI16Form$local$__0:
; IASM-NEXT:.LtestI16Form$local$__0:
; ASM-NEXT: ldr r0, [sp]
; DIS-NEXT: 98: e59d0000
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldrh r1, [sp, #4]
; DIS-NEXT: 9c: e1dd10b4
; IASM-NEXT: .byte 0xb4
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0xdd
; IASM-NEXT: .byte 0xe1
; ASM-NEXT: uxth r1, r1
; DIS-NEXT: a0: e6ff1071
; IASM-NEXT: .byte 0x71
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0xe6
; ***** Example of ldrexh ***** ; ***** Example of ldrexh *****
; ASM-NEXT: ldrexh r2, [r0] ; ASM: ldrexh r1, [r2]
; DIS-NEXT: a4: e1f02f9f ; DIS: 84: e1f21f9f
; IASM-NEXT: .byte 0x9f ; IASM: .byte 0x9f
; IASM-NEXT: .byte 0x2f ; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0xf0 ; IASM-NEXT: .byte 0xf2
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
; ASM-NEXT: add r1, r2, r1
; ASM-NEXT: # r3 = def.pseudo
; DIS-NEXT: a8: e0821001
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x82
; IASM-NEXT: .byte 0xe0
; ***** Example of strexh ***** ; ***** Example of strexh *****
; ASM-NEXT: strexh r3, r1, [r0] ; ASM: strexh r4, r3, [r2]
; DIS-NEXT: ac: e1e03f91 ; DIS: 8c: e1e24f93
; IASM-NEXT: .byte 0x91 ; IASM: .byte 0x93
; IASM-NEXT: .byte 0x3f ; IASM-NEXT: .byte 0x4f
; IASM-NEXT: .byte 0xe0 ; IASM-NEXT: .byte 0xe2
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
%retval = zext i16 %v to i32 %retval = zext i16 %v to i32
...@@ -312,116 +99,28 @@ entry: ...@@ -312,116 +99,28 @@ entry:
define internal i32 @testI32Form(i32 %ptr, i32 %a) { define internal i32 @testI32Form(i32 %ptr, i32 %a) {
; ASM-LABEL:testI32Form: ; ASM-LABEL:testI32Form:
; DIS-LABEL:000000e0 <testI32Form>: ; DIS-LABEL:<testI32Form>:
; IASM-LABEL:testI32Form: ; IASM-LABEL:testI32Form:
entry: entry:
; ASM-NEXT:.LtestI32Form$entry:
; IASM-NEXT:.LtestI32Form$entry:
; ASM-NEXT: sub sp, sp, #20
; DIS-NEXT: e0: e24dd014
; IASM-NEXT: .byte 0x14
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: str r0, [sp, #16]
; ASM-NEXT: # [sp, #16] = def.pseudo
; DIS-NEXT: e4: e58d0010
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r1, [sp, #12]
; ASM-NEXT: # [sp, #12] = def.pseudo
; DIS-NEXT: e8: e58d100c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
%ptr.asptr = inttoptr i32 %ptr to i32* %ptr.asptr = inttoptr i32 %ptr to i32*
%v = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr.asptr, %v = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr.asptr,
i32 %a, i32 6) i32 %a, i32 6)
; ASM-NEXT: ldr r0, [sp, #12]
; DIS-NEXT: ec: e59d000c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r0, [sp, #4]
; ASM-NEXT: # [sp, #4] = def.pseudo
; DIS-NEXT: f0: e58d0004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r0, [sp, #16]
; DIS-NEXT: f4: e59d0010
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r0, [sp]
; ASM-NEXT: # [sp] = def.pseudo
; DIS-NEXT: f8: e58d0000
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: dmb sy
; DIS-NEXT: fc: f57ff05f
; IASM-NEXT: .byte 0x5f
; IASM-NEXT: .byte 0xf0
; IASM-NEXT: .byte 0x7f
; IASM-NEXT: .byte 0xf5
; ASM-NEXT:.LtestI32Form$local$__0:
; IASM-NEXT:.LtestI32Form$local$__0:
; ASM-NEXT: ldr r0, [sp]
; DIS-NEXT: 100: e59d0000
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r1, [sp, #4]
; DIS-NEXT: 104: e59d1004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ***** Example of ldrex ***** ; ***** Example of ldrex *****
; ASM-NEXT: ldrex r2, [r0] ; ASM: ldrex r1, [r2]
; DIS-NEXT: 108: e1902f9f ; DIS: dc: e1921f9f
; IASM-NEXT: .byte 0x9f ; IASM: .byte 0x9f
; IASM-NEXT: .byte 0x2f ; IASM-NEXT: .byte 0x1f
; IASM-NEXT: .byte 0x90 ; IASM-NEXT: .byte 0x92
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
; ASM-NEXT: add r1, r2, r1
; ASM-NEXT: # r3 = def.pseudo
; DIS-NEXT: 10c: e0821001
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x82
; IASM-NEXT: .byte 0xe0
; ***** Example of strex ***** ; ***** Example of strex *****
; ASM-NEXT: strex r3, r1, [r0] ; ASM: strex r4, r3, [r2]
; DIS-NEXT: 110: e1803f91 ; DIS: e4: e1824f93
; IASM-NEXT: .byte 0x91 ; IASM: .byte 0x93
; IASM-NEXT: .byte 0x3f ; IASM-NEXT: .byte 0x4f
; IASM-NEXT: .byte 0x80 ; IASM-NEXT: .byte 0x82
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
ret i32 %v ret i32 %v
...@@ -429,193 +128,28 @@ entry: ...@@ -429,193 +128,28 @@ entry:
define internal i64 @testI64Form(i32 %ptr, i64 %a) { define internal i64 @testI64Form(i32 %ptr, i64 %a) {
; ASM-LABEL:testI64Form: ; ASM-LABEL:testI64Form:
; DIS-LABEL:00000130 <testI64Form>: ; DIS-LABEL:<testI64Form>:
; IASM-LABEL:testI64Form: ; IASM-LABEL:testI64Form:
entry: entry:
; ASM-NEXT:.LtestI64Form$entry:
; IASM-NEXT:.LtestI64Form$entry:
; ASM-NEXT: push {r4, r5}
; DIS-NEXT: 130: e92d0030
; IASM-NEXT: .byte 0x30
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xe9
; ASM-NEXT: sub sp, sp, #32
; DIS-NEXT: 134: e24dd020
; IASM-NEXT: .byte 0x20
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: str r0, [sp, #28]
; ASM-NEXT: # [sp, #28] = def.pseudo
; DIS-NEXT: 138: e58d001c
; IASM-NEXT: .byte 0x1c
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mov r0, r2
; DIS-NEXT: 13c: e1a00002
; IASM-NEXT: .byte 0x2
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; ASM-NEXT: str r0, [sp, #24]
; ASM-NEXT: # [sp, #24] = def.pseudo
; DIS-NEXT: 140: e58d0018
; IASM-NEXT: .byte 0x18
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mov r0, r3
; DIS-NEXT: 144: e1a00003
; IASM-NEXT: .byte 0x3
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; ASM-NEXT: str r0, [sp, #20]
; ASM-NEXT: # [sp, #20] = def.pseudo
; ASM-NEXT: # [sp] = def.pseudo
; DIS-NEXT: 148: e58d0014
; IASM-NEXT: .byte 0x14
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
%ptr.asptr = inttoptr i32 %ptr to i64* %ptr.asptr = inttoptr i32 %ptr to i64*
%v = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr.asptr, %v = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr.asptr,
i64 %a, i32 6) i64 %a, i32 6)
; ASM-NEXT: ldr r0, [sp, #24]
; DIS-NEXT: 14c: e59d0018
; IASM-NEXT: .byte 0x18
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r0, [sp, #8]
; ASM-NEXT: # [sp, #8] = def.pseudo
; DIS-NEXT: 150: e58d0008
; IASM-NEXT: .byte 0x8
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r0, [sp, #20]
; DIS-NEXT: 154: e59d0014
; IASM-NEXT: .byte 0x14
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r0, [sp, #4]
; ASM-NEXT: # [sp, #4] = def.pseudo
; DIS-NEXT: 158: e58d0004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r0, [sp, #28]
; DIS-NEXT: 15c: e59d001c
; IASM-NEXT: .byte 0x1c
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: str r0, [sp]
; ASM-NEXT: # [sp] = def.pseudo
; DIS-NEXT: 160: e58d0000
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: dmb sy
; DIS-NEXT: 164: f57ff05f
; IASM-NEXT: .byte 0x5f
; IASM-NEXT: .byte 0xf0
; IASM-NEXT: .byte 0x7f
; IASM-NEXT: .byte 0xf5
; ASM-NEXT:.LtestI64Form$local$__0:
; IASM-NEXT:.LtestI64Form$local$__0:
; ASM-NEXT: ldr r0, [sp]
; ASM-NEXT: # r2, r3 = def.pseudo [sp]
; DIS-NEXT: 168: e59d0000
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: ldr r1, [sp, #8]
; DIS-NEXT: 16c: e59d1008
; IASM-NEXT: .byte 0x8
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mov r2, r1
; DIS-NEXT: 170: e1a02001
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x20
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; ASM-NEXT: ldr r1, [sp, #4]
; DIS-NEXT: 174: e59d1004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mov r3, r1
; DIS-NEXT: 178: e1a03001
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0x30
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; ***** Example of ldrexd ***** ; ***** Example of ldrexd *****
; ASM-NEXT: ldrexd r4, r5, [r0] ; ASM: ldrexd r4, r5, [r6]
; ASM-NEXT: # r4 = def.pseudo r4, r5 ; DIS: 13c: e1b64f9f
; ASM-NEXT: # r5 = def.pseudo r4, r5 ; IASM: .byte 0x9f
; ASM-NEXT: # r2, r3 = def.pseudo r2, r3
; DIS-NEXT: 17c: e1b04f9f
; IASM-NEXT: .byte 0x9f
; IASM-NEXT: .byte 0x4f ; IASM-NEXT: .byte 0x4f
; IASM-NEXT: .byte 0xb0 ; IASM-NEXT: .byte 0xb6
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
; ASM-NEXT: adds r2, r4, r2
; DIS-NEXT: 180: e0942002
; IASM-NEXT: .byte 0x2
; IASM-NEXT: .byte 0x20
; IASM-NEXT: .byte 0x94
; IASM-NEXT: .byte 0xe0
; ASM-NEXT: adc r3, r5, r3
; ASM-NEXT: # r1 = def.pseudo
; DIS-NEXT: 184: e0a53003
; IASM-NEXT: .byte 0x3
; IASM-NEXT: .byte 0x30
; IASM-NEXT: .byte 0xa5
; IASM-NEXT: .byte 0xe0
; ***** Example of strexd ***** ; ***** Example of strexd *****
; ASM-NEXT: strexd r1, r2, r3, [r0] ; ASM: strexd r4, r0, r1, [r6]
; DIS-NEXT: 188: e1a01f92 ; DIS: 158: e1a64f90
; IASM-NEXT: .byte 0x92 ; IASM: .byte 0x90
; IASM-NEXT: .byte 0x1f ; IASM-NEXT: .byte 0x4f
; IASM-NEXT: .byte 0xa0 ; IASM-NEXT: .byte 0xa6
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
ret i64 %v ret i64 %v
......
...@@ -246,10 +246,10 @@ entry: ...@@ -246,10 +246,10 @@ entry:
; CHECK: movq QWORD {{.*}},x{{.*}} ; CHECK: movq QWORD {{.*}},x{{.*}}
; CHECK: mfence ; CHECK: mfence
; ARM32-LABEL: test_atomic_store_64_const ; ARM32-LABEL: test_atomic_store_64_const
; ARM32: dmb
; ARM32: movw [[T0:r[0-9]+]], #12274 ; ARM32: movw [[T0:r[0-9]+]], #12274
; ARM32: movt [[T0]], #29646 ; ARM32: movt [[T0]], #29646
; ARM32: movw r{{[0-9]+}}, #2874 ; ARM32: movw r{{[0-9]+}}, #2874
; ARM32: dmb
; ARM32: .L[[RETRY:.*]]: ; ARM32: .L[[RETRY:.*]]:
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [[MEM:.*]] ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [[MEM:.*]]
; ARM32: strexd [[S:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}, [[MEM]] ; ARM32: strexd [[S:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}, [[MEM]]
...@@ -342,7 +342,7 @@ entry: ...@@ -342,7 +342,7 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: adds ; ARM32: adds
; ARM32-NEXT: adc ; ARM32: adc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -359,7 +359,7 @@ entry: ...@@ -359,7 +359,7 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: adds ; ARM32: adds
; ARM32-NEXT: adc ; ARM32: adc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -400,7 +400,7 @@ eblock: ...@@ -400,7 +400,7 @@ eblock:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: adds ; ARM32: adds
; ARM32-NEXT: adc ; ARM32: adc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -457,7 +457,7 @@ err: ...@@ -457,7 +457,7 @@ err:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: adds ; ARM32: adds
; ARM32-NEXT: adc ; ARM32: adc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -543,7 +543,7 @@ entry: ...@@ -543,7 +543,7 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: subs ; ARM32: subs
; ARM32-NEXT: sbc ; ARM32: sbc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -602,9 +602,9 @@ entry: ...@@ -602,9 +602,9 @@ entry:
} }
; CHECK-LABEL: test_atomic_rmw_or_8_global ; CHECK-LABEL: test_atomic_rmw_or_8_global
; ARM32-LABEL: test_atomic_rmw_or_8_global ; ARM32-LABEL: test_atomic_rmw_or_8_global
; ARM32: dmb
; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal8 ; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal8
; ARM32: movt [[PTR]], #:upper16:SzGlobal8 ; ARM32: movt [[PTR]], #:upper16:SzGlobal8
; ARM32: dmb
; ARM32: ldrexb r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} ; ARM32: ldrexb r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: orr ; ARM32: orr
; ARM32: strexb ; ARM32: strexb
...@@ -643,9 +643,9 @@ entry: ...@@ -643,9 +643,9 @@ entry:
} }
; CHECK-LABEL: test_atomic_rmw_or_16_global ; CHECK-LABEL: test_atomic_rmw_or_16_global
; ARM32-LABEL: test_atomic_rmw_or_16_global ; ARM32-LABEL: test_atomic_rmw_or_16_global
; ARM32: dmb
; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal16 ; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal16
; ARM32: movt [[PTR]], #:upper16:SzGlobal16 ; ARM32: movt [[PTR]], #:upper16:SzGlobal16
; ARM32: dmb
; ARM32: ldrexh r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} ; ARM32: ldrexh r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: orr ; ARM32: orr
; ARM32: strexh ; ARM32: strexh
...@@ -680,9 +680,9 @@ entry: ...@@ -680,9 +680,9 @@ entry:
} }
; CHECK-LABEL: test_atomic_rmw_or_32_global ; CHECK-LABEL: test_atomic_rmw_or_32_global
; ARM32-LABEL: test_atomic_rmw_or_32_global ; ARM32-LABEL: test_atomic_rmw_or_32_global
; ARM32: dmb
; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal32 ; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal32
; ARM32: movt [[PTR]], #:upper16:SzGlobal32 ; ARM32: movt [[PTR]], #:upper16:SzGlobal32
; ARM32: dmb
; ARM32: ldrex r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} ; ARM32: ldrex r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: orr ; ARM32: orr
; ARM32: strex ; ARM32: strex
...@@ -709,7 +709,7 @@ entry: ...@@ -709,7 +709,7 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: orr ; ARM32: orr
; ARM32-NEXT: orr ; ARM32: orr
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -819,7 +819,7 @@ entry: ...@@ -819,7 +819,7 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: and ; ARM32: and
; ARM32-NEXT: and ; ARM32: and
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -927,7 +927,7 @@ entry: ...@@ -927,7 +927,7 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: eor ; ARM32: eor
; ARM32-NEXT: eor ; ARM32: eor
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}] ; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -1067,8 +1067,8 @@ entry: ...@@ -1067,8 +1067,8 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexb ; ARM32: ldrexb
; ARM32: cmp ; ARM32: cmp
; ARM32: {{strb|mov}}
; ARM32: strexbeq ; ARM32: strexbeq
; ARM32: {{strb|mov}}ne
; ARM32: cmpeq ; ARM32: cmpeq
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -1091,8 +1091,8 @@ entry: ...@@ -1091,8 +1091,8 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexh ; ARM32: ldrexh
; ARM32: cmp ; ARM32: cmp
; ARM32: {{strh|mov}}
; ARM32: strexheq ; ARM32: strexheq
; ARM32: {{strh|mov}}ne
; ARM32: cmpeq ; ARM32: cmpeq
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -1112,8 +1112,8 @@ entry: ...@@ -1112,8 +1112,8 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrex ; ARM32: ldrex
; ARM32: cmp ; ARM32: cmp
; ARM32: {{str|mov}}
; ARM32: strexeq ; ARM32: strexeq
; ARM32: {{str|mov}}ne
; ARM32: cmpeq ; ARM32: cmpeq
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -1140,10 +1140,10 @@ entry: ...@@ -1140,10 +1140,10 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}} ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}}
; ARM32: cmp ; ARM32: cmp
; ARM32-NEXT: cmpeq ; ARM32: cmpeq
; ARM32: mov
; ARM32: mov
; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} ; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: {{str|mov}}ne
; ARM32: {{str|mov}}ne
; ARM32: cmpeq ; ARM32: cmpeq
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -1163,10 +1163,10 @@ entry: ...@@ -1163,10 +1163,10 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}} ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}}
; ARM32: cmp ; ARM32: cmp
; ARM32-NEXT: cmpeq ; ARM32: cmpeq
; ARM32: mov
; ARM32: mov
; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} ; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: {{str|mov}}ne
; ARM32: {{str|mov}}ne
; ARM32: cmpeq ; ARM32: cmpeq
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -1195,10 +1195,10 @@ entry: ...@@ -1195,10 +1195,10 @@ entry:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}} ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}}
; ARM32: cmp ; ARM32: cmp
; ARM32-NEXT: cmpeq ; ARM32: cmpeq
; ARM32: mov
; ARM32: mov
; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} ; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: {{str|mov}}ne
; ARM32: {{str|mov}}ne
; ARM32: cmpeq ; ARM32: cmpeq
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -1241,10 +1241,10 @@ eblock: ...@@ -1241,10 +1241,10 @@ eblock:
; ARM32: dmb ; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}} ; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}}
; ARM32: cmp ; ARM32: cmp
; ARM32-NEXT: cmpeq ; ARM32: cmpeq
; ARM32: mov
; ARM32: mov
; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}} ; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: {{str|mov}}ne
; ARM32: {{str|mov}}ne
; ARM32: cmpeq ; ARM32: cmpeq
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
...@@ -1265,7 +1265,6 @@ entry: ...@@ -1265,7 +1265,6 @@ entry:
; ARM32: ldrex ; ARM32: ldrex
; ARM32: cmp ; ARM32: cmp
; ARM32: strexeq ; ARM32: strexeq
; ARM32: {{str|mov}}ne
; ARM32: cmpeq ; ARM32: cmpeq
; ARM32: bne ; ARM32: bne
; ARM32: dmb ; ARM32: dmb
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment