Commit a3a01a2f by Jan Voung

Subzero: lower the rest of the atomic operations.

64-bit ops are expanded via a cmpxchg8b loop. 64/32-bit and/or/xor are also expanded into a cmpxchg / cmpxchg8b loop. Add a cross test for atomic RMW operations and compare and swap. Misc: Test that atomic.is.lock.free can be optimized out if result is ignored. TODO: * optimize compare and swap with compare+branch further down instruction stream. * optimize atomic RMW when the return value is ignored (adds a locked field to binary ops though). * We may want to do some actual target-dependent basic block splitting + expansion (the instructions inserted by the expansion must reference the pre-colored registers, etc.). Otherwise, we are currently getting by with modeling the extended liveness of the variables used in the loops using fake uses. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882 R=jfb@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/362463002
parent 8d1072e7
......@@ -57,6 +57,11 @@ if __name__ == '__main__':
metavar='PATH',
help='Path to LLVM executables like llc ' +
'(defaults to $LLVM_BIN_PATH)')
argparser.add_argument('--crosstest-bitcode', required=False,
default=1, type=int,
help='Compile non-subzero crosstest object file ' +
'from the same bitcode as the subzero object. ' +
'If 0, then compile it straight from source.')
args = argparser.parse_args()
objs = []
......@@ -113,7 +118,9 @@ if __name__ == '__main__':
# failures. This behavior can be inspected by switching
# use_llc between True and False.
use_llc = False
if use_llc:
if not args.crosstest_bitcode:
objs.append(arg)
elif use_llc:
shellcmd([os.path.join(llvm_bin_path, 'llc'),
'-filetype=obj',
'-o=' + obj_llc,
......@@ -125,4 +132,4 @@ if __name__ == '__main__':
linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++'
shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', args.driver] +
objs +
['-lm', '-o', os.path.join(args.dir, args.output)])
['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)])
......@@ -64,6 +64,17 @@ for optlevel in ${OPTLEVELS} ; do
--driver=test_icmp_main.cpp \
--output=test_icmp_O${optlevel}
# Compile the non-subzero object files straight from source
# since the native LLVM backend does not understand how to
# lower NaCl-specific intrinsics.
./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
--dir="${OUTDIR}" \
--llvm-bin-path="${LLVM_BIN_PATH}" \
--test=test_sync_atomic.cpp \
--crosstest-bitcode=0 \
--driver=test_sync_atomic_main.cpp \
--output=test_sync_atomic_O${optlevel}
done
for optlevel in ${OPTLEVELS} ; do
......@@ -74,4 +85,5 @@ for optlevel in ${OPTLEVELS} ; do
"${OUTDIR}"/test_fcmp_O${optlevel}
"${OUTDIR}"/test_global_O${optlevel}
"${OUTDIR}"/test_icmp_O${optlevel}
"${OUTDIR}"/test_sync_atomic_O${optlevel}
done
//===- subzero/crosstest/test_sync_atomic.cpp - Implementation for tests --===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This aims to test that all the atomic RMW instructions and compare and swap
// work across the allowed atomic types. This uses the __sync_* builtins
// to test the atomic operations.
//
//===----------------------------------------------------------------------===//
#include <stdint.h>
#include <cstdlib>
#include "test_sync_atomic.h"
#define X(inst, type) \
type test_##inst(bool fetch_first, volatile type *ptr, type a) { \
if (fetch_first) { \
return __sync_fetch_and_##inst(ptr, a); \
} else { \
return __sync_##inst##_and_fetch(ptr, a); \
} \
} \
type test_alloca_##inst(bool fetch, volatile type *ptr, type a) { \
const size_t buf_size = 8; \
type buf[buf_size]; \
for (size_t i = 0; i < buf_size; ++i) { \
if (fetch) { \
buf[i] = __sync_fetch_and_##inst(ptr, a); \
} else { \
buf[i] = __sync_##inst##_and_fetch(ptr, a); \
} \
} \
type sum = 0; \
for (size_t i = 0; i < buf_size; ++i) { \
sum += buf[i]; \
} \
return sum; \
} \
type test_const_##inst(bool fetch, volatile type *ptr, type ign) { \
if (fetch) { \
return __sync_fetch_and_##inst(ptr, 42); \
} else { \
return __sync_##inst##_and_fetch(ptr, 99); \
} \
}
FOR_ALL_RMWOP_TYPES(X)
#undef X
#define X(type) \
type test_val_cmp_swap(volatile type *ptr, type oldval, type newval) { \
return __sync_val_compare_and_swap(ptr, oldval, newval); \
}
ATOMIC_TYPE_TABLE
#undef X
//===- subzero/crosstest/test_sync_atomic.def - macros for tests -*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines macros for testing atomic intrinsics (via sync builtins).
//
//===----------------------------------------------------------------------===//
#ifndef TEST_SYNC_ATOMIC_DEF
#define TEST_SYNC_ATOMIC_DEF
#define STR(s) #s
#define RMWOP_TABLE \
/* inst */ \
X(add) \
X(sub) \
X(or) \
X(and) \
X(xor)
//#define X(inst)
#define ATOMIC_TYPE_TABLE \
/* type */ \
X(uint8_t) \
X(uint16_t) \
X(uint32_t) \
X(uint64_t)
//#define X(type)
#define FOR_ALL_RMWTYPES_INST(F, inst) \
F(inst, uint8_t) \
F(inst, uint16_t) \
F(inst, uint32_t) \
F(inst, uint64_t)
#define FOR_ALL_RMWOP_TYPES(X) \
FOR_ALL_RMWTYPES_INST(X, add) \
FOR_ALL_RMWTYPES_INST(X, sub) \
FOR_ALL_RMWTYPES_INST(X, or) \
FOR_ALL_RMWTYPES_INST(X, and) \
FOR_ALL_RMWTYPES_INST(X, xor)
//#define X(inst, type)
#endif // TEST_SYNC_ATOMIC_DEF
//===- subzero/crosstest/test_sync_atomic.h - Test prototypes ---*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the function prototypes for cross testing atomic
// intrinsics.
//
//===----------------------------------------------------------------------===//
#include "test_sync_atomic.def"
#define X(inst, type) \
type test_##inst(bool fetch_first, volatile type *ptr, type a); \
type test_alloca_##inst(bool fetch, volatile type *ptr, type a); \
type test_const_##inst(bool fetch, volatile type *ptr, type ignored);
FOR_ALL_RMWOP_TYPES(X)
#undef X
#define X(type) \
type test_val_cmp_swap(volatile type *ptr, type oldval, type newval);
ATOMIC_TYPE_TABLE
#undef X
//===- subzero/crosstest/test_sync_atomic_main.cpp - Driver for tests -----===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Driver for cross testing atomic intrinsics, via the sync builtins.
//
//===----------------------------------------------------------------------===//
/* crosstest.py --test=test_sync_atomic.cpp --crosstest-bitcode=0 \
--driver=test_sync_atomic_main.cpp --prefix=Subzero_ \
--output=test_sync_atomic */
#include <pthread.h>
#include <stdint.h>
#include <cerrno>
#include <climits>
#include <cstdlib>
#include <cstring>
#include <iostream>
// Include test_sync_atomic.h twice - once normally, and once within the
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_sync_atomic.h"
namespace Subzero_ {
#include "test_sync_atomic.h"
}
volatile uint64_t Values[] = {
0, 1, 0x7e,
0x7f, 0x80, 0x81,
0xfe, 0xff, 0x7ffe,
0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff,
0x007fffff /*Max subnormal + */,
0x00800000 /*Min+ */, 0x7f7fffff /*Max+ */,
0x7f800000 /*+Inf*/, 0xff800000 /*-Inf*/,
0x7fa00000 /*SNaN*/, 0x7fc00000 /*QNaN*/,
0x7ffffffe, 0x7fffffff, 0x80000000,
0x80000001, 0xfffffffe, 0xffffffff,
0x100000000ll, 0x100000001ll,
0x000fffffffffffffll /*Max subnormal + */,
0x0010000000000000ll /*Min+ */,
0x7fefffffffffffffll /*Max+ */,
0x7ff0000000000000ll /*+Inf*/,
0xfff0000000000000ll /*-Inf*/,
0x7ff0000000000001ll /*SNaN*/,
0x7ff8000000000000ll /*QNaN*/,
0x7ffffffffffffffell, 0x7fffffffffffffffll, 0x8000000000000000ll,
0x8000000000000001ll, 0xfffffffffffffffell, 0xffffffffffffffffll };
const static size_t NumValues = sizeof(Values) / sizeof(*Values);
struct {
volatile uint8_t l8;
volatile uint16_t l16;
volatile uint32_t l32;
volatile uint64_t l64;
} AtomicLocs;
template <typename Type>
void testAtomicRMW(volatile Type *AtomicLoc,
size_t &TotalTests, size_t &Passes, size_t &Failures) {
typedef Type (*FuncType)(bool, volatile Type*, Type);
static struct {
const char *Name;
FuncType FuncLlc;
FuncType FuncSz;
} Funcs[] = {
#define X(inst) \
{ \
STR(inst), test_##inst, Subzero_::test_##inst \
}, \
{ \
STR(inst) "_alloca", test_alloca_##inst, Subzero_::test_alloca_##inst \
}, \
{ \
STR(inst) "_const", test_const_##inst, Subzero_::test_const_##inst \
},
RMWOP_TABLE
#undef X
};
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
for (size_t f = 0; f < NumFuncs; ++f) {
for (size_t i = 0; i < NumValues; ++i) {
Type Value1 = static_cast<Type>(Values[i]);
for (size_t j = 0; j < NumValues; ++j) {
Type Value2 = static_cast<Type>(Values[j]);
for (size_t k = 0; k < 2; ++k) {
bool fetch_first = k;
++TotalTests;
*AtomicLoc = Value1;
Type ResultSz1 = Funcs[f].FuncSz(
fetch_first, AtomicLoc, Value2);
Type ResultSz2 = *AtomicLoc;
*AtomicLoc = Value1;
Type ResultLlc1 = Funcs[f].FuncLlc(
fetch_first, AtomicLoc, Value2);
Type ResultLlc2 = *AtomicLoc;
if (ResultSz1 == ResultLlc1 && ResultSz2 == ResultLlc2) {
++Passes;
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name
<< (CHAR_BIT * sizeof(Type)) << "("
<< static_cast<uint64_t>(Value1) << ", "
<< static_cast<uint64_t>(Value2)
<< "): sz1=" << static_cast<uint64_t>(ResultSz1)
<< " llc1=" << static_cast<uint64_t>(ResultLlc1)
<< " sz2=" << static_cast<uint64_t>(ResultSz2)
<< " llc2=" << static_cast<uint64_t>(ResultLlc2)
<< "\n";
}
}
}
}
}
}
template <typename Type>
void testValCompareAndSwap(volatile Type *AtomicLoc, size_t &TotalTests,
size_t &Passes, size_t &Failures) {
for (size_t i = 0; i < NumValues; ++i) {
Type Value1 = static_cast<Type>(Values[i]);
for (size_t j = 0; j < NumValues; ++j) {
Type Value2 = static_cast<Type>(Values[j]);
for (size_t f = 0; f < 2; ++f) {
bool flip = f;
++TotalTests;
*AtomicLoc = Value1;
Type ResultSz1 = Subzero_::test_val_cmp_swap(
AtomicLoc, flip ? Value2 : Value1, Value2);
Type ResultSz2 = *AtomicLoc;
*AtomicLoc = Value1;
Type ResultLlc1 = test_val_cmp_swap(
AtomicLoc, flip ? Value2 : Value1, Value2);
Type ResultLlc2 = *AtomicLoc;
if (ResultSz1 == ResultLlc1 && ResultSz2 == ResultLlc2) {
++Passes;
} else {
++Failures;
std::cout << "test_val_cmp_swap" << (CHAR_BIT * sizeof(Type)) << "("
<< static_cast<uint64_t>(Value1) << ", "
<< static_cast<uint64_t>(Value2)
<< "): sz1=" << static_cast<uint64_t>(ResultSz1)
<< " llc1=" << static_cast<uint64_t>(ResultLlc1)
<< " sz2=" << static_cast<uint64_t>(ResultSz2)
<< " llc2=" << static_cast<uint64_t>(ResultLlc2)
<< "\n";
}
}
}
}
}
template <typename Type>
struct ThreadData {
Type (*FuncPtr)(bool, volatile Type*, Type);
bool Fetch;
volatile Type *Ptr;
Type Adjustment;
};
template <typename Type>
void *threadWrapper(void *Data) {
const size_t NumReps = 8000;
ThreadData<Type> *TData = reinterpret_cast<ThreadData<Type>*>(Data);
for (size_t i = 0; i < NumReps; ++i) {
(void)TData->FuncPtr(TData->Fetch, TData->Ptr, TData->Adjustment);
}
return NULL;
}
template <typename Type>
void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
size_t &Passes, size_t &Failures) {
typedef Type (*FuncType)(bool, volatile Type*, Type);
static struct {
const char *Name;
FuncType FuncLlc;
FuncType FuncSz;
} Funcs[] = {
#define X(inst) \
{ \
STR(inst), test_##inst, Subzero_::test_##inst \
}, \
{ \
STR(inst) "_alloca", test_alloca_##inst, Subzero_::test_alloca_##inst \
},
RMWOP_TABLE
#undef X
};
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
// Just test a few values, otherwise it takes a *really* long time.
volatile uint64_t ValuesSubset[] = { 1, 0x7e, 0x000fffffffffffffffll };
const size_t NumValuesSubset = sizeof(ValuesSubset) / sizeof(*ValuesSubset);
for (size_t f = 0; f < NumFuncs; ++f) {
for (size_t i = 0; i < NumValuesSubset; ++i) {
Type Value1 = static_cast<Type>(ValuesSubset[i]);
for (size_t j = 0; j < NumValuesSubset; ++j) {
Type Value2 = static_cast<Type>(ValuesSubset[j]);
bool fetch_first = true;
ThreadData<Type> TDataSz = {
Funcs[f].FuncSz, fetch_first, AtomicLoc, Value2 };
ThreadData<Type> TDataLlc = {
Funcs[f].FuncLlc, fetch_first, AtomicLoc, Value2 };
++TotalTests;
const size_t NumThreads = 4;
pthread_t t[NumThreads];
// Try N threads w/ just Llc.
*AtomicLoc = Value1;
for (size_t m = 0; m < NumThreads; ++m) {
pthread_create(&t[m], NULL, &threadWrapper<Type>,
reinterpret_cast<void *>(&TDataLlc));
}
for (size_t m = 0; m < NumThreads; ++m) {
pthread_join(t[m], NULL);
}
Type ResultLlc = *AtomicLoc;
// Try N threads w/ both Sz and Llc.
*AtomicLoc = Value1;
for (size_t m = 0; m < NumThreads; ++m) {
if (pthread_create(&t[m], NULL, &threadWrapper<Type>,
m % 2 == 0
? reinterpret_cast<void *>(&TDataLlc)
: reinterpret_cast<void *>(&TDataSz)) != 0) {
++Failures;
std::cout << "pthread_create failed w/ " << strerror(errno) << "\n";
abort();
}
}
for (size_t m = 0; m < NumThreads; ++m) {
if (pthread_join(t[m], NULL) != 0) {
++Failures;
std::cout << "pthread_join failed w/ " << strerror(errno) << "\n";
abort();
}
}
Type ResultMixed = *AtomicLoc;
if (ResultLlc == ResultMixed) {
++Passes;
} else {
++Failures;
std::cout << "test_with_threads_" << Funcs[f].Name
<< (8 * sizeof(Type)) << "("
<< static_cast<uint64_t>(Value1) << ", "
<< static_cast<uint64_t>(Value2)
<< "): llc=" << static_cast<uint64_t>(ResultLlc)
<< " mixed=" << static_cast<uint64_t>(ResultMixed)
<< "\n";
}
}
}
}
}
int main(int argc, char **argv) {
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
testAtomicRMW<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testAtomicRMW<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures);
testAtomicRMW<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures);
testAtomicRMW<uint64_t>(&AtomicLocs.l64, TotalTests, Passes, Failures);
testValCompareAndSwap<uint8_t>(
&AtomicLocs.l8, TotalTests, Passes, Failures);
testValCompareAndSwap<uint16_t>(
&AtomicLocs.l16, TotalTests, Passes, Failures);
testValCompareAndSwap<uint32_t>(
&AtomicLocs.l32, TotalTests, Passes, Failures);
testValCompareAndSwap<uint64_t>(
&AtomicLocs.l64, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint8_t>(
&AtomicLocs.l8, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint16_t>(
&AtomicLocs.l16, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint32_t>(
&AtomicLocs.l32, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint64_t>(
&AtomicLocs.l64, TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
return Failures;
}
......@@ -51,9 +51,8 @@ const size_t TypeX8632AttributesSize =
llvm::array_lengthof(TypeX8632Attributes);
const char *InstX8632SegmentRegNames[] = {
#define X(val, name) \
name,
SEG_REGX8632_TABLE
#define X(val, name) name,
SEG_REGX8632_TABLE
#undef X
};
const size_t InstX8632SegmentRegNamesSize =
......@@ -140,6 +139,33 @@ InstX8632Cdq::InstX8632Cdq(Cfg *Func, Variable *Dest, Operand *Source)
addSource(Source);
}
InstX8632Cmpxchg::InstX8632Cmpxchg(Cfg *Func, Operand *DestOrAddr,
Variable *Eax, Variable *Desired,
bool Locked)
: InstX8632Lockable(Func, InstX8632::Cmpxchg, 3,
llvm::dyn_cast<Variable>(DestOrAddr), Locked) {
assert(Eax->getRegNum() == TargetX8632::Reg_eax);
addSource(DestOrAddr);
addSource(Eax);
addSource(Desired);
}
InstX8632Cmpxchg8b::InstX8632Cmpxchg8b(Cfg *Func, OperandX8632 *Addr,
Variable *Edx, Variable *Eax,
Variable *Ecx, Variable *Ebx,
bool Locked)
: InstX8632Lockable(Func, InstX8632::Cmpxchg, 5, NULL, Locked) {
assert(Edx->getRegNum() == TargetX8632::Reg_edx);
assert(Eax->getRegNum() == TargetX8632::Reg_eax);
assert(Ecx->getRegNum() == TargetX8632::Reg_ecx);
assert(Ebx->getRegNum() == TargetX8632::Reg_ebx);
addSource(Addr);
addSource(Edx);
addSource(Eax);
addSource(Ecx);
addSource(Ebx);
}
InstX8632Cvt::InstX8632Cvt(Cfg *Func, Variable *Dest, Operand *Source)
: InstX8632(Func, InstX8632::Cvt, 1, Dest) {
addSource(Source);
......@@ -284,9 +310,14 @@ InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
bool Locked)
: InstX8632(Func, InstX8632::Xadd, 2, llvm::dyn_cast<Variable>(Dest)),
Locked(Locked) {
HasSideEffects = Locked;
: InstX8632Lockable(Func, InstX8632::Xadd, 2,
llvm::dyn_cast<Variable>(Dest), Locked) {
addSource(Dest);
addSource(Source);
}
InstX8632Xchg::InstX8632Xchg(Cfg *Func, Operand *Dest, Variable *Source)
: InstX8632(Func, InstX8632::Xchg, 2, llvm::dyn_cast<Variable>(Dest)) {
addSource(Dest);
addSource(Source);
}
......@@ -398,6 +429,7 @@ void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
Str << "\n";
}
template <> const char *InstX8632Neg::Opcode = "neg";
template <> const char *InstX8632Add::Opcode = "add";
template <> const char *InstX8632Addps::Opcode = "addps";
template <> const char *InstX8632Adc::Opcode = "adc";
......@@ -554,6 +586,48 @@ void InstX8632Cdq::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstX8632Cmpxchg::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 3);
if (Locked) {
Str << "\tlock";
}
Str << "\tcmpxchg\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(2)->emit(Func);
Str << "\n";
}
void InstX8632Cmpxchg::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
if (Locked) {
Str << "lock ";
}
Str << "cmpxchg." << getSrc(0)->getType() << " ";
dumpSources(Func);
}
void InstX8632Cmpxchg8b::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 5);
if (Locked) {
Str << "\tlock";
}
Str << "\tcmpxchg8b\t";
getSrc(0)->emit(Func);
Str << "\n";
}
void InstX8632Cmpxchg8b::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
if (Locked) {
Str << "lock ";
}
Str << "cmpxchg8b ";
dumpSources(Func);
}
void InstX8632Cvt::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
......@@ -955,10 +1029,9 @@ void InstX8632Sqrtss::dump(const Cfg *Func) const {
void InstX8632Xadd::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
if (Locked) {
Str << "\tlock xadd ";
} else {
Str << "\txadd\t";
Str << "\tlock";
}
Str << "\txadd\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
......@@ -975,6 +1048,22 @@ void InstX8632Xadd::dump(const Cfg *Func) const {
dumpSources(Func);
}
void InstX8632Xchg::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\txchg\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << "\n";
}
void InstX8632Xchg::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Type Ty = getSrc(0)->getType();
Str << "xchg." << Ty << " ";
dumpSources(Func);
}
void OperandX8632::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Str << "<OperandX8632>";
......
......@@ -54,9 +54,8 @@ class OperandX8632Mem : public OperandX8632 {
public:
enum SegmentRegisters {
DefaultSegment = -1,
#define X(val, name) \
val,
SEG_REGX8632_TABLE
#define X(val, name) val,
SEG_REGX8632_TABLE
#undef X
SegReg_NUM
};
......@@ -142,6 +141,8 @@ public:
Br,
Call,
Cdq,
Cmpxchg,
Cmpxchg8b,
Cvt,
Div,
Divps,
......@@ -162,6 +163,7 @@ public:
Mul,
Mulps,
Mulss,
Neg,
Or,
Pop,
Push,
......@@ -183,6 +185,7 @@ public:
Ucomiss,
UD2,
Xadd,
Xchg,
Xor
};
static const char *getWidthString(Type Ty);
......@@ -328,6 +331,41 @@ private:
virtual ~InstX8632Call() {}
};
template <InstX8632::InstKindX8632 K>
class InstX8632Unaryop : public InstX8632 {
public:
// Create an unary-op instruction like neg.
// The source and dest are the same variable.
static InstX8632Unaryop *create(Cfg *Func, Operand *SrcDest) {
return new (Func->allocate<InstX8632Unaryop>())
InstX8632Unaryop(Func, SrcDest);
}
virtual void emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Str << "\t" << Opcode << "\t";
getSrc(0)->emit(Func);
Str << "\n";
}
virtual void dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = " << Opcode << "." << getDest()->getType() << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstX8632Unaryop(Cfg *Func, Operand *SrcDest)
: InstX8632(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
addSource(SrcDest);
}
InstX8632Unaryop(const InstX8632Unaryop &) LLVM_DELETED_FUNCTION;
InstX8632Unaryop &operator=(const InstX8632Unaryop &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Unaryop() {}
static const char *Opcode;
};
// See the definition of emitTwoAddress() for a description of
// ShiftHack.
void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
......@@ -400,6 +438,7 @@ private:
static const char *Opcode;
};
typedef InstX8632Unaryop<InstX8632::Neg> InstX8632Neg;
typedef InstX8632Binop<InstX8632::Add> InstX8632Add;
typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc;
......@@ -423,6 +462,28 @@ typedef InstX8632Binop<InstX8632::Sar, true> InstX8632Sar;
typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv;
typedef InstX8632Ternop<InstX8632::Div> InstX8632Div;
// Base class for a lockable x86-32 instruction (emits a locked prefix).
class InstX8632Lockable : public InstX8632 {
public:
virtual void emit(const Cfg *Func) const = 0;
virtual void dump(const Cfg *Func) const;
protected:
bool Locked;
InstX8632Lockable(Cfg *Func, InstKindX8632 Kind, SizeT Maxsrcs,
Variable *Dest, bool Locked)
: InstX8632(Func, Kind, Maxsrcs, Dest), Locked(Locked) {
// Assume that such instructions are used for Atomics and be careful
// with optimizations.
HasSideEffects = Locked;
}
private:
InstX8632Lockable(const InstX8632Lockable &) LLVM_DELETED_FUNCTION;
InstX8632Lockable &operator=(const InstX8632Lockable &) LLVM_DELETED_FUNCTION;
};
// Mul instruction - unsigned multiply.
class InstX8632Mul : public InstX8632 {
public:
......@@ -502,6 +563,57 @@ private:
virtual ~InstX8632Cdq() {}
};
// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
// If not, ZF is cleared and <dest> is copied to eax (or subregister).
// <dest> can be a register or memory, while <desired> must be a register.
// It is the user's responsiblity to mark eax with a FakeDef.
class InstX8632Cmpxchg : public InstX8632Lockable {
public:
static InstX8632Cmpxchg *create(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
Variable *Desired, bool Locked) {
return new (Func->allocate<InstX8632Cmpxchg>())
InstX8632Cmpxchg(Func, DestOrAddr, Eax, Desired, Locked);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpxchg); }
private:
InstX8632Cmpxchg(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
Variable *Desired, bool Locked);
InstX8632Cmpxchg(const InstX8632Cmpxchg &) LLVM_DELETED_FUNCTION;
InstX8632Cmpxchg &operator=(const InstX8632Cmpxchg &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Cmpxchg() {}
};
// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64>
// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>.
// If not, ZF is cleared and <m64> is copied to edx:eax.
// The caller is responsible for inserting FakeDefs to mark edx
// and eax as modified.
// <m64> must be a memory operand.
class InstX8632Cmpxchg8b : public InstX8632Lockable {
public:
static InstX8632Cmpxchg8b *create(Cfg *Func, OperandX8632 *Dest,
Variable *Edx, Variable *Eax, Variable *Ecx,
Variable *Ebx, bool Locked) {
return new (Func->allocate<InstX8632Cmpxchg8b>())
InstX8632Cmpxchg8b(Func, Dest, Edx, Eax, Ecx, Ebx, Locked);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpxchg8b); }
private:
InstX8632Cmpxchg8b(Cfg *Func, OperandX8632 *Dest, Variable *Edx,
Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked);
InstX8632Cmpxchg8b(const InstX8632Cmpxchg8b &) LLVM_DELETED_FUNCTION;
InstX8632Cmpxchg8b &
operator=(const InstX8632Cmpxchg8b &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Cmpxchg8b() {}
};
// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i}
// as appropriate. s=float, d=double, i=int. X and Y are determined
// from dest/src types. Sign and zero extension on the integer
......@@ -861,7 +973,7 @@ private:
//
// Both the dest and source are updated. The caller should then insert a
// FakeDef to reflect the second udpate.
class InstX8632Xadd : public InstX8632 {
class InstX8632Xadd : public InstX8632Lockable {
public:
static InstX8632Xadd *create(Cfg *Func, Operand *Dest, Variable *Source,
bool Locked) {
......@@ -873,14 +985,35 @@ public:
static bool classof(const Inst *Inst) { return isClassof(Inst, Xadd); }
private:
bool Locked;
InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
InstX8632Xadd(const InstX8632Xadd &) LLVM_DELETED_FUNCTION;
InstX8632Xadd &operator=(const InstX8632Xadd &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Xadd() {}
};
// Exchange instruction. Exchanges the first operand (destination
// operand) with the second operand (source operand). At least one of
// the operands must be a register (and the other can be reg or mem).
// Both the Dest and Source are updated. If there is a memory operand,
// then the instruction is automatically "locked" without the need for
// a lock prefix.
class InstX8632Xchg : public InstX8632 {
public:
static InstX8632Xchg *create(Cfg *Func, Operand *Dest, Variable *Source) {
return new (Func->allocate<InstX8632Xchg>())
InstX8632Xchg(Func, Dest, Source);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Xchg); }
private:
InstX8632Xchg(Cfg *Func, Operand *Dest, Variable *Source);
InstX8632Xchg(const InstX8632Xchg &) LLVM_DELETED_FUNCTION;
InstX8632Xchg &operator=(const InstX8632Xchg &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Xchg() {}
};
} // end of namespace Ice
#endif // SUBZERO_SRC_ICEINSTX8632_H
......@@ -46,7 +46,7 @@ const struct IceIntrinsicsEntry_ {
"nacl.atomic.fence" },
{ { { Intrinsics::AtomicFenceAll, true }, { IceType_void }, 1 },
"nacl.atomic.fence.all" },
{ { { Intrinsics::AtomicIsLockFree, true },
{ { { Intrinsics::AtomicIsLockFree, false },
{ IceType_i1, IceType_i32, IceType_i32 }, 3 },
"nacl.atomic.is.lock.free" },
......
......@@ -1968,7 +1968,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
switch (Instr->getIntrinsicInfo().ID) {
case Intrinsics::AtomicCmpxchg:
case Intrinsics::AtomicCmpxchg: {
if (!Intrinsics::VerifyMemoryOrder(
llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
......@@ -1979,9 +1979,18 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
return;
}
// TODO(jvoung): fill it in.
Func->setError("Unhandled intrinsic");
Variable *DestPrev = Instr->getDest();
Operand *PtrToMem = Instr->getArg(0);
Operand *Expected = Instr->getArg(1);
Operand *Desired = Instr->getArg(2);
lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
// TODO(jvoung): If we peek ahead a few instructions and see how
// DestPrev is used (typically via another compare and branch),
// we may be able to optimize. If the result truly is used by a
// compare + branch, and the comparison is for equality, then we can
// optimize out the later compare, and fuse with the later branch.
return;
}
case Intrinsics::AtomicFence:
if (!Intrinsics::VerifyMemoryOrder(
llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
......@@ -2183,18 +2192,54 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return;
}
void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
Operand *Expected, Operand *Desired) {
if (Expected->getType() == IceType_i64) {
// Reserve the pre-colored registers first, before adding any more
// infinite-weight variables from FormMemoryOperand's legalization.
Variable *T_edx = makeReg(IceType_i32, Reg_edx);
Variable *T_eax = makeReg(IceType_i32, Reg_eax);
Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
_mov(T_eax, loOperand(Expected));
_mov(T_edx, hiOperand(Expected));
_mov(T_ebx, loOperand(Desired));
_mov(T_ecx, hiOperand(Desired));
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
const bool Locked = true;
_cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
_mov(DestLo, T_eax);
_mov(DestHi, T_edx);
return;
}
Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
_mov(T_eax, Expected);
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
Variable *DesiredReg = legalizeToVar(Desired);
const bool Locked = true;
_cmpxchg(Addr, T_eax, DesiredReg, Locked);
_mov(DestPrev, T_eax);
}
void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
Operand *Ptr, Operand *Val) {
bool NeedsCmpxchg = false;
LowerBinOp Op_Lo = NULL;
LowerBinOp Op_Hi = NULL;
switch (Operation) {
default:
Func->setError("Unknown AtomicRMW operation");
return;
case Intrinsics::AtomicAdd: {
if (Dest->getType() == IceType_i64) {
// Do a nasty cmpxchg8b loop. Factor this into a function.
// TODO(jvoung): fill it in.
Func->setError("Unhandled AtomicRMW operation");
return;
// All the fall-through paths must set this to true, but use this
// for asserting.
NeedsCmpxchg = true;
Op_Lo = &TargetX8632::_add;
Op_Hi = &TargetX8632::_adc;
break;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
const bool Locked = true;
......@@ -2206,26 +2251,160 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
}
case Intrinsics::AtomicSub: {
if (Dest->getType() == IceType_i64) {
// Do a nasty cmpxchg8b loop.
// TODO(jvoung): fill it in.
Func->setError("Unhandled AtomicRMW operation");
return;
NeedsCmpxchg = true;
Op_Lo = &TargetX8632::_sub;
Op_Hi = &TargetX8632::_sbb;
break;
}
// Generate a memory operand from Ptr.
// neg...
// Then do the same as AtomicAdd.
// TODO(jvoung): fill it in.
Func->setError("Unhandled AtomicRMW operation");
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
const bool Locked = true;
Variable *T = NULL;
_mov(T, Val);
_neg(T);
_xadd(Addr, T, Locked);
_mov(Dest, T);
return;
}
case Intrinsics::AtomicOr:
// TODO(jvoung): If Dest is null or dead, then some of these
// operations do not need an "exchange", but just a locked op.
// That appears to be "worth" it for sub, or, and, and xor.
// xadd is probably fine vs lock add for add, and xchg is fine
// vs an atomic store.
NeedsCmpxchg = true;
Op_Lo = &TargetX8632::_or;
Op_Hi = &TargetX8632::_or;
break;
case Intrinsics::AtomicAnd:
NeedsCmpxchg = true;
Op_Lo = &TargetX8632::_and;
Op_Hi = &TargetX8632::_and;
break;
case Intrinsics::AtomicXor:
NeedsCmpxchg = true;
Op_Lo = &TargetX8632::_xor;
Op_Hi = &TargetX8632::_xor;
break;
case Intrinsics::AtomicExchange:
// TODO(jvoung): fill it in.
Func->setError("Unhandled AtomicRMW operation");
if (Dest->getType() == IceType_i64) {
NeedsCmpxchg = true;
// NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
// just need to be moved to the ecx and ebx registers.
Op_Lo = NULL;
Op_Hi = NULL;
break;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
Variable *T = NULL;
_mov(T, Val);
_xchg(Addr, T);
_mov(Dest, T);
return;
}
// Otherwise, we need a cmpxchg loop.
assert(NeedsCmpxchg);
expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
}
void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
Variable *Dest, Operand *Ptr,
Operand *Val) {
// Expand a more complex RMW operation as a cmpxchg loop:
// For 64-bit:
// mov eax, [ptr]
// mov edx, [ptr + 4]
// .LABEL:
// mov ebx, eax
// <Op_Lo> ebx, <desired_adj_lo>
// mov ecx, edx
// <Op_Hi> ecx, <desired_adj_hi>
// lock cmpxchg8b [ptr]
// jne .LABEL
// mov <dest_lo>, eax
// mov <dest_lo>, edx
//
// For 32-bit:
// mov eax, [ptr]
// .LABEL:
// mov <reg>, eax
// op <reg>, [desired_adj]
// lock cmpxchg [ptr], <reg>
// jne .LABEL
// mov <dest>, eax
//
// If Op_{Lo,Hi} are NULL, then just copy the value.
Val = legalize(Val);
Type Ty = Val->getType();
if (Ty == IceType_i64) {
Variable *T_edx = makeReg(IceType_i32, Reg_edx);
Variable *T_eax = makeReg(IceType_i32, Reg_eax);
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
_mov(T_eax, loOperand(Addr));
_mov(T_edx, hiOperand(Addr));
Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
InstX8632Label *Label = InstX8632Label::create(Func, this);
const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
if (!IsXchg8b) {
Context.insert(Label);
_mov(T_ebx, T_eax);
(this->*Op_Lo)(T_ebx, loOperand(Val));
_mov(T_ecx, T_edx);
(this->*Op_Hi)(T_ecx, hiOperand(Val));
} else {
// This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
// It just needs the Val loaded into ebx and ecx.
// That can also be done before the loop.
_mov(T_ebx, loOperand(Val));
_mov(T_ecx, hiOperand(Val));
Context.insert(Label);
}
const bool Locked = true;
_cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
_br(InstX8632Br::Br_ne, Label);
if (!IsXchg8b) {
// If Val is a variable, model the extended live range of Val through
// the end of the loop, since it will be re-used by the loop.
if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
Context.insert(InstFakeUse::create(Func, ValLo));
Context.insert(InstFakeUse::create(Func, ValHi));
}
} else {
// For xchg, the loop is slightly smaller and ebx/ecx are used.
Context.insert(InstFakeUse::create(Func, T_ebx));
Context.insert(InstFakeUse::create(Func, T_ecx));
}
// The address base is also reused in the loop.
Context.insert(InstFakeUse::create(Func, Addr->getBase()));
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
_mov(DestLo, T_eax);
_mov(DestHi, T_edx);
return;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
Variable *T_eax = makeReg(Ty, Reg_eax);
_mov(T_eax, Addr);
InstX8632Label *Label = InstX8632Label::create(Func, this);
Context.insert(Label);
// We want to pick a different register for T than Eax, so don't use
// _mov(T == NULL, T_eax).
Variable *T = makeReg(Ty);
_mov(T, T_eax);
(this->*Op_Lo)(T, Val);
const bool Locked = true;
_cmpxchg(Addr, T_eax, T, Locked);
_br(InstX8632Br::Br_ne, Label);
// If Val is a variable, model the extended live range of Val through
// the end of the loop, since it will be re-used by the loop.
if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
Context.insert(InstFakeUse::create(Func, ValVar));
}
// The address base is also reused in the loop.
Context.insert(InstFakeUse::create(Func, Addr->getBase()));
_mov(Dest, T_eax);
}
namespace {
......
......@@ -95,9 +95,15 @@ protected:
virtual void doAddressOptLoad();
virtual void doAddressOptStore();
void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected,
Operand *Desired);
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val);
typedef void (TargetX8632::*LowerBinOp)(Variable *, Operand *);
void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi,
Variable *Dest, Operand *Ptr, Operand *Val);
// Operand legalization helpers. To deal with address mode
// constraints, the helpers will create a new Operand and emit
// instructions that guarantee that the Operand kind is one of those
......@@ -177,6 +183,22 @@ protected:
void _cmp(Operand *Src0, Operand *Src1) {
Context.insert(InstX8632Icmp::create(Func, Src0, Src1));
}
void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired,
bool Locked) {
Context.insert(
InstX8632Cmpxchg::create(Func, DestOrAddr, Eax, Desired, Locked));
// Mark eax as possibly modified by cmpxchg.
Context.insert(
InstFakeDef::create(Func, Eax, llvm::dyn_cast<Variable>(DestOrAddr)));
}
void _cmpxchg8b(OperandX8632 *Addr, Variable *Edx, Variable *Eax,
Variable *Ecx, Variable *Ebx, bool Locked) {
Context.insert(
InstX8632Cmpxchg8b::create(Func, Addr, Edx, Eax, Ecx, Ebx, Locked));
// Mark edx, and eax as possibly modified by cmpxchg8b.
Context.insert(InstFakeDef::create(Func, Edx));
Context.insert(InstFakeDef::create(Func, Eax));
}
void _cvt(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Cvt::create(Func, Dest, Src0));
}
......@@ -232,6 +254,9 @@ protected:
void _mulss(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Mulss::create(Func, Dest, Src0));
}
void _neg(Variable *SrcDest) {
Context.insert(InstX8632Neg::create(Func, SrcDest));
}
void _or(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Or::create(Func, Dest, Src0));
}
......@@ -294,7 +319,14 @@ protected:
Context.insert(InstX8632Xadd::create(Func, Dest, Src, Locked));
// The xadd exchanges Dest and Src (modifying Src).
// Model that update with a FakeDef.
Context.insert(InstFakeDef::create(Func, Src));
Context.insert(
InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
}
void _xchg(Operand *Dest, Variable *Src) {
Context.insert(InstX8632Xchg::create(Func, Dest, Src));
// The xchg modifies Dest and Src -- model that update with a FakeDef.
Context.insert(
InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
}
void _xor(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Xor::create(Func, Dest, Src0));
......
......@@ -2,6 +2,7 @@
; size allowed.
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s --check-prefix=CHECKO2REM
; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
......@@ -28,6 +29,11 @@ declare void @llvm.nacl.atomic.fence(i32)
declare void @llvm.nacl.atomic.fence.all()
declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*)
; NOTE: The LLC equivalent for 16-bit atomic operations are expanded
; as 32-bit operations. For Subzero, assume that real 16-bit operations
; will be usable (the validator will be fixed):
; https://code.google.com/p/nativeclient/issues/detail?id=2981
;;; Load
; x86 guarantees load/store to be atomic if naturally aligned.
......@@ -107,7 +113,6 @@ entry:
; CHECK: movq x{{.*}}, qword
; CHECK: movq qword {{.*}}, x{{.*}}
;;; Store
define void @test_atomic_store_8(i32 %iptr, i32 %v) {
......@@ -169,6 +174,8 @@ entry:
;;; RMW
;; add
define i32 @test_atomic_rmw_add_8(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i8
......@@ -180,7 +187,7 @@ entry:
}
; CHECK-LABEL: test_atomic_rmw_add_8
; CHECK: lock xadd byte {{.*}}, [[REG:.*]]
; CHECK: mov {{.*}}, {{.*}}[[REG]]
; CHECK: mov {{.*}}, [[REG]]
define i32 @test_atomic_rmw_add_16(i32 %iptr, i32 %v) {
entry:
......@@ -192,7 +199,7 @@ entry:
}
; CHECK-LABEL: test_atomic_rmw_add_16
; CHECK: lock xadd word {{.*}}, [[REG:.*]]
; CHECK: mov {{.*}}, {{.*}}[[REG]]
; CHECK: mov {{.*}}, [[REG]]
define i32 @test_atomic_rmw_add_32(i32 %iptr, i32 %v) {
entry:
......@@ -202,16 +209,61 @@ entry:
}
; CHECK-LABEL: test_atomic_rmw_add_32
; CHECK: lock xadd dword {{.*}}, [[REG:.*]]
; CHECK: mov {{.*}}, {{.*}}[[REG]]
; CHECK: mov {{.*}}, [[REG]]
;define i64 @test_atomic_rmw_add_64(i32 %iptr, i64 %v) {
;entry:
; %ptr = inttoptr i32 %iptr to i64*
; %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
; ret i64 %a
;}
; CHECKLATER-LABEL: test_atomic_rmw_add_64
; CHECKLATER: uh need a... cmpxchg8b loop.
define i64 @test_atomic_rmw_add_64(i32 %iptr, i64 %v) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
ret i64 %a
}
; CHECK-LABEL: test_atomic_rmw_add_64
; CHECK: push ebx
; CHECK: mov eax, dword ptr [{{.*}}]
; CHECK: mov edx, dword ptr [{{.*}}+4]
; CHECK: .L[[LABEL:.*]]:
; CHECK: mov ebx, eax
; RHS of add cannot be any of the e[abcd]x regs because they are
; clobbered in the loop, and the RHS needs to be remain live.
; CHECK: add ebx, {{.*e.[^x]}}
; CHECK: mov ecx, edx
; CHECK: adc ecx, {{.*e.[^x]}}
; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired).
; It can be esi, edi, or ebp though, for example (so we need to be careful
; about rejecting eb* and ed*.)
; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
; CHECK: jne .L[[LABEL]]
; Test with some more register pressure. When we have an alloca, ebp is
; used to manage the stack frame, so it cannot be used as a register either.
declare void @use_ptr(i32 %iptr)
define i64 @test_atomic_rmw_add_64_alloca(i32 %iptr, i64 %v) {
entry:
%alloca_ptr = alloca i8, i32 16, align 16
%ptr = inttoptr i32 %iptr to i64*
%old = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
store i8 0, i8* %alloca_ptr, align 1
store i8 1, i8* %alloca_ptr, align 1
store i8 2, i8* %alloca_ptr, align 1
store i8 3, i8* %alloca_ptr, align 1
%__5 = ptrtoint i8* %alloca_ptr to i32
call void @use_ptr(i32 %__5)
ret i64 %old
}
; CHECK-LABEL: test_atomic_rmw_add_64_alloca
; CHECK: push ebx
; CHECK-DAG: mov edx
; CHECK-DAG: mov eax
; CHECK-DAG: mov ecx
; CHECK-DAG: mov ebx
; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired).
; It also cannot be ebp since we use that for alloca. Also make sure it's
; not esp, since that's the stack pointer and mucking with it will break
; the later use_ptr function call.
; That pretty much leaves esi, or edi as the only viable registers.
; CHECK: lock cmpxchg8b qword ptr [e{{[ds]}}i]
; CHECK: call use_ptr
define i32 @test_atomic_rmw_add_32_ignored(i32 %iptr, i32 %v) {
entry:
......@@ -219,129 +271,562 @@ entry:
%ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %v, i32 6)
ret i32 %v
}
; Technically this could use "lock add" instead of "lock xadd", if liveness
; tells us that the destination variable is dead.
; CHECK-LABEL: test_atomic_rmw_add_32_ignored
; CHECK: lock xadd dword {{.*}}, [[REG:.*]]
;define i32 @test_atomic_rmw_sub_32(i32 %iptr, i32 %v) {
;entry:
; %ptr = inttoptr i32 %iptr to i32*
; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6)
; ret i32 %a
;}
; CHECKLATER-LABEL: test_atomic_rmw_sub_32
; CHECKLATER: neg
; CHECKLATER: lock
; CHECKLATER: xadd
;define i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) {
;entry:
; %ptr = inttoptr i32 %iptr to i32*
; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
; ret i32 %a
;}
; CHECKLATER-LABEL: test_atomic_rmw_or_32
; Need a cmpxchg loop.
;define i32 @test_atomic_rmw_and_32(i32 %iptr, i32 %v) {
;entry:
; %ptr = inttoptr i32 %iptr to i32*
; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6)
; ret i32 %a
;}
; CHECKLATER-LABEL: test_atomic_rmw_and_32
; Also a cmpxchg loop.
;define i32 @test_atomic_rmw_xor_32(i32 %iptr, i32 %v) {
;entry:
; %ptr = inttoptr i32 %iptr to i32*
; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6)
; ret i32 %a
;}
; CHECKLATER-LABEL: test_atomic_rmw_xor_32
; Also a cmpxchg loop.
;define i32 @test_atomic_rmw_xchg_32(i32 %iptr, i32 %v) {
;entry:
; %ptr = inttoptr i32 %iptr to i32*
; %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6)
; ret i32 %a
;}
; CHECKLATER-LABEL: test_atomic_rmw_xchg_32
; Atomic RMW 64 needs to be expanded into its own loop.
; Make sure that works w/ non-trivial function bodies.
define i64 @test_atomic_rmw_add_64_loop(i32 %iptr, i64 %v) {
entry:
%x = icmp ult i64 %v, 100
br i1 %x, label %err, label %loop
loop:
%v_next = phi i64 [ %v, %entry ], [ %next, %loop ]
%ptr = inttoptr i32 %iptr to i64*
%next = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v_next, i32 6)
%success = icmp eq i64 %next, 100
br i1 %success, label %done, label %loop
done:
ret i64 %next
err:
ret i64 0
}
; CHECK-LABEL: test_atomic_rmw_add_64_loop
; CHECK: push ebx
; CHECK-LABEL: .Ltest_atomic_rmw_add_64_loop{{.*}}loop
; CHECK: mov eax, dword ptr [{{.*}}]
; CHECK: mov edx, dword ptr [{{.*}}+4]
; CHECK: .L[[LABEL:.*]]:
; CHECK: mov ebx, eax
; CHECK: add ebx, {{.*e.[^x]}}
; CHECK: mov ecx, edx
; CHECK: adc ecx, {{.*e.[^x]}}
; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
; CHECK: jne .L[[LABEL]]
; CHECK-LABEL: .Ltest_atomic_rmw_add_64_loop{{.*}}done
;; sub
define i32 @test_atomic_rmw_sub_8(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i8
%ptr = inttoptr i32 %iptr to i8*
%a = call i8 @llvm.nacl.atomic.rmw.i8(i32 2, i8* %ptr, i8 %trunc, i32 6)
%a_ext = zext i8 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_sub_8
; CHECK: neg [[REG:.*]]
; CHECK: lock xadd byte {{.*}}, [[REG]]
; CHECK: mov {{.*}}, [[REG]]
define i32 @test_atomic_rmw_sub_16(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i16
%ptr = inttoptr i32 %iptr to i16*
%a = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %trunc, i32 6)
%a_ext = zext i16 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_sub_16
; CHECK: neg [[REG:.*]]
; CHECK: lock xadd word {{.*}}, [[REG]]
; CHECK: mov {{.*}}, [[REG]]
define i32 @test_atomic_rmw_sub_32(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%a = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6)
ret i32 %a
}
; CHECK-LABEL: test_atomic_rmw_sub_32
; CHECK: neg [[REG:.*]]
; CHECK: lock xadd dword {{.*}}, [[REG]]
; CHECK: mov {{.*}}, [[REG]]
define i64 @test_atomic_rmw_sub_64(i32 %iptr, i64 %v) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%a = call i64 @llvm.nacl.atomic.rmw.i64(i32 2, i64* %ptr, i64 %v, i32 6)
ret i64 %a
}
; CHECK-LABEL: test_atomic_rmw_sub_64
; CHECK: push ebx
; CHECK: mov eax, dword ptr [{{.*}}]
; CHECK: mov edx, dword ptr [{{.*}}+4]
; CHECK: .L[[LABEL:.*]]:
; CHECK: mov ebx, eax
; CHECK: sub ebx, {{.*e.[^x]}}
; CHECK: mov ecx, edx
; CHECK: sbb ecx, {{.*e.[^x]}}
; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_sub_32_ignored(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6)
ret i32 %v
}
; Could use "lock sub" instead of "neg; lock xadd"
; CHECK-LABEL: test_atomic_rmw_sub_32_ignored
; CHECK: neg [[REG:.*]]
; CHECK: lock xadd dword {{.*}}, [[REG]]
;; or
define i32 @test_atomic_rmw_or_8(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i8
%ptr = inttoptr i32 %iptr to i8*
%a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %trunc, i32 6)
%a_ext = zext i8 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_or_8
; CHECK: mov al, byte ptr
; CHECK: .L[[LABEL:.*]]:
; Dest cannot be eax here, because eax is used for the old value. Also want
; to make sure that cmpxchg's source is the same register.
; CHECK: or [[REG:[^a].]]
; CHECK: lock cmpxchg byte ptr [e{{[^a].}}], [[REG]]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_or_16(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i16
%ptr = inttoptr i32 %iptr to i16*
%a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6)
%a_ext = zext i16 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_or_16
; CHECK: mov ax, word ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: or [[REG:[^a].]]
; CHECK: lock cmpxchg word ptr [e{{[^a].}}], [[REG]]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
ret i32 %a
}
; CHECK-LABEL: test_atomic_rmw_or_32
; CHECK: mov eax, dword ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: or [[REG:e[^a].]]
; CHECK: lock cmpxchg dword ptr [e{{[^a].}}], [[REG]]
; CHECK: jne .L[[LABEL]]
define i64 @test_atomic_rmw_or_64(i32 %iptr, i64 %v) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%a = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i64* %ptr, i64 %v, i32 6)
ret i64 %a
}
; CHECK-LABEL: test_atomic_rmw_or_64
; CHECK: push ebx
; CHECK: mov eax, dword ptr [{{.*}}]
; CHECK: mov edx, dword ptr [{{.*}}+4]
; CHECK: .L[[LABEL:.*]]:
; CHECK: mov ebx, eax
; CHECK: or ebx, {{.*e.[^x]}}
; CHECK: mov ecx, edx
; CHECK: or ecx, {{.*e.[^x]}}
; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_or_32_ignored(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
ret i32 %v
}
; CHECK-LABEL: test_atomic_rmw_or_32_ignored
; Could just "lock or", if we inspect the liveness information first.
; Would also need a way to introduce "lock"'edness to binary
; operators without introducing overhead on the more common binary ops.
; CHECK: mov eax, dword ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: or [[REG:e[^a].]]
; CHECK: lock cmpxchg dword ptr [e{{[^a].}}], [[REG]]
; CHECK: jne .L[[LABEL]]
;; and
define i32 @test_atomic_rmw_and_8(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i8
%ptr = inttoptr i32 %iptr to i8*
%a = call i8 @llvm.nacl.atomic.rmw.i8(i32 4, i8* %ptr, i8 %trunc, i32 6)
%a_ext = zext i8 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_and_8
; CHECK: mov al, byte ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: and [[REG:[^a].]]
; CHECK: lock cmpxchg byte ptr [e{{[^a].}}], [[REG]]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_and_16(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i16
%ptr = inttoptr i32 %iptr to i16*
%a = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %trunc, i32 6)
%a_ext = zext i16 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_and_16
; CHECK: mov ax, word ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: and
; CHECK: lock cmpxchg word ptr [e{{[^a].}}]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_and_32(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%a = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6)
ret i32 %a
}
; CHECK-LABEL: test_atomic_rmw_and_32
; CHECK: mov eax, dword ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: and
; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
; CHECK: jne .L[[LABEL]]
define i64 @test_atomic_rmw_and_64(i32 %iptr, i64 %v) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%a = call i64 @llvm.nacl.atomic.rmw.i64(i32 4, i64* %ptr, i64 %v, i32 6)
ret i64 %a
}
; CHECK-LABEL: test_atomic_rmw_and_64
; CHECK: push ebx
; CHECK: mov eax, dword ptr [{{.*}}]
; CHECK: mov edx, dword ptr [{{.*}}+4]
; CHECK: .L[[LABEL:.*]]:
; CHECK: mov ebx, eax
; CHECK: and ebx, {{.*e.[^x]}}
; CHECK: mov ecx, edx
; CHECK: and ecx, {{.*e.[^x]}}
; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_and_32_ignored(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6)
ret i32 %v
}
; CHECK-LABEL: test_atomic_rmw_and_32_ignored
; Could just "lock and"
; CHECK: mov eax, dword ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: and
; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
; CHECK: jne .L[[LABEL]]
;; xor
define i32 @test_atomic_rmw_xor_8(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i8
%ptr = inttoptr i32 %iptr to i8*
%a = call i8 @llvm.nacl.atomic.rmw.i8(i32 5, i8* %ptr, i8 %trunc, i32 6)
%a_ext = zext i8 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_xor_8
; CHECK: mov al, byte ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: xor [[REG:[^a].]]
; CHECK: lock cmpxchg byte ptr [e{{[^a].}}], [[REG]]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_xor_16(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i16
%ptr = inttoptr i32 %iptr to i16*
%a = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %trunc, i32 6)
%a_ext = zext i16 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_xor_16
; CHECK: mov ax, word ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: xor
; CHECK: lock cmpxchg word ptr [e{{[^a].}}]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_xor_32(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%a = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6)
ret i32 %a
}
; CHECK-LABEL: test_atomic_rmw_xor_32
; CHECK: mov eax, dword ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: xor
; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
; CHECK: jne .L[[LABEL]]
define i64 @test_atomic_rmw_xor_64(i32 %iptr, i64 %v) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%a = call i64 @llvm.nacl.atomic.rmw.i64(i32 5, i64* %ptr, i64 %v, i32 6)
ret i64 %a
}
; CHECK-LABEL: test_atomic_rmw_xor_64
; CHECK: push ebx
; CHECK: mov eax, dword ptr [{{.*}}]
; CHECK: mov edx, dword ptr [{{.*}}+4]
; CHECK: .L[[LABEL:.*]]:
; CHECK: mov ebx, eax
; CHECK: or ebx, {{.*e.[^x]}}
; CHECK: mov ecx, edx
; CHECK: or ecx, {{.*e.[^x]}}
; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_xor_32_ignored(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6)
ret i32 %v
}
; CHECK-LABEL: test_atomic_rmw_xor_32_ignored
; CHECK: mov eax, dword ptr
; CHECK: .L[[LABEL:.*]]:
; CHECK: xor
; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
; CHECK: jne .L[[LABEL]]
;; exchange
define i32 @test_atomic_rmw_xchg_8(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i8
%ptr = inttoptr i32 %iptr to i8*
%a = call i8 @llvm.nacl.atomic.rmw.i8(i32 6, i8* %ptr, i8 %trunc, i32 6)
%a_ext = zext i8 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_xchg_8
; CHECK: xchg byte ptr {{.*}}, [[REG:.*]]
define i32 @test_atomic_rmw_xchg_16(i32 %iptr, i32 %v) {
entry:
%trunc = trunc i32 %v to i16
%ptr = inttoptr i32 %iptr to i16*
%a = call i16 @llvm.nacl.atomic.rmw.i16(i32 6, i16* %ptr, i16 %trunc, i32 6)
%a_ext = zext i16 %a to i32
ret i32 %a_ext
}
; CHECK-LABEL: test_atomic_rmw_xchg_16
; CHECK: xchg word ptr {{.*}}, [[REG:.*]]
define i32 @test_atomic_rmw_xchg_32(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%a = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6)
ret i32 %a
}
; CHECK-LABEL: test_atomic_rmw_xchg_32
; CHECK: xchg dword ptr {{.*}}, [[REG:.*]]
define i64 @test_atomic_rmw_xchg_64(i32 %iptr, i64 %v) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%a = call i64 @llvm.nacl.atomic.rmw.i64(i32 6, i64* %ptr, i64 %v, i32 6)
ret i64 %a
}
; CHECK-LABEL: test_atomic_rmw_xchg_64
; CHECK: push ebx
; CHECK-DAG: mov edx
; CHECK-DAG: mov eax
; CHECK-DAG: mov ecx
; CHECK-DAG: mov ebx
; CHECK: .L[[LABEL:.*]]:
; CHECK: lock cmpxchg8b qword ptr [{{e.[^x]}}]
; CHECK: jne .L[[LABEL]]
define i32 @test_atomic_rmw_xchg_32_ignored(i32 %iptr, i32 %v) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6)
ret i32 %v
}
; In this case, ignoring the return value doesn't help. The xchg is
; used to do an atomic store.
; CHECK-LABEL: test_atomic_rmw_xchg_32_ignored
; CHECK: xchg dword ptr {{.*}}, [[REG:.*]]
;;;; Cmpxchg
;define i32 @test_atomic_cmpxchg_8(i32 %iptr, i32 %expected, i32 %desired) {
;entry:
; %ptr = inttoptr i32 %iptr to i8*
; %trunc_exp = trunc i32 %expected to i8
; %trunc_des = trunc i32 %desired to i8
; %old = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %trunc_exp,
; i8 %trunc_des, i32 6, i32 6)
; %old_ext = zext i8 %old to i32
; ret i32 %old_ext
;}
; CHECKLATER-LABEL: test_atomic_cmpxchg_8
; CHECKLATER: lock cmpxchg byte
;define i32 @test_atomic_cmpxchg_16(i32 %iptr, i32 %expected, i32 %desired) {
;entry:
; %ptr = inttoptr i32 %iptr to i16*
; %trunc_exp = trunc i32 %expected to i16
; %trunc_des = trunc i32 %desired to i16
; %old = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %trunc_exp,
; i16 %trunc_des, i32 6, i32 6)
; %old_ext = zext i16 %old to i32
; ret i32 %old_ext
;}
; CHECKLATER-LABEL: test_atomic_cmpxchg_16
; This one is a bit gross for NaCl right now.
; https://code.google.com/p/nativeclient/issues/detail?id=2981
; But we'll assume that NaCl will have it fixed...
; CHECKLATER: lock cmpxchg word
;define i32 @test_atomic_cmpxchg_32(i32 %iptr, i32 %expected, i32 %desired) {
;entry:
; %ptr = inttoptr i32 %iptr to i32*
; %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected,
; i32 %desired, i32 6, i32 6)
; ret i32 %old
;}
; CHECKLATER-LABEL: test_atomic_cmpxchg_32
; CHECKLATER: mov eax
; CHECKLATER: mov ecx
; CHECKLATER: lock cmpxchg dword
;define i64 @test_atomic_cmpxchg_64(i32 %iptr, i64 %expected, i64 %desired) {
;entry:
; %ptr = inttoptr i32 %iptr to i64*
; %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
; i64 %desired, i32 6, i32 6)
; ret i64 %old
;}
; CHECKLATER-LABEL: test_atomic_cmpxchg_64
; CHECKLATER: mov eax
; CHECKLATER: mov edx
; CHECKLATER: mov ebx
; CHECKLATER: mov ecx
; CHECKLATER: lock cmpxchg8b qword
;define i32 @test_atomic_cmpxchg_32_loop(i32 %iptr,
; i32 %expected, i32 %desired) {
;entry:
; br label %loop
;
;loop:
; %cmp = phi i32 [ %expected, %entry], [%old, %loop]
; %ptr = inttoptr i32 %iptr to i32*
; %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %cmp,
; i32 %desired, i32 6, i32 6)
; %success = icmp eq i32 %cmp, %old
; br i1 %success, label %done, label %loop
;
;done:
; ret i32 %old
;}
; CHECKLATER-LABEL: test_atomic_cmpxchg_32_loop
define i32 @test_atomic_cmpxchg_8(i32 %iptr, i32 %expected, i32 %desired) {
entry:
%trunc_exp = trunc i32 %expected to i8
%trunc_des = trunc i32 %desired to i8
%ptr = inttoptr i32 %iptr to i8*
%old = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %trunc_exp,
i8 %trunc_des, i32 6, i32 6)
%old_ext = zext i8 %old to i32
ret i32 %old_ext
}
; CHECK-LABEL: test_atomic_cmpxchg_8
; CHECK: mov al, {{.*}}
; Need to check that eax isn't used as the address register or the desired.
; since it is already used as the *expected* register.
; CHECK: lock cmpxchg byte ptr [e{{[^a].}}], {{[^a]}}
define i32 @test_atomic_cmpxchg_16(i32 %iptr, i32 %expected, i32 %desired) {
entry:
%trunc_exp = trunc i32 %expected to i16
%trunc_des = trunc i32 %desired to i16
%ptr = inttoptr i32 %iptr to i16*
%old = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %trunc_exp,
i16 %trunc_des, i32 6, i32 6)
%old_ext = zext i16 %old to i32
ret i32 %old_ext
}
; CHECK-LABEL: test_atomic_cmpxchg_16
; CHECK: mov ax, {{.*}}
; CHECK: lock cmpxchg word ptr [e{{[^a].}}], {{[^a]}}
define i32 @test_atomic_cmpxchg_32(i32 %iptr, i32 %expected, i32 %desired) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected,
i32 %desired, i32 6, i32 6)
ret i32 %old
}
; CHECK-LABEL: test_atomic_cmpxchg_32
; CHECK: mov eax, {{.*}}
; CHECK: lock cmpxchg dword ptr [e{{[^a].}}], e{{[^a]}}
define i64 @test_atomic_cmpxchg_64(i32 %iptr, i64 %expected, i64 %desired) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
i64 %desired, i32 6, i32 6)
ret i64 %old
}
; CHECK-LABEL: test_atomic_cmpxchg_64
; CHECK: push ebx
; CHECK-DAG: mov edx
; CHECK-DAG: mov eax
; CHECK-DAG: mov ecx
; CHECK-DAG: mov ebx
; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
; edx and eax are already the return registers, so they don't actually
; need to be reshuffled via movs. The next test stores the result
; somewhere, so in that case they do need to be mov'ed.
; Test a case where %old really does need to be copied out of edx:eax.
define void @test_atomic_cmpxchg_64_store(i32 %ret_iptr, i32 %iptr, i64 %expected, i64 %desired) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
i64 %desired, i32 6, i32 6)
%__6 = inttoptr i32 %ret_iptr to i64*
store i64 %old, i64* %__6, align 1
ret void
}
; CHECK-LABEL: test_atomic_cmpxchg_64_store
; CHECK: push ebx
; CHECK-DAG: mov edx
; CHECK-DAG: mov eax
; CHECK-DAG: mov ecx
; CHECK-DAG: mov ebx
; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
; CHECK: mov {{.*}}, edx
; CHECK: mov {{.*}}, eax
; Test with some more register pressure. When we have an alloca, ebp is
; used to manage the stack frame, so it cannot be used as a register either.
define i64 @test_atomic_cmpxchg_64_alloca(i32 %iptr, i64 %expected, i64 %desired) {
entry:
%alloca_ptr = alloca i8, i32 16, align 16
%ptr = inttoptr i32 %iptr to i64*
%old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
i64 %desired, i32 6, i32 6)
store i8 0, i8* %alloca_ptr, align 1
store i8 1, i8* %alloca_ptr, align 1
store i8 2, i8* %alloca_ptr, align 1
store i8 3, i8* %alloca_ptr, align 1
%__6 = ptrtoint i8* %alloca_ptr to i32
call void @use_ptr(i32 %__6)
ret i64 %old
}
; CHECK-LABEL: test_atomic_cmpxchg_64_alloca
; CHECK: push ebx
; CHECK-DAG: mov edx
; CHECK-DAG: mov eax
; CHECK-DAG: mov ecx
; CHECK-DAG: mov ebx
; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired).
; It also cannot be ebp since we use that for alloca. Also make sure it's
; not esp, since that's the stack pointer and mucking with it will break
; the later use_ptr function call.
; That pretty much leaves esi, or edi as the only viable registers.
; CHECK: lock cmpxchg8b qword ptr [e{{[ds]}}i]
; CHECK: call use_ptr
define i32 @test_atomic_cmpxchg_32_ignored(i32 %iptr, i32 %expected, i32 %desired) {
entry:
%ptr = inttoptr i32 %iptr to i32*
%ignored = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected,
i32 %desired, i32 6, i32 6)
ret i32 0
}
; CHECK-LABEL: test_atomic_cmpxchg_32_ignored
; CHECK: mov eax, {{.*}}
; CHECK: lock cmpxchg dword ptr [e{{[^a].}}]
define i64 @test_atomic_cmpxchg_64_ignored(i32 %iptr, i64 %expected, i64 %desired) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%ignored = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
i64 %desired, i32 6, i32 6)
ret i64 0
}
; CHECK-LABEL: test_atomic_cmpxchg_64_ignored
; CHECK: push ebx
; CHECK-DAG: mov edx
; CHECK-DAG: mov eax
; CHECK-DAG: mov ecx
; CHECK-DAG: mov ebx
; CHECK: lock cmpxchg8b qword ptr [e{{.[^x]}}]
define i32 @test_atomic_cmpxchg_32_loop(i32 %iptr, i32 %expected, i32 %desired) {
entry:
br label %loop
loop:
%cmp = phi i32 [ %expected, %entry ], [ %old, %loop ]
%ptr = inttoptr i32 %iptr to i32*
%old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %cmp,
i32 %desired, i32 6, i32 6)
%success = icmp eq i32 %cmp, %old
br i1 %success, label %done, label %loop
done:
ret i32 %old
}
; CHECK-LABEL: test_atomic_cmpxchg_32_loop
;;;; Fence and is-lock-free.
......@@ -381,6 +866,19 @@ entry:
; CHECK-LABEL: test_not_lock_free
; CHECK: mov {{.*}}, 0
define i32 @test_atomic_is_lock_free_ignored(i32 %iptr) {
entry:
%ptr = inttoptr i32 %iptr to i8*
%ignored = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr)
ret i32 0
}
; CHECK-LABEL: test_atomic_is_lock_free_ignored
; CHECK: mov {{.*}}, 0
; This can get optimized out, because it's side-effect-free.
; CHECKO2REM-LABEL: test_atomic_is_lock_free_ignored
; CHECKO2REM-NOT: mov {{.*}}, 1
; CHECKO2REM: mov {{.*}}, 0
; TODO(jvoung): at some point we can take advantage of the
; fact that nacl.atomic.is.lock.free will resolve to a constant
; (which adds DCE opportunities). Once we optimize, the test expectations
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment