Commit a3a01a2f by Jan Voung

Subzero: lower the rest of the atomic operations.

64-bit ops are expanded via a cmpxchg8b loop. 64/32-bit and/or/xor are also expanded into a cmpxchg / cmpxchg8b loop. Add a cross test for atomic RMW operations and compare and swap. Misc: Test that atomic.is.lock.free can be optimized out if result is ignored. TODO: * optimize compare and swap with compare+branch further down instruction stream. * optimize atomic RMW when the return value is ignored (adds a locked field to binary ops though). * We may want to do some actual target-dependent basic block splitting + expansion (the instructions inserted by the expansion must reference the pre-colored registers, etc.). Otherwise, we are currently getting by with modeling the extended liveness of the variables used in the loops using fake uses. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882 R=jfb@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/362463002
parent 8d1072e7
...@@ -57,6 +57,11 @@ if __name__ == '__main__': ...@@ -57,6 +57,11 @@ if __name__ == '__main__':
metavar='PATH', metavar='PATH',
help='Path to LLVM executables like llc ' + help='Path to LLVM executables like llc ' +
'(defaults to $LLVM_BIN_PATH)') '(defaults to $LLVM_BIN_PATH)')
argparser.add_argument('--crosstest-bitcode', required=False,
default=1, type=int,
help='Compile non-subzero crosstest object file ' +
'from the same bitcode as the subzero object. ' +
'If 0, then compile it straight from source.')
args = argparser.parse_args() args = argparser.parse_args()
objs = [] objs = []
...@@ -113,7 +118,9 @@ if __name__ == '__main__': ...@@ -113,7 +118,9 @@ if __name__ == '__main__':
# failures. This behavior can be inspected by switching # failures. This behavior can be inspected by switching
# use_llc between True and False. # use_llc between True and False.
use_llc = False use_llc = False
if use_llc: if not args.crosstest_bitcode:
objs.append(arg)
elif use_llc:
shellcmd([os.path.join(llvm_bin_path, 'llc'), shellcmd([os.path.join(llvm_bin_path, 'llc'),
'-filetype=obj', '-filetype=obj',
'-o=' + obj_llc, '-o=' + obj_llc,
...@@ -125,4 +132,4 @@ if __name__ == '__main__': ...@@ -125,4 +132,4 @@ if __name__ == '__main__':
linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++' linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++'
shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', args.driver] + shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', args.driver] +
objs + objs +
['-lm', '-o', os.path.join(args.dir, args.output)]) ['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)])
...@@ -64,6 +64,17 @@ for optlevel in ${OPTLEVELS} ; do ...@@ -64,6 +64,17 @@ for optlevel in ${OPTLEVELS} ; do
--driver=test_icmp_main.cpp \ --driver=test_icmp_main.cpp \
--output=test_icmp_O${optlevel} --output=test_icmp_O${optlevel}
# Compile the non-subzero object files straight from source
# since the native LLVM backend does not understand how to
# lower NaCl-specific intrinsics.
./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
--dir="${OUTDIR}" \
--llvm-bin-path="${LLVM_BIN_PATH}" \
--test=test_sync_atomic.cpp \
--crosstest-bitcode=0 \
--driver=test_sync_atomic_main.cpp \
--output=test_sync_atomic_O${optlevel}
done done
for optlevel in ${OPTLEVELS} ; do for optlevel in ${OPTLEVELS} ; do
...@@ -74,4 +85,5 @@ for optlevel in ${OPTLEVELS} ; do ...@@ -74,4 +85,5 @@ for optlevel in ${OPTLEVELS} ; do
"${OUTDIR}"/test_fcmp_O${optlevel} "${OUTDIR}"/test_fcmp_O${optlevel}
"${OUTDIR}"/test_global_O${optlevel} "${OUTDIR}"/test_global_O${optlevel}
"${OUTDIR}"/test_icmp_O${optlevel} "${OUTDIR}"/test_icmp_O${optlevel}
"${OUTDIR}"/test_sync_atomic_O${optlevel}
done done
//===- subzero/crosstest/test_sync_atomic.cpp - Implementation for tests --===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This aims to test that all the atomic RMW instructions and compare and swap
// work across the allowed atomic types. This uses the __sync_* builtins
// to test the atomic operations.
//
//===----------------------------------------------------------------------===//
#include <stdint.h>
#include <cstdlib>
#include "test_sync_atomic.h"
#define X(inst, type) \
type test_##inst(bool fetch_first, volatile type *ptr, type a) { \
if (fetch_first) { \
return __sync_fetch_and_##inst(ptr, a); \
} else { \
return __sync_##inst##_and_fetch(ptr, a); \
} \
} \
type test_alloca_##inst(bool fetch, volatile type *ptr, type a) { \
const size_t buf_size = 8; \
type buf[buf_size]; \
for (size_t i = 0; i < buf_size; ++i) { \
if (fetch) { \
buf[i] = __sync_fetch_and_##inst(ptr, a); \
} else { \
buf[i] = __sync_##inst##_and_fetch(ptr, a); \
} \
} \
type sum = 0; \
for (size_t i = 0; i < buf_size; ++i) { \
sum += buf[i]; \
} \
return sum; \
} \
type test_const_##inst(bool fetch, volatile type *ptr, type ign) { \
if (fetch) { \
return __sync_fetch_and_##inst(ptr, 42); \
} else { \
return __sync_##inst##_and_fetch(ptr, 99); \
} \
}
FOR_ALL_RMWOP_TYPES(X)
#undef X
#define X(type) \
type test_val_cmp_swap(volatile type *ptr, type oldval, type newval) { \
return __sync_val_compare_and_swap(ptr, oldval, newval); \
}
ATOMIC_TYPE_TABLE
#undef X
//===- subzero/crosstest/test_sync_atomic.def - macros for tests -*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines macros for testing atomic intrinsics (via sync builtins).
//
//===----------------------------------------------------------------------===//
#ifndef TEST_SYNC_ATOMIC_DEF
#define TEST_SYNC_ATOMIC_DEF
#define STR(s) #s
#define RMWOP_TABLE \
/* inst */ \
X(add) \
X(sub) \
X(or) \
X(and) \
X(xor)
//#define X(inst)
#define ATOMIC_TYPE_TABLE \
/* type */ \
X(uint8_t) \
X(uint16_t) \
X(uint32_t) \
X(uint64_t)
//#define X(type)
#define FOR_ALL_RMWTYPES_INST(F, inst) \
F(inst, uint8_t) \
F(inst, uint16_t) \
F(inst, uint32_t) \
F(inst, uint64_t)
#define FOR_ALL_RMWOP_TYPES(X) \
FOR_ALL_RMWTYPES_INST(X, add) \
FOR_ALL_RMWTYPES_INST(X, sub) \
FOR_ALL_RMWTYPES_INST(X, or) \
FOR_ALL_RMWTYPES_INST(X, and) \
FOR_ALL_RMWTYPES_INST(X, xor)
//#define X(inst, type)
#endif // TEST_SYNC_ATOMIC_DEF
//===- subzero/crosstest/test_sync_atomic.h - Test prototypes ---*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the function prototypes for cross testing atomic
// intrinsics.
//
//===----------------------------------------------------------------------===//
#include "test_sync_atomic.def"
#define X(inst, type) \
type test_##inst(bool fetch_first, volatile type *ptr, type a); \
type test_alloca_##inst(bool fetch, volatile type *ptr, type a); \
type test_const_##inst(bool fetch, volatile type *ptr, type ignored);
FOR_ALL_RMWOP_TYPES(X)
#undef X
#define X(type) \
type test_val_cmp_swap(volatile type *ptr, type oldval, type newval);
ATOMIC_TYPE_TABLE
#undef X
...@@ -51,8 +51,7 @@ const size_t TypeX8632AttributesSize = ...@@ -51,8 +51,7 @@ const size_t TypeX8632AttributesSize =
llvm::array_lengthof(TypeX8632Attributes); llvm::array_lengthof(TypeX8632Attributes);
const char *InstX8632SegmentRegNames[] = { const char *InstX8632SegmentRegNames[] = {
#define X(val, name) \ #define X(val, name) name,
name,
SEG_REGX8632_TABLE SEG_REGX8632_TABLE
#undef X #undef X
}; };
...@@ -140,6 +139,33 @@ InstX8632Cdq::InstX8632Cdq(Cfg *Func, Variable *Dest, Operand *Source) ...@@ -140,6 +139,33 @@ InstX8632Cdq::InstX8632Cdq(Cfg *Func, Variable *Dest, Operand *Source)
addSource(Source); addSource(Source);
} }
InstX8632Cmpxchg::InstX8632Cmpxchg(Cfg *Func, Operand *DestOrAddr,
Variable *Eax, Variable *Desired,
bool Locked)
: InstX8632Lockable(Func, InstX8632::Cmpxchg, 3,
llvm::dyn_cast<Variable>(DestOrAddr), Locked) {
assert(Eax->getRegNum() == TargetX8632::Reg_eax);
addSource(DestOrAddr);
addSource(Eax);
addSource(Desired);
}
InstX8632Cmpxchg8b::InstX8632Cmpxchg8b(Cfg *Func, OperandX8632 *Addr,
Variable *Edx, Variable *Eax,
Variable *Ecx, Variable *Ebx,
bool Locked)
: InstX8632Lockable(Func, InstX8632::Cmpxchg, 5, NULL, Locked) {
assert(Edx->getRegNum() == TargetX8632::Reg_edx);
assert(Eax->getRegNum() == TargetX8632::Reg_eax);
assert(Ecx->getRegNum() == TargetX8632::Reg_ecx);
assert(Ebx->getRegNum() == TargetX8632::Reg_ebx);
addSource(Addr);
addSource(Edx);
addSource(Eax);
addSource(Ecx);
addSource(Ebx);
}
InstX8632Cvt::InstX8632Cvt(Cfg *Func, Variable *Dest, Operand *Source) InstX8632Cvt::InstX8632Cvt(Cfg *Func, Variable *Dest, Operand *Source)
: InstX8632(Func, InstX8632::Cvt, 1, Dest) { : InstX8632(Func, InstX8632::Cvt, 1, Dest) {
addSource(Source); addSource(Source);
...@@ -284,9 +310,14 @@ InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source) ...@@ -284,9 +310,14 @@ InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source, InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
bool Locked) bool Locked)
: InstX8632(Func, InstX8632::Xadd, 2, llvm::dyn_cast<Variable>(Dest)), : InstX8632Lockable(Func, InstX8632::Xadd, 2,
Locked(Locked) { llvm::dyn_cast<Variable>(Dest), Locked) {
HasSideEffects = Locked; addSource(Dest);
addSource(Source);
}
InstX8632Xchg::InstX8632Xchg(Cfg *Func, Operand *Dest, Variable *Source)
: InstX8632(Func, InstX8632::Xchg, 2, llvm::dyn_cast<Variable>(Dest)) {
addSource(Dest); addSource(Dest);
addSource(Source); addSource(Source);
} }
...@@ -398,6 +429,7 @@ void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func, ...@@ -398,6 +429,7 @@ void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
Str << "\n"; Str << "\n";
} }
template <> const char *InstX8632Neg::Opcode = "neg";
template <> const char *InstX8632Add::Opcode = "add"; template <> const char *InstX8632Add::Opcode = "add";
template <> const char *InstX8632Addps::Opcode = "addps"; template <> const char *InstX8632Addps::Opcode = "addps";
template <> const char *InstX8632Adc::Opcode = "adc"; template <> const char *InstX8632Adc::Opcode = "adc";
...@@ -554,6 +586,48 @@ void InstX8632Cdq::dump(const Cfg *Func) const { ...@@ -554,6 +586,48 @@ void InstX8632Cdq::dump(const Cfg *Func) const {
dumpSources(Func); dumpSources(Func);
} }
void InstX8632Cmpxchg::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 3);
if (Locked) {
Str << "\tlock";
}
Str << "\tcmpxchg\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(2)->emit(Func);
Str << "\n";
}
void InstX8632Cmpxchg::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
if (Locked) {
Str << "lock ";
}
Str << "cmpxchg." << getSrc(0)->getType() << " ";
dumpSources(Func);
}
void InstX8632Cmpxchg8b::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 5);
if (Locked) {
Str << "\tlock";
}
Str << "\tcmpxchg8b\t";
getSrc(0)->emit(Func);
Str << "\n";
}
void InstX8632Cmpxchg8b::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
if (Locked) {
Str << "lock ";
}
Str << "cmpxchg8b ";
dumpSources(Func);
}
void InstX8632Cvt::emit(const Cfg *Func) const { void InstX8632Cvt::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
...@@ -955,10 +1029,9 @@ void InstX8632Sqrtss::dump(const Cfg *Func) const { ...@@ -955,10 +1029,9 @@ void InstX8632Sqrtss::dump(const Cfg *Func) const {
void InstX8632Xadd::emit(const Cfg *Func) const { void InstX8632Xadd::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
if (Locked) { if (Locked) {
Str << "\tlock xadd "; Str << "\tlock";
} else {
Str << "\txadd\t";
} }
Str << "\txadd\t";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
Str << ", "; Str << ", ";
getSrc(1)->emit(Func); getSrc(1)->emit(Func);
...@@ -975,6 +1048,22 @@ void InstX8632Xadd::dump(const Cfg *Func) const { ...@@ -975,6 +1048,22 @@ void InstX8632Xadd::dump(const Cfg *Func) const {
dumpSources(Func); dumpSources(Func);
} }
void InstX8632Xchg::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\txchg\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Str << "\n";
}
void InstX8632Xchg::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Type Ty = getSrc(0)->getType();
Str << "xchg." << Ty << " ";
dumpSources(Func);
}
void OperandX8632::dump(const Cfg *Func) const { void OperandX8632::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
Str << "<OperandX8632>"; Str << "<OperandX8632>";
......
...@@ -54,8 +54,7 @@ class OperandX8632Mem : public OperandX8632 { ...@@ -54,8 +54,7 @@ class OperandX8632Mem : public OperandX8632 {
public: public:
enum SegmentRegisters { enum SegmentRegisters {
DefaultSegment = -1, DefaultSegment = -1,
#define X(val, name) \ #define X(val, name) val,
val,
SEG_REGX8632_TABLE SEG_REGX8632_TABLE
#undef X #undef X
SegReg_NUM SegReg_NUM
...@@ -142,6 +141,8 @@ public: ...@@ -142,6 +141,8 @@ public:
Br, Br,
Call, Call,
Cdq, Cdq,
Cmpxchg,
Cmpxchg8b,
Cvt, Cvt,
Div, Div,
Divps, Divps,
...@@ -162,6 +163,7 @@ public: ...@@ -162,6 +163,7 @@ public:
Mul, Mul,
Mulps, Mulps,
Mulss, Mulss,
Neg,
Or, Or,
Pop, Pop,
Push, Push,
...@@ -183,6 +185,7 @@ public: ...@@ -183,6 +185,7 @@ public:
Ucomiss, Ucomiss,
UD2, UD2,
Xadd, Xadd,
Xchg,
Xor Xor
}; };
static const char *getWidthString(Type Ty); static const char *getWidthString(Type Ty);
...@@ -328,6 +331,41 @@ private: ...@@ -328,6 +331,41 @@ private:
virtual ~InstX8632Call() {} virtual ~InstX8632Call() {}
}; };
template <InstX8632::InstKindX8632 K>
class InstX8632Unaryop : public InstX8632 {
public:
// Create an unary-op instruction like neg.
// The source and dest are the same variable.
static InstX8632Unaryop *create(Cfg *Func, Operand *SrcDest) {
return new (Func->allocate<InstX8632Unaryop>())
InstX8632Unaryop(Func, SrcDest);
}
virtual void emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Str << "\t" << Opcode << "\t";
getSrc(0)->emit(Func);
Str << "\n";
}
virtual void dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = " << Opcode << "." << getDest()->getType() << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
InstX8632Unaryop(Cfg *Func, Operand *SrcDest)
: InstX8632(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
addSource(SrcDest);
}
InstX8632Unaryop(const InstX8632Unaryop &) LLVM_DELETED_FUNCTION;
InstX8632Unaryop &operator=(const InstX8632Unaryop &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Unaryop() {}
static const char *Opcode;
};
// See the definition of emitTwoAddress() for a description of // See the definition of emitTwoAddress() for a description of
// ShiftHack. // ShiftHack.
void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func, void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
...@@ -400,6 +438,7 @@ private: ...@@ -400,6 +438,7 @@ private:
static const char *Opcode; static const char *Opcode;
}; };
typedef InstX8632Unaryop<InstX8632::Neg> InstX8632Neg;
typedef InstX8632Binop<InstX8632::Add> InstX8632Add; typedef InstX8632Binop<InstX8632::Add> InstX8632Add;
typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps; typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc; typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc;
...@@ -423,6 +462,28 @@ typedef InstX8632Binop<InstX8632::Sar, true> InstX8632Sar; ...@@ -423,6 +462,28 @@ typedef InstX8632Binop<InstX8632::Sar, true> InstX8632Sar;
typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv; typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv;
typedef InstX8632Ternop<InstX8632::Div> InstX8632Div; typedef InstX8632Ternop<InstX8632::Div> InstX8632Div;
// Base class for a lockable x86-32 instruction (emits a locked prefix).
class InstX8632Lockable : public InstX8632 {
public:
virtual void emit(const Cfg *Func) const = 0;
virtual void dump(const Cfg *Func) const;
protected:
bool Locked;
InstX8632Lockable(Cfg *Func, InstKindX8632 Kind, SizeT Maxsrcs,
Variable *Dest, bool Locked)
: InstX8632(Func, Kind, Maxsrcs, Dest), Locked(Locked) {
// Assume that such instructions are used for Atomics and be careful
// with optimizations.
HasSideEffects = Locked;
}
private:
InstX8632Lockable(const InstX8632Lockable &) LLVM_DELETED_FUNCTION;
InstX8632Lockable &operator=(const InstX8632Lockable &) LLVM_DELETED_FUNCTION;
};
// Mul instruction - unsigned multiply. // Mul instruction - unsigned multiply.
class InstX8632Mul : public InstX8632 { class InstX8632Mul : public InstX8632 {
public: public:
...@@ -502,6 +563,57 @@ private: ...@@ -502,6 +563,57 @@ private:
virtual ~InstX8632Cdq() {} virtual ~InstX8632Cdq() {}
}; };
// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
// If not, ZF is cleared and <dest> is copied to eax (or subregister).
// <dest> can be a register or memory, while <desired> must be a register.
// It is the user's responsiblity to mark eax with a FakeDef.
class InstX8632Cmpxchg : public InstX8632Lockable {
public:
static InstX8632Cmpxchg *create(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
Variable *Desired, bool Locked) {
return new (Func->allocate<InstX8632Cmpxchg>())
InstX8632Cmpxchg(Func, DestOrAddr, Eax, Desired, Locked);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpxchg); }
private:
InstX8632Cmpxchg(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
Variable *Desired, bool Locked);
InstX8632Cmpxchg(const InstX8632Cmpxchg &) LLVM_DELETED_FUNCTION;
InstX8632Cmpxchg &operator=(const InstX8632Cmpxchg &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Cmpxchg() {}
};
// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64>
// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>.
// If not, ZF is cleared and <m64> is copied to edx:eax.
// The caller is responsible for inserting FakeDefs to mark edx
// and eax as modified.
// <m64> must be a memory operand.
class InstX8632Cmpxchg8b : public InstX8632Lockable {
public:
static InstX8632Cmpxchg8b *create(Cfg *Func, OperandX8632 *Dest,
Variable *Edx, Variable *Eax, Variable *Ecx,
Variable *Ebx, bool Locked) {
return new (Func->allocate<InstX8632Cmpxchg8b>())
InstX8632Cmpxchg8b(Func, Dest, Edx, Eax, Ecx, Ebx, Locked);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpxchg8b); }
private:
InstX8632Cmpxchg8b(Cfg *Func, OperandX8632 *Dest, Variable *Edx,
Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked);
InstX8632Cmpxchg8b(const InstX8632Cmpxchg8b &) LLVM_DELETED_FUNCTION;
InstX8632Cmpxchg8b &
operator=(const InstX8632Cmpxchg8b &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Cmpxchg8b() {}
};
// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i} // Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i}
// as appropriate. s=float, d=double, i=int. X and Y are determined // as appropriate. s=float, d=double, i=int. X and Y are determined
// from dest/src types. Sign and zero extension on the integer // from dest/src types. Sign and zero extension on the integer
...@@ -861,7 +973,7 @@ private: ...@@ -861,7 +973,7 @@ private:
// //
// Both the dest and source are updated. The caller should then insert a // Both the dest and source are updated. The caller should then insert a
// FakeDef to reflect the second udpate. // FakeDef to reflect the second udpate.
class InstX8632Xadd : public InstX8632 { class InstX8632Xadd : public InstX8632Lockable {
public: public:
static InstX8632Xadd *create(Cfg *Func, Operand *Dest, Variable *Source, static InstX8632Xadd *create(Cfg *Func, Operand *Dest, Variable *Source,
bool Locked) { bool Locked) {
...@@ -873,14 +985,35 @@ public: ...@@ -873,14 +985,35 @@ public:
static bool classof(const Inst *Inst) { return isClassof(Inst, Xadd); } static bool classof(const Inst *Inst) { return isClassof(Inst, Xadd); }
private: private:
bool Locked;
InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked); InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
InstX8632Xadd(const InstX8632Xadd &) LLVM_DELETED_FUNCTION; InstX8632Xadd(const InstX8632Xadd &) LLVM_DELETED_FUNCTION;
InstX8632Xadd &operator=(const InstX8632Xadd &) LLVM_DELETED_FUNCTION; InstX8632Xadd &operator=(const InstX8632Xadd &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Xadd() {} virtual ~InstX8632Xadd() {}
}; };
// Exchange instruction. Exchanges the first operand (destination
// operand) with the second operand (source operand). At least one of
// the operands must be a register (and the other can be reg or mem).
// Both the Dest and Source are updated. If there is a memory operand,
// then the instruction is automatically "locked" without the need for
// a lock prefix.
class InstX8632Xchg : public InstX8632 {
public:
static InstX8632Xchg *create(Cfg *Func, Operand *Dest, Variable *Source) {
return new (Func->allocate<InstX8632Xchg>())
InstX8632Xchg(Func, Dest, Source);
}
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Xchg); }
private:
InstX8632Xchg(Cfg *Func, Operand *Dest, Variable *Source);
InstX8632Xchg(const InstX8632Xchg &) LLVM_DELETED_FUNCTION;
InstX8632Xchg &operator=(const InstX8632Xchg &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Xchg() {}
};
} // end of namespace Ice } // end of namespace Ice
#endif // SUBZERO_SRC_ICEINSTX8632_H #endif // SUBZERO_SRC_ICEINSTX8632_H
...@@ -46,7 +46,7 @@ const struct IceIntrinsicsEntry_ { ...@@ -46,7 +46,7 @@ const struct IceIntrinsicsEntry_ {
"nacl.atomic.fence" }, "nacl.atomic.fence" },
{ { { Intrinsics::AtomicFenceAll, true }, { IceType_void }, 1 }, { { { Intrinsics::AtomicFenceAll, true }, { IceType_void }, 1 },
"nacl.atomic.fence.all" }, "nacl.atomic.fence.all" },
{ { { Intrinsics::AtomicIsLockFree, true }, { { { Intrinsics::AtomicIsLockFree, false },
{ IceType_i1, IceType_i32, IceType_i32 }, 3 }, { IceType_i1, IceType_i32, IceType_i32 }, 3 },
"nacl.atomic.is.lock.free" }, "nacl.atomic.is.lock.free" },
......
...@@ -1968,7 +1968,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { ...@@ -1968,7 +1968,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
switch (Instr->getIntrinsicInfo().ID) { switch (Instr->getIntrinsicInfo().ID) {
case Intrinsics::AtomicCmpxchg: case Intrinsics::AtomicCmpxchg: {
if (!Intrinsics::VerifyMemoryOrder( if (!Intrinsics::VerifyMemoryOrder(
llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
...@@ -1979,9 +1979,18 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -1979,9 +1979,18 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
return; return;
} }
// TODO(jvoung): fill it in. Variable *DestPrev = Instr->getDest();
Func->setError("Unhandled intrinsic"); Operand *PtrToMem = Instr->getArg(0);
Operand *Expected = Instr->getArg(1);
Operand *Desired = Instr->getArg(2);
lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
// TODO(jvoung): If we peek ahead a few instructions and see how
// DestPrev is used (typically via another compare and branch),
// we may be able to optimize. If the result truly is used by a
// compare + branch, and the comparison is for equality, then we can
// optimize out the later compare, and fuse with the later branch.
return; return;
}
case Intrinsics::AtomicFence: case Intrinsics::AtomicFence:
if (!Intrinsics::VerifyMemoryOrder( if (!Intrinsics::VerifyMemoryOrder(
llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
...@@ -2183,18 +2192,54 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -2183,18 +2192,54 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return; return;
} }
void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
Operand *Expected, Operand *Desired) {
if (Expected->getType() == IceType_i64) {
// Reserve the pre-colored registers first, before adding any more
// infinite-weight variables from FormMemoryOperand's legalization.
Variable *T_edx = makeReg(IceType_i32, Reg_edx);
Variable *T_eax = makeReg(IceType_i32, Reg_eax);
Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
_mov(T_eax, loOperand(Expected));
_mov(T_edx, hiOperand(Expected));
_mov(T_ebx, loOperand(Desired));
_mov(T_ecx, hiOperand(Desired));
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
const bool Locked = true;
_cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
_mov(DestLo, T_eax);
_mov(DestHi, T_edx);
return;
}
Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
_mov(T_eax, Expected);
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
Variable *DesiredReg = legalizeToVar(Desired);
const bool Locked = true;
_cmpxchg(Addr, T_eax, DesiredReg, Locked);
_mov(DestPrev, T_eax);
}
void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
Operand *Ptr, Operand *Val) { Operand *Ptr, Operand *Val) {
bool NeedsCmpxchg = false;
LowerBinOp Op_Lo = NULL;
LowerBinOp Op_Hi = NULL;
switch (Operation) { switch (Operation) {
default: default:
Func->setError("Unknown AtomicRMW operation"); Func->setError("Unknown AtomicRMW operation");
return; return;
case Intrinsics::AtomicAdd: { case Intrinsics::AtomicAdd: {
if (Dest->getType() == IceType_i64) { if (Dest->getType() == IceType_i64) {
// Do a nasty cmpxchg8b loop. Factor this into a function. // All the fall-through paths must set this to true, but use this
// TODO(jvoung): fill it in. // for asserting.
Func->setError("Unhandled AtomicRMW operation"); NeedsCmpxchg = true;
return; Op_Lo = &TargetX8632::_add;
Op_Hi = &TargetX8632::_adc;
break;
} }
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
const bool Locked = true; const bool Locked = true;
...@@ -2206,26 +2251,160 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, ...@@ -2206,26 +2251,160 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
} }
case Intrinsics::AtomicSub: { case Intrinsics::AtomicSub: {
if (Dest->getType() == IceType_i64) { if (Dest->getType() == IceType_i64) {
// Do a nasty cmpxchg8b loop. NeedsCmpxchg = true;
// TODO(jvoung): fill it in. Op_Lo = &TargetX8632::_sub;
Func->setError("Unhandled AtomicRMW operation"); Op_Hi = &TargetX8632::_sbb;
return; break;
} }
// Generate a memory operand from Ptr. OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
// neg... const bool Locked = true;
// Then do the same as AtomicAdd. Variable *T = NULL;
// TODO(jvoung): fill it in. _mov(T, Val);
Func->setError("Unhandled AtomicRMW operation"); _neg(T);
_xadd(Addr, T, Locked);
_mov(Dest, T);
return; return;
} }
case Intrinsics::AtomicOr: case Intrinsics::AtomicOr:
// TODO(jvoung): If Dest is null or dead, then some of these
// operations do not need an "exchange", but just a locked op.
// That appears to be "worth" it for sub, or, and, and xor.
// xadd is probably fine vs lock add for add, and xchg is fine
// vs an atomic store.
NeedsCmpxchg = true;
Op_Lo = &TargetX8632::_or;
Op_Hi = &TargetX8632::_or;
break;
case Intrinsics::AtomicAnd: case Intrinsics::AtomicAnd:
NeedsCmpxchg = true;
Op_Lo = &TargetX8632::_and;
Op_Hi = &TargetX8632::_and;
break;
case Intrinsics::AtomicXor: case Intrinsics::AtomicXor:
NeedsCmpxchg = true;
Op_Lo = &TargetX8632::_xor;
Op_Hi = &TargetX8632::_xor;
break;
case Intrinsics::AtomicExchange: case Intrinsics::AtomicExchange:
// TODO(jvoung): fill it in. if (Dest->getType() == IceType_i64) {
Func->setError("Unhandled AtomicRMW operation"); NeedsCmpxchg = true;
// NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
// just need to be moved to the ecx and ebx registers.
Op_Lo = NULL;
Op_Hi = NULL;
break;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
Variable *T = NULL;
_mov(T, Val);
_xchg(Addr, T);
_mov(Dest, T);
return; return;
} }
// Otherwise, we need a cmpxchg loop.
assert(NeedsCmpxchg);
expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
}
void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
Variable *Dest, Operand *Ptr,
Operand *Val) {
// Expand a more complex RMW operation as a cmpxchg loop:
// For 64-bit:
// mov eax, [ptr]
// mov edx, [ptr + 4]
// .LABEL:
// mov ebx, eax
// <Op_Lo> ebx, <desired_adj_lo>
// mov ecx, edx
// <Op_Hi> ecx, <desired_adj_hi>
// lock cmpxchg8b [ptr]
// jne .LABEL
// mov <dest_lo>, eax
// mov <dest_lo>, edx
//
// For 32-bit:
// mov eax, [ptr]
// .LABEL:
// mov <reg>, eax
// op <reg>, [desired_adj]
// lock cmpxchg [ptr], <reg>
// jne .LABEL
// mov <dest>, eax
//
// If Op_{Lo,Hi} are NULL, then just copy the value.
Val = legalize(Val);
Type Ty = Val->getType();
if (Ty == IceType_i64) {
Variable *T_edx = makeReg(IceType_i32, Reg_edx);
Variable *T_eax = makeReg(IceType_i32, Reg_eax);
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
_mov(T_eax, loOperand(Addr));
_mov(T_edx, hiOperand(Addr));
Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
InstX8632Label *Label = InstX8632Label::create(Func, this);
const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
if (!IsXchg8b) {
Context.insert(Label);
_mov(T_ebx, T_eax);
(this->*Op_Lo)(T_ebx, loOperand(Val));
_mov(T_ecx, T_edx);
(this->*Op_Hi)(T_ecx, hiOperand(Val));
} else {
// This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
// It just needs the Val loaded into ebx and ecx.
// That can also be done before the loop.
_mov(T_ebx, loOperand(Val));
_mov(T_ecx, hiOperand(Val));
Context.insert(Label);
}
const bool Locked = true;
_cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
_br(InstX8632Br::Br_ne, Label);
if (!IsXchg8b) {
// If Val is a variable, model the extended live range of Val through
// the end of the loop, since it will be re-used by the loop.
if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
Context.insert(InstFakeUse::create(Func, ValLo));
Context.insert(InstFakeUse::create(Func, ValHi));
}
} else {
// For xchg, the loop is slightly smaller and ebx/ecx are used.
Context.insert(InstFakeUse::create(Func, T_ebx));
Context.insert(InstFakeUse::create(Func, T_ecx));
}
// The address base is also reused in the loop.
Context.insert(InstFakeUse::create(Func, Addr->getBase()));
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
_mov(DestLo, T_eax);
_mov(DestHi, T_edx);
return;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
Variable *T_eax = makeReg(Ty, Reg_eax);
_mov(T_eax, Addr);
InstX8632Label *Label = InstX8632Label::create(Func, this);
Context.insert(Label);
// We want to pick a different register for T than Eax, so don't use
// _mov(T == NULL, T_eax).
Variable *T = makeReg(Ty);
_mov(T, T_eax);
(this->*Op_Lo)(T, Val);
const bool Locked = true;
_cmpxchg(Addr, T_eax, T, Locked);
_br(InstX8632Br::Br_ne, Label);
// If Val is a variable, model the extended live range of Val through
// the end of the loop, since it will be re-used by the loop.
if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
Context.insert(InstFakeUse::create(Func, ValVar));
}
// The address base is also reused in the loop.
Context.insert(InstFakeUse::create(Func, Addr->getBase()));
_mov(Dest, T_eax);
} }
namespace { namespace {
......
...@@ -95,9 +95,15 @@ protected: ...@@ -95,9 +95,15 @@ protected:
virtual void doAddressOptLoad(); virtual void doAddressOptLoad();
virtual void doAddressOptStore(); virtual void doAddressOptStore();
void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected,
Operand *Desired);
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val); Operand *Val);
typedef void (TargetX8632::*LowerBinOp)(Variable *, Operand *);
void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi,
Variable *Dest, Operand *Ptr, Operand *Val);
// Operand legalization helpers. To deal with address mode // Operand legalization helpers. To deal with address mode
// constraints, the helpers will create a new Operand and emit // constraints, the helpers will create a new Operand and emit
// instructions that guarantee that the Operand kind is one of those // instructions that guarantee that the Operand kind is one of those
...@@ -177,6 +183,22 @@ protected: ...@@ -177,6 +183,22 @@ protected:
void _cmp(Operand *Src0, Operand *Src1) { void _cmp(Operand *Src0, Operand *Src1) {
Context.insert(InstX8632Icmp::create(Func, Src0, Src1)); Context.insert(InstX8632Icmp::create(Func, Src0, Src1));
} }
void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired,
bool Locked) {
Context.insert(
InstX8632Cmpxchg::create(Func, DestOrAddr, Eax, Desired, Locked));
// Mark eax as possibly modified by cmpxchg.
Context.insert(
InstFakeDef::create(Func, Eax, llvm::dyn_cast<Variable>(DestOrAddr)));
}
void _cmpxchg8b(OperandX8632 *Addr, Variable *Edx, Variable *Eax,
Variable *Ecx, Variable *Ebx, bool Locked) {
Context.insert(
InstX8632Cmpxchg8b::create(Func, Addr, Edx, Eax, Ecx, Ebx, Locked));
// Mark edx, and eax as possibly modified by cmpxchg8b.
Context.insert(InstFakeDef::create(Func, Edx));
Context.insert(InstFakeDef::create(Func, Eax));
}
void _cvt(Variable *Dest, Operand *Src0) { void _cvt(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Cvt::create(Func, Dest, Src0)); Context.insert(InstX8632Cvt::create(Func, Dest, Src0));
} }
...@@ -232,6 +254,9 @@ protected: ...@@ -232,6 +254,9 @@ protected:
void _mulss(Variable *Dest, Operand *Src0) { void _mulss(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Mulss::create(Func, Dest, Src0)); Context.insert(InstX8632Mulss::create(Func, Dest, Src0));
} }
void _neg(Variable *SrcDest) {
Context.insert(InstX8632Neg::create(Func, SrcDest));
}
void _or(Variable *Dest, Operand *Src0) { void _or(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Or::create(Func, Dest, Src0)); Context.insert(InstX8632Or::create(Func, Dest, Src0));
} }
...@@ -294,7 +319,14 @@ protected: ...@@ -294,7 +319,14 @@ protected:
Context.insert(InstX8632Xadd::create(Func, Dest, Src, Locked)); Context.insert(InstX8632Xadd::create(Func, Dest, Src, Locked));
// The xadd exchanges Dest and Src (modifying Src). // The xadd exchanges Dest and Src (modifying Src).
// Model that update with a FakeDef. // Model that update with a FakeDef.
Context.insert(InstFakeDef::create(Func, Src)); Context.insert(
InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
}
void _xchg(Operand *Dest, Variable *Src) {
Context.insert(InstX8632Xchg::create(Func, Dest, Src));
// The xchg modifies Dest and Src -- model that update with a FakeDef.
Context.insert(
InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
} }
void _xor(Variable *Dest, Operand *Src0) { void _xor(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Xor::create(Func, Dest, Src0)); Context.insert(InstX8632Xor::create(Func, Dest, Src0));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment