Commit 370cba59 by Antonio Maiorano

SubzeroReactor: implement missing atomic ops

* Most use Subzero intrinsics, except for Min/MaxAtomic, which are emulated. * Added unit tests for each implemented function, but am not really testing that they behave as atomic functions. Only that they perform the expected operation. Bug: b/145754674 Change-Id: Ie3ec6e473ee8b448b28bf440da094ac03ac0005b Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39829Reviewed-by: 's avatarBen Clayton <bclayton@google.com> Tested-by: 's avatarAntonio Maiorano <amaiorano@google.com>
parent e37f2ef6
......@@ -14,8 +14,10 @@
#include "EmulatedReactor.hpp"
#include <algorithm>
#include <cmath>
#include <functional>
#include <mutex>
#include <utility>
namespace rr {
......@@ -96,6 +98,31 @@ void scatter(RValue<Pointer<EL>> base, RValue<T> val, RValue<Int4> offsets, RVal
}
}
// TODO(b/148276653): Both atomicMin and atomicMax use a static (global) mutex that makes all min
// operations for a given T mutually exclusive, rather than only the ones on the value pointed to
// by ptr. Use a CAS loop, as is done for LLVMReactor's min/max atomic for Android.
// TODO(b/148207274): Or, move this down into Subzero as a CAS-based operation.
template<typename T>
static T atomicMin(T *ptr, T value)
{
static std::mutex m;
std::lock_guard<std::mutex> lock(m);
T origValue = *ptr;
*ptr = std::min(origValue, value);
return origValue;
}
template<typename T>
static T atomicMax(T *ptr, T value)
{
static std::mutex m;
std::lock_guard<std::mutex> lock(m);
T origValue = *ptr;
*ptr = std::max(origValue, value);
return origValue;
}
} // anonymous namespace
namespace emulated {
......@@ -224,6 +251,26 @@ RValue<Float4> Log2(RValue<Float4> x)
return call4(log2f, x);
}
RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
{
return Call(atomicMin<int32_t>, x, y);
}
RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
{
return Call(atomicMin<uint32_t>, x, y);
}
RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
{
return Call(atomicMax<int32_t>, x, y);
}
RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
{
return Call(atomicMax<uint32_t>, x, y);
}
RValue<Float4> FRem(RValue<Float4> lhs, RValue<Float4> rhs)
{
return call4(fmodf, lhs, rhs);
......
......@@ -48,6 +48,10 @@ RValue<Float4> Exp(RValue<Float4> x);
RValue<Float4> Log(RValue<Float4> x);
RValue<Float4> Exp2(RValue<Float4> x);
RValue<Float4> Log2(RValue<Float4> x);
RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder);
RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder);
RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
RValue<Float4> FRem(RValue<Float4> lhs, RValue<Float4> rhs);
} // namespace emulated
......
......@@ -4049,6 +4049,26 @@ RValue<UInt4> Cttz(RValue<UInt4> v, bool isZeroUndef)
isZeroUndef ? ::llvm::ConstantInt::getTrue(jit->context) : ::llvm::ConstantInt::getFalse(jit->context)))));
}
RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
{
return RValue<Int>(Nucleus::createAtomicMin(x.value, y.value, memoryOrder));
}
RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
{
return RValue<UInt>(Nucleus::createAtomicUMin(x.value, y.value, memoryOrder));
}
RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
{
return RValue<Int>(Nucleus::createAtomicMax(x.value, y.value, memoryOrder));
}
RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
{
return RValue<UInt>(Nucleus::createAtomicUMax(x.value, y.value, memoryOrder));
}
Type *Float4::getType()
{
return T(llvm::VectorType::get(T(Float::getType()), 4));
......
......@@ -2623,26 +2623,6 @@ RValue<UInt> XorAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_orde
return RValue<UInt>(Nucleus::createAtomicXor(x.value, y.value, memoryOrder));
}
RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
{
return RValue<Int>(Nucleus::createAtomicMin(x.value, y.value, memoryOrder));
}
RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
{
return RValue<UInt>(Nucleus::createAtomicUMin(x.value, y.value, memoryOrder));
}
RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
{
return RValue<Int>(Nucleus::createAtomicMax(x.value, y.value, memoryOrder));
}
RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
{
return RValue<UInt>(Nucleus::createAtomicUMax(x.value, y.value, memoryOrder));
}
RValue<UInt> ExchangeAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
{
return RValue<UInt>(Nucleus::createAtomicExchange(x.value, y.value, memoryOrder));
......
......@@ -2252,6 +2252,215 @@ TEST(ReactorUnitTests, ExtractFromRValue)
EXPECT_EQ(result[3], 678);
}
TEST(ReactorUnitTests, AddAtomic)
{
FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
{
Pointer<UInt> p = function.Arg<0>();
UInt a = function.Arg<1>();
UInt r = rr::AddAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
uint32_t x = 123;
uint32_t y = 456;
uint32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, 123u);
EXPECT_EQ(x, 579u);
}
TEST(ReactorUnitTests, SubAtomic)
{
FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
{
Pointer<UInt> p = function.Arg<0>();
UInt a = function.Arg<1>();
UInt r = rr::SubAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
uint32_t x = 456;
uint32_t y = 123;
uint32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, 456u);
EXPECT_EQ(x, 333u);
}
TEST(ReactorUnitTests, AndAtomic)
{
FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
{
Pointer<UInt> p = function.Arg<0>();
UInt a = function.Arg<1>();
UInt r = rr::AndAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
uint32_t x = 0b1111'0000;
uint32_t y = 0b1010'1100;
uint32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, 0b1111'0000u);
EXPECT_EQ(x, 0b1010'0000u);
}
TEST(ReactorUnitTests, OrAtomic)
{
FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
{
Pointer<UInt> p = function.Arg<0>();
UInt a = function.Arg<1>();
UInt r = rr::OrAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
uint32_t x = 0b1111'0000;
uint32_t y = 0b1010'1100;
uint32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, 0b1111'0000u);
EXPECT_EQ(x, 0b1111'1100u);
}
TEST(ReactorUnitTests, XorAtomic)
{
FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
{
Pointer<UInt> p = function.Arg<0>();
UInt a = function.Arg<1>();
UInt r = rr::XorAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
uint32_t x = 0b1111'0000;
uint32_t y = 0b1010'1100;
uint32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, 0b1111'0000u);
EXPECT_EQ(x, 0b0101'1100u);
}
TEST(ReactorUnitTests, MinAtomic)
{
{
FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
{
Pointer<UInt> p = function.Arg<0>();
UInt a = function.Arg<1>();
UInt r = rr::MinAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
uint32_t x = 123;
uint32_t y = 100;
uint32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, 123u);
EXPECT_EQ(x, 100u);
}
{
FunctionT<int32_t(int32_t * p, int32_t a)> function;
{
Pointer<Int> p = function.Arg<0>();
Int a = function.Arg<1>();
Int r = rr::MinAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
int32_t x = -123;
int32_t y = -200;
int32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, -123);
EXPECT_EQ(x, -200);
}
}
TEST(ReactorUnitTests, MaxAtomic)
{
{
FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
{
Pointer<UInt> p = function.Arg<0>();
UInt a = function.Arg<1>();
UInt r = rr::MaxAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
uint32_t x = 123;
uint32_t y = 100;
uint32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, 123u);
EXPECT_EQ(x, 123u);
}
{
FunctionT<int32_t(int32_t * p, int32_t a)> function;
{
Pointer<Int> p = function.Arg<0>();
Int a = function.Arg<1>();
Int r = rr::MaxAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
int32_t x = -123;
int32_t y = -200;
int32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, -123);
EXPECT_EQ(x, -123);
}
}
TEST(ReactorUnitTests, ExchangeAtomic)
{
FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
{
Pointer<UInt> p = function.Arg<0>();
UInt a = function.Arg<1>();
UInt r = rr::ExchangeAtomic(p, a, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
uint32_t x = 123;
uint32_t y = 456;
uint32_t prevX = routine(&x, y);
EXPECT_EQ(prevX, 123u);
EXPECT_EQ(x, y);
}
TEST(ReactorUnitTests, CompareExchangeAtomic)
{
FunctionT<uint32_t(uint32_t * x, uint32_t y, uint32_t compare)> function;
{
Pointer<UInt> x = function.Arg<0>();
UInt y = function.Arg<1>();
UInt compare = function.Arg<2>();
UInt r = rr::CompareExchangeAtomic(x, y, compare, std::memory_order_relaxed, std::memory_order_relaxed);
Return(r);
}
auto routine = function("one");
uint32_t x = 123;
uint32_t y = 456;
uint32_t compare = 123;
uint32_t prevX = routine(&x, y, compare);
EXPECT_EQ(prevX, 123u);
EXPECT_EQ(x, y);
x = 123;
y = 456;
compare = 456;
prevX = routine(&x, y, compare);
EXPECT_EQ(prevX, 123u);
EXPECT_EQ(x, 123u);
}
TEST(ReactorUnitTests, SRem)
{
FunctionT<void(int4 *, int4 *)> function;
......
......@@ -89,7 +89,7 @@ namespace {
# define __x86_64__ 1
#endif
static Ice::OptLevel toIce(rr::Optimization::Level level)
Ice::OptLevel toIce(rr::Optimization::Level level)
{
switch(level)
{
......@@ -103,6 +103,20 @@ static Ice::OptLevel toIce(rr::Optimization::Level level)
return Ice::Opt_2;
}
Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
{
switch(memoryOrder)
{
case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
}
return Ice::Intrinsics::MemoryOrderInvalid;
}
class CPUID
{
public:
......@@ -1132,70 +1146,71 @@ Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedInd
return createAdd(ptr, index);
}
Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicAdd");
return nullptr;
}
Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicSub");
return nullptr;
}
Ice::Variable *result = ::function->makeVariable(value->getType());
Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicAnd");
return nullptr;
}
const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
auto op = ::context->getConstantInt32(rmwOp);
auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
inst->addArg(op);
inst->addArg(ptr);
inst->addArg(value);
inst->addArg(order);
::basicBlock->appendInst(inst);
Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicOr");
return nullptr;
return V(result);
}
Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicXor");
return nullptr;
return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
}
Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicMin");
return nullptr;
return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
}
Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicMax");
return nullptr;
return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
}
Value *Nucleus::createAtomicUMin(Value *ptr, Value *value, std::memory_order memoryOrder)
Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicUMin");
return nullptr;
return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
}
Value *Nucleus::createAtomicUMax(Value *ptr, Value *value, std::memory_order memoryOrder)
Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicUMax");
return nullptr;
return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
}
Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
{
UNIMPLEMENTED("createAtomicExchange");
return nullptr;
return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
}
Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
{
UNIMPLEMENTED("createAtomicCompareExchange");
return nullptr;
Ice::Variable *result = ::function->makeVariable(value->getType());
const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
inst->addArg(ptr);
inst->addArg(compare);
inst->addArg(value);
inst->addArg(orderEq);
inst->addArg(orderNeq);
::basicBlock->appendInst(inst);
return V(result);
}
static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
......@@ -3603,8 +3618,14 @@ void Breakpoint()
void Nucleus::createFence(std::memory_order memoryOrder)
{
UNIMPLEMENTED("Subzero createFence()");
const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
inst->addArg(order);
::basicBlock->appendInst(inst);
}
Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
{
UNIMPLEMENTED("Subzero createMaskedLoad()");
......@@ -3813,6 +3834,26 @@ RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
}
}
RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
{
return emulated::MinAtomic(x, y, memoryOrder);
}
RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
{
return emulated::MinAtomic(x, y, memoryOrder);
}
RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
{
return emulated::MaxAtomic(x, y, memoryOrder);
}
RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
{
return emulated::MaxAtomic(x, y, memoryOrder);
}
void EmitDebugLocation() {}
void EmitDebugVariable(Value *value) {}
void FlushDebug() {}
......
......@@ -259,6 +259,9 @@ namespace {
// Returns whether PNaCl allows the given memory ordering in general.
bool isMemoryOrderValidPNaCl(uint64_t Order) {
if (::Ice::getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl)
return true;
switch (Order) {
case Intrinsics::MemoryOrderAcquire:
case Intrinsics::MemoryOrderRelease:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment