Commit e6ab4707 by Antonio Maiorano

SubzeroReactor: implement most missing intrinsics

* Implement intrinsics for the Subzero backend required for ANGLE / SwiftShader for GLES 2 and 3. * Note that most intrinsics are implemented as "emulated". I've added rr::emulated namespace in EmulatedReactor.hpp/cpp that contains the set of Reactor functions that are presently being emulated. These are invoked from SubzeroReactor until we decide to implement proper intrinsics for these in Subzero. Bug: b/130459196 Change-Id: I01171cfa7cc45b078c3b98be6b61328eee4f35e5 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/38874 Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarAntonio Maiorano <amaiorano@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com>
parent 9c0617c3
...@@ -1540,6 +1540,7 @@ if(${REACTOR_BACKEND} STREQUAL "Subzero") ...@@ -1540,6 +1540,7 @@ if(${REACTOR_BACKEND} STREQUAL "Subzero")
set(SUBZERO_REACTOR_LIST set(SUBZERO_REACTOR_LIST
${SOURCE_DIR}/Reactor/Debug.cpp ${SOURCE_DIR}/Reactor/Debug.cpp
${SOURCE_DIR}/Reactor/Debug.hpp ${SOURCE_DIR}/Reactor/Debug.hpp
${SOURCE_DIR}/Reactor/EmulatedReactor.cpp
${SOURCE_DIR}/Reactor/ExecutableMemory.cpp ${SOURCE_DIR}/Reactor/ExecutableMemory.cpp
${SOURCE_DIR}/Reactor/ExecutableMemory.hpp ${SOURCE_DIR}/Reactor/ExecutableMemory.hpp
${SOURCE_DIR}/Reactor/Nucleus.hpp ${SOURCE_DIR}/Reactor/Nucleus.hpp
...@@ -1675,6 +1676,7 @@ set(REACTOR_LLVM_LIST ...@@ -1675,6 +1676,7 @@ set(REACTOR_LLVM_LIST
${SOURCE_DIR}/Reactor/CPUID.hpp ${SOURCE_DIR}/Reactor/CPUID.hpp
${SOURCE_DIR}/Reactor/Debug.cpp ${SOURCE_DIR}/Reactor/Debug.cpp
${SOURCE_DIR}/Reactor/Debug.hpp ${SOURCE_DIR}/Reactor/Debug.hpp
${SOURCE_DIR}/Reactor/EmulatedReactor.cpp
${SOURCE_DIR}/Reactor/ExecutableMemory.cpp ${SOURCE_DIR}/Reactor/ExecutableMemory.cpp
${SOURCE_DIR}/Reactor/ExecutableMemory.hpp ${SOURCE_DIR}/Reactor/ExecutableMemory.hpp
${SOURCE_DIR}/Reactor/LLVMReactor.cpp ${SOURCE_DIR}/Reactor/LLVMReactor.cpp
......
...@@ -47,6 +47,7 @@ config("swiftshader_reactor_private_config") { ...@@ -47,6 +47,7 @@ config("swiftshader_reactor_private_config") {
swiftshader_source_set("swiftshader_reactor_base") { swiftshader_source_set("swiftshader_reactor_base") {
sources = [ sources = [
"Debug.cpp", "Debug.cpp",
"EmulatedReactor.cpp",
"ExecutableMemory.cpp", "ExecutableMemory.cpp",
"Reactor.cpp", "Reactor.cpp",
] ]
......
#include "EmulatedReactor.hpp"
#include <cmath>
#include <functional>
#include <utility>
namespace rr
{
namespace
{
template <typename T>
struct UnderlyingType
{
using Type = typename decltype(rr::Extract(std::declval<RValue<T>>(), 0))::rvalue_underlying_type;
};
template <typename T>
using UnderlyingTypeT = typename UnderlyingType<T>::Type;
// Call single arg function on a vector type
template <typename Func, typename T>
RValue<T> call4(Func func, const RValue<T>& x)
{
T result;
result = Insert(result, Call(func, Extract(x, 0)), 0);
result = Insert(result, Call(func, Extract(x, 1)), 1);
result = Insert(result, Call(func, Extract(x, 2)), 2);
result = Insert(result, Call(func, Extract(x, 3)), 3);
return result;
}
// Call two arg function on a vector type
template <typename Func, typename T>
RValue<T> call4(Func func, const RValue<T>& x, const RValue<T>& y)
{
T result;
result = Insert(result, Call(func, Extract(x, 0), Extract(y, 0)), 0);
result = Insert(result, Call(func, Extract(x, 1), Extract(y, 1)), 1);
result = Insert(result, Call(func, Extract(x, 2), Extract(y, 2)), 2);
result = Insert(result, Call(func, Extract(x, 3), Extract(y, 3)), 3);
return result;
}
template <typename T, typename EL = UnderlyingTypeT<T>>
void gather(T& out, RValue<Pointer<EL>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes)
{
constexpr bool atomic = false;
constexpr std::memory_order order = std::memory_order_relaxed;
Pointer<Byte> baseBytePtr = base;
out = T(0);
for (int i = 0; i < 4; i++)
{
If(Extract(mask, i) != 0)
{
auto offset = Extract(offsets, i);
auto el = Load(Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
out = Insert(out, el, i);
}
Else If(zeroMaskedLanes)
{
out = Insert(out, EL(0), i);
}
}
}
template <typename T, typename EL = UnderlyingTypeT<T>>
void scatter(RValue<Pointer<EL>> base, RValue<T> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
{
constexpr bool atomic = false;
constexpr std::memory_order order = std::memory_order_relaxed;
Pointer<Byte> baseBytePtr = base;
for (int i = 0; i < 4; i++)
{
If(Extract(mask, i) != 0)
{
auto offset = Extract(offsets, i);
Store(Extract(val, i), Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
}
}
}
}
namespace emulated
{
RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{
Float4 result{};
gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
return result;
}
RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{
Int4 result{};
gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
return result;
}
void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
{
scatter(base, val, offsets, mask, alignment);
}
void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
{
scatter<Int4>(base, val, offsets, mask, alignment);
}
RValue<Float> Exp2(RValue<Float> x)
{
return Call(exp2f, x);
}
RValue<Float> Log2(RValue<Float> x)
{
return Call(log2f, x);
}
RValue<Float4> Sin(RValue<Float4> x)
{
return call4(sinf, x);
}
RValue<Float4> Cos(RValue<Float4> x)
{
return call4(cosf, x);
}
RValue<Float4> Tan(RValue<Float4> x)
{
return call4(tanf, x);
}
RValue<Float4> Asin(RValue<Float4> x)
{
return call4(asinf, x);
}
RValue<Float4> Acos(RValue<Float4> x)
{
return call4(acosf, x);
}
RValue<Float4> Atan(RValue<Float4> x)
{
return call4(atanf, x);
}
RValue<Float4> Sinh(RValue<Float4> x)
{
return call4(sinhf, x);
}
RValue<Float4> Cosh(RValue<Float4> x)
{
return call4(coshf, x);
}
RValue<Float4> Tanh(RValue<Float4> x)
{
return call4(tanhf, x);
}
RValue<Float4> Asinh(RValue<Float4> x)
{
return call4(asinhf, x);
}
RValue<Float4> Acosh(RValue<Float4> x)
{
return call4(acoshf, x);
}
RValue<Float4> Atanh(RValue<Float4> x)
{
return call4(atanhf, x);
}
RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
{
return call4(atan2f, x, y);
}
RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
{
return call4(powf, x, y);
}
RValue<Float4> Exp(RValue<Float4> x)
{
return call4(expf, x);
}
RValue<Float4> Log(RValue<Float4> x)
{
return call4(logf, x);
}
RValue<Float4> Exp2(RValue<Float4> x)
{
return call4(exp2f, x);
}
RValue<Float4> Log2(RValue<Float4> x)
{
return call4(log2f, x);
}
}
}
// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "Reactor.hpp"
// Implementation of Reactor functions that are "emulated" - that is,
// implemented either in terms of Reactor code, or make use of
// rr::Call to C functions. These are typically slower than implementing
// in terms of direct calls to the JIT backend; however, provide a good
// starting point for implementing a new backend, or for when adding
// functionality to an existing backend is non-trivial.
namespace rr
{
namespace emulated
{
RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment);
void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment);
RValue<Float> Exp2(RValue<Float> x);
RValue<Float> Log2(RValue<Float> x);
RValue<Float4> Sin(RValue<Float4> x);
RValue<Float4> Cos(RValue<Float4> x);
RValue<Float4> Tan(RValue<Float4> x);
RValue<Float4> Asin(RValue<Float4> x);
RValue<Float4> Acos(RValue<Float4> x);
RValue<Float4> Atan(RValue<Float4> x);
RValue<Float4> Sinh(RValue<Float4> x);
RValue<Float4> Cosh(RValue<Float4> x);
RValue<Float4> Tanh(RValue<Float4> x);
RValue<Float4> Asinh(RValue<Float4> x);
RValue<Float4> Acosh(RValue<Float4> x);
RValue<Float4> Atanh(RValue<Float4> x);
RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> Exp(RValue<Float4> x);
RValue<Float4> Log(RValue<Float4> x);
RValue<Float4> Exp2(RValue<Float4> x);
RValue<Float4> Log2(RValue<Float4> x);
}
}
...@@ -873,6 +873,55 @@ namespace ...@@ -873,6 +873,55 @@ namespace
llvm::Value *mulh = jit->builder->CreateAShr(mult, intTy->getBitWidth()); llvm::Value *mulh = jit->builder->CreateAShr(mult, intTy->getBitWidth());
return jit->builder->CreateTrunc(mulh, ty); return jit->builder->CreateTrunc(mulh, ty);
} }
llvm::Value *createGather(llvm::Value *base, llvm::Type *elTy, llvm::Value *offsets, llvm::Value *mask, unsigned int alignment, bool zeroMaskedLanes)
{
ASSERT(base->getType()->isPointerTy());
ASSERT(offsets->getType()->isVectorTy());
ASSERT(mask->getType()->isVectorTy());
auto numEls = mask->getType()->getVectorNumElements();
auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
auto i8PtrTy = i8Ty->getPointerTo();
auto elPtrTy = elTy->getPointerTo();
auto elVecTy = ::llvm::VectorType::get(elTy, numEls);
auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
auto i8Base = jit->builder->CreatePointerCast(base, i8PtrTy);
auto i8Ptrs = jit->builder->CreateGEP(i8Base, offsets);
auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
auto i8Mask = jit->builder->CreateIntCast(mask, ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
return jit->builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough });
}
void createScatter(llvm::Value *base, llvm::Value *val, llvm::Value *offsets, llvm::Value *mask, unsigned int alignment)
{
ASSERT(base->getType()->isPointerTy());
ASSERT(val->getType()->isVectorTy());
ASSERT(offsets->getType()->isVectorTy());
ASSERT(mask->getType()->isVectorTy());
auto numEls = mask->getType()->getVectorNumElements();
auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
auto i8PtrTy = i8Ty->getPointerTo();
auto elVecTy = val->getType();
auto elTy = elVecTy->getVectorElementType();
auto elPtrTy = elTy->getPointerTo();
auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
auto i8Base = jit->builder->CreatePointerCast(base, i8PtrTy);
auto i8Ptrs = jit->builder->CreateGEP(i8Base, offsets);
auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
auto i8Mask = jit->builder->CreateIntCast(mask, ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
jit->builder->CreateCall(func, { val, elPtrs, align, i8Mask });
}
} }
namespace rr namespace rr
...@@ -1751,53 +1800,24 @@ namespace rr ...@@ -1751,53 +1800,24 @@ namespace rr
jit->builder->CreateCall(func, { V(val), V(ptr), align, i8Mask }); jit->builder->CreateCall(func, { V(val), V(ptr), align, i8Mask });
} }
Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes) RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{ {
ASSERT(V(base)->getType()->isPointerTy()); return As<Float4>(V(createGather(V(base.value), T(Float::getType()), V(offsets.value), V(mask.value), alignment, zeroMaskedLanes)));
ASSERT(V(offsets)->getType()->isVectorTy()); }
ASSERT(V(mask)->getType()->isVectorTy());
auto numEls = V(mask)->getType()->getVectorNumElements(); RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
auto i1Ty = ::llvm::Type::getInt1Ty(jit->context); {
auto i32Ty = ::llvm::Type::getInt32Ty(jit->context); return As<Int4>(V(createGather(V(base.value), T(Float::getType()), V(offsets.value), V(mask.value), alignment, zeroMaskedLanes)));
auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
auto i8PtrTy = i8Ty->getPointerTo();
auto elPtrTy = T(elTy)->getPointerTo();
auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
auto i8Base = jit->builder->CreatePointerCast(V(base), i8PtrTy);
auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets));
auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
return V(jit->builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
} }
void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment) void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
{ {
ASSERT(V(base)->getType()->isPointerTy()); return createScatter(V(base.value), V(val.value), V(offsets.value), V(mask.value), alignment);
ASSERT(V(val)->getType()->isVectorTy()); }
ASSERT(V(offsets)->getType()->isVectorTy());
ASSERT(V(mask)->getType()->isVectorTy());
auto numEls = V(mask)->getType()->getVectorNumElements(); void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
auto i1Ty = ::llvm::Type::getInt1Ty(jit->context); {
auto i32Ty = ::llvm::Type::getInt32Ty(jit->context); return createScatter(V(base.value), V(val.value), V(offsets.value), V(mask.value), alignment);
auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
auto i8PtrTy = i8Ty->getPointerTo();
auto elVecTy = V(val)->getType();
auto elTy = elVecTy->getVectorElementType();
auto elPtrTy = elTy->getPointerTo();
auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
auto i8Base = jit->builder->CreatePointerCast(V(base), i8PtrTy);
auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets));
auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
jit->builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
} }
void Nucleus::createFence(std::memory_order memoryOrder) void Nucleus::createFence(std::memory_order memoryOrder)
......
...@@ -199,10 +199,6 @@ namespace rr ...@@ -199,10 +199,6 @@ namespace rr
static Value *createMaskedLoad(Value *base, Type *elementType, Value *mask, unsigned int alignment, bool zeroMaskedLanes); static Value *createMaskedLoad(Value *base, Type *elementType, Value *mask, unsigned int alignment, bool zeroMaskedLanes);
static void createMaskedStore(Value *base, Value *value, Value *mask, unsigned int alignment); static void createMaskedStore(Value *base, Value *value, Value *mask, unsigned int alignment);
// Scatter / Gather instructions
static Value *createGather(Value *base, Type *elementType, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes);
static void createScatter(Value *base, Value *value, Value *offsets, Value *mask, unsigned int alignment);
// Barrier instructions // Barrier instructions
static void createFence(std::memory_order memoryOrder); static void createFence(std::memory_order memoryOrder);
......
...@@ -4327,26 +4327,6 @@ namespace rr ...@@ -4327,26 +4327,6 @@ namespace rr
Nucleus::createMaskedStore(base.value, val.value, mask.value, alignment); Nucleus::createMaskedStore(base.value, val.value, mask.value, alignment);
} }
RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{
return RValue<Float4>(Nucleus::createGather(base.value, Float::getType(), offsets.value, mask.value, alignment, zeroMaskedLanes));
}
RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{
return RValue<Int4>(Nucleus::createGather(base.value, Int::getType(), offsets.value, mask.value, alignment, zeroMaskedLanes));
}
void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
{
Nucleus::createScatter(base.value, val.value, offsets.value, mask.value, alignment);
}
void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
{
Nucleus::createScatter(base.value, val.value, offsets.value, mask.value, alignment);
}
void Fence(std::memory_order memoryOrder) void Fence(std::memory_order memoryOrder)
{ {
ASSERT_MSG(memoryOrder == std::memory_order_acquire || ASSERT_MSG(memoryOrder == std::memory_order_acquire ||
......
...@@ -2316,14 +2316,14 @@ namespace rr ...@@ -2316,14 +2316,14 @@ namespace rr
// TODO: Currently unimplemented for Subzero. // TODO: Currently unimplemented for Subzero.
// Count leading zeros. // Count leading zeros.
// Returns 32 when: isZeroUndef && x == 0. // Returns 32 when: !isZeroUndef && x == 0.
// Returns an undefined value when: !isZeroUndef && x == 0. // Returns an undefined value when: isZeroUndef && x == 0.
RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef); RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef);
RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef); RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef);
// Count trailing zeros. // Count trailing zeros.
// Returns 32 when: isZeroUndef && x == 0. // Returns 32 when: !isZeroUndef && x == 0.
// Returns an undefined value when: !isZeroUndef && x == 0. // Returns an undefined value when: isZeroUndef && x == 0.
RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef); RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef);
RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef); RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "Reactor.hpp" #include "Reactor.hpp"
#include "Debug.hpp" #include "Debug.hpp"
#include "EmulatedReactor.hpp"
#include "Optimizer.hpp" #include "Optimizer.hpp"
#include "ExecutableMemory.hpp" #include "ExecutableMemory.hpp"
...@@ -3560,7 +3561,6 @@ namespace rr ...@@ -3560,7 +3561,6 @@ namespace rr
Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys) Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
{ {
// FIXME: This does not currently work on Windows.
Ice::Variable *ret = nullptr; Ice::Variable *ret = nullptr;
if (retTy != nullptr) if (retTy != nullptr)
{ {
...@@ -3583,37 +3583,203 @@ namespace rr ...@@ -3583,37 +3583,203 @@ namespace rr
::basicBlock->appendInst(trap); ::basicBlock->appendInst(trap);
} }
// Below are functions currently unimplemented for the Subzero backend.
// They are stubbed to satisfy the linker.
void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); } void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); }
Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createMaskedLoad()"); return nullptr; } Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createMaskedLoad()"); return nullptr; }
void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createMaskedStore()"); } void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createMaskedStore()"); }
Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createGather()"); return nullptr; }
void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createScatter()"); } RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
RValue<Float> Exp2(RValue<Float> x) { UNIMPLEMENTED("Subzero Exp2()"); return Float(0); } {
RValue<Float> Log2(RValue<Float> x) { UNIMPLEMENTED("Subzero Log2()"); return Float(0); } return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4(0); } }
RValue<Float4> Cos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cos()"); return Float4(0); }
RValue<Float4> Tan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tan()"); return Float4(0); } RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
RValue<Float4> Asin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asin()"); return Float4(0); } {
RValue<Float4> Acos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acos()"); return Float4(0); } return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
RValue<Float4> Atan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atan()"); return Float4(0); } }
RValue<Float4> Sinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sinh()"); return Float4(0); }
RValue<Float4> Cosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cosh()"); return Float4(0); } void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
RValue<Float4> Tanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tanh()"); return Float4(0); } {
RValue<Float4> Asinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asinh()"); return Float4(0); } return emulated::Scatter(base, val, offsets, mask, alignment);
RValue<Float4> Acosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acosh()"); return Float4(0); } }
RValue<Float4> Atanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atanh()"); return Float4(0); }
RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Atan2()"); return Float4(0); } void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Pow()"); return Float4(0); } {
RValue<Float4> Exp(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp()"); return Float4(0); } return emulated::Scatter(base, val, offsets, mask, alignment);
RValue<Float4> Log(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log()"); return Float4(0); } }
RValue<Float4> Exp2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp2()"); return Float4(0); }
RValue<Float4> Log2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log2()"); return Float4(0); } RValue<Float> Exp2(RValue<Float> x)
RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Ctlz()"); return UInt(0); } {
RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Ctlz()"); return UInt4(0); } return emulated::Exp2(x);
RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Cttz()"); return UInt(0); } }
RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Cttz()"); return UInt4(0); }
RValue<Float> Log2(RValue<Float> x)
{
return emulated::Log2(x);
}
RValue<Float4> Sin(RValue<Float4> x)
{
return emulated::Sin(x);
}
RValue<Float4> Cos(RValue<Float4> x)
{
return emulated::Cos(x);
}
RValue<Float4> Tan(RValue<Float4> x)
{
return emulated::Tan(x);
}
RValue<Float4> Asin(RValue<Float4> x)
{
return emulated::Asin(x);
}
RValue<Float4> Acos(RValue<Float4> x)
{
return emulated::Acos(x);
}
RValue<Float4> Atan(RValue<Float4> x)
{
return emulated::Atan(x);
}
RValue<Float4> Sinh(RValue<Float4> x)
{
return emulated::Sinh(x);
}
RValue<Float4> Cosh(RValue<Float4> x)
{
return emulated::Cosh(x);
}
RValue<Float4> Tanh(RValue<Float4> x)
{
return emulated::Tanh(x);
}
RValue<Float4> Asinh(RValue<Float4> x)
{
return emulated::Asinh(x);
}
RValue<Float4> Acosh(RValue<Float4> x)
{
return emulated::Acosh(x);
}
RValue<Float4> Atanh(RValue<Float4> x)
{
return emulated::Atanh(x);
}
RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
{
return emulated::Atan2(x, y);
}
RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
{
return emulated::Pow(x, y);
}
RValue<Float4> Exp(RValue<Float4> x)
{
return emulated::Exp(x);
}
RValue<Float4> Log(RValue<Float4> x)
{
return emulated::Log(x);
}
RValue<Float4> Exp2(RValue<Float4> x)
{
return emulated::Exp2(x);
}
RValue<Float4> Log2(RValue<Float4> x)
{
return emulated::Log2(x);
}
RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
{
if (emulateIntrinsics)
{
UNIMPLEMENTED("Subzero Ctlz()"); return UInt(0);
}
else
{
Ice::Variable* result = ::function->makeVariable(Ice::IceType_i32);
const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
ctlz->addArg(x.value);
::basicBlock->appendInst(ctlz);
return RValue<UInt>(V(result));
}
}
RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
{
if (emulateIntrinsics)
{
UNIMPLEMENTED("Subzero Ctlz()"); return UInt4(0);
}
else
{
// TODO: implement vectorized version in Subzero
UInt4 result;
result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
return result;
}
}
RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
{
if (emulateIntrinsics)
{
UNIMPLEMENTED("Subzero Cttz()"); return UInt(0);
}
else
{
Ice::Variable* result = ::function->makeVariable(Ice::IceType_i32);
const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
auto target = ::context->getConstantUndef(Ice::IceType_i32);
auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
ctlz->addArg(x.value);
::basicBlock->appendInst(ctlz);
return RValue<UInt>(V(result));
}
}
RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
{
if (emulateIntrinsics)
{
UNIMPLEMENTED("Subzero Cttz()"); return UInt4(0);
}
else
{
// TODO: implement vectorized version in Subzero
UInt4 result;
result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
return result;
}
}
void EmitDebugLocation() {} void EmitDebugLocation() {}
void EmitDebugVariable(Value* value) {} void EmitDebugVariable(Value* value) {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment