Commit ee1af664 by Preston Jackson

Support LLVM 8+ with Reactor

LLVM 8 removes several x86 SSE intrinsics for saturated arithmetic, but provides target-independent replacements. Note there are now three code paths for these vector operations: - With LLVM 8+, use the new target-independent intrinsics. - With LLVM 7 on x86, use the SSE intrinsics. - On other architectures, lower to a generic sequence of vector operations (lowerPSAT). Bug: b/139412871 Change-Id: I849f8b5c004849c628507d6377779df170a11d93 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/36788 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAntonio Maiorano <amaiorano@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com>
parent 6fff5122
...@@ -309,8 +309,14 @@ namespace ...@@ -309,8 +309,14 @@ namespace
// settings and no Reactor routine directly links against another. // settings and no Reactor routine directly links against another.
class JITRoutine : public rr::Routine class JITRoutine : public rr::Routine
{ {
#if LLVM_VERSION_MAJOR >= 8
using ObjLayer = llvm::orc::LegacyRTDyldObjectLinkingLayer;
using CompileLayer = llvm::orc::LegacyIRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
#else
using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer; using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>; using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
#endif
public: public:
JITRoutine( JITRoutine(
std::unique_ptr<llvm::Module> module, std::unique_ptr<llvm::Module> module,
...@@ -611,7 +617,7 @@ namespace ...@@ -611,7 +617,7 @@ namespace
return jit->builder->CreateCall(trunc, ARGS(x)); return jit->builder->CreateCall(trunc, ARGS(x));
} }
// Packed add/sub saturatation // Packed add/sub with saturation
llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned) llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
{ {
llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
...@@ -646,26 +652,6 @@ namespace ...@@ -646,26 +652,6 @@ namespace
return jit->builder->CreateTrunc(res, ty); return jit->builder->CreateTrunc(res, ty);
} }
llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
{
return lowerPSAT(x, y, true, false);
}
llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
{
return lowerPSAT(x, y, true, true);
}
llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
{
return lowerPSAT(x, y, false, false);
}
llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
{
return lowerPSAT(x, y, false, true);
}
llvm::Value *lowerSQRT(llvm::Value *x) llvm::Value *lowerSQRT(llvm::Value *x)
{ {
llvm::Function *sqrt = llvm::Intrinsic::getDeclaration( llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
...@@ -817,6 +803,44 @@ namespace ...@@ -817,6 +803,44 @@ namespace
} }
#endif // !defined(__i386__) && !defined(__x86_64__) #endif // !defined(__i386__) && !defined(__x86_64__)
#if (LLVM_VERSION_MAJOR >= 8) || (!defined(__i386__) && !defined(__x86_64__))
llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
{
#if LLVM_VERSION_MAJOR >= 8
return jit->builder->CreateBinaryIntrinsic(llvm::Intrinsic::uadd_sat, x, y);
#else
return lowerPSAT(x, y, true, false);
#endif
}
llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
{
#if LLVM_VERSION_MAJOR >= 8
return jit->builder->CreateBinaryIntrinsic(llvm::Intrinsic::sadd_sat, x, y);
#else
return lowerPSAT(x, y, true, true);
#endif
}
llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
{
#if LLVM_VERSION_MAJOR >= 8
return jit->builder->CreateBinaryIntrinsic(llvm::Intrinsic::usub_sat, x, y);
#else
return lowerPSAT(x, y, false, false);
#endif
}
llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
{
#if LLVM_VERSION_MAJOR >= 8
return jit->builder->CreateBinaryIntrinsic(llvm::Intrinsic::ssub_sat, x, y);
#else
return lowerPSAT(x, y, false, true);
#endif
}
#endif // (LLVM_VERSION_MAJOR >= 8) || (!defined(__i386__) && !defined(__x86_64__))
llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext) llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
{ {
llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType()); llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
...@@ -4095,58 +4119,90 @@ namespace rr ...@@ -4095,58 +4119,90 @@ namespace rr
RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y) RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_w); #if LLVM_VERSION_MAJOR >= 8
return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
#else
llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_w);
return As<Short4>(V(jit->builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value))))); return As<Short4>(V(jit->builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
#endif
} }
RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y) RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
{ {
llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_w); #if LLVM_VERSION_MAJOR >= 8
return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
#else
llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_w);
return As<Short4>(V(jit->builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value))))); return As<Short4>(V(jit->builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
#endif
} }
RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y) RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
{ {
llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_w); #if LLVM_VERSION_MAJOR >= 8
return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
#else
llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_w);
return As<UShort4>(V(jit->builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value))))); return As<UShort4>(V(jit->builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
#endif
} }
RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y) RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
{ {
llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_w); #if LLVM_VERSION_MAJOR >= 8
return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
#else
llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_w);
return As<UShort4>(V(jit->builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value))))); return As<UShort4>(V(jit->builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
#endif
} }
RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y) RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
{ {
llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_b); #if LLVM_VERSION_MAJOR >= 8
return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
#else
llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_b);
return As<SByte8>(V(jit->builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value))))); return As<SByte8>(V(jit->builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
#endif
} }
RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y) RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
{ {
llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_b); #if LLVM_VERSION_MAJOR >= 8
return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
#else
llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_b);
return As<SByte8>(V(jit->builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value))))); return As<SByte8>(V(jit->builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
#endif
} }
RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y) RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
{ {
llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_b); #if LLVM_VERSION_MAJOR >= 8
return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
#else
llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_b);
return As<Byte8>(V(jit->builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value))))); return As<Byte8>(V(jit->builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
#endif
} }
RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y) RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
{ {
llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_b); #if LLVM_VERSION_MAJOR >= 8
return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
#else
llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_b);
return As<Byte8>(V(jit->builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value))))); return As<Byte8>(V(jit->builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
#endif
} }
RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y) RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment