Commit 204a410c by Ben Clayton

SpirvShader: Add a single-entry, last-used sampler cache

This complements the const-cache in 34348. Timings for the Glass demo running on a i7-4930K: this change: 24.25 FPS this change without 34528: 23.02 FPS parent change (inc 34528): 22.46 FPS Bug: b/137649247 Change-Id: I206cdaabfaf63da7f67e3cd5f6823f3343b823c8 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/34528Tested-by: 's avatarBen Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 27e28737
...@@ -2057,6 +2057,25 @@ namespace sw ...@@ -2057,6 +2057,25 @@ namespace sw
routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents)); routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
break; break;
} }
case spv::OpImageDrefGather:
case spv::OpImageFetch:
case spv::OpImageGather:
case spv::OpImageQueryLod:
case spv::OpImageSampleDrefExplicitLod:
case spv::OpImageSampleDrefImplicitLod:
case spv::OpImageSampleExplicitLod:
case spv::OpImageSampleImplicitLod:
case spv::OpImageSampleProjDrefExplicitLod:
case spv::OpImageSampleProjDrefImplicitLod:
case spv::OpImageSampleProjExplicitLod:
case spv::OpImageSampleProjImplicitLod:
{
Object::ID resultId = insn.word(2);
routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{});
break;
}
default: default:
// Nothing else produces interface variables, so can all be safely ignored. // Nothing else produces interface variables, so can all be safely ignored.
break; break;
...@@ -5013,10 +5032,20 @@ namespace sw ...@@ -5013,10 +5032,20 @@ namespace sw
in[i] = sampleValue.Float(0); in[i] = sampleValue.Float(0);
} }
auto samplerFunc = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler); auto cacheIt = state->routine->samplerCache.find(resultId);
ASSERT(cacheIt != state->routine->samplerCache.end());
auto &cache = cacheIt->second;
auto cacheHit = cache.imageDescriptor == imageDescriptor && cache.sampler == sampler;
If(!cacheHit)
{
cache.function = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
cache.imageDescriptor = imageDescriptor;
cache.sampler = sampler;
}
Array<SIMD::Float> out(4); Array<SIMD::Float> out(4);
Call<ImageSampler>(samplerFunc, texture, sampler, &in[0], &out[0], state->routine->constants); Call<ImageSampler>(cache.function, texture, sampler, &in[0], &out[0], state->routine->constants);
for (auto i = 0u; i < resultType.sizeInComponents; i++) { result.move(i, out[i]); } for (auto i = 0u; i < resultType.sizeInComponents; i++) { result.move(i, out[i]); }
......
...@@ -1251,10 +1251,17 @@ namespace sw ...@@ -1251,10 +1251,17 @@ namespace sw
using Variable = Array<SIMD::Float>; using Variable = Array<SIMD::Float>;
struct SamplerCache
{
Pointer<Byte> imageDescriptor;
Pointer<Byte> sampler;
Pointer<Byte> function;
};
vk::PipelineLayout const * const pipelineLayout; vk::PipelineLayout const * const pipelineLayout;
std::unordered_map<SpirvShader::Object::ID, Variable> variables; std::unordered_map<SpirvShader::Object::ID, Variable> variables;
std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache;
Variable inputs = Variable{MAX_INTERFACE_COMPONENTS}; Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
Variable outputs = Variable{MAX_INTERFACE_COMPONENTS}; Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};
......
...@@ -1953,6 +1953,12 @@ namespace rr ...@@ -1953,6 +1953,12 @@ namespace rr
return V(jit->builder->CreateBitCast(V(v), T(destType))); return V(jit->builder->CreateBitCast(V(v), T(destType)));
} }
Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
{
RR_DEBUG_INFO_UPDATE_LOC();
return V(jit->builder->CreateICmpEQ(V(lhs), V(rhs)));
}
Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs) Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
{ {
RR_DEBUG_INFO_UPDATE_LOC(); RR_DEBUG_INFO_UPDATE_LOC();
......
...@@ -230,6 +230,7 @@ namespace rr ...@@ -230,6 +230,7 @@ namespace rr
static Value *createBitCast(Value *V, Type *destType); static Value *createBitCast(Value *V, Type *destType);
// Compare instructions // Compare instructions
static Value *createPtrEQ(Value *lhs, Value *rhs);
static Value *createICmpEQ(Value *lhs, Value *rhs); static Value *createICmpEQ(Value *lhs, Value *rhs);
static Value *createICmpNE(Value *lhs, Value *rhs); static Value *createICmpNE(Value *lhs, Value *rhs);
static Value *createICmpUGT(Value *lhs, Value *rhs); static Value *createICmpUGT(Value *lhs, Value *rhs);
......
...@@ -2356,6 +2356,12 @@ namespace rr ...@@ -2356,6 +2356,12 @@ namespace rr
RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset); RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset);
RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset); RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset);
template <typename T>
RValue<Bool> operator==(const Pointer<T> &lhs, const Pointer<T> &rhs)
{
return RValue<Bool>(Nucleus::createPtrEQ(lhs.loadValue(), rhs.loadValue()));
}
template<typename T> template<typename T>
RValue<T> Load(RValue<Pointer<T>> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder) RValue<T> Load(RValue<Pointer<T>> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
{ {
......
...@@ -1072,6 +1072,39 @@ TEST(ReactorUnitTests, MulAdd) ...@@ -1072,6 +1072,39 @@ TEST(ReactorUnitTests, MulAdd)
} }
TEST(ReactorUnitTests, PointersEqual)
{
Function<Int(Pointer<Byte>, Pointer<Byte>)> function;
{
Pointer<Byte> ptrA = function.Arg<0>();
Pointer<Byte> ptrB = function.Arg<1>();
If (ptrA == ptrB)
{
Return(1);
}
Else
{
Return(0);
}
}
auto routine = function("one");
auto equal = (int(*)(void*, void*))routine->getEntry();
int* a = reinterpret_cast<int*>(uintptr_t(0x0000000000000000));
int* b = reinterpret_cast<int*>(uintptr_t(0x00000000F0000000));
int* c = reinterpret_cast<int*>(uintptr_t(0xF000000000000000));
EXPECT_EQ(equal(&a, &a), 1);
EXPECT_EQ(equal(&b, &b), 1);
EXPECT_EQ(equal(&c, &c), 1);
EXPECT_EQ(equal(&a, &b), 0);
EXPECT_EQ(equal(&b, &a), 0);
EXPECT_EQ(equal(&b, &c), 0);
EXPECT_EQ(equal(&c, &b), 0);
EXPECT_EQ(equal(&c, &a), 0);
EXPECT_EQ(equal(&a, &c), 0);
}
TEST(ReactorUnitTests, Call) TEST(ReactorUnitTests, Call)
{ {
if (!rr::Caps.CallSupported) if (!rr::Caps.CallSupported)
......
...@@ -1222,6 +1222,11 @@ namespace rr ...@@ -1222,6 +1222,11 @@ namespace rr
return V(result); return V(result);
} }
Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
{
return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
}
Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs) Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
{ {
return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs); return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment