Commit 3575550a by Alexis Hetu Committed by Alexis Hétu

Faster image sampler fetch from key

This cl makes a tradeoff of a one time conversion of the LRUCache to a constant unordered map to save time on a costly per pixel imageSampler fetch operation. When the renderer is idle, the device copies the LRUCache to an unordered map, which has faster fetch times. This cache is always constant throughout any rendering operation, so it can be fetched without a mutex. This copy operation happens only if the LRUCache was modified since the last copy occurred, so, if all sampling variations happened on the first frame, all subsequent frames can render much faster. On MacOS, the Glass demo goes from 2.6 FPS to 20 FPS. Bug b/129523279 b/137649247 Change-Id: I195ca8b2ead59eb5cc9e75e8b0dc5119c794d717 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/34348 Presubmit-Ready: Alexis Hétu <sugoi@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: 's avatarAlexis Hétu <sugoi@google.com> Reviewed-by: 's avatarBen Clayton <bclayton@google.com>
parent ec303925
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <cstring> #include <cstring>
#include <type_traits> #include <type_traits>
#include <unordered_map>
namespace sw namespace sw
{ {
...@@ -28,15 +29,15 @@ namespace sw ...@@ -28,15 +29,15 @@ namespace sw
public: public:
LRUCache(int n); LRUCache(int n);
~LRUCache(); virtual ~LRUCache();
Data *query(const Key &key) const; Data *query(const Key &key) const;
Data *add(const Key &key, Data *data); virtual Data *add(const Key &key, Data *data);
int getSize() {return size;} int getSize() {return size;}
Key &getKey(int i) {return key[i];} Key &getKey(int i) {return key[i];}
private: protected:
int size; int size;
int mask; int mask;
int top; int top;
...@@ -47,6 +48,29 @@ namespace sw ...@@ -47,6 +48,29 @@ namespace sw
Data **data; Data **data;
}; };
template<class Key, class Data>
class LRUConstCache : public LRUCache<Key, Data>
{
using LRUBase = LRUCache<Key, Data>;
public:
LRUConstCache(int n) : LRUBase(n) {}
~LRUConstCache() { clearConstCache(); }
Data *add(const Key &key, Data *data) override
{
constCacheNeedsUpdate = true;
return LRUBase::add(key, data);
}
void updateConstCache();
Data *queryConstCache(const Key &key) const;
private:
void clearConstCache();
bool constCacheNeedsUpdate = false;
std::unordered_map<Key, Data*> constCache;
};
// Helper class for clearing the memory of objects at construction. // Helper class for clearing the memory of objects at construction.
// Useful as the first base class of cache keys which may contain padding bytes or bits otherwise left uninitialized. // Useful as the first base class of cache keys which may contain padding bytes or bits otherwise left uninitialized.
template<class T> template<class T>
...@@ -183,6 +207,45 @@ namespace sw ...@@ -183,6 +207,45 @@ namespace sw
return data; return data;
} }
template<class Key, class Data>
void LRUConstCache<Key, Data>::clearConstCache()
{
auto it = constCache.begin();
auto itEnd = constCache.end();
for(; it != itEnd; ++it)
{
it->second->unbind();
}
constCache.clear();
}
template<class Key, class Data>
void LRUConstCache<Key, Data>::updateConstCache()
{
if(constCacheNeedsUpdate)
{
clearConstCache();
for(int i = 0; i < LRUBase::size; i++)
{
if(LRUBase::data[i])
{
LRUBase::data[i]->bind();
constCache[*LRUBase::ref[i]] = LRUBase::data[i];
}
}
constCacheNeedsUpdate = false;
}
}
template<class Key, class Data>
Data *LRUConstCache<Key, Data>::queryConstCache(const Key &key) const
{
auto it = constCache.find(key);
return (it != constCache.end()) ? it->second : nullptr;
}
} }
#endif // sw_LRUCache_hpp #endif // sw_LRUCache_hpp
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "System/Timer.hpp" #include "System/Timer.hpp"
#include "Vulkan/VkConfig.h" #include "Vulkan/VkConfig.h"
#include "Vulkan/VkDebug.hpp" #include "Vulkan/VkDebug.hpp"
#include "Vulkan/VkDevice.hpp"
#include "Vulkan/VkFence.hpp" #include "Vulkan/VkFence.hpp"
#include "Vulkan/VkImageView.hpp" #include "Vulkan/VkImageView.hpp"
#include "Vulkan/VkQueryPool.hpp" #include "Vulkan/VkQueryPool.hpp"
...@@ -162,7 +163,7 @@ namespace sw ...@@ -162,7 +163,7 @@ namespace sw
deallocate(data); deallocate(data);
} }
Renderer::Renderer() Renderer::Renderer(vk::Device* device) : device(device)
{ {
for(int i = 0; i < 16; i++) for(int i = 0; i < 16; i++)
{ {
...@@ -733,6 +734,7 @@ namespace sw ...@@ -733,6 +734,7 @@ namespace sw
void Renderer::synchronize() void Renderer::synchronize()
{ {
sync.wait(); sync.wait();
device->updateSamplingRoutineConstCache();
} }
void Renderer::finishRendering(Task &pixelTask) void Renderer::finishRendering(Task &pixelTask)
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
namespace vk namespace vk
{ {
class DescriptorSet; class DescriptorSet;
class Device;
class Query; class Query;
} }
...@@ -156,7 +157,7 @@ namespace sw ...@@ -156,7 +157,7 @@ namespace sw
}; };
public: public:
Renderer(); Renderer(vk::Device* device);
virtual ~Renderer(); virtual ~Renderer();
...@@ -254,6 +255,8 @@ namespace sw ...@@ -254,6 +255,8 @@ namespace sw
Routine *vertexRoutine; Routine *vertexRoutine;
Routine *setupRoutine; Routine *setupRoutine;
Routine *pixelRoutine; Routine *pixelRoutine;
vk::Device* device;
}; };
struct DrawCall struct DrawCall
......
...@@ -40,10 +40,16 @@ SpirvShader::ImageSampler *SpirvShader::getImageSampler(uint32_t inst, vk::Sampl ...@@ -40,10 +40,16 @@ SpirvShader::ImageSampler *SpirvShader::getImageSampler(uint32_t inst, vk::Sampl
ASSERT(imageDescriptor->device); ASSERT(imageDescriptor->device);
rr::Routine* routine = imageDescriptor->device->findInConstCache(key);
if(routine)
{
return (ImageSampler*)(routine->getEntry());
}
std::unique_lock<std::mutex> lock(imageDescriptor->device->getSamplingRoutineCacheMutex()); std::unique_lock<std::mutex> lock(imageDescriptor->device->getSamplingRoutineCacheMutex());
vk::Device::SamplingRoutineCache* cache = imageDescriptor->device->getSamplingRoutineCache(); vk::Device::SamplingRoutineCache* cache = imageDescriptor->device->getSamplingRoutineCache();
rr::Routine* routine = cache->query(key); routine = cache->query(key);
if(routine) if(routine)
{ {
return (ImageSampler*)(routine->getEntry()); return (ImageSampler*)(routine->getEntry());
......
...@@ -47,7 +47,17 @@ void Device::SamplingRoutineCache::add(const vk::Device::SamplingRoutineCache::K ...@@ -47,7 +47,17 @@ void Device::SamplingRoutineCache::add(const vk::Device::SamplingRoutineCache::K
cache.add(hash(key), routine); cache.add(hash(key), routine);
} }
std::size_t Device::SamplingRoutineCache::hash(const vk::Device::SamplingRoutineCache::Key &key) const rr::Routine* Device::SamplingRoutineCache::queryConst(const vk::Device::SamplingRoutineCache::Key& key) const
{
return cache.queryConstCache(hash(key));
}
void Device::SamplingRoutineCache::updateConstCache()
{
cache.updateConstCache();
}
std::size_t Device::SamplingRoutineCache::hash(const vk::Device::SamplingRoutineCache::Key &key)
{ {
return (key.instruction << 16) ^ (key.sampler << 8) ^ key.imageView; return (key.instruction << 16) ^ (key.sampler << 8) ^ key.imageView;
} }
...@@ -71,7 +81,7 @@ Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice ...@@ -71,7 +81,7 @@ Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice
for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++) for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++)
{ {
new (&queues[queueID]) Queue(); new (&queues[queueID]) Queue(this);
} }
} }
...@@ -89,6 +99,7 @@ Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice ...@@ -89,6 +99,7 @@ Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice
// FIXME (b/119409619): use an allocator here so we can control all memory allocations // FIXME (b/119409619): use an allocator here so we can control all memory allocations
blitter.reset(new sw::Blitter()); blitter.reset(new sw::Blitter());
samplingRoutineCache.reset(new SamplingRoutineCache());
} }
void Device::destroy(const VkAllocationCallbacks* pAllocator) void Device::destroy(const VkAllocationCallbacks* pAllocator)
...@@ -235,15 +246,22 @@ void Device::updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDe ...@@ -235,15 +246,22 @@ void Device::updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDe
} }
} }
Device::SamplingRoutineCache* Device::getSamplingRoutineCache() Device::SamplingRoutineCache* Device::getSamplingRoutineCache() const
{ {
if(!samplingRoutineCache.get())
{
samplingRoutineCache.reset(new SamplingRoutineCache());
}
return samplingRoutineCache.get(); return samplingRoutineCache.get();
} }
rr::Routine* Device::findInConstCache(const SamplingRoutineCache::Key& key) const
{
return samplingRoutineCache->queryConst(key);
}
void Device::updateSamplingRoutineConstCache()
{
std::unique_lock<std::mutex> lock(samplingRoutineCacheMutex);
samplingRoutineCache->updateConstCache();
}
std::mutex& Device::getSamplingRoutineCacheMutex() std::mutex& Device::getSamplingRoutineCacheMutex()
{ {
return samplingRoutineCacheMutex; return samplingRoutineCacheMutex;
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
namespace sw namespace sw
{ {
class Blitter; class Blitter;
class SamplingRoutineCache;
} }
namespace vk namespace vk
...@@ -71,13 +70,19 @@ public: ...@@ -71,13 +70,19 @@ public:
rr::Routine* query(const Key& key) const; rr::Routine* query(const Key& key) const;
void add(const Key& key, rr::Routine* routine); void add(const Key& key, rr::Routine* routine);
rr::Routine* queryConst(const Key& key) const;
void updateConstCache();
static std::size_t hash(const Key &key);
private: private:
std::size_t hash(const Key &key) const; sw::LRUConstCache<std::size_t, rr::Routine> cache;
sw::LRUCache<std::size_t, rr::Routine> cache;
}; };
SamplingRoutineCache* getSamplingRoutineCache(); SamplingRoutineCache* getSamplingRoutineCache() const;
std::mutex& getSamplingRoutineCacheMutex(); std::mutex& getSamplingRoutineCacheMutex();
rr::Routine* findInConstCache(const SamplingRoutineCache::Key& key) const;
void updateSamplingRoutineConstCache();
private: private:
PhysicalDevice *const physicalDevice = nullptr; PhysicalDevice *const physicalDevice = nullptr;
......
...@@ -74,7 +74,7 @@ VkSubmitInfo* DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo* pSubm ...@@ -74,7 +74,7 @@ VkSubmitInfo* DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo* pSubm
namespace vk namespace vk
{ {
Queue::Queue() : renderer() Queue::Queue(Device* device) : renderer(device)
{ {
queueThread = std::thread(TaskLoop, this); queueThread = std::thread(TaskLoop, this);
} }
......
...@@ -31,6 +31,7 @@ namespace sw ...@@ -31,6 +31,7 @@ namespace sw
namespace vk namespace vk
{ {
class Device;
class Fence; class Fence;
class Queue class Queue
...@@ -38,7 +39,7 @@ class Queue ...@@ -38,7 +39,7 @@ class Queue
VK_LOADER_DATA loaderData = { ICD_LOADER_MAGIC }; VK_LOADER_DATA loaderData = { ICD_LOADER_MAGIC };
public: public:
Queue(); Queue(Device* device);
~Queue(); ~Queue();
operator VkQueue() operator VkQueue()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment