Commit 6448bd66 by Alexis Hetu Committed by Alexis Hétu

Memory leak fix for SamplingRoutine cache

- The SamplingRoutine cache is now owned by the Device and has the same lifetime as the device, which means it gets released when the device gets released. - Added a utility class, SamplingRoutineCache, to contain the cache and release all routine entries upon destruction. - Added a pointer to the current device in the sampler object in order to retrieve it during sampling, in order to access the routine cache - The cache is now internally an LRUCache, so it can't cache a large quantity of routines and take up a lot of memory. Bug b/129523279 b/137649247 b/137524292 chromium:971325 Change-Id: If4ff1fbc87962355d0a281b9d0acace4316a02b8 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/32688Tested-by: 's avatarAlexis Hétu <sugoi@google.com> Presubmit-Ready: Alexis Hétu <sugoi@google.com> Reviewed-by: 's avatarChris Forbes <chrisforbes@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent 52ce1e9f
......@@ -1196,7 +1196,7 @@ namespace sw
std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const;
static ImageSampler *getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler);
static ImageSampler *emitSamplerFunction(ImageInstruction instruction, const Sampler &samplerState);
static rr::Routine *emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState);
// TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly.
static sw::TextureType convertTextureType(VkImageViewType imageViewType);
......
......@@ -16,13 +16,11 @@
#include "SamplerCore.hpp" // TODO: Figure out what's needed.
#include "System/Math.hpp"
#include "Vulkan/VkBuffer.hpp"
#include "Vulkan/VkDebug.hpp"
#include "Vulkan/VkDescriptorSet.hpp"
#include "Vulkan/VkPipelineLayout.hpp"
#include "Vulkan/VkDescriptorSetLayout.hpp"
#include "Vulkan/VkDevice.hpp"
#include "Vulkan/VkImageView.hpp"
#include "Vulkan/VkSampler.hpp"
#include "Vulkan/VkDescriptorSetLayout.hpp"
#include "Device/Config.hpp"
#include <spirv/unified1/spirv.hpp>
......@@ -31,31 +29,6 @@
#include <climits>
#include <mutex>
namespace
{
struct SamplingRoutineKey
{
uint32_t instruction;
uint32_t sampler;
uint32_t imageView;
bool operator==(const SamplingRoutineKey &rhs) const
{
return instruction == rhs.instruction && sampler == rhs.sampler && imageView == rhs.imageView;
}
struct Hash
{
std::size_t operator()(const SamplingRoutineKey &key) const noexcept
{
return (key.instruction << 16) ^ (key.sampler << 8) ^ key.imageView;
}
};
};
}
namespace sw {
SpirvShader::ImageSampler *SpirvShader::getImageSampler(uint32_t inst, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler)
......@@ -63,15 +36,18 @@ SpirvShader::ImageSampler *SpirvShader::getImageSampler(uint32_t inst, vk::Sampl
ImageInstruction instruction(inst);
ASSERT(imageDescriptor->imageViewId != 0 && (sampler->id != 0 || instruction.samplerMethod == Fetch));
// TODO(b/129523279): Move somewhere sensible.
static std::unordered_map<SamplingRoutineKey, ImageSampler*, SamplingRoutineKey::Hash> cache;
static std::mutex mutex;
vk::Device::SamplingRoutineCache::Key key = {inst, imageDescriptor->imageViewId, sampler->id};
ASSERT(imageDescriptor->device);
SamplingRoutineKey key = {inst, imageDescriptor->imageViewId, sampler->id};
std::unique_lock<std::mutex> lock(imageDescriptor->device->getSamplingRoutineCacheMutex());
vk::Device::SamplingRoutineCache* cache = imageDescriptor->device->getSamplingRoutineCache();
std::unique_lock<std::mutex> lock(mutex);
auto it = cache.find(key);
if (it != cache.end()) { return it->second; }
rr::Routine* routine = cache->query(key);
if(routine)
{
return (ImageSampler*)(routine->getEntry());
}
auto type = imageDescriptor->type;
......@@ -108,13 +84,13 @@ SpirvShader::ImageSampler *SpirvShader::getImageSampler(uint32_t inst, vk::Sampl
UNSUPPORTED("anisotropyEnable");
}
auto fptr = emitSamplerFunction(instruction, samplerState);
routine = emitSamplerRoutine(instruction, samplerState);
cache.emplace(key, fptr);
return fptr;
cache->add(key, routine);
return (ImageSampler*)(routine->getEntry());
}
SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction instruction, const Sampler &samplerState)
rr::Routine *SpirvShader::emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState)
{
// TODO(b/129523279): Hold a separate mutex lock for the sampler being built.
rr::Function<Void(Pointer<Byte>, Pointer<Byte>, Pointer<SIMD::Float>, Pointer<SIMD::Float>, Pointer<Byte>)> function;
......@@ -231,7 +207,7 @@ SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction ins
}
}
return (ImageSampler*)function("sampler")->getEntry();
return function("sampler");
}
sw::TextureType SpirvShader::convertTextureType(VkImageViewType imageViewType)
......
......@@ -269,7 +269,7 @@ void SampledImageDescriptor::updateSampler(const vk::Sampler *newSampler)
}
}
void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src)
void DescriptorSetLayout::WriteDescriptorSet(Device* device, DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src)
{
DescriptorSetLayout* dstLayout = dstSet->header.layout;
auto &binding = dstLayout->bindings[dstLayout->getBindingIndex(entry.dstBinding)];
......@@ -294,6 +294,7 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
{
imageSampler[i].updateSampler(vk::Cast(update->sampler));
}
imageSampler[i].device = device;
}
}
else if (entry.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
......@@ -319,6 +320,7 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
imageSampler[i].texture.width = sw::replicate(static_cast<float>(numElements));
imageSampler[i].texture.height = sw::replicate(1);
imageSampler[i].texture.depth = sw::replicate(1);
imageSampler[i].device = device;
sw::Mipmap &mipmap = imageSampler[i].texture.mipmap[0];
mipmap.buffer = bufferView->getPointer();
......@@ -360,6 +362,7 @@ void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptor
imageSampler[i].type = imageView->getType();
imageSampler[i].swizzle = imageView->getComponentMapping();
imageSampler[i].format = format;
imageSampler[i].device = device;
auto &subresourceRange = imageView->getSubresourceRange();
......@@ -572,7 +575,7 @@ void DescriptorSetLayout::WriteTextureLevelInfo(sw::Texture *texture, int level,
mipmap.sliceP[3] = sliceP;
}
void DescriptorSetLayout::WriteDescriptorSet(const VkWriteDescriptorSet& writeDescriptorSet)
void DescriptorSetLayout::WriteDescriptorSet(Device* device, const VkWriteDescriptorSet& writeDescriptorSet)
{
DescriptorSet* dstSet = vk::Cast(writeDescriptorSet.dstSet);
VkDescriptorUpdateTemplateEntry e;
......@@ -611,7 +614,7 @@ void DescriptorSetLayout::WriteDescriptorSet(const VkWriteDescriptorSet& writeDe
UNIMPLEMENTED("descriptor type %u", writeDescriptorSet.descriptorType);
}
WriteDescriptorSet(dstSet, e, reinterpret_cast<char const *>(ptr));
WriteDescriptorSet(device, dstSet, e, reinterpret_cast<char const *>(ptr));
}
void DescriptorSetLayout::CopyDescriptorSet(const VkCopyDescriptorSet& descriptorCopies)
......
......@@ -25,6 +25,7 @@ namespace vk
{
class DescriptorSet;
class Device;
// TODO(b/129523279): Move to the Device or Pipeline layer.
struct alignas(16) SampledImageDescriptor
......@@ -35,6 +36,7 @@ struct alignas(16) SampledImageDescriptor
// TODO(b/129523279): Minimize to the data actually needed.
vk::Sampler sampler;
vk::Device* device;
uint32_t imageViewId;
VkImageViewType type;
......@@ -84,10 +86,10 @@ public:
static size_t ComputeRequiredAllocationSize(const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
static size_t GetDescriptorSize(VkDescriptorType type);
static void WriteDescriptorSet(const VkWriteDescriptorSet& descriptorWrites);
static void WriteDescriptorSet(Device* device, const VkWriteDescriptorSet& descriptorWrites);
static void CopyDescriptorSet(const VkCopyDescriptorSet& descriptorCopies);
static void WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src);
static void WriteDescriptorSet(Device* device, DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src);
static void WriteTextureLevelInfo(sw::Texture *texture, int level, int width, int height, int depth, int pitchP, int sliceP);
void initialize(DescriptorSet* descriptorSet);
......
......@@ -35,14 +35,14 @@ namespace vk
return info->descriptorUpdateEntryCount * sizeof(VkDescriptorUpdateTemplateEntry);
}
void DescriptorUpdateTemplate::updateDescriptorSet(VkDescriptorSet vkDescriptorSet, const void* pData)
void DescriptorUpdateTemplate::updateDescriptorSet(Device* device, VkDescriptorSet vkDescriptorSet, const void* pData)
{
DescriptorSet* descriptorSet = vk::Cast(vkDescriptorSet);
for(uint32_t i = 0; i < descriptorUpdateEntryCount; i++)
{
DescriptorSetLayout::WriteDescriptorSet(descriptorSet, descriptorUpdateEntries[i],
DescriptorSetLayout::WriteDescriptorSet(device, descriptorSet, descriptorUpdateEntries[i],
reinterpret_cast<char const *>(pData));
}
}
......
......@@ -20,6 +20,7 @@
namespace vk
{
class DescriptorSetLayout;
class Device;
class DescriptorUpdateTemplate : public Object<DescriptorUpdateTemplate, VkDescriptorUpdateTemplate>
{
......@@ -28,7 +29,7 @@ namespace vk
static size_t ComputeRequiredAllocationSize(const VkDescriptorUpdateTemplateCreateInfo* info);
void updateDescriptorSet(VkDescriptorSet descriptorSet, const void* pData);
void updateDescriptorSet(Device* device, VkDescriptorSet descriptorSet, const void* pData);
private:
uint32_t descriptorUpdateEntryCount = 0;
......
......@@ -36,6 +36,22 @@ namespace
namespace vk
{
rr::Routine* Device::SamplingRoutineCache::query(const vk::Device::SamplingRoutineCache::Key& key) const
{
return cache.query(hash(key));
}
void Device::SamplingRoutineCache::add(const vk::Device::SamplingRoutineCache::Key& key, rr::Routine* routine)
{
ASSERT(routine);
cache.add(hash(key), routine);
}
std::size_t Device::SamplingRoutineCache::hash(const vk::Device::SamplingRoutineCache::Key &key) const
{
return (key.instruction << 16) ^ (key.sampler << 8) ^ key.imageView;
}
Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures)
: physicalDevice(physicalDevice),
queues(reinterpret_cast<Queue*>(mem)),
......@@ -72,7 +88,7 @@ Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice
}
// FIXME (b/119409619): use an allocator here so we can control all memory allocations
blitter = new sw::Blitter();
blitter.reset(new sw::Blitter());
}
void Device::destroy(const VkAllocationCallbacks* pAllocator)
......@@ -83,8 +99,6 @@ void Device::destroy(const VkAllocationCallbacks* pAllocator)
}
vk::deallocate(queues, pAllocator);
delete blitter;
}
size_t Device::ComputeRequiredAllocationSize(const VkDeviceCreateInfo* pCreateInfo)
......@@ -212,7 +226,7 @@ void Device::updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDe
{
for(uint32_t i = 0; i < descriptorWriteCount; i++)
{
DescriptorSetLayout::WriteDescriptorSet(pDescriptorWrites[i]);
DescriptorSetLayout::WriteDescriptorSet(this, pDescriptorWrites[i]);
}
for(uint32_t i = 0; i < descriptorCopyCount; i++)
......@@ -221,4 +235,18 @@ void Device::updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDe
}
}
Device::SamplingRoutineCache* Device::getSamplingRoutineCache()
{
if(!samplingRoutineCache.get())
{
samplingRoutineCache.reset(new SamplingRoutineCache());
}
return samplingRoutineCache.get();
}
std::mutex& Device::getSamplingRoutineCacheMutex()
{
return samplingRoutineCacheMutex;
}
} // namespace vk
......@@ -16,10 +16,15 @@
#define VK_DEVICE_HPP_
#include "VkObject.hpp"
#include "Device/LRUCache.hpp"
#include "Reactor/Routine.hpp"
#include <memory>
#include <mutex>
namespace sw
{
class Blitter;
class SamplingRoutineCache;
}
namespace vk
......@@ -48,19 +53,43 @@ public:
void updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites,
uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies);
const VkPhysicalDeviceFeatures &getEnabledFeatures() const { return enabledFeatures; }
sw::Blitter* getBlitter() const { return blitter; }
sw::Blitter* getBlitter() const { return blitter.get(); }
class SamplingRoutineCache
{
public:
SamplingRoutineCache() : cache(1024) {}
~SamplingRoutineCache() {}
struct Key
{
uint32_t instruction;
uint32_t sampler;
uint32_t imageView;
};
rr::Routine* query(const Key& key) const;
void add(const Key& key, rr::Routine* routine);
private:
std::size_t hash(const Key &key) const;
sw::LRUCache<std::size_t, rr::Routine> cache;
};
SamplingRoutineCache* getSamplingRoutineCache();
std::mutex& getSamplingRoutineCacheMutex();
private:
PhysicalDevice *const physicalDevice = nullptr;
Queue *const queues = nullptr;
uint32_t queueCount = 0;
const uint32_t enabledExtensionCount = 0;
std::unique_ptr<sw::Blitter> blitter;
std::unique_ptr<SamplingRoutineCache> samplingRoutineCache;
std::mutex samplingRoutineCacheMutex;
uint32_t enabledExtensionCount = 0;
typedef char ExtensionName[VK_MAX_EXTENSION_NAME_SIZE];
ExtensionName* extensions = nullptr;
const VkPhysicalDeviceFeatures enabledFeatures = {};
sw::Blitter* blitter = nullptr;
};
using DispatchableDevice = DispatchableObject<Device, VkDevice>;
......
......@@ -2558,7 +2558,7 @@ VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSetWithTemplate(VkDevice device, Vk
TRACE("(VkDevice device = %p, VkDescriptorSet descriptorSet = %p, VkDescriptorUpdateTemplate descriptorUpdateTemplate = %p, const void* pData = %p)",
device, static_cast<void*>(descriptorSet), static_cast<void*>(descriptorUpdateTemplate), pData);
vk::Cast(descriptorUpdateTemplate)->updateDescriptorSet(descriptorSet, pData);
vk::Cast(descriptorUpdateTemplate)->updateDescriptorSet(vk::Cast(device), descriptorSet, pData);
}
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, VkExternalBufferProperties* pExternalBufferProperties)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment