Commit 46eaba7f by Mohan Maiya Committed by Commit Bot

Vulkan: Add support for internal cache hit and miss counts

Add a CacheStats class that provides cache hit and miss bookkeeping. All internal caches make use of this class to keep track of its stats. This provides a means to profile cache hit ratios a.k.a Vulkan object reuse for any application. Bug: angleproject:5447 Test: Manual verification with angle_end2end_tests Change-Id: I44eeb0c2b9b291ec1cdd156fb2be4a5fe80d2848 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2580111 Commit-Queue: Mohan Maiya <m.maiya@samsung.com> Reviewed-by: 's avatarTim Van Patten <timvp@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent f32fbb51
......@@ -610,7 +610,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
uint32_t dynamicOffset;
vk::BindingPointer<vk::DescriptorSetLayout> descriptorSetLayout;
vk::RefCountedDescriptorPoolBinding descriptorPoolBinding;
angle::FastIntegerMap<VkDescriptorSet> descriptorSetCache;
DriverUniformsDescriptorSetCache descriptorSetCache;
DriverUniformsDescriptorSet();
~DriverUniformsDescriptorSet();
......
......@@ -347,22 +347,12 @@ FramebufferVk::FramebufferVk(RendererVk *renderer,
FramebufferVk::~FramebufferVk() = default;
void FramebufferVk::clearCache(ContextVk *contextVk)
{
for (auto &entry : mFramebufferCache)
{
vk::FramebufferHelper &tmpFB = entry.second;
tmpFB.release(contextVk);
}
mFramebufferCache.clear();
}
void FramebufferVk::destroy(const gl::Context *context)
{
ContextVk *contextVk = vk::GetImpl(context);
mReadPixelBuffer.release(contextVk->getRenderer());
clearCache(contextVk);
mFramebufferCache.clear(contextVk);
}
angle::Result FramebufferVk::discard(const gl::Context *context,
......@@ -1797,7 +1787,7 @@ angle::Result FramebufferVk::syncState(const gl::Context *context,
case gl::Framebuffer::DIRTY_BIT_DEFAULT_FIXED_SAMPLE_LOCATIONS:
// Invalidate the cache. If we have performance critical code hitting this path we
// can add related data (such as width/height) to the cache
clearCache(contextVk);
mFramebufferCache.clear(contextVk);
break;
case gl::Framebuffer::DIRTY_BIT_DEFAULT_LAYERS:
shouldUpdateLayerCount = true;
......@@ -1951,10 +1941,10 @@ angle::Result FramebufferVk::getFramebuffer(ContextVk *contextVk,
return angle::Result::Continue;
}
// No current FB, so now check for previously cached Framebuffer
auto iter = mFramebufferCache.find(mCurrentFramebufferDesc);
if (iter != mFramebufferCache.end())
vk::FramebufferHelper *framebufferHelper = nullptr;
if (mFramebufferCache.get(contextVk, mCurrentFramebufferDesc, &framebufferHelper))
{
*framebufferOut = &iter->second.getFramebuffer();
*framebufferOut = &framebufferHelper->getFramebuffer();
return angle::Result::Continue;
}
......@@ -2065,9 +2055,11 @@ angle::Result FramebufferVk::getFramebuffer(ContextVk *contextVk,
// Check that our description matches our attachments. Can catch implementation bugs.
ASSERT(static_cast<uint32_t>(attachments.size()) == mCurrentFramebufferDesc.attachmentCount());
mFramebufferCache[mCurrentFramebufferDesc] = std::move(newFramebuffer);
mFramebuffer = &mFramebufferCache[mCurrentFramebufferDesc];
*framebufferOut = &mFramebuffer->getFramebuffer();
mFramebufferCache.insert(mCurrentFramebufferDesc, std::move(newFramebuffer));
bool result = mFramebufferCache.get(contextVk, mCurrentFramebufferDesc, &mFramebuffer);
ASSERT(result);
*framebufferOut = &mFramebuffer->getFramebuffer();
return angle::Result::Continue;
}
......
......@@ -204,8 +204,6 @@ class FramebufferVk : public FramebufferImpl
const GLenum *attachments,
bool isSubInvalidate,
const gl::Rectangle &invalidateArea);
// Release all FramebufferVk objects in the cache and clear cache
void clearCache(ContextVk *contextVk);
RenderTargetVk *getReadPixelsRenderTarget(GLenum format) const;
VkImageAspectFlagBits getReadPixelsAspectFlags(GLenum format) const;
......@@ -238,7 +236,7 @@ class FramebufferVk : public FramebufferImpl
gl::DrawBufferMask mEmulatedAlphaAttachmentMask;
vk::FramebufferDesc mCurrentFramebufferDesc;
angle::HashMap<vk::FramebufferDesc, vk::FramebufferHelper> mFramebufferCache;
FramebufferCache mFramebufferCache;
vk::ClearValuesArray mDeferredClears;
......
......@@ -357,11 +357,11 @@ angle::Result ProgramExecutableVk::allocUniformAndXfbDescriptorSet(
mCurrentDefaultUniformBufferSerial = xfbBufferDesc.getDefaultUniformBufferSerial();
// Look up in the cache first
auto iter = mUniformsAndXfbDescriptorSetCache.find(xfbBufferDesc);
if (iter != mUniformsAndXfbDescriptorSetCache.end())
VkDescriptorSet descriptorSet = VK_NULL_HANDLE;
if (mUniformsAndXfbDescriptorSetCache.get(xfbBufferDesc, &descriptorSet))
{
*newDescriptorSetAllocated = false;
mDescriptorSets[ToUnderlying(DescriptorSetIndex::UniformsAndXfb)] = iter->second;
mDescriptorSets[ToUnderlying(DescriptorSetIndex::UniformsAndXfb)] = descriptorSet;
// The descriptor pool that this descriptor set was allocated from needs to be retained each
// time the descriptor set is used in a new command.
mDescriptorPoolBindings[ToUnderlying(DescriptorSetIndex::UniformsAndXfb)].get().retain(
......@@ -380,7 +380,7 @@ angle::Result ProgramExecutableVk::allocUniformAndXfbDescriptorSet(
}
// Add the descriptor set into cache
mUniformsAndXfbDescriptorSetCache.emplace(
mUniformsAndXfbDescriptorSetCache.insert(
xfbBufferDesc, mDescriptorSets[ToUnderlying(DescriptorSetIndex::UniformsAndXfb)]);
*newDescriptorSetAllocated = true;
......@@ -1427,11 +1427,10 @@ angle::Result ProgramExecutableVk::updateTexturesDescriptorSet(ContextVk *contex
}
const vk::TextureDescriptorDesc &texturesDesc = contextVk->getActiveTexturesDesc();
auto iter = mTextureDescriptorsCache.find(texturesDesc);
if (iter != mTextureDescriptorsCache.end())
VkDescriptorSet descriptorSet = VK_NULL_HANDLE;
if (mTextureDescriptorsCache.get(texturesDesc, &descriptorSet))
{
mDescriptorSets[ToUnderlying(DescriptorSetIndex::Texture)] = iter->second;
mDescriptorSets[ToUnderlying(DescriptorSetIndex::Texture)] = descriptorSet;
// The descriptor pool that this descriptor set was allocated from needs to be retained each
// time the descriptor set is used in a new command.
mDescriptorPoolBindings[ToUnderlying(DescriptorSetIndex::Texture)].get().retain(
......@@ -1439,10 +1438,7 @@ angle::Result ProgramExecutableVk::updateTexturesDescriptorSet(ContextVk *contex
return angle::Result::Continue;
}
VkDescriptorSet descriptorSet = VK_NULL_HANDLE;
const gl::ActiveTextureArray<vk::TextureUnit> &activeTextures = contextVk->getActiveTextures();
bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling();
bool useOldRewriteStructSamplers = contextVk->useOldRewriteStructSamplers();
......@@ -1483,7 +1479,7 @@ angle::Result ProgramExecutableVk::updateTexturesDescriptorSet(ContextVk *contex
}
descriptorSet = mDescriptorSets[ToUnderlying(DescriptorSetIndex::Texture)];
mTextureDescriptorsCache.emplace(texturesDesc, descriptorSet);
mTextureDescriptorsCache.insert(texturesDesc, descriptorSet);
}
ASSERT(descriptorSet != VK_NULL_HANDLE);
......
......@@ -245,8 +245,8 @@ class ProgramExecutableVk
size_t mNumDefaultUniformDescriptors;
vk::BufferSerial mCurrentDefaultUniformBufferSerial;
angle::HashMap<vk::UniformsAndXfbDesc, VkDescriptorSet> mUniformsAndXfbDescriptorSetCache;
angle::HashMap<vk::TextureDescriptorDesc, VkDescriptorSet> mTextureDescriptorsCache;
DescriptorSetCache<vk::UniformsAndXfbDesc> mUniformsAndXfbDescriptorSetCache;
DescriptorSetCache<vk::TextureDescriptorDesc> mTextureDescriptorsCache;
// We keep a reference to the pipeline and descriptor set layouts. This ensures they don't get
// deleted while this program is in use.
......
......@@ -3275,6 +3275,7 @@ angle::Result RenderPassCache::getRenderPassWithOpsImpl(ContextVk *contextVk,
// TODO(jmadill): Could possibly use an MRU cache here.
vk::GetRenderPassAndUpdateCounters(contextVk, updatePerfCounters, &innerIt->second,
renderPassOut);
mRenderPassWithOpsCacheStats.hit();
return angle::Result::Continue;
}
}
......@@ -3284,6 +3285,7 @@ angle::Result RenderPassCache::getRenderPassWithOpsImpl(ContextVk *contextVk,
outerIt = emplaceResult.first;
}
mRenderPassWithOpsCacheStats.miss();
vk::RenderPassHelper newRenderPass;
ANGLE_TRY(vk::InitializeRenderPassFromDesc(contextVk, desc, attachmentOps, &newRenderPass));
......@@ -3402,9 +3404,11 @@ angle::Result DescriptorSetLayoutCache::getDescriptorSetLayout(
{
vk::RefCountedDescriptorSetLayout &layout = iter->second;
descriptorSetLayoutOut->set(&layout);
mCacheStats.hit();
return angle::Result::Continue;
}
mCacheStats.miss();
// We must unpack the descriptor set layout description.
vk::DescriptorSetLayoutBindingVector bindingVector;
std::vector<VkSampler> immutableSamplers;
......@@ -3457,9 +3461,11 @@ angle::Result PipelineLayoutCache::getPipelineLayout(
{
vk::RefCountedPipelineLayout &layout = iter->second;
pipelineLayoutOut->set(&layout);
mCacheStats.hit();
return angle::Result::Continue;
}
mCacheStats.miss();
// Note this does not handle gaps in descriptor set layouts gracefully.
angle::FixedVector<VkDescriptorSetLayout, vk::kMaxDescriptorSetLayouts> setLayoutHandles;
for (const vk::BindingPointer<vk::DescriptorSetLayout> &layoutPtr : descriptorSetLayouts)
......@@ -3547,9 +3553,11 @@ angle::Result SamplerYcbcrConversionCache::getYuvConversion(
{
vk::RefCountedSamplerYcbcrConversion &yuvConversion = iter->second;
yuvConversionOut->set(&yuvConversion);
mCacheStats.hit();
return angle::Result::Continue;
}
mCacheStats.miss();
vk::SamplerYcbcrConversion wrappedYuvConversion;
ANGLE_VK_TRY(context, wrappedYuvConversion.init(context->getDevice(), yuvConversionCreateInfo));
......@@ -3612,9 +3620,11 @@ angle::Result SamplerCache::getSampler(ContextVk *contextVk,
{
vk::RefCountedSampler &sampler = iter->second;
samplerOut->set(&sampler);
mCacheStats.hit();
return angle::Result::Continue;
}
mCacheStats.miss();
vk::SamplerHelper samplerHelper(contextVk);
ANGLE_TRY(desc.init(contextVk, &samplerHelper.get()));
......@@ -3627,4 +3637,37 @@ angle::Result SamplerCache::getSampler(ContextVk *contextVk,
return angle::Result::Continue;
}
// FramebufferCache implementation.
bool FramebufferCache::get(ContextVk *contextVk,
const vk::FramebufferDesc &desc,
vk::FramebufferHelper **framebufferHelperOut)
{
auto iter = mPayload.find(desc);
if (iter != mPayload.end())
{
*framebufferHelperOut = &iter->second;
mCacheStats.hit();
return true;
}
mCacheStats.miss();
return false;
}
void FramebufferCache::insert(const vk::FramebufferDesc &desc,
vk::FramebufferHelper &&framebufferHelper)
{
mPayload.emplace(desc, std::move(framebufferHelper));
}
void FramebufferCache::clear(ContextVk *contextVk)
{
for (auto &entry : mPayload)
{
vk::FramebufferHelper &tmpFB = entry.second;
tmpFB.release(contextVk);
}
mPayload.clear();
}
} // namespace rx
......@@ -53,6 +53,7 @@ enum DescriptorSetIndex : uint32_t
namespace vk
{
class DynamicDescriptorPool;
class FramebufferHelper;
class ImageHelper;
enum class ImageLayout;
......@@ -1311,6 +1312,32 @@ ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
namespace rx
{
// Base class for all caches. Provides cache hit and miss counters.
class CacheStats final : angle::NonCopyable
{
public:
CacheStats() : mHitCount(0), mMissCount(0) {}
~CacheStats() {}
ANGLE_INLINE void hit() { mHitCount++; }
ANGLE_INLINE void miss() { mMissCount++; }
ANGLE_INLINE double getHitRatio() const
{
if (mHitCount + mMissCount == 0)
{
return 0;
}
else
{
return static_cast<double>(mHitCount) / (mHitCount + mMissCount);
}
}
private:
uint64_t mHitCount;
uint64_t mMissCount;
};
// TODO(jmadill): Add cache trimming/eviction.
class RenderPassCache final : angle::NonCopyable
{
......@@ -1332,9 +1359,11 @@ class RenderPassCache final : angle::NonCopyable
// Find the first element and return it.
*renderPassOut = &innerCache.begin()->second.getRenderPass();
mCompatibleRenderPassCacheStats.hit();
return angle::Result::Continue;
}
mCompatibleRenderPassCacheStats.miss();
return addRenderPass(contextVk, desc, renderPassOut);
}
......@@ -1360,6 +1389,8 @@ class RenderPassCache final : angle::NonCopyable
using OuterCache = angle::HashMap<vk::RenderPassDesc, InnerCache>;
OuterCache mPayload;
CacheStats mCompatibleRenderPassCacheStats;
CacheStats mRenderPassWithOpsCacheStats;
};
// TODO(jmadill): Add cache trimming/eviction.
......@@ -1393,9 +1424,11 @@ class GraphicsPipelineCache final : angle::NonCopyable
{
*descPtrOut = &item->first;
*pipelineOut = &item->second;
mCacheStats.hit();
return angle::Result::Continue;
}
mCacheStats.miss();
return insertPipeline(contextVk, pipelineCacheVk, compatibleRenderPass, pipelineLayout,
activeAttribLocationsMask, programAttribsTypeMask, vertexModule,
fragmentModule, geometryModule, specConsts, desc, descPtrOut,
......@@ -1418,6 +1451,7 @@ class GraphicsPipelineCache final : angle::NonCopyable
vk::PipelineHelper **pipelineOut);
std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload;
CacheStats mCacheStats;
};
class DescriptorSetLayoutCache final : angle::NonCopyable
......@@ -1435,6 +1469,7 @@ class DescriptorSetLayoutCache final : angle::NonCopyable
private:
std::unordered_map<vk::DescriptorSetLayoutDesc, vk::RefCountedDescriptorSetLayout> mPayload;
CacheStats mCacheStats;
};
class PipelineLayoutCache final : angle::NonCopyable
......@@ -1452,6 +1487,7 @@ class PipelineLayoutCache final : angle::NonCopyable
private:
std::unordered_map<vk::PipelineLayoutDesc, vk::RefCountedPipelineLayout> mPayload;
CacheStats mCacheStats;
};
class SamplerCache final : angle::NonCopyable
......@@ -1468,6 +1504,7 @@ class SamplerCache final : angle::NonCopyable
private:
std::unordered_map<vk::SamplerDesc, vk::RefCountedSampler> mPayload;
CacheStats mCacheStats;
};
// YuvConversion Cache
......@@ -1488,6 +1525,88 @@ class SamplerYcbcrConversionCache final : angle::NonCopyable
private:
std::unordered_map<uint64_t, vk::RefCountedSamplerYcbcrConversion> mPayload;
CacheStats mCacheStats;
};
// FramebufferVk Cache
class FramebufferCache final : angle::NonCopyable
{
public:
FramebufferCache() = default;
~FramebufferCache() { ASSERT(mPayload.empty()); }
bool get(ContextVk *contextVk,
const vk::FramebufferDesc &desc,
vk::FramebufferHelper **framebufferOut);
void insert(const vk::FramebufferDesc &desc, vk::FramebufferHelper &&framebufferHelper);
void clear(ContextVk *contextVk);
private:
angle::HashMap<vk::FramebufferDesc, vk::FramebufferHelper> mPayload;
CacheStats mCacheStats;
};
// DescriptorSet Cache
class DriverUniformsDescriptorSetCache final : angle::NonCopyable
{
public:
DriverUniformsDescriptorSetCache() = default;
~DriverUniformsDescriptorSetCache() { ASSERT(mPayload.empty()); }
ANGLE_INLINE bool get(uint32_t serial, VkDescriptorSet *descriptorSet)
{
if (mPayload.get(serial, descriptorSet))
{
mCacheStats.hit();
return true;
}
mCacheStats.miss();
return false;
}
ANGLE_INLINE void insert(uint32_t serial, VkDescriptorSet descriptorSet)
{
mPayload.insert(serial, descriptorSet);
}
ANGLE_INLINE void clear() { mPayload.clear(); }
private:
angle::FastIntegerMap<VkDescriptorSet> mPayload;
CacheStats mCacheStats;
};
// Templated Descriptors Cache
template <typename key>
class DescriptorSetCache final : angle::NonCopyable
{
public:
DescriptorSetCache() = default;
~DescriptorSetCache() { ASSERT(mPayload.empty()); }
ANGLE_INLINE bool get(const key &desc, VkDescriptorSet *descriptorSet)
{
auto iter = mPayload.find(desc);
if (iter != mPayload.end())
{
*descriptorSet = iter->second;
mCacheStats.hit();
return true;
}
mCacheStats.miss();
return false;
}
ANGLE_INLINE void insert(const key &desc, VkDescriptorSet descriptorSet)
{
mPayload.emplace(desc, descriptorSet);
}
ANGLE_INLINE void clear() { mPayload.clear(); }
private:
angle::HashMap<key, VkDescriptorSet> mPayload;
CacheStats mCacheStats;
};
// Only 1 driver uniform binding is used.
......
......@@ -9,6 +9,7 @@
#include "ANGLEPerfTest.h"
#include "libANGLE/renderer/vulkan/vk_cache_utils.h"
#include "libANGLE/renderer/vulkan/vk_helpers.h"
#include "util/random_utils.h"
using namespace rx;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment