Commit 95935176 by Amy Liu Committed by Commit Bot

Handle the compression of big pipeline cache.

Big pipeline cache will cost much time to compress. Regarding the perfomance, handle the compression of big pipeline cache in this way: 1)Return when the pipeline cache data is larger than 10M. 2)Use worker thread to complete compression. Bug: angleproject:4722 Change-Id: I62eb69d8c46729261f0502af01450ec301c258f3 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2788169 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarShahbaz Youssefi <syoussefi@chromium.org>
parent 2f808349
...@@ -70,6 +70,12 @@ struct FrontendFeatures : angle::FeatureSetBase ...@@ -70,6 +70,12 @@ struct FrontendFeatures : angle::FeatureSetBase
angle::FeatureCategory::FrontendFeatures, angle::FeatureCategory::FrontendFeatures,
"Set the context limits like frame capturing was enabled", "Set the context limits like frame capturing was enabled",
&members, "http://anglebug.com/5750"}; &members, "http://anglebug.com/5750"};
// Whether we should compress pipeline cache in thread pool before it's stored in blob cache.
// http://anglebug.com/4722
angle::Feature enableCompressingPipelineCacheInThreadPool = {
"enableCompressingPipelineCacheInThreadPool", angle::FeatureCategory::FrontendWorkarounds,
"Enable compressing pipeline cache in thread pool.", &members, "http://anglebug.com/4722"};
}; };
inline FrontendFeatures::FrontendFeatures() = default; inline FrontendFeatures::FrontendFeatures() = default;
......
...@@ -34,9 +34,11 @@ enum CacheResult ...@@ -34,9 +34,11 @@ enum CacheResult
// In oder to store more cache in blob cache, compress cacheData to compressedData // In oder to store more cache in blob cache, compress cacheData to compressedData
// before being stored. // before being stored.
bool CompressBlobCacheData(angle::MemoryBuffer *cacheData, angle::MemoryBuffer *compressedData) bool CompressBlobCacheData(const size_t cacheSize,
const uint8_t *cacheData,
angle::MemoryBuffer *compressedData)
{ {
uLong uncompressedSize = static_cast<uLong>(cacheData->size()); uLong uncompressedSize = static_cast<uLong>(cacheSize);
uLong expectedCompressedSize = zlib_internal::GzipExpectedCompressedSize(uncompressedSize); uLong expectedCompressedSize = zlib_internal::GzipExpectedCompressedSize(uncompressedSize);
// Allocate memory. // Allocate memory.
...@@ -46,9 +48,8 @@ bool CompressBlobCacheData(angle::MemoryBuffer *cacheData, angle::MemoryBuffer * ...@@ -46,9 +48,8 @@ bool CompressBlobCacheData(angle::MemoryBuffer *cacheData, angle::MemoryBuffer *
return false; return false;
} }
int zResult = int zResult = zlib_internal::GzipCompressHelper(compressedData->data(), &expectedCompressedSize,
zlib_internal::GzipCompressHelper(compressedData->data(), &expectedCompressedSize, cacheData, uncompressedSize, nullptr, nullptr);
cacheData->data(), uncompressedSize, nullptr, nullptr);
if (zResult != Z_OK) if (zResult != Z_OK)
{ {
...@@ -120,6 +121,7 @@ void BlobCache::put(const BlobCache::Key &key, angle::MemoryBuffer &&value) ...@@ -120,6 +121,7 @@ void BlobCache::put(const BlobCache::Key &key, angle::MemoryBuffer &&value)
void BlobCache::putApplication(const BlobCache::Key &key, const angle::MemoryBuffer &value) void BlobCache::putApplication(const BlobCache::Key &key, const angle::MemoryBuffer &value)
{ {
std::lock_guard<std::mutex> lock(mBlobCacheMutex);
if (areBlobCacheFuncsSet()) if (areBlobCacheFuncsSet())
{ {
mSetBlobFunc(key.data(), key.size(), value.data(), value.size()); mSetBlobFunc(key.data(), key.size(), value.data(), value.size());
......
...@@ -48,7 +48,9 @@ struct hash<egl::BlobCacheKey> ...@@ -48,7 +48,9 @@ struct hash<egl::BlobCacheKey>
namespace egl namespace egl
{ {
bool CompressBlobCacheData(angle::MemoryBuffer *cacheData, angle::MemoryBuffer *compressedData); bool CompressBlobCacheData(const size_t cacheSize,
const uint8_t *cacheData,
angle::MemoryBuffer *compressedData);
bool DecompressBlobCacheData(const uint8_t *compressedData, bool DecompressBlobCacheData(const uint8_t *compressedData,
const size_t compressedSize, const size_t compressedSize,
angle::MemoryBuffer *uncompressedData); angle::MemoryBuffer *uncompressedData);
...@@ -148,6 +150,8 @@ class BlobCache final : angle::NonCopyable ...@@ -148,6 +150,8 @@ class BlobCache final : angle::NonCopyable
private: private:
// This internal cache is used only if the application is not providing caching callbacks // This internal cache is used only if the application is not providing caching callbacks
using CacheEntry = std::pair<angle::MemoryBuffer, CacheSource>; using CacheEntry = std::pair<angle::MemoryBuffer, CacheSource>;
std::mutex mBlobCacheMutex;
angle::SizedMRUCache<BlobCache::Key, CacheEntry> mBlobCache; angle::SizedMRUCache<BlobCache::Key, CacheEntry> mBlobCache;
EGLSetBlobFuncANDROID mSetBlobFunc; EGLSetBlobFuncANDROID mSetBlobFunc;
......
...@@ -3930,7 +3930,9 @@ void Context::updateCaps() ...@@ -3930,7 +3930,9 @@ void Context::updateCaps()
mValidBufferBindings.set(BufferBinding::Texture); mValidBufferBindings.set(BufferBinding::Texture);
} }
mThreadPool = angle::WorkerThreadPool::Create(mState.mExtensions.parallelShaderCompile); mThreadPool = angle::WorkerThreadPool::Create(
mState.mExtensions.parallelShaderCompile ||
getFrontendFeatures().enableCompressingPipelineCacheInThreadPool.enabled);
// Reinitialize some dirty bits that depend on extensions. // Reinitialize some dirty bits that depend on extensions.
if (mState.isRobustResourceInitEnabled()) if (mState.isRobustResourceInitEnabled())
......
...@@ -1856,6 +1856,10 @@ void Display::initializeFrontendFeatures() ...@@ -1856,6 +1856,10 @@ void Display::initializeFrontendFeatures()
mImplementation->initializeFrontendFeatures(&mFrontendFeatures); mImplementation->initializeFrontendFeatures(&mFrontendFeatures);
rx::ApplyFeatureOverrides(&mFrontendFeatures, mState); rx::ApplyFeatureOverrides(&mFrontendFeatures, mState);
// Disabled by default. To reduce the risk, create a feature to enable
// compressing pipeline cache in multi-thread pool.
ANGLE_FEATURE_CONDITION(&mFrontendFeatures, enableCompressingPipelineCacheInThreadPool, false);
} }
const DisplayExtensions &Display::getExtensions() const const DisplayExtensions &Display::getExtensions() const
......
...@@ -210,7 +210,8 @@ angle::Result MemoryProgramCache::putProgram(const egl::BlobCache::Key &programH ...@@ -210,7 +210,8 @@ angle::Result MemoryProgramCache::putProgram(const egl::BlobCache::Key &programH
ANGLE_TRY(program->serialize(context, &serializedProgram)); ANGLE_TRY(program->serialize(context, &serializedProgram));
angle::MemoryBuffer compressedData; angle::MemoryBuffer compressedData;
if (!egl::CompressBlobCacheData(&serializedProgram, &compressedData)) if (!egl::CompressBlobCacheData(serializedProgram.size(), serializedProgram.data(),
&compressedData))
{ {
ERR() << "Error compressing binary data."; ERR() << "Error compressing binary data.";
return angle::Result::Incomplete; return angle::Result::Incomplete;
......
...@@ -505,28 +505,30 @@ void ComputePipelineCacheVkChunkKey(VkPhysicalDeviceProperties physicalDevicePro ...@@ -505,28 +505,30 @@ void ComputePipelineCacheVkChunkKey(VkPhysicalDeviceProperties physicalDevicePro
hashString.length(), hashOut->data()); hashString.length(), hashOut->data());
} }
angle::Result CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physicalDeviceProperties, bool CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physicalDeviceProperties,
DisplayVk *displayVk, DisplayVk *displayVk,
ContextVk *contextVk, ContextVk *contextVk,
angle::MemoryBuffer *pipelineCacheData, const std::vector<uint8_t> &cacheData,
bool *success) const size_t maxTotalSize)
{ {
// There is a limitation in android, we can only store cache data less than 64kb in blob cache. // Though the pipeline cache will be compressed and divided into several chunks to store in blob
// So there is no use to handle big pipeline cache when android will reject it finally. // cache, the largest total size of blob cache is only 2M in android now, so there is no use to
constexpr size_t kMaxTotalSize = 64 * 1024; // handle big pipeline cache when android will reject it finally.
if (cacheData.size() >= maxTotalSize)
if (pipelineCacheData->size() >= kMaxTotalSize)
{ {
// TODO: handle the big pipeline cache. http://anglebug.com/4722 // TODO: handle the big pipeline cache. http://anglebug.com/4722
ANGLE_PERF_WARNING(contextVk->getDebug(), GL_DEBUG_SEVERITY_LOW, ANGLE_PERF_WARNING(contextVk->getDebug(), GL_DEBUG_SEVERITY_LOW,
"Skip syncing pipeline cache data when it's larger than 64kb."); "Skip syncing pipeline cache data when it's larger than maxTotalSize.");
return angle::Result::Continue; return false;
} }
// To make it possible to store more pipeline cache data, compress the whole pipelineCache. // To make it possible to store more pipeline cache data, compress the whole pipelineCache.
angle::MemoryBuffer compressedData; angle::MemoryBuffer compressedData;
ANGLE_VK_CHECK(displayVk, egl::CompressBlobCacheData(pipelineCacheData, &compressedData),
VK_ERROR_INITIALIZATION_FAILED); if (!egl::CompressBlobCacheData(cacheData.size(), cacheData.data(), &compressedData))
{
return false;
}
// If the size of compressedData is larger than (kMaxBlobCacheSize - sizeof(numChunks)), // If the size of compressedData is larger than (kMaxBlobCacheSize - sizeof(numChunks)),
// the pipelineCache still can't be stored in blob cache. Divide the large compressed // the pipelineCache still can't be stored in blob cache. Divide the large compressed
...@@ -553,8 +555,10 @@ angle::Result CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physica ...@@ -553,8 +555,10 @@ angle::Result CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physica
} }
angle::MemoryBuffer keyData; angle::MemoryBuffer keyData;
ANGLE_VK_CHECK(displayVk, keyData.resize(kBlobHeaderSize + chunkSize), if (!keyData.resize(kBlobHeaderSize + chunkSize))
VK_ERROR_INITIALIZATION_FAILED); {
return false;
}
ASSERT(numChunks <= UINT8_MAX); ASSERT(numChunks <= UINT8_MAX);
keyData.data()[0] = static_cast<uint8_t>(numChunks); keyData.data()[0] = static_cast<uint8_t>(numChunks);
...@@ -565,12 +569,59 @@ angle::Result CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physica ...@@ -565,12 +569,59 @@ angle::Result CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physica
// Create unique hash key. // Create unique hash key.
egl::BlobCache::Key chunkCacheHash; egl::BlobCache::Key chunkCacheHash;
ComputePipelineCacheVkChunkKey(physicalDeviceProperties, chunkIndex, &chunkCacheHash); ComputePipelineCacheVkChunkKey(physicalDeviceProperties, chunkIndex, &chunkCacheHash);
displayVk->getBlobCache()->putApplication(chunkCacheHash, keyData); displayVk->getBlobCache()->putApplication(chunkCacheHash, keyData);
} }
*success = true;
return angle::Result::Continue; return true;
} }
class CompressAndStorePipelineCacheTask : public angle::Closure
{
public:
CompressAndStorePipelineCacheTask(DisplayVk *displayVk,
ContextVk *contextVk,
std::vector<uint8_t> &&cacheData,
size_t kMaxTotalSize)
: mDisplayVk(displayVk),
mContextVk(contextVk),
mCacheData(std::move(cacheData)),
mMaxTotalSize(kMaxTotalSize),
mResult(true)
{}
void operator()() override
{
ANGLE_TRACE_EVENT0("gpu.angle", "CompressAndStorePipelineCacheVk");
mResult = CompressAndStorePipelineCacheVk(
mContextVk->getRenderer()->getPhysicalDeviceProperties(), mDisplayVk, mContextVk,
mCacheData, mMaxTotalSize);
}
bool getResult() { return mResult; }
private:
DisplayVk *mDisplayVk;
ContextVk *mContextVk;
std::vector<uint8_t> mCacheData;
size_t mMaxTotalSize;
bool mResult;
};
class WaitableCompressEventImpl : public WaitableCompressEvent
{
public:
WaitableCompressEventImpl(std::shared_ptr<angle::WaitableEvent> waitableEvent,
std::shared_ptr<CompressAndStorePipelineCacheTask> compressTask)
: WaitableCompressEvent(waitableEvent), mCompressTask(compressTask)
{}
bool getResult() override { return mCompressTask->getResult(); }
private:
std::shared_ptr<CompressAndStorePipelineCacheTask> mCompressTask;
};
angle::Result GetAndDecompressPipelineCacheVk(VkPhysicalDeviceProperties physicalDeviceProperties, angle::Result GetAndDecompressPipelineCacheVk(VkPhysicalDeviceProperties physicalDeviceProperties,
DisplayVk *displayVk, DisplayVk *displayVk,
angle::MemoryBuffer *uncompressedData, angle::MemoryBuffer *uncompressedData,
...@@ -765,6 +816,12 @@ void RendererVk::onDestroy(vk::Context *context) ...@@ -765,6 +816,12 @@ void RendererVk::onDestroy(vk::Context *context)
mInstance = VK_NULL_HANDLE; mInstance = VK_NULL_HANDLE;
} }
if (mCompressEvent)
{
mCompressEvent->wait();
mCompressEvent.reset();
}
mMemoryProperties.destroy(); mMemoryProperties.destroy();
mPhysicalDevice = VK_NULL_HANDLE; mPhysicalDevice = VK_NULL_HANDLE;
} }
...@@ -2452,7 +2509,7 @@ angle::Result RendererVk::getPipelineCacheSize(DisplayVk *displayVk, size_t *pip ...@@ -2452,7 +2509,7 @@ angle::Result RendererVk::getPipelineCacheSize(DisplayVk *displayVk, size_t *pip
return angle::Result::Continue; return angle::Result::Continue;
} }
angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *contextVk) angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, const gl::Context *context)
{ {
// TODO: Synchronize access to the pipeline/blob caches? // TODO: Synchronize access to the pipeline/blob caches?
ASSERT(mPipelineCache.valid()); ASSERT(mPipelineCache.valid());
...@@ -2480,13 +2537,23 @@ angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *c ...@@ -2480,13 +2537,23 @@ angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *c
return angle::Result::Continue; return angle::Result::Continue;
} }
angle::MemoryBuffer *pipelineCacheData = nullptr; ContextVk *contextVk = vk::GetImpl(context);
ANGLE_VK_CHECK_ALLOC(displayVk,
displayVk->getScratchBuffer(pipelineCacheSize, &pipelineCacheData)); // Use worker thread pool to complete compression.
// If the last task hasn't been finished, skip the syncing.
if (mCompressEvent && (!mCompressEvent->isReady() || !mCompressEvent->getResult()))
{
ANGLE_PERF_WARNING(contextVk->getDebug(), GL_DEBUG_SEVERITY_LOW,
"Skip syncing pipeline cache data when the last task is not ready or "
"the compress task failed.");
return angle::Result::Continue;
}
std::vector<uint8_t> pipelineCacheData(pipelineCacheSize);
size_t oldPipelineCacheSize = pipelineCacheSize; size_t oldPipelineCacheSize = pipelineCacheSize;
VkResult result = VkResult result =
mPipelineCache.getCacheData(mDevice, &pipelineCacheSize, pipelineCacheData->data()); mPipelineCache.getCacheData(mDevice, &pipelineCacheSize, pipelineCacheData.data());
// We don't need all of the cache data, so just make sure we at least got the header // We don't need all of the cache data, so just make sure we at least got the header
// Vulkan Spec 9.6. Pipeline Cache // Vulkan Spec 9.6. Pipeline Cache
// https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/chap9.html#pipelines-cache // https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/chap9.html#pipelines-cache
...@@ -2511,21 +2578,42 @@ angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *c ...@@ -2511,21 +2578,42 @@ angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *c
// If vkGetPipelineCacheData ends up writing fewer bytes than requested, zero out the rest of // If vkGetPipelineCacheData ends up writing fewer bytes than requested, zero out the rest of
// the buffer to avoid leaking garbage memory. // the buffer to avoid leaking garbage memory.
ASSERT(pipelineCacheSize <= pipelineCacheData->size()); ASSERT(pipelineCacheSize <= pipelineCacheData.size());
if (pipelineCacheSize < pipelineCacheData->size()) if (pipelineCacheSize < pipelineCacheData.size())
{ {
memset(pipelineCacheData->data() + pipelineCacheSize, 0, memset(pipelineCacheData.data() + pipelineCacheSize, 0,
pipelineCacheData->size() - pipelineCacheSize); pipelineCacheData.size() - pipelineCacheSize);
} }
bool success = false; if (context->getFrontendFeatures().enableCompressingPipelineCacheInThreadPool.enabled)
ANGLE_TRY(CompressAndStorePipelineCacheVk(mPhysicalDeviceProperties, displayVk, contextVk,
pipelineCacheData, &success));
if (success)
{ {
// The function zlib_internal::GzipCompressHelper() can compress 10M pipeline cache data
// into about 2M, to save the time of compression, set kMaxTotalSize to 10M.
constexpr size_t kMaxTotalSize = 10 * 1024 * 1024;
// Create task to compress.
auto compressAndStorePipelineCacheTask =
std::make_shared<CompressAndStorePipelineCacheTask>(
displayVk, contextVk, std::move(pipelineCacheData), kMaxTotalSize);
mCompressEvent = std::make_shared<WaitableCompressEventImpl>(
angle::WorkerThreadPool::PostWorkerTask(context->getWorkerThreadPool(),
compressAndStorePipelineCacheTask),
compressAndStorePipelineCacheTask);
mPipelineCacheDirty = false; mPipelineCacheDirty = false;
} }
else
{
// If enableCompressingPipelineCacheInThreadPool is diabled, to avoid the risk, set
// kMaxTotalSize to 64k.
constexpr size_t kMaxTotalSize = 64 * 1024;
bool compressResult = CompressAndStorePipelineCacheVk(
mPhysicalDeviceProperties, displayVk, contextVk, pipelineCacheData, kMaxTotalSize);
if (compressResult)
{
mPipelineCacheDirty = false;
}
}
return angle::Result::Continue; return angle::Result::Continue;
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "common/vulkan/vulkan_icd.h" #include "common/vulkan/vulkan_icd.h"
#include "libANGLE/BlobCache.h" #include "libANGLE/BlobCache.h"
#include "libANGLE/Caps.h" #include "libANGLE/Caps.h"
#include "libANGLE/WorkerThread.h"
#include "libANGLE/renderer/vulkan/CommandProcessor.h" #include "libANGLE/renderer/vulkan/CommandProcessor.h"
#include "libANGLE/renderer/vulkan/DebugAnnotatorVk.h" #include "libANGLE/renderer/vulkan/DebugAnnotatorVk.h"
#include "libANGLE/renderer/vulkan/QueryVk.h" #include "libANGLE/renderer/vulkan/QueryVk.h"
...@@ -96,6 +97,25 @@ void CollectGarbage(std::vector<vk::GarbageObject> *garbageOut, ArgT object, Arg ...@@ -96,6 +97,25 @@ void CollectGarbage(std::vector<vk::GarbageObject> *garbageOut, ArgT object, Arg
CollectGarbage(garbageOut, objectsIn...); CollectGarbage(garbageOut, objectsIn...);
} }
class WaitableCompressEvent
{
public:
WaitableCompressEvent(std::shared_ptr<angle::WaitableEvent> waitableEvent)
: mWaitableEvent(waitableEvent)
{}
virtual ~WaitableCompressEvent() {}
void wait() { return mWaitableEvent->wait(); }
bool isReady() { return mWaitableEvent->isReady(); }
virtual bool getResult() = 0;
private:
std::shared_ptr<angle::WaitableEvent> mWaitableEvent;
};
class RendererVk : angle::NonCopyable class RendererVk : angle::NonCopyable
{ {
public: public:
...@@ -165,7 +185,7 @@ class RendererVk : angle::NonCopyable ...@@ -165,7 +185,7 @@ class RendererVk : angle::NonCopyable
const vk::Format &getFormat(angle::FormatID formatID) const { return mFormatTable[formatID]; } const vk::Format &getFormat(angle::FormatID formatID) const { return mFormatTable[formatID]; }
angle::Result getPipelineCacheSize(DisplayVk *displayVk, size_t *pipelineCacheSizeOut); angle::Result getPipelineCacheSize(DisplayVk *displayVk, size_t *pipelineCacheSizeOut);
angle::Result syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *contextVk); angle::Result syncPipelineCacheVk(DisplayVk *displayVk, const gl::Context *context);
// Issues a new serial for linked shader modules. Used in the pipeline cache. // Issues a new serial for linked shader modules. Used in the pipeline cache.
Serial issueShaderSerial(); Serial issueShaderSerial();
...@@ -509,6 +529,9 @@ class RendererVk : angle::NonCopyable ...@@ -509,6 +529,9 @@ class RendererVk : angle::NonCopyable
// Note that this mask can have bits set that don't correspond to valid stages, so it's strictly // Note that this mask can have bits set that don't correspond to valid stages, so it's strictly
// only useful for masking out unsupported stages in an otherwise valid set of stages. // only useful for masking out unsupported stages in an otherwise valid set of stages.
VkPipelineStageFlags mSupportedVulkanPipelineStageMask; VkPipelineStageFlags mSupportedVulkanPipelineStageMask;
// Use thread pool to compress cache data.
std::shared_ptr<rx::WaitableCompressEvent> mCompressEvent;
}; };
} // namespace rx } // namespace rx
......
...@@ -1537,7 +1537,7 @@ angle::Result WindowSurfaceVk::doDeferredAcquireNextImage(const gl::Context *con ...@@ -1537,7 +1537,7 @@ angle::Result WindowSurfaceVk::doDeferredAcquireNextImage(const gl::Context *con
} }
RendererVk *renderer = contextVk->getRenderer(); RendererVk *renderer = contextVk->getRenderer();
ANGLE_TRY(renderer->syncPipelineCacheVk(displayVk, contextVk)); ANGLE_TRY(renderer->syncPipelineCacheVk(displayVk, context));
return angle::Result::Continue; return angle::Result::Continue;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment