Commit 95935176 by Amy Liu Committed by Commit Bot

Handle the compression of big pipeline cache.

Big pipeline cache will cost much time to compress. Regarding the perfomance, handle the compression of big pipeline cache in this way: 1)Return when the pipeline cache data is larger than 10M. 2)Use worker thread to complete compression. Bug: angleproject:4722 Change-Id: I62eb69d8c46729261f0502af01450ec301c258f3 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2788169 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarShahbaz Youssefi <syoussefi@chromium.org>
parent 2f808349
......@@ -70,6 +70,12 @@ struct FrontendFeatures : angle::FeatureSetBase
angle::FeatureCategory::FrontendFeatures,
"Set the context limits like frame capturing was enabled",
&members, "http://anglebug.com/5750"};
// Whether we should compress pipeline cache in thread pool before it's stored in blob cache.
// http://anglebug.com/4722
angle::Feature enableCompressingPipelineCacheInThreadPool = {
"enableCompressingPipelineCacheInThreadPool", angle::FeatureCategory::FrontendWorkarounds,
"Enable compressing pipeline cache in thread pool.", &members, "http://anglebug.com/4722"};
};
inline FrontendFeatures::FrontendFeatures() = default;
......
......@@ -34,9 +34,11 @@ enum CacheResult
// In oder to store more cache in blob cache, compress cacheData to compressedData
// before being stored.
bool CompressBlobCacheData(angle::MemoryBuffer *cacheData, angle::MemoryBuffer *compressedData)
bool CompressBlobCacheData(const size_t cacheSize,
const uint8_t *cacheData,
angle::MemoryBuffer *compressedData)
{
uLong uncompressedSize = static_cast<uLong>(cacheData->size());
uLong uncompressedSize = static_cast<uLong>(cacheSize);
uLong expectedCompressedSize = zlib_internal::GzipExpectedCompressedSize(uncompressedSize);
// Allocate memory.
......@@ -46,9 +48,8 @@ bool CompressBlobCacheData(angle::MemoryBuffer *cacheData, angle::MemoryBuffer *
return false;
}
int zResult =
zlib_internal::GzipCompressHelper(compressedData->data(), &expectedCompressedSize,
cacheData->data(), uncompressedSize, nullptr, nullptr);
int zResult = zlib_internal::GzipCompressHelper(compressedData->data(), &expectedCompressedSize,
cacheData, uncompressedSize, nullptr, nullptr);
if (zResult != Z_OK)
{
......@@ -120,6 +121,7 @@ void BlobCache::put(const BlobCache::Key &key, angle::MemoryBuffer &&value)
void BlobCache::putApplication(const BlobCache::Key &key, const angle::MemoryBuffer &value)
{
std::lock_guard<std::mutex> lock(mBlobCacheMutex);
if (areBlobCacheFuncsSet())
{
mSetBlobFunc(key.data(), key.size(), value.data(), value.size());
......
......@@ -48,7 +48,9 @@ struct hash<egl::BlobCacheKey>
namespace egl
{
bool CompressBlobCacheData(angle::MemoryBuffer *cacheData, angle::MemoryBuffer *compressedData);
bool CompressBlobCacheData(const size_t cacheSize,
const uint8_t *cacheData,
angle::MemoryBuffer *compressedData);
bool DecompressBlobCacheData(const uint8_t *compressedData,
const size_t compressedSize,
angle::MemoryBuffer *uncompressedData);
......@@ -148,6 +150,8 @@ class BlobCache final : angle::NonCopyable
private:
// This internal cache is used only if the application is not providing caching callbacks
using CacheEntry = std::pair<angle::MemoryBuffer, CacheSource>;
std::mutex mBlobCacheMutex;
angle::SizedMRUCache<BlobCache::Key, CacheEntry> mBlobCache;
EGLSetBlobFuncANDROID mSetBlobFunc;
......
......@@ -3930,7 +3930,9 @@ void Context::updateCaps()
mValidBufferBindings.set(BufferBinding::Texture);
}
mThreadPool = angle::WorkerThreadPool::Create(mState.mExtensions.parallelShaderCompile);
mThreadPool = angle::WorkerThreadPool::Create(
mState.mExtensions.parallelShaderCompile ||
getFrontendFeatures().enableCompressingPipelineCacheInThreadPool.enabled);
// Reinitialize some dirty bits that depend on extensions.
if (mState.isRobustResourceInitEnabled())
......
......@@ -1856,6 +1856,10 @@ void Display::initializeFrontendFeatures()
mImplementation->initializeFrontendFeatures(&mFrontendFeatures);
rx::ApplyFeatureOverrides(&mFrontendFeatures, mState);
// Disabled by default. To reduce the risk, create a feature to enable
// compressing pipeline cache in multi-thread pool.
ANGLE_FEATURE_CONDITION(&mFrontendFeatures, enableCompressingPipelineCacheInThreadPool, false);
}
const DisplayExtensions &Display::getExtensions() const
......
......@@ -210,7 +210,8 @@ angle::Result MemoryProgramCache::putProgram(const egl::BlobCache::Key &programH
ANGLE_TRY(program->serialize(context, &serializedProgram));
angle::MemoryBuffer compressedData;
if (!egl::CompressBlobCacheData(&serializedProgram, &compressedData))
if (!egl::CompressBlobCacheData(serializedProgram.size(), serializedProgram.data(),
&compressedData))
{
ERR() << "Error compressing binary data.";
return angle::Result::Incomplete;
......
......@@ -505,28 +505,30 @@ void ComputePipelineCacheVkChunkKey(VkPhysicalDeviceProperties physicalDevicePro
hashString.length(), hashOut->data());
}
angle::Result CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physicalDeviceProperties,
DisplayVk *displayVk,
ContextVk *contextVk,
angle::MemoryBuffer *pipelineCacheData,
bool *success)
bool CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physicalDeviceProperties,
DisplayVk *displayVk,
ContextVk *contextVk,
const std::vector<uint8_t> &cacheData,
const size_t maxTotalSize)
{
// There is a limitation in android, we can only store cache data less than 64kb in blob cache.
// So there is no use to handle big pipeline cache when android will reject it finally.
constexpr size_t kMaxTotalSize = 64 * 1024;
if (pipelineCacheData->size() >= kMaxTotalSize)
// Though the pipeline cache will be compressed and divided into several chunks to store in blob
// cache, the largest total size of blob cache is only 2M in android now, so there is no use to
// handle big pipeline cache when android will reject it finally.
if (cacheData.size() >= maxTotalSize)
{
// TODO: handle the big pipeline cache. http://anglebug.com/4722
ANGLE_PERF_WARNING(contextVk->getDebug(), GL_DEBUG_SEVERITY_LOW,
"Skip syncing pipeline cache data when it's larger than 64kb.");
return angle::Result::Continue;
"Skip syncing pipeline cache data when it's larger than maxTotalSize.");
return false;
}
// To make it possible to store more pipeline cache data, compress the whole pipelineCache.
angle::MemoryBuffer compressedData;
ANGLE_VK_CHECK(displayVk, egl::CompressBlobCacheData(pipelineCacheData, &compressedData),
VK_ERROR_INITIALIZATION_FAILED);
if (!egl::CompressBlobCacheData(cacheData.size(), cacheData.data(), &compressedData))
{
return false;
}
// If the size of compressedData is larger than (kMaxBlobCacheSize - sizeof(numChunks)),
// the pipelineCache still can't be stored in blob cache. Divide the large compressed
......@@ -553,8 +555,10 @@ angle::Result CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physica
}
angle::MemoryBuffer keyData;
ANGLE_VK_CHECK(displayVk, keyData.resize(kBlobHeaderSize + chunkSize),
VK_ERROR_INITIALIZATION_FAILED);
if (!keyData.resize(kBlobHeaderSize + chunkSize))
{
return false;
}
ASSERT(numChunks <= UINT8_MAX);
keyData.data()[0] = static_cast<uint8_t>(numChunks);
......@@ -565,12 +569,59 @@ angle::Result CompressAndStorePipelineCacheVk(VkPhysicalDeviceProperties physica
// Create unique hash key.
egl::BlobCache::Key chunkCacheHash;
ComputePipelineCacheVkChunkKey(physicalDeviceProperties, chunkIndex, &chunkCacheHash);
displayVk->getBlobCache()->putApplication(chunkCacheHash, keyData);
}
*success = true;
return angle::Result::Continue;
return true;
}
class CompressAndStorePipelineCacheTask : public angle::Closure
{
public:
CompressAndStorePipelineCacheTask(DisplayVk *displayVk,
ContextVk *contextVk,
std::vector<uint8_t> &&cacheData,
size_t kMaxTotalSize)
: mDisplayVk(displayVk),
mContextVk(contextVk),
mCacheData(std::move(cacheData)),
mMaxTotalSize(kMaxTotalSize),
mResult(true)
{}
void operator()() override
{
ANGLE_TRACE_EVENT0("gpu.angle", "CompressAndStorePipelineCacheVk");
mResult = CompressAndStorePipelineCacheVk(
mContextVk->getRenderer()->getPhysicalDeviceProperties(), mDisplayVk, mContextVk,
mCacheData, mMaxTotalSize);
}
bool getResult() { return mResult; }
private:
DisplayVk *mDisplayVk;
ContextVk *mContextVk;
std::vector<uint8_t> mCacheData;
size_t mMaxTotalSize;
bool mResult;
};
class WaitableCompressEventImpl : public WaitableCompressEvent
{
public:
WaitableCompressEventImpl(std::shared_ptr<angle::WaitableEvent> waitableEvent,
std::shared_ptr<CompressAndStorePipelineCacheTask> compressTask)
: WaitableCompressEvent(waitableEvent), mCompressTask(compressTask)
{}
bool getResult() override { return mCompressTask->getResult(); }
private:
std::shared_ptr<CompressAndStorePipelineCacheTask> mCompressTask;
};
angle::Result GetAndDecompressPipelineCacheVk(VkPhysicalDeviceProperties physicalDeviceProperties,
DisplayVk *displayVk,
angle::MemoryBuffer *uncompressedData,
......@@ -765,6 +816,12 @@ void RendererVk::onDestroy(vk::Context *context)
mInstance = VK_NULL_HANDLE;
}
if (mCompressEvent)
{
mCompressEvent->wait();
mCompressEvent.reset();
}
mMemoryProperties.destroy();
mPhysicalDevice = VK_NULL_HANDLE;
}
......@@ -2452,7 +2509,7 @@ angle::Result RendererVk::getPipelineCacheSize(DisplayVk *displayVk, size_t *pip
return angle::Result::Continue;
}
angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *contextVk)
angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, const gl::Context *context)
{
// TODO: Synchronize access to the pipeline/blob caches?
ASSERT(mPipelineCache.valid());
......@@ -2480,13 +2537,23 @@ angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *c
return angle::Result::Continue;
}
angle::MemoryBuffer *pipelineCacheData = nullptr;
ANGLE_VK_CHECK_ALLOC(displayVk,
displayVk->getScratchBuffer(pipelineCacheSize, &pipelineCacheData));
ContextVk *contextVk = vk::GetImpl(context);
// Use worker thread pool to complete compression.
// If the last task hasn't been finished, skip the syncing.
if (mCompressEvent && (!mCompressEvent->isReady() || !mCompressEvent->getResult()))
{
ANGLE_PERF_WARNING(contextVk->getDebug(), GL_DEBUG_SEVERITY_LOW,
"Skip syncing pipeline cache data when the last task is not ready or "
"the compress task failed.");
return angle::Result::Continue;
}
std::vector<uint8_t> pipelineCacheData(pipelineCacheSize);
size_t oldPipelineCacheSize = pipelineCacheSize;
VkResult result =
mPipelineCache.getCacheData(mDevice, &pipelineCacheSize, pipelineCacheData->data());
mPipelineCache.getCacheData(mDevice, &pipelineCacheSize, pipelineCacheData.data());
// We don't need all of the cache data, so just make sure we at least got the header
// Vulkan Spec 9.6. Pipeline Cache
// https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/chap9.html#pipelines-cache
......@@ -2511,21 +2578,42 @@ angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *c
// If vkGetPipelineCacheData ends up writing fewer bytes than requested, zero out the rest of
// the buffer to avoid leaking garbage memory.
ASSERT(pipelineCacheSize <= pipelineCacheData->size());
if (pipelineCacheSize < pipelineCacheData->size())
ASSERT(pipelineCacheSize <= pipelineCacheData.size());
if (pipelineCacheSize < pipelineCacheData.size())
{
memset(pipelineCacheData->data() + pipelineCacheSize, 0,
pipelineCacheData->size() - pipelineCacheSize);
memset(pipelineCacheData.data() + pipelineCacheSize, 0,
pipelineCacheData.size() - pipelineCacheSize);
}
bool success = false;
ANGLE_TRY(CompressAndStorePipelineCacheVk(mPhysicalDeviceProperties, displayVk, contextVk,
pipelineCacheData, &success));
if (success)
if (context->getFrontendFeatures().enableCompressingPipelineCacheInThreadPool.enabled)
{
// The function zlib_internal::GzipCompressHelper() can compress 10M pipeline cache data
// into about 2M, to save the time of compression, set kMaxTotalSize to 10M.
constexpr size_t kMaxTotalSize = 10 * 1024 * 1024;
// Create task to compress.
auto compressAndStorePipelineCacheTask =
std::make_shared<CompressAndStorePipelineCacheTask>(
displayVk, contextVk, std::move(pipelineCacheData), kMaxTotalSize);
mCompressEvent = std::make_shared<WaitableCompressEventImpl>(
angle::WorkerThreadPool::PostWorkerTask(context->getWorkerThreadPool(),
compressAndStorePipelineCacheTask),
compressAndStorePipelineCacheTask);
mPipelineCacheDirty = false;
}
else
{
// If enableCompressingPipelineCacheInThreadPool is diabled, to avoid the risk, set
// kMaxTotalSize to 64k.
constexpr size_t kMaxTotalSize = 64 * 1024;
bool compressResult = CompressAndStorePipelineCacheVk(
mPhysicalDeviceProperties, displayVk, contextVk, pipelineCacheData, kMaxTotalSize);
if (compressResult)
{
mPipelineCacheDirty = false;
}
}
return angle::Result::Continue;
}
......
......@@ -26,6 +26,7 @@
#include "common/vulkan/vulkan_icd.h"
#include "libANGLE/BlobCache.h"
#include "libANGLE/Caps.h"
#include "libANGLE/WorkerThread.h"
#include "libANGLE/renderer/vulkan/CommandProcessor.h"
#include "libANGLE/renderer/vulkan/DebugAnnotatorVk.h"
#include "libANGLE/renderer/vulkan/QueryVk.h"
......@@ -96,6 +97,25 @@ void CollectGarbage(std::vector<vk::GarbageObject> *garbageOut, ArgT object, Arg
CollectGarbage(garbageOut, objectsIn...);
}
class WaitableCompressEvent
{
public:
WaitableCompressEvent(std::shared_ptr<angle::WaitableEvent> waitableEvent)
: mWaitableEvent(waitableEvent)
{}
virtual ~WaitableCompressEvent() {}
void wait() { return mWaitableEvent->wait(); }
bool isReady() { return mWaitableEvent->isReady(); }
virtual bool getResult() = 0;
private:
std::shared_ptr<angle::WaitableEvent> mWaitableEvent;
};
class RendererVk : angle::NonCopyable
{
public:
......@@ -165,7 +185,7 @@ class RendererVk : angle::NonCopyable
const vk::Format &getFormat(angle::FormatID formatID) const { return mFormatTable[formatID]; }
angle::Result getPipelineCacheSize(DisplayVk *displayVk, size_t *pipelineCacheSizeOut);
angle::Result syncPipelineCacheVk(DisplayVk *displayVk, ContextVk *contextVk);
angle::Result syncPipelineCacheVk(DisplayVk *displayVk, const gl::Context *context);
// Issues a new serial for linked shader modules. Used in the pipeline cache.
Serial issueShaderSerial();
......@@ -509,6 +529,9 @@ class RendererVk : angle::NonCopyable
// Note that this mask can have bits set that don't correspond to valid stages, so it's strictly
// only useful for masking out unsupported stages in an otherwise valid set of stages.
VkPipelineStageFlags mSupportedVulkanPipelineStageMask;
// Use thread pool to compress cache data.
std::shared_ptr<rx::WaitableCompressEvent> mCompressEvent;
};
} // namespace rx
......
......@@ -1537,7 +1537,7 @@ angle::Result WindowSurfaceVk::doDeferredAcquireNextImage(const gl::Context *con
}
RendererVk *renderer = contextVk->getRenderer();
ANGLE_TRY(renderer->syncPipelineCacheVk(displayVk, contextVk));
ANGLE_TRY(renderer->syncPipelineCacheVk(displayVk, context));
return angle::Result::Continue;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment