PipelineCache implementation

The PipelineCache class now contains members for caching programs and shaders, which allows significant speedups when using it to prevent recompiling the same shaders over and over again. For now, each PipelineCache object contains its own cache and the cache is not shared between PipelineCache objects. It remains to be seen if SwiftShader would benefit from always caching shaders and programs in a global cache. Notes: - Merging 2 PipelineCache objects was also implemented - Added a few "const" where appropriate IMPORTANT NOTE: This cl DOES NOT allow pipeline caches to be saved and loaded through the pipeline cache data. Bug b/123588002 Change-Id: I95b183033c03e114d69d4432e5831e26be477033 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33428Tested-by: Alexis Hétu <sugoi@google.com> Presubmit-Ready: Alexis Hétu <sugoi@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>

PipelineCache implementation
52edb176 · Alexis Hetu · Alexis Hétu · 83dd452c · 52edb176 · 52edb176
Commit 52edb176 authored Jun 26, 2019 by Alexis Hetu Committed by Alexis Hétu Jun 28, 2019
9 changed files
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -253,7 +253,7 @@ namespace sw
 					using Coroutine = std::unique_ptr<rr::Stream<SpirvShader::YieldResult>>;
 					std::queue<Coroutine> coroutines;

-					if (shader->getModes().ContainsControlBarriers)
+					if (modes.ContainsControlBarriers)
 					{
 						// Make a function call per subgroup so each subgroup
 						// can yield, bringing all subgroups to the barrier

--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -409,7 +409,7 @@ namespace sw
 			VkShaderStageFlagBits pipelineStage,
 			const char *entryPointName,
 			InsnStore const &insns,
-			vk::RenderPass *renderPass,
+			const vk::RenderPass *renderPass,
 			uint32_t subpassIndex)
 				: insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
 				outputs{MAX_INTERFACE_COMPONENTS},

--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -615,7 +615,7 @@ namespace sw
 		            VkShaderStageFlagBits stage,
 		            const char *entryPointName,
 		            InsnStore const &insns,
-		            vk::RenderPass *renderPass,
+		            const vk::RenderPass *renderPass,
 		            uint32_t subpassIndex);

 		struct Modes

--- a/src/Reactor/Coroutine.hpp
+++ b/src/Reactor/Coroutine.hpp
@@ -25,7 +25,7 @@ namespace rr
 	class StreamBase
 	{
 	protected:
-		StreamBase(std::shared_ptr<Routine> &routine, Nucleus::CoroutineHandle handle)
+		StreamBase(const std::shared_ptr<Routine> &routine, Nucleus::CoroutineHandle handle)
 			: routine(routine), handle(handle) {}

 		~StreamBase()
@@ -52,7 +52,7 @@ private:
 	class Stream : public StreamBase
 	{
 	public:
-		inline Stream(std::shared_ptr<Routine> &routine, Nucleus::CoroutineHandle handle)
+		inline Stream(const std::shared_ptr<Routine> &routine, Nucleus::CoroutineHandle handle)
 			: StreamBase(routine, handle) {}

 		// await() retrieves the next yielded value from the coroutine.

--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "VkPipeline.hpp"
+#include "VkPipelineCache.hpp"
 #include "VkPipelineLayout.hpp"
 #include "VkShaderModule.hpp"
 #include "VkRenderPass.hpp"
@@ -219,6 +220,30 @@ std::vector<uint32_t> preprocessSpirv(
 	return optimized;
 }

+std::shared_ptr<sw::SpirvShader> createShader(const vk::PipelineCache::SpirvShaderKey& key, const vk::ShaderModule *module)
+{
+	auto code = preprocessSpirv(key.getInsns(), key.getSpecializationInfo());
+	ASSERT(code.size() > 0);
+
+	// If the pipeline has specialization constants, assume they're unique and
+	// use a new serial ID so the shader gets recompiled.
+	uint32_t codeSerialID = (key.getSpecializationInfo() ? vk::ShaderModule::nextSerialID() : module->getSerialID());
+
+	// TODO(b/119409619): use allocator.
+	return std::make_shared<sw::SpirvShader>(codeSerialID, key.getPipelineStage(), key.getEntryPointName().c_str(),
+		code, key.getRenderPass(), key.getSubpassIndex());
+}
+
+std::shared_ptr<sw::ComputeProgram> createProgram(const vk::PipelineCache::ComputeProgramKey& key)
+{
+	vk::DescriptorSet::Bindings descriptorSets;  // FIXME(b/129523279): Delay code generation until invoke time.
+	// TODO(b/119409619): use allocator.
+	auto program = std::make_shared<sw::ComputeProgram>(key.getShader(), key.getLayout(), descriptorSets);
+	program->generate();
+	program->finalize();
+	return program;
+}
+
 } // anonymous namespace

 namespace vk
@@ -432,8 +457,8 @@ GraphicsPipeline::GraphicsPipeline(const VkGraphicsPipelineCreateInfo* pCreateIn

 void GraphicsPipeline::destroyPipeline(const VkAllocationCallbacks* pAllocator)
 {
-	delete vertexShader;
-	delete fragmentShader;
+	vertexShader.reset();
+	fragmentShader.reset();
 }

 size_t GraphicsPipeline::ComputeRequiredAllocationSize(const VkGraphicsPipelineCreateInfo* pCreateInfo)
@@ -441,7 +466,43 @@ size_t GraphicsPipeline::ComputeRequiredAllocationSize(const VkGraphicsPipelineC
 	return 0;
 }

-void GraphicsPipeline::compileShaders(const VkAllocationCallbacks* pAllocator, const VkGraphicsPipelineCreateInfo* pCreateInfo)
+void GraphicsPipeline::setShader(const VkShaderStageFlagBits& stage, const std::shared_ptr<sw::SpirvShader> spirvShader)
+{
+	switch(stage)
+	{
+	case VK_SHADER_STAGE_VERTEX_BIT:
+		ASSERT(vertexShader.get() == nullptr);
+		vertexShader = spirvShader;
+		context.vertexShader = vertexShader.get();
+		break;
+
+	case VK_SHADER_STAGE_FRAGMENT_BIT:
+		ASSERT(fragmentShader.get() == nullptr);
+		fragmentShader = spirvShader;
+		context.pixelShader = fragmentShader.get();
+		break;
+
+	default:
+		UNSUPPORTED("Unsupported stage");
+		break;
+	}
+}
+
+const std::shared_ptr<sw::SpirvShader> GraphicsPipeline::getShader(const VkShaderStageFlagBits& stage) const
+{
+	switch(stage)
+	{
+	case VK_SHADER_STAGE_VERTEX_BIT:
+		return vertexShader;
+	case VK_SHADER_STAGE_FRAGMENT_BIT:
+		return fragmentShader;
+	default:
+		UNSUPPORTED("Unsupported stage");
+		return fragmentShader;
+	}
+}
+
+void GraphicsPipeline::compileShaders(const VkAllocationCallbacks* pAllocator, const VkGraphicsPipelineCreateInfo* pCreateInfo, PipelineCache* pPipelineCache)
 {
 	for (auto pStage = pCreateInfo->pStages; pStage != pCreateInfo->pStages + pCreateInfo->stageCount; pStage++)
 	{
@@ -451,30 +512,29 @@ void GraphicsPipeline::compileShaders(const VkAllocationCallbacks* pAllocator, c
 		}

 		const ShaderModule *module = vk::Cast(pStage->module);
-		auto code = preprocessSpirv(module->getCode(), pStage->pSpecializationInfo);
-
-		// If the pipeline has specialization constants, assume they're unique and
-		// use a new serial ID so the shader gets recompiled.
-		uint32_t codeSerialID = (pStage->pSpecializationInfo ? ShaderModule::nextSerialID() : module->getSerialID());
-
-		// FIXME (b/119409619): use an allocator here so we can control all memory allocations
-		// TODO: also pass in any pipeline state which will affect shader compilation
-		auto spirvShader = new sw::SpirvShader(codeSerialID, pStage->stage, pStage->pName, code, vk::Cast(pCreateInfo->renderPass), pCreateInfo->subpass);
-
-		switch (pStage->stage)
+		const PipelineCache::SpirvShaderKey key(pStage->stage, pStage->pName, module->getCode(),
+		                                        vk::Cast(pCreateInfo->renderPass), pCreateInfo->subpass,
+		                                        pStage->pSpecializationInfo);
+		if(pPipelineCache)
 		{
-		case VK_SHADER_STAGE_VERTEX_BIT:
-			ASSERT(vertexShader == nullptr);
-			context.vertexShader = vertexShader = spirvShader;
-			break;
-
-		case VK_SHADER_STAGE_FRAGMENT_BIT:
-			ASSERT(fragmentShader == nullptr);
-			context.pixelShader = fragmentShader = spirvShader;
-			break;
-
-		default:
-			UNIMPLEMENTED("Unsupported stage");
+			PipelineCache& pipelineCache = *pPipelineCache;
+			{
+				std::unique_lock<std::mutex> lock(pipelineCache.getShaderMutex());
+				const std::shared_ptr<sw::SpirvShader>* spirvShader = pipelineCache[key];
+				if(!spirvShader)
+				{
+					setShader(key.getPipelineStage(), createShader(key, module));
+					pipelineCache.insert(key, getShader(key.getPipelineStage()));
+				}
+				else
+				{
+					setShader(key.getPipelineStage(), *spirvShader);
+				}
+			}
+		}
+		else
+		{
+			setShader(key.getPipelineStage(), createShader(key, module));
 		}
 	}
 }
@@ -534,8 +594,8 @@ ComputePipeline::ComputePipeline(const VkComputePipelineCreateInfo* pCreateInfo,

 void ComputePipeline::destroyPipeline(const VkAllocationCallbacks* pAllocator)
 {
-	delete shader;
-	delete program;
+	shader.reset();
+	program.reset();
 }

 size_t ComputePipeline::ComputeRequiredAllocationSize(const VkComputePipelineCreateInfo* pCreateInfo)
@@ -543,27 +603,54 @@ size_t ComputePipeline::ComputeRequiredAllocationSize(const VkComputePipelineCre
 	return 0;
 }

-void ComputePipeline::compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo)
+void ComputePipeline::compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo, PipelineCache* pPipelineCache)
 {
 	auto &stage = pCreateInfo->stage;
 	const ShaderModule *module = vk::Cast(stage.module);

-	auto code = preprocessSpirv(module->getCode(), stage.pSpecializationInfo);
-
-	ASSERT_OR_RETURN(code.size() > 0);
+	ASSERT(shader.get() == nullptr);
+	ASSERT(program.get() == nullptr);

-	ASSERT(shader == nullptr);
-
-	// If the pipeline has specialization constants, assume they're unique and
-	// use a new serial ID so the shader gets recompiled.
-	uint32_t codeSerialID = (stage.pSpecializationInfo ? ShaderModule::nextSerialID() : module->getSerialID());
+	const PipelineCache::SpirvShaderKey shaderKey(
+		stage.stage, stage.pName, module->getCode(), nullptr, 0, stage.pSpecializationInfo);
+	if(pPipelineCache)
+	{
+		PipelineCache& pipelineCache = *pPipelineCache;
+		{
+			std::unique_lock<std::mutex> lock(pipelineCache.getShaderMutex());
+			const std::shared_ptr<sw::SpirvShader>* spirvShader = pipelineCache[shaderKey];
+			if(!spirvShader)
+			{
+				shader = createShader(shaderKey, module);
+				pipelineCache.insert(shaderKey, shader);
+			}
+			else
+			{
+				shader = *spirvShader;
+			}
+		}

-	// TODO(b/119409619): use allocator.
-	shader = new sw::SpirvShader(codeSerialID, stage.stage, stage.pName, code, nullptr, 0);
-	vk::DescriptorSet::Bindings descriptorSets;  // FIXME(b/129523279): Delay code generation until invoke time.
-	program = new sw::ComputeProgram(shader, layout, descriptorSets);
-	program->generate();
-	program->finalize();
+		{
+			const PipelineCache::ComputeProgramKey programKey(shader.get(), layout);
+			std::unique_lock<std::mutex> lock(pipelineCache.getProgramMutex());
+			const std::shared_ptr<sw::ComputeProgram>* computeProgram = pipelineCache[programKey];
+			if(!computeProgram)
+			{
+				program = createProgram(programKey);
+				pipelineCache.insert(programKey, program);
+			}
+			else
+			{
+				program = *computeProgram;
+			}
+		}
+	}
+	else
+	{
+		shader = createShader(shaderKey, module);
+		const PipelineCache::ComputeProgramKey programKey(shader.get(), layout);
+		program = createProgram(programKey);
+	}
 }

 void ComputePipeline::run(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,

--- a/src/Vulkan/VkPipeline.hpp
+++ b/src/Vulkan/VkPipeline.hpp
@@ -17,7 +17,9 @@

 #include "VkObject.hpp"
 #include "Vulkan/VkDescriptorSet.hpp"
+#include "Vulkan/VkPipelineCache.hpp"
 #include "Device/Renderer.hpp"
+#include <memory>

 namespace sw
 {
@@ -28,7 +30,9 @@ namespace sw
 namespace vk
 {

+class PipelineCache;
 class PipelineLayout;
+class ShaderModule;

 class Pipeline
 {
@@ -66,6 +70,8 @@ class GraphicsPipeline : public Pipeline, public ObjectBase<GraphicsPipeline, Vk
 {
 public:
 	GraphicsPipeline(const VkGraphicsPipelineCreateInfo* pCreateInfo, void* mem);
+	virtual ~GraphicsPipeline() = default;
+
 	void destroyPipeline(const VkAllocationCallbacks* pAllocator) override;

 #ifndef NDEBUG
@@ -77,7 +83,7 @@ public:

 	static size_t ComputeRequiredAllocationSize(const VkGraphicsPipelineCreateInfo* pCreateInfo);

-	void compileShaders(const VkAllocationCallbacks* pAllocator, const VkGraphicsPipelineCreateInfo* pCreateInfo);
+	void compileShaders(const VkAllocationCallbacks* pAllocator, const VkGraphicsPipelineCreateInfo* pCreateInfo, PipelineCache* pipelineCache);

 	uint32_t computePrimitiveCount(uint32_t vertexCount) const;
 	const sw::Context& getContext() const;
@@ -88,8 +94,10 @@ public:
 	bool hasPrimitiveRestartEnable() const { return primitiveRestartEnable; }

 private:
-	sw::SpirvShader *vertexShader = nullptr;
-	sw::SpirvShader *fragmentShader = nullptr;
+	void setShader(const VkShaderStageFlagBits& stage, const std::shared_ptr<sw::SpirvShader> spirvShader);
+	const std::shared_ptr<sw::SpirvShader> getShader(const VkShaderStageFlagBits& stage) const;
+	std::shared_ptr<sw::SpirvShader> vertexShader;
+	std::shared_ptr<sw::SpirvShader> fragmentShader;

 	uint32_t dynamicStateFlags = 0;
 	bool primitiveRestartEnable = false;
@@ -103,6 +111,8 @@ class ComputePipeline : public Pipeline, public ObjectBase<ComputePipeline, VkPi
 {
 public:
 	ComputePipeline(const VkComputePipelineCreateInfo* pCreateInfo, void* mem);
+	virtual ~ComputePipeline() = default;
+
 	void destroyPipeline(const VkAllocationCallbacks* pAllocator) override;

 #ifndef NDEBUG
@@ -114,7 +124,7 @@ public:

 	static size_t ComputeRequiredAllocationSize(const VkComputePipelineCreateInfo* pCreateInfo);

-	void compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo);
+	void compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo, PipelineCache* pipelineCache);

 	void run(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
 			uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
@@ -123,8 +133,8 @@ public:
 		sw::PushConstantStorage const &pushConstants);

 protected:
-	sw::SpirvShader *shader = nullptr;
-	sw::ComputeProgram *program = nullptr;
+	std::shared_ptr<sw::SpirvShader> shader;
+	std::shared_ptr<sw::ComputeProgram> program;
 };

 static inline Pipeline* Cast(VkPipeline object)

--- a/src/Vulkan/VkPipelineCache.cpp
+++ b/src/Vulkan/VkPipelineCache.cpp
@@ -18,6 +18,136 @@
 namespace vk
 {

+PipelineCache::SpirvShaderKey::SpecializationInfo::SpecializationInfo(const VkSpecializationInfo* specializationInfo)
+{
+	if(specializationInfo)
+	{
+		info = reinterpret_cast<VkSpecializationInfo*>(
+			allocate(sizeof(VkSpecializationInfo), REQUIRED_MEMORY_ALIGNMENT, DEVICE_MEMORY));
+
+		info->mapEntryCount = specializationInfo->mapEntryCount;
+		if(specializationInfo->mapEntryCount > 0)
+		{
+			size_t entriesSize = specializationInfo->mapEntryCount * sizeof(VkSpecializationMapEntry);
+			VkSpecializationMapEntry* mapEntries = reinterpret_cast<VkSpecializationMapEntry*>(
+				allocate(entriesSize, REQUIRED_MEMORY_ALIGNMENT, DEVICE_MEMORY));
+			memcpy(mapEntries, specializationInfo->pMapEntries, entriesSize);
+			info->pMapEntries = mapEntries;
+		}
+
+		info->dataSize = specializationInfo->dataSize;
+		if(specializationInfo->dataSize > 0)
+		{
+			void* data = allocate(specializationInfo->dataSize, REQUIRED_MEMORY_ALIGNMENT, DEVICE_MEMORY);
+			memcpy(data, specializationInfo->pData, specializationInfo->dataSize);
+			info->pData = data;
+		}
+	}
+}
+
+PipelineCache::SpirvShaderKey::SpecializationInfo::~SpecializationInfo()
+{
+	if(info)
+	{
+		deallocate(const_cast<VkSpecializationMapEntry*>(info->pMapEntries), DEVICE_MEMORY);
+		deallocate(const_cast<void*>(info->pData), DEVICE_MEMORY);
+		deallocate(info, DEVICE_MEMORY);
+	}
+}
+
+bool PipelineCache::SpirvShaderKey::SpecializationInfo::operator<(const SpecializationInfo& specializationInfo) const
+{
+	if(info && specializationInfo.info)
+	{
+		if(info->mapEntryCount != specializationInfo.info->mapEntryCount)
+		{
+			return info->mapEntryCount < specializationInfo.info->mapEntryCount;
+		}
+
+		if(info->dataSize != specializationInfo.info->dataSize)
+		{
+			return info->dataSize < specializationInfo.info->dataSize;
+		}
+
+		if(info->mapEntryCount > 0)
+		{
+			int cmp = memcmp(info->pMapEntries, specializationInfo.info->pMapEntries, info->mapEntryCount * sizeof(VkSpecializationMapEntry));
+			if(cmp != 0)
+			{
+				return cmp < 0;
+			}
+		}
+
+		if(info->dataSize > 0)
+		{
+			int cmp = memcmp(info->pData, specializationInfo.info->pData, info->dataSize);
+			if(cmp != 0)
+			{
+				return cmp < 0;
+			}
+		}
+	}
+
+	return (info < specializationInfo.info);
+}
+
+PipelineCache::SpirvShaderKey::SpirvShaderKey(const VkShaderStageFlagBits pipelineStage,
+	                                          const std::string& entryPointName,
+	                                          const std::vector<uint32_t>& insns,
+	                                          const vk::RenderPass *renderPass,
+	                                          const uint32_t subpassIndex,
+	                                          const VkSpecializationInfo* specializationInfo) :
+	pipelineStage(pipelineStage),
+	entryPointName(entryPointName),
+	insns(insns),
+	renderPass(renderPass),
+	subpassIndex(subpassIndex),
+	specializationInfo(specializationInfo)
+{
+}
+
+bool PipelineCache::SpirvShaderKey::operator<(const SpirvShaderKey &other) const
+{
+	if(pipelineStage != other.pipelineStage)
+	{
+		return pipelineStage < other.pipelineStage;
+	}
+
+	if(renderPass != other.renderPass)
+	{
+		return renderPass < other.renderPass;
+	}
+
+	if(subpassIndex != other.subpassIndex)
+	{
+		return subpassIndex < other.subpassIndex;
+	}
+
+	if(insns.size() != other.insns.size())
+	{
+		return insns.size() < other.insns.size();
+	}
+
+	if(entryPointName.size() != other.entryPointName.size())
+	{
+		return entryPointName.size() < other.entryPointName.size();
+	}
+
+	int cmp = memcmp(entryPointName.c_str(), other.entryPointName.c_str(), entryPointName.size());
+	if(cmp != 0)
+	{
+		return cmp < 0;
+	}
+
+	cmp = memcmp(insns.data(), other.insns.data(), insns.size() * sizeof(uint32_t));
+	if(cmp != 0)
+	{
+		return cmp < 0;
+	}
+
+	return (specializationInfo < other.specializationInfo);
+}
+
 PipelineCache::PipelineCache(const VkPipelineCacheCreateInfo* pCreateInfo, void* mem) :
 	dataSize(ComputeRequiredAllocationSize(pCreateInfo)), data(reinterpret_cast<uint8_t*>(mem))
 {
@@ -34,6 +164,12 @@ PipelineCache::PipelineCache(const VkPipelineCacheCreateInfo* pCreateInfo, void*
 	}
 }

+PipelineCache::~PipelineCache()
+{
+	spirvShaders.clear();
+	computePrograms.clear();
+}
+
 void PipelineCache::destroy(const VkAllocationCallbacks* pAllocator)
 {
 	vk::deallocate(data, pAllocator);
@@ -70,10 +206,42 @@ VkResult PipelineCache::merge(uint32_t srcCacheCount, const VkPipelineCache* pSr
 {
 	for(uint32_t i = 0; i < srcCacheCount; i++)
 	{
-		// TODO (b/123588002): merge pSrcCaches[i];
+		PipelineCache* srcCache = Cast(pSrcCaches[i]);
+
+		{
+			std::unique_lock<std::mutex> lock(spirvShadersMutex);
+			spirvShaders.insert(srcCache->spirvShaders.begin(), srcCache->spirvShaders.end());
+		}
+
+		{
+			std::unique_lock<std::mutex> lock(computeProgramsMutex);
+			computePrograms.insert(srcCache->computePrograms.begin(), srcCache->computePrograms.end());
+		}
 	}

 	return VK_SUCCESS;
 }

+const std::shared_ptr<sw::SpirvShader>* PipelineCache::operator[](const PipelineCache::SpirvShaderKey& key) const
+{
+	auto it = spirvShaders.find(key);
+	return (it != spirvShaders.end()) ? &(it->second) : nullptr;
+}
+
+void PipelineCache::insert(const PipelineCache::SpirvShaderKey& key, const std::shared_ptr<sw::SpirvShader> &shader)
+{
+	spirvShaders[key] = shader;
+}
+
+const std::shared_ptr<sw::ComputeProgram>* PipelineCache::operator[](const PipelineCache::ComputeProgramKey& key) const
+{
+	auto it = computePrograms.find(key);
+	return (it != computePrograms.end()) ? &(it->second) : nullptr;
+}
+
+void PipelineCache::insert(const PipelineCache::ComputeProgramKey& key, const std::shared_ptr<sw::ComputeProgram> &computeProgram)
+{
+	computePrograms[key] = computeProgram;
+}
+
 } // namespace vk
--- a/src/Vulkan/VkPipelineCache.hpp
+++ b/src/Vulkan/VkPipelineCache.hpp
@@ -16,14 +16,30 @@
 #define VK_PIPELINE_CACHE_HPP_

 #include "VkObject.hpp"
+#include <cstring>
+#include <functional>
+#include <memory>
+#include <map>
+#include <mutex>
+#include <vector>
+
+namespace sw
+{
+	class ComputeProgram;
+	class SpirvShader;
+}

 namespace vk
 {

+class PipelineLayout;
+class RenderPass;
+
 class PipelineCache : public Object<PipelineCache, VkPipelineCache>
 {
 public:
 	PipelineCache(const VkPipelineCacheCreateInfo* pCreateInfo, void* mem);
+	virtual ~PipelineCache();
 	void destroy(const VkAllocationCallbacks* pAllocator);

 	static size_t ComputeRequiredAllocationSize(const VkPipelineCacheCreateInfo* pCreateInfo);
@@ -31,6 +47,73 @@ public:
 	VkResult getData(size_t* pDataSize, void* pData);
 	VkResult merge(uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches);

+	struct SpirvShaderKey
+	{
+		struct SpecializationInfo
+		{
+			SpecializationInfo(const VkSpecializationInfo* specializationInfo);
+			~SpecializationInfo();
+
+			bool operator<(const SpecializationInfo& specializationInfo) const;
+
+			VkSpecializationInfo* get() const { return info; }
+
+		private:
+			VkSpecializationInfo* info = nullptr;
+		};
+
+		SpirvShaderKey(const VkShaderStageFlagBits pipelineStage,
+		               const std::string& entryPointName,
+		               const std::vector<uint32_t>& insns,
+		               const vk::RenderPass *renderPass,
+		               const uint32_t subpassIndex,
+		               const VkSpecializationInfo* specializationInfo);
+
+		bool operator<(const SpirvShaderKey &other) const;
+
+		const VkShaderStageFlagBits& getPipelineStage() const { return pipelineStage; }
+		const std::string& getEntryPointName() const { return entryPointName; }
+		const std::vector<uint32_t>& getInsns() const { return insns; }
+		const vk::RenderPass *getRenderPass() const { return renderPass; }
+		uint32_t getSubpassIndex() const { return subpassIndex; }
+		const VkSpecializationInfo *getSpecializationInfo() const { return specializationInfo.get(); }
+
+	private:
+		const VkShaderStageFlagBits pipelineStage;
+		const std::string entryPointName;
+		const std::vector<uint32_t> insns;
+		const vk::RenderPass *renderPass;
+		const uint32_t subpassIndex;
+		const SpecializationInfo specializationInfo;
+	};
+
+	std::mutex& getShaderMutex() { return spirvShadersMutex; }
+	const std::shared_ptr<sw::SpirvShader>* operator[](const PipelineCache::SpirvShaderKey& key) const;
+	void insert(const PipelineCache::SpirvShaderKey& key, const std::shared_ptr<sw::SpirvShader> &shader);
+
+	struct ComputeProgramKey
+	{
+		ComputeProgramKey(const sw::SpirvShader* shader, const vk::PipelineLayout* layout) :
+			shader(shader), layout(layout)
+		{}
+
+		bool operator<(const ComputeProgramKey &other) const
+		{
+			return (shader < other.shader) || (layout < other.layout);
+		}
+
+		const sw::SpirvShader* getShader() const { return shader; }
+		const vk::PipelineLayout* getLayout() const { return layout; }
+
+	private:
+		const sw::SpirvShader* shader;
+		const vk::PipelineLayout* layout;
+	};
+
+	std::mutex& getProgramMutex() { return computeProgramsMutex; }
+	const std::shared_ptr<sw::ComputeProgram>* operator[](const PipelineCache::ComputeProgramKey& key) const;
+	void insert(const PipelineCache::ComputeProgramKey& key, const std::shared_ptr<sw::ComputeProgram> &computeProgram);
+
 private:
 	struct CacheHeader
 	{
@@ -43,6 +126,12 @@ private:

 	size_t dataSize = 0;
 	uint8_t* data   = nullptr;
+
+	std::mutex spirvShadersMutex;
+	std::map<SpirvShaderKey, std::shared_ptr<sw::SpirvShader>> spirvShaders;
+
+	std::mutex computeProgramsMutex;
+	std::map<ComputeProgramKey, std::shared_ptr<sw::ComputeProgram>> computePrograms;
 };

 static inline PipelineCache* Cast(VkPipelineCache object)

--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -1190,15 +1190,13 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines(VkDevice device, VkPipe
 	TRACE("(VkDevice device = %p, VkPipelineCache pipelineCache = %p, uint32_t createInfoCount = %d, const VkGraphicsPipelineCreateInfo* pCreateInfos = %p, const VkAllocationCallbacks* pAllocator = %p, VkPipeline* pPipelines = %p)",
 		    device, static_cast<void*>(pipelineCache), int(createInfoCount), pCreateInfos, pAllocator, pPipelines);

-	// TODO (b/123588002): Optimize based on pipelineCache.
-
 	VkResult errorResult = VK_SUCCESS;
 	for(uint32_t i = 0; i < createInfoCount; i++)
 	{
 		VkResult result = vk::GraphicsPipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i]);
 		if(result == VK_SUCCESS)
 		{
-			static_cast<vk::GraphicsPipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i]);
+			static_cast<vk::GraphicsPipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i], vk::Cast(pipelineCache));
 		}
 		else
 		{
@@ -1223,15 +1221,13 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines(VkDevice device, VkPipel
 	TRACE("(VkDevice device = %p, VkPipelineCache pipelineCache = %p, uint32_t createInfoCount = %d, const VkComputePipelineCreateInfo* pCreateInfos = %p, const VkAllocationCallbacks* pAllocator = %p, VkPipeline* pPipelines = %p)",
 		device, static_cast<void*>(pipelineCache), int(createInfoCount), pCreateInfos, pAllocator, pPipelines);

-	// TODO (b/123588002): Optimize based on pipelineCache.
-
 	VkResult errorResult = VK_SUCCESS;
 	for(uint32_t i = 0; i < createInfoCount; i++)
 	{
 		VkResult result = vk::ComputePipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i]);
 		if(result == VK_SUCCESS)
 		{
-			static_cast<vk::ComputePipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i]);
+			static_cast<vk::ComputePipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i], vk::Cast(pipelineCache));
 		}
 		else
 		{