Commit 02fa7313 by Tobin Ehlis Committed by Commit Bot

Vulkan:Initial worker thread disabled by default

Created new CommandProcessor class that can be run as a worker thread. Running CommandProcessor within RendererVk as a worker thread that takes a CommmandBufferHelper (CBH) ptr as the interface and processes that CBH into a primary command buffer. Main thread has a queue of CBH to draw from. After submitting a CBH to the worker, it pulls next CBH from the queue. Worker thread releases CBH back to the main thread queue when done. Synchronization goes two ways: 1. Work submitted to worker thread is managaed with a mutex and condition variable based around the work queue. 2. Available CBH ptrs for the main thread have a mutex and condition variable that manages the CBH queue. The worker thread is disabled by default, and, when enabled, it will currently behave and perform as the non-threaded code. This is because the kNumCommandBuffers const in ContextVk.h is set to 2. With only 2 command buffers, they will be assigned to the inside and outside RenderPass command buffers respectively. Then, as soon as one is submitted, the main thread will stall waiting for it to be completed and put back into the queue mentioned in #2 above. The next step is to move command submission to the worker thread and update the number of command buffers so that processing/submission will occur in parallel with the main thread. Right now there is a race condition issue when attempting to run in parallel because the main thread updates and submits the same primary command buffers that are used in the worker thread, which is in violation of the Vulkan spec. The follow-on CL will fix this issue as the main thread will only touch SecondaryCommandBuffers and the worker thread will be the only thread touching the primary command buffers. Bug: b/154030730 Change-Id: Ib0c518bbd7ca9a3a7e789f4e1f2f7131ddc0509e Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2174719 Commit-Queue: Tobin Ehlis <tobine@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent f5dace0f
...@@ -317,6 +317,13 @@ struct FeaturesVk : FeatureSetBase ...@@ -317,6 +317,13 @@ struct FeaturesVk : FeatureSetBase
"Single barrier call is preferred over multiple calls with " "Single barrier call is preferred over multiple calls with "
"fine grained pipeline stage dependency information", "fine grained pipeline stage dependency information",
&members, "http://anglebug.com/4633"}; &members, "http://anglebug.com/4633"};
// Enable parallel thread that processes and submits vulkan command buffers.
// Currently off by default to enable testing.
Feature enableCommandProcessingThread = {
"enable_command_processing_thread", FeatureCategory::VulkanFeatures,
"Enable parallel processing and submission of Vulkan commands in worker thread", &members,
"http://anglebug.com/4324"};
}; };
inline FeaturesVk::FeaturesVk() = default; inline FeaturesVk::FeaturesVk() = default;
......
...@@ -37,6 +37,13 @@ PoolAllocator::PoolAllocator(int growthIncrement, int allocationAlignment) ...@@ -37,6 +37,13 @@ PoolAllocator::PoolAllocator(int growthIncrement, int allocationAlignment)
#endif #endif
mLocked(false) mLocked(false)
{ {
initialize(growthIncrement, allocationAlignment);
}
void PoolAllocator::initialize(int pageSize, int alignment)
{
mPageSize = pageSize;
mAlignment = alignment;
#if !defined(ANGLE_DISABLE_POOL_ALLOC) #if !defined(ANGLE_DISABLE_POOL_ALLOC)
if (mAlignment == 1) if (mAlignment == 1)
{ {
......
...@@ -125,7 +125,7 @@ class PoolAllocator : angle::NonCopyable ...@@ -125,7 +125,7 @@ class PoolAllocator : angle::NonCopyable
public: public:
static const int kDefaultAlignment = 16; static const int kDefaultAlignment = 16;
// //
// Create PoolAllocator. If alignment is be set to 1 byte then fastAllocate() // Create PoolAllocator. If alignment is set to 1 byte then fastAllocate()
// function can be used to make allocations with less overhead. // function can be used to make allocations with less overhead.
// //
PoolAllocator(int growthIncrement = 8 * 1024, int allocationAlignment = kDefaultAlignment); PoolAllocator(int growthIncrement = 8 * 1024, int allocationAlignment = kDefaultAlignment);
...@@ -136,6 +136,11 @@ class PoolAllocator : angle::NonCopyable ...@@ -136,6 +136,11 @@ class PoolAllocator : angle::NonCopyable
~PoolAllocator(); ~PoolAllocator();
// //
// Initialize page size and alignment after construction
//
void initialize(int pageSize, int alignment);
//
// Call push() to establish a new place to pop memory to. Does not // Call push() to establish a new place to pop memory to. Does not
// have to be called to get things started. // have to be called to get things started.
// //
......
...@@ -20,6 +20,8 @@ declare_args() { ...@@ -20,6 +20,8 @@ declare_args() {
_vulkan_backend_sources = [ _vulkan_backend_sources = [
"BufferVk.cpp", "BufferVk.cpp",
"BufferVk.h", "BufferVk.h",
"CommandProcessor.cpp",
"CommandProcessor.h",
"CompilerVk.cpp", "CompilerVk.cpp",
"CompilerVk.h", "CompilerVk.h",
"ContextVk.cpp", "ContextVk.cpp",
......
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// CommandProcessor.cpp:
// Implements the class methods for CommandProcessor.
//
#include "libANGLE/renderer/vulkan/CommandProcessor.h"
#include "libANGLE/trace.h"
namespace rx
{
CommandProcessor::CommandProcessor() : mWorkerThreadIdle(true) {}
void CommandProcessor::queueCommands(const vk::CommandProcessorTask &commands)
{
ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queueCommands");
std::lock_guard<std::mutex> queueLock(mWorkerMutex);
ASSERT(commands.commandBuffer == nullptr || !commands.commandBuffer->empty());
mCommandsQueue.push(commands);
mWorkAvailableCondition.notify_one();
}
angle::Result CommandProcessor::processCommandProcessorTasks()
{
while (true)
{
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.notify_one();
mWorkerThreadIdle = true;
// Only wake if notified and command queue is not empty
mWorkAvailableCondition.wait(lock, [this] { return !mCommandsQueue.empty(); });
mWorkerThreadIdle = false;
vk::CommandProcessorTask task = mCommandsQueue.front();
mCommandsQueue.pop();
lock.unlock();
// Either both ptrs should be null or non-null
ASSERT((task.commandBuffer != nullptr && task.contextVk != nullptr) ||
(task.commandBuffer == nullptr && task.contextVk == nullptr));
// A work block with null ptrs signals worker thread to exit
if (task.commandBuffer == nullptr && task.contextVk == nullptr)
{
break;
}
ASSERT(!task.commandBuffer->empty());
// TODO: Will need some way to synchronize error reporting between threads
ANGLE_TRY(task.commandBuffer->flushToPrimary(task.contextVk, task.primaryCB));
ASSERT(task.commandBuffer->empty());
task.commandBuffer->releaseToContextQueue(task.contextVk);
}
return angle::Result::Continue;
}
void CommandProcessor::waitForWorkComplete()
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitForWorkerThreadIdle");
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock,
[this] { return (mCommandsQueue.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue
lock.unlock();
}
void CommandProcessor::shutdown(std::thread *commandProcessorThread)
{
waitForWorkComplete();
const vk::CommandProcessorTask endTask = vk::kEndCommandProcessorThread;
queueCommands(endTask);
if (commandProcessorThread->joinable())
{
commandProcessorThread->join();
}
}
} // namespace rx
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// CommandProcessor.h:
// A class to process and submit Vulkan command buffers that can be
// used in an asynchronous worker thread.
//
#ifndef LIBANGLE_RENDERER_VULKAN_COMMAND_PROCESSOR_H_
#define LIBANGLE_RENDERER_VULKAN_COMMAND_PROCESSOR_H_
#include <condition_variable>
#include <mutex>
#include <queue>
#include <thread>
#include "libANGLE/renderer/vulkan/vk_headers.h"
#include "libANGLE/renderer/vulkan/vk_helpers.h"
namespace rx
{
namespace vk
{
// CommandProcessorTask is used to queue a task to the worker thread when
// enableCommandProcessingThread feature is true.
// The typical task includes pointers in all values and the worker thread will
// process the SecondaryCommandBuffer commands in cbh into the primaryCB.
// There is a special task in which all of the pointers are null that will trigger
// the worker thread to exit, and is sent when the renderer instance shuts down.
struct CommandProcessorTask
{
ContextVk *contextVk;
// TODO: b/153666475 Removed primaryCB in threading phase2.
vk::PrimaryCommandBuffer *primaryCB;
CommandBufferHelper *commandBuffer;
};
static const CommandProcessorTask kEndCommandProcessorThread = {nullptr, nullptr, nullptr};
} // namespace vk
class CommandProcessor : angle::NonCopyable
{
public:
CommandProcessor();
~CommandProcessor() = default;
// Main worker loop that should be launched in its own thread. The
// loop waits for work to be submitted from a separate thread.
angle::Result processCommandProcessorTasks();
// Called asynchronously from workLoop() thread to queue work that is
// then processed by the workLoop() thread
void queueCommands(const vk::CommandProcessorTask &commands);
// Used by separate thread to wait for worker thread to complete all
// outstanding work.
void waitForWorkComplete();
// Stop the command processor loop
void shutdown(std::thread *commandProcessorThread);
private:
std::queue<vk::CommandProcessorTask> mCommandsQueue;
std::mutex mWorkerMutex;
// Signal worker thread when work is available
std::condition_variable mWorkAvailableCondition;
// Signal main thread when all work completed
std::condition_variable mWorkerIdleCondition;
// Track worker thread Idle state for assertion purposes
bool mWorkerThreadIdle;
};
} // namespace rx
#endif // LIBANGLE_RENDERER_VULKAN_COMMAND_PROCESSOR_H_
...@@ -130,7 +130,6 @@ constexpr VkColorComponentFlags kAllColorChannelsMask = ...@@ -130,7 +130,6 @@ constexpr VkColorComponentFlags kAllColorChannelsMask =
constexpr VkBufferUsageFlags kVertexBufferUsage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; constexpr VkBufferUsageFlags kVertexBufferUsage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
constexpr size_t kDefaultValueSize = sizeof(gl::VertexAttribCurrentValueData::Values); constexpr size_t kDefaultValueSize = sizeof(gl::VertexAttribCurrentValueData::Values);
constexpr size_t kDefaultBufferSize = kDefaultValueSize * 16; constexpr size_t kDefaultBufferSize = kDefaultValueSize * 16;
constexpr size_t kDefaultPoolAllocatorPageSize = 16 * 1024;
constexpr size_t kDriverUniformsAllocatorPageSize = 4 * 1024; constexpr size_t kDriverUniformsAllocatorPageSize = 4 * 1024;
constexpr size_t kInFlightCommandsLimit = 100u; constexpr size_t kInFlightCommandsLimit = 100u;
...@@ -670,7 +669,6 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk ...@@ -670,7 +669,6 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mIsAnyHostVisibleBufferWritten(false), mIsAnyHostVisibleBufferWritten(false),
mEmulateSeamfulCubeMapSampling(false), mEmulateSeamfulCubeMapSampling(false),
mUseOldRewriteStructSamplers(false), mUseOldRewriteStructSamplers(false),
mPoolAllocator(kDefaultPoolAllocatorPageSize, 1),
mOutsideRenderPassCommands(nullptr), mOutsideRenderPassCommands(nullptr),
mRenderPassCommands(nullptr), mRenderPassCommands(nullptr),
mHasPrimaryCommands(false), mHasPrimaryCommands(false),
...@@ -889,15 +887,20 @@ angle::Result ContextVk::initialize() ...@@ -889,15 +887,20 @@ angle::Result ContextVk::initialize()
mUseOldRewriteStructSamplers = shouldUseOldRewriteStructSamplers(); mUseOldRewriteStructSamplers = shouldUseOldRewriteStructSamplers();
// Push a scope in the pool allocator so we can easily reinitialize on flush. // Prepare command buffer queue by:
mPoolAllocator.push(); // 1. Initializing each command buffer (as non-renderpass initially)
mOutsideRenderPassCommands = &mCommandBuffers[0]; // 2. Put a pointer to each command buffer into queue
mRenderPassCommands = &mCommandBuffers[1]; for (vk::CommandBufferHelper &commandBuffer : mCommandBuffers)
{
// TODO: b/157508684 Don't cache feature in class like this, just check when needed
commandBuffer.initialize(false,
mRenderer->getFeatures().preferAggregateBarrierCalls.enabled);
recycleCommandBuffer(&commandBuffer);
}
// Now assign initial command buffers from queue
getNextAvailableCommandBuffer(&mOutsideRenderPassCommands, false);
getNextAvailableCommandBuffer(&mRenderPassCommands, true);
mOutsideRenderPassCommands->initialize(
&mPoolAllocator, false, mRenderer->getFeatures().preferAggregateBarrierCalls.enabled);
mRenderPassCommands->initialize(&mPoolAllocator, true,
mRenderer->getFeatures().preferAggregateBarrierCalls.enabled);
ANGLE_TRY(startPrimaryCommandBuffer()); ANGLE_TRY(startPrimaryCommandBuffer());
if (mGpuEventsEnabled) if (mGpuEventsEnabled)
...@@ -3796,13 +3799,15 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore) ...@@ -3796,13 +3799,15 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore)
TRACE_EVENT_PHASE_END, eventName)); TRACE_EVENT_PHASE_END, eventName));
} }
ANGLE_TRY(flushOutsideRenderPassCommands()); ANGLE_TRY(flushOutsideRenderPassCommands());
ANGLE_VK_TRY(this, mPrimaryCommands.end());
// Free secondary command pool allocations and restart command buffers with the new page. if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
mPoolAllocator.pop(); {
mPoolAllocator.push(); // Worker thread must complete adding any commands that were just flushed above to the
mOutsideRenderPassCommands->reset(); // primary command buffer before we can End the primary command buffer below.
mRenderPassCommands->reset(); mRenderer->waitForWorkerThreadIdle();
}
ANGLE_VK_TRY(this, mPrimaryCommands.end());
Serial serial = getCurrentQueueSerial(); Serial serial = getCurrentQueueSerial();
mResourceUseList.releaseResourceUsesAndUpdateSerials(serial); mResourceUseList.releaseResourceUsesAndUpdateSerials(serial);
...@@ -4247,7 +4252,17 @@ angle::Result ContextVk::endRenderPass() ...@@ -4247,7 +4252,17 @@ angle::Result ContextVk::endRenderPass()
mRenderPassCommands->pauseTransformFeedbackIfStarted(); mRenderPassCommands->pauseTransformFeedbackIfStarted();
ANGLE_TRY(mRenderPassCommands->flushToPrimary(this, &mPrimaryCommands)); if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
vk::CommandProcessorTask task = {this, &mPrimaryCommands, mRenderPassCommands};
queueCommandsToWorker(task);
getNextAvailableCommandBuffer(&mRenderPassCommands, true);
}
else
{
ANGLE_TRY(mRenderPassCommands->flushToPrimary(this, &mPrimaryCommands));
}
mHasPrimaryCommands = true; mHasPrimaryCommands = true;
if (mGpuEventsEnabled) if (mGpuEventsEnabled)
...@@ -4268,6 +4283,30 @@ void ContextVk::onRenderPassImageWrite(VkImageAspectFlags aspectFlags, ...@@ -4268,6 +4283,30 @@ void ContextVk::onRenderPassImageWrite(VkImageAspectFlags aspectFlags,
mRenderPassCommands->imageWrite(&mResourceUseList, aspectFlags, imageLayout, image); mRenderPassCommands->imageWrite(&mResourceUseList, aspectFlags, imageLayout, image);
} }
void ContextVk::getNextAvailableCommandBuffer(vk::CommandBufferHelper **commandBuffer,
bool hasRenderPass)
{
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::getNextAvailableCommandBuffer");
std::unique_lock<std::mutex> lock(mCommandBufferQueueMutex);
// Only wake if notified and command queue is not empty
mAvailableCommandBufferCondition.wait(lock,
[this] { return !mAvailableCommandBuffers.empty(); });
*commandBuffer = mAvailableCommandBuffers.front();
ASSERT((*commandBuffer)->empty());
mAvailableCommandBuffers.pop();
lock.unlock();
(*commandBuffer)->setHasRenderPass(hasRenderPass);
}
void ContextVk::recycleCommandBuffer(vk::CommandBufferHelper *commandBuffer)
{
ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::waitForWorkerThreadIdle");
std::lock_guard<std::mutex> queueLock(mCommandBufferQueueMutex);
ASSERT(commandBuffer->empty());
mAvailableCommandBuffers.push(commandBuffer);
mAvailableCommandBufferCondition.notify_one();
}
angle::Result ContextVk::syncExternalMemory() angle::Result ContextVk::syncExternalMemory()
{ {
vk::CommandBuffer *commandBuffer; vk::CommandBuffer *commandBuffer;
...@@ -4350,7 +4389,16 @@ angle::Result ContextVk::flushOutsideRenderPassCommands() ...@@ -4350,7 +4389,16 @@ angle::Result ContextVk::flushOutsideRenderPassCommands()
{ {
if (!mOutsideRenderPassCommands->empty()) if (!mOutsideRenderPassCommands->empty())
{ {
ANGLE_TRY(mOutsideRenderPassCommands->flushToPrimary(this, &mPrimaryCommands)); if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
vk::CommandProcessorTask task = {this, &mPrimaryCommands, mOutsideRenderPassCommands};
queueCommandsToWorker(task);
getNextAvailableCommandBuffer(&mOutsideRenderPassCommands, false);
}
else
{
ANGLE_TRY(mOutsideRenderPassCommands->flushToPrimary(this, &mPrimaryCommands));
}
mHasPrimaryCommands = true; mHasPrimaryCommands = true;
} }
return angle::Result::Continue; return angle::Result::Continue;
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#ifndef LIBANGLE_RENDERER_VULKAN_CONTEXTVK_H_ #ifndef LIBANGLE_RENDERER_VULKAN_CONTEXTVK_H_
#define LIBANGLE_RENDERER_VULKAN_CONTEXTVK_H_ #define LIBANGLE_RENDERER_VULKAN_CONTEXTVK_H_
#include <condition_variable>
#include "common/PackedEnums.h" #include "common/PackedEnums.h"
#include "libANGLE/renderer/ContextImpl.h" #include "libANGLE/renderer/ContextImpl.h"
#include "libANGLE/renderer/renderer_utils.h" #include "libANGLE/renderer/renderer_utils.h"
...@@ -526,6 +528,14 @@ class ContextVk : public ContextImpl, public vk::Context ...@@ -526,6 +528,14 @@ class ContextVk : public ContextImpl, public vk::Context
void updateOverlayOnPresent(); void updateOverlayOnPresent();
// Submit commands to worker thread for processing
ANGLE_INLINE void queueCommandsToWorker(const vk::CommandProcessorTask &commands)
{
mRenderer->queueCommands(commands);
}
// When worker thread completes, it releases command buffers back to context queue
void recycleCommandBuffer(vk::CommandBufferHelper *commandBuffer);
private: private:
// Dirty bits. // Dirty bits.
enum DirtyBitType : size_t enum DirtyBitType : size_t
...@@ -790,6 +800,9 @@ class ContextVk : public ContextImpl, public vk::Context ...@@ -790,6 +800,9 @@ class ContextVk : public ContextImpl, public vk::Context
void initIndexTypeMap(); void initIndexTypeMap();
// Pull an available CBH ptr from the CBH queue and set to specified hasRenderPass state
void getNextAvailableCommandBuffer(vk::CommandBufferHelper **commandBuffer, bool hasRenderPass);
std::array<DirtyBitHandler, DIRTY_BIT_MAX> mGraphicsDirtyBitHandlers; std::array<DirtyBitHandler, DIRTY_BIT_MAX> mGraphicsDirtyBitHandlers;
std::array<DirtyBitHandler, DIRTY_BIT_MAX> mComputeDirtyBitHandlers; std::array<DirtyBitHandler, DIRTY_BIT_MAX> mComputeDirtyBitHandlers;
...@@ -906,20 +919,23 @@ class ContextVk : public ContextImpl, public vk::Context ...@@ -906,20 +919,23 @@ class ContextVk : public ContextImpl, public vk::Context
// http://anglebug.com/2701 // http://anglebug.com/2701
vk::Shared<vk::Fence> mSubmitFence; vk::Shared<vk::Fence> mSubmitFence;
// Pool allocator used for command graph but may be expanded to other allocations
angle::PoolAllocator mPoolAllocator;
// When the command graph is disabled we record commands completely linearly. We have plans to // When the command graph is disabled we record commands completely linearly. We have plans to
// reorder independent draws so that we can create fewer RenderPasses in some scenarios. // reorder independent draws so that we can create fewer RenderPasses in some scenarios.
// Currently we just point the inside/outside RenderPass command buffers to respective fixed // We have a queue of CommandBufferHelpers (CBHs) that is drawn from for the two active command
// command buffers in the mCommandBuffers array. In the near future when we move to a worker // buffers in the main thread. The two active command buffers are the inside and outside
// thread there will a larger pool of command buffers and command buffer pointers will be // RenderPass command buffers.
// assigned from a queue based on availability.
constexpr static size_t kNumCommandBuffers = 2; constexpr static size_t kNumCommandBuffers = 2;
std::array<vk::CommandBufferHelper, kNumCommandBuffers> mCommandBuffers; std::array<vk::CommandBufferHelper, kNumCommandBuffers> mCommandBuffers;
// Lock access to the command buffer queue
std::mutex mCommandBufferQueueMutex;
std::queue<vk::CommandBufferHelper *> mAvailableCommandBuffers;
std::condition_variable mAvailableCommandBufferCondition;
vk::CommandBufferHelper *mOutsideRenderPassCommands; vk::CommandBufferHelper *mOutsideRenderPassCommands;
vk::CommandBufferHelper *mRenderPassCommands; vk::CommandBufferHelper *mRenderPassCommands;
vk::PrimaryCommandBuffer mPrimaryCommands; vk::PrimaryCommandBuffer mPrimaryCommands;
// Function recycleCommandBuffer() is public above
bool mHasPrimaryCommands; bool mHasPrimaryCommands;
// Internal shader library. // Internal shader library.
......
...@@ -470,6 +470,12 @@ RendererVk::~RendererVk() ...@@ -470,6 +470,12 @@ RendererVk::~RendererVk()
void RendererVk::onDestroy() void RendererVk::onDestroy()
{ {
if (getFeatures().enableCommandProcessingThread.enabled)
{
// Shutdown worker thread
mCommandProcessor.shutdown(&mCommandProcessorThread);
}
// Force all commands to finish by flushing all queues. // Force all commands to finish by flushing all queues.
for (VkQueue queue : mQueues) for (VkQueue queue : mQueues)
{ {
...@@ -833,6 +839,11 @@ angle::Result RendererVk::initialize(DisplayVk *displayVk, ...@@ -833,6 +839,11 @@ angle::Result RendererVk::initialize(DisplayVk *displayVk,
// Initialize the format table. // Initialize the format table.
mFormatTable.initialize(this, &mNativeTextureCaps, &mNativeCaps.compressedTextureFormats); mFormatTable.initialize(this, &mNativeTextureCaps, &mNativeCaps.compressedTextureFormats);
if (getFeatures().enableCommandProcessingThread.enabled)
{
mCommandProcessorThread =
std::thread(&CommandProcessor::processCommandProcessorTasks, &mCommandProcessor);
}
return angle::Result::Continue; return angle::Result::Continue;
} }
...@@ -1695,6 +1706,9 @@ void RendererVk::initFeatures(DisplayVk *displayVk, const ExtensionNameList &dev ...@@ -1695,6 +1706,9 @@ void RendererVk::initFeatures(DisplayVk *displayVk, const ExtensionNameList &dev
ANGLE_FEATURE_CONDITION(&mFeatures, preferAggregateBarrierCalls, isNvidia || isAMD || isIntel); ANGLE_FEATURE_CONDITION(&mFeatures, preferAggregateBarrierCalls, isNvidia || isAMD || isIntel);
// Currently disabled by default: http://anglebug.com/4324
ANGLE_FEATURE_CONDITION(&mFeatures, enableCommandProcessingThread, false);
angle::PlatformMethods *platform = ANGLEPlatformCurrent(); angle::PlatformMethods *platform = ANGLEPlatformCurrent();
platform->overrideFeaturesVk(platform, &mFeatures); platform->overrideFeaturesVk(platform, &mFeatures);
...@@ -1925,6 +1939,14 @@ angle::Result RendererVk::queueSubmit(vk::Context *context, ...@@ -1925,6 +1939,14 @@ angle::Result RendererVk::queueSubmit(vk::Context *context,
const vk::Fence *fence, const vk::Fence *fence,
Serial *serialOut) Serial *serialOut)
{ {
if (getFeatures().enableCommandProcessingThread.enabled)
{
// For initial threading phase 1 code make sure any outstanding command processing
// is complete.
// TODO: b/153666475 For phase2 investigate if this is required as most submits will take
// place through worker thread except for one-off submits below.
mCommandProcessor.waitForWorkComplete();
}
{ {
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
VkFence handle = fence ? fence->getHandle() : VK_NULL_HANDLE; VkFence handle = fence ? fence->getHandle() : VK_NULL_HANDLE;
...@@ -1960,6 +1982,11 @@ angle::Result RendererVk::queueSubmitOneOff(vk::Context *context, ...@@ -1960,6 +1982,11 @@ angle::Result RendererVk::queueSubmitOneOff(vk::Context *context,
angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriority priority) angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriority priority)
{ {
if (getFeatures().enableCommandProcessingThread.enabled)
{
// First make sure command processor is complete when waiting for queue idle.
mCommandProcessor.waitForWorkComplete();
}
{ {
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
ANGLE_VK_TRY(context, vkQueueWaitIdle(mQueues[priority])); ANGLE_VK_TRY(context, vkQueueWaitIdle(mQueues[priority]));
...@@ -1972,6 +1999,11 @@ angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriori ...@@ -1972,6 +1999,11 @@ angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriori
angle::Result RendererVk::deviceWaitIdle(vk::Context *context) angle::Result RendererVk::deviceWaitIdle(vk::Context *context)
{ {
if (getFeatures().enableCommandProcessingThread.enabled)
{
// First make sure command processor is complete when waiting for device idle.
mCommandProcessor.waitForWorkComplete();
}
{ {
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
ANGLE_VK_TRY(context, vkDeviceWaitIdle(mDevice)); ANGLE_VK_TRY(context, vkDeviceWaitIdle(mDevice));
...@@ -1987,6 +2019,13 @@ VkResult RendererVk::queuePresent(egl::ContextPriority priority, ...@@ -1987,6 +2019,13 @@ VkResult RendererVk::queuePresent(egl::ContextPriority priority,
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queuePresent"); ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queuePresent");
if (getFeatures().enableCommandProcessingThread.enabled)
{
// First make sure command processor is complete before queue present as
// present may have dependencies on that thread.
mCommandProcessor.waitForWorkComplete();
}
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
{ {
......
...@@ -10,9 +10,12 @@ ...@@ -10,9 +10,12 @@
#ifndef LIBANGLE_RENDERER_VULKAN_RENDERERVK_H_ #ifndef LIBANGLE_RENDERER_VULKAN_RENDERERVK_H_
#define LIBANGLE_RENDERER_VULKAN_RENDERERVK_H_ #define LIBANGLE_RENDERER_VULKAN_RENDERERVK_H_
#include <condition_variable>
#include <deque> #include <deque>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <queue>
#include <thread>
#include "vk_ext_provoking_vertex.h" #include "vk_ext_provoking_vertex.h"
...@@ -22,6 +25,7 @@ ...@@ -22,6 +25,7 @@
#include "common/vulkan/vulkan_icd.h" #include "common/vulkan/vulkan_icd.h"
#include "libANGLE/BlobCache.h" #include "libANGLE/BlobCache.h"
#include "libANGLE/Caps.h" #include "libANGLE/Caps.h"
#include "libANGLE/renderer/vulkan/CommandProcessor.h"
#include "libANGLE/renderer/vulkan/QueryVk.h" #include "libANGLE/renderer/vulkan/QueryVk.h"
#include "libANGLE/renderer/vulkan/ResourceVk.h" #include "libANGLE/renderer/vulkan/ResourceVk.h"
#include "libANGLE/renderer/vulkan/UtilsVk.h" #include "libANGLE/renderer/vulkan/UtilsVk.h"
...@@ -250,6 +254,13 @@ class RendererVk : angle::NonCopyable ...@@ -250,6 +254,13 @@ class RendererVk : angle::NonCopyable
SamplerCache &getSamplerCache() { return mSamplerCache; } SamplerCache &getSamplerCache() { return mSamplerCache; }
vk::ActiveHandleCounter &getActiveHandleCounts() { return mActiveHandleCounts; } vk::ActiveHandleCounter &getActiveHandleCounts() { return mActiveHandleCounts; }
// Queue commands to worker thread for processing
void queueCommands(const vk::CommandProcessorTask &commands)
{
mCommandProcessor.queueCommands(commands);
}
void waitForWorkerThreadIdle() { mCommandProcessor.waitForWorkComplete(); }
private: private:
angle::Result initializeDevice(DisplayVk *displayVk, uint32_t queueFamilyIndex); angle::Result initializeDevice(DisplayVk *displayVk, uint32_t queueFamilyIndex);
void ensureCapsInitialized() const; void ensureCapsInitialized() const;
...@@ -363,6 +374,10 @@ class RendererVk : angle::NonCopyable ...@@ -363,6 +374,10 @@ class RendererVk : angle::NonCopyable
}; };
std::deque<PendingOneOffCommands> mPendingOneOffCommands; std::deque<PendingOneOffCommands> mPendingOneOffCommands;
// Worker Thread
CommandProcessor mCommandProcessor;
std::thread mCommandProcessorThread;
// track whether we initialized (or released) glslang // track whether we initialized (or released) glslang
bool mGlslangInitialized; bool mGlslangInitialized;
......
...@@ -669,6 +669,7 @@ class SecondaryCommandBuffer final : angle::NonCopyable ...@@ -669,6 +669,7 @@ class SecondaryCommandBuffer final : angle::NonCopyable
void initialize(angle::PoolAllocator *allocator) void initialize(angle::PoolAllocator *allocator)
{ {
ASSERT(allocator); ASSERT(allocator);
ASSERT(mCommands.empty());
mAllocator = allocator; mAllocator = allocator;
allocateNewBlock(); allocateNewBlock();
// Set first command to Invalid to start // Set first command to Invalid to start
......
...@@ -63,6 +63,8 @@ constexpr angle::PackedEnumMap<PipelineStage, VkPipelineStageFlagBits> kPipeline ...@@ -63,6 +63,8 @@ constexpr angle::PackedEnumMap<PipelineStage, VkPipelineStageFlagBits> kPipeline
{PipelineStage::BottomOfPipe, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT}, {PipelineStage::BottomOfPipe, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT},
{PipelineStage::Host, VK_PIPELINE_STAGE_HOST_BIT}}; {PipelineStage::Host, VK_PIPELINE_STAGE_HOST_BIT}};
constexpr size_t kDefaultPoolAllocatorPageSize = 16 * 1024;
struct ImageMemoryBarrierData struct ImageMemoryBarrierData
{ {
// The Vk layout corresponding to the ImageLayout key. // The Vk layout corresponding to the ImageLayout key.
...@@ -529,11 +531,12 @@ CommandBufferHelper::~CommandBufferHelper() ...@@ -529,11 +531,12 @@ CommandBufferHelper::~CommandBufferHelper()
mFramebuffer.setHandle(VK_NULL_HANDLE); mFramebuffer.setHandle(VK_NULL_HANDLE);
} }
void CommandBufferHelper::initialize(angle::PoolAllocator *poolAllocator, void CommandBufferHelper::initialize(bool isRenderPassCommandBuffer, bool mergeBarriers)
bool isRenderPassCommandBuffer,
bool mergeBarriers)
{ {
mCommandBuffer.initialize(poolAllocator); mAllocator.initialize(kDefaultPoolAllocatorPageSize, 1);
// Push a scope into the pool allocator so we can easily free and re-init on reset()
mAllocator.push();
mCommandBuffer.initialize(&mAllocator);
mIsRenderPassCommandBuffer = isRenderPassCommandBuffer; mIsRenderPassCommandBuffer = isRenderPassCommandBuffer;
mMergeBarriers = mergeBarriers; mMergeBarriers = mergeBarriers;
} }
...@@ -667,8 +670,8 @@ void CommandBufferHelper::beginTransformFeedback(size_t validBufferCount, ...@@ -667,8 +670,8 @@ void CommandBufferHelper::beginTransformFeedback(size_t validBufferCount,
angle::Result CommandBufferHelper::flushToPrimary(ContextVk *contextVk, angle::Result CommandBufferHelper::flushToPrimary(ContextVk *contextVk,
vk::PrimaryCommandBuffer *primary) vk::PrimaryCommandBuffer *primary)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandBufferHelper::flushToPrimary");
ASSERT(!empty()); ASSERT(!empty());
if (kEnableCommandStreamDiagnostics) if (kEnableCommandStreamDiagnostics)
{ {
addCommandDiagnostics(contextVk); addCommandDiagnostics(contextVk);
...@@ -819,6 +822,8 @@ void CommandBufferHelper::addCommandDiagnostics(ContextVk *contextVk) ...@@ -819,6 +822,8 @@ void CommandBufferHelper::addCommandDiagnostics(ContextVk *contextVk)
void CommandBufferHelper::reset() void CommandBufferHelper::reset()
{ {
mAllocator.pop();
mAllocator.push();
mCommandBuffer.reset(); mCommandBuffer.reset();
if (mIsRenderPassCommandBuffer) if (mIsRenderPassCommandBuffer)
{ {
...@@ -832,6 +837,11 @@ void CommandBufferHelper::reset() ...@@ -832,6 +837,11 @@ void CommandBufferHelper::reset()
ASSERT(mRebindTransformFeedbackBuffers == false); ASSERT(mRebindTransformFeedbackBuffers == false);
} }
void CommandBufferHelper::releaseToContextQueue(ContextVk *contextVk)
{
contextVk->recycleCommandBuffer(this);
}
void CommandBufferHelper::resumeTransformFeedbackIfStarted() void CommandBufferHelper::resumeTransformFeedbackIfStarted()
{ {
ASSERT(mIsRenderPassCommandBuffer); ASSERT(mIsRenderPassCommandBuffer);
......
...@@ -828,9 +828,7 @@ struct CommandBufferHelper : angle::NonCopyable ...@@ -828,9 +828,7 @@ struct CommandBufferHelper : angle::NonCopyable
~CommandBufferHelper(); ~CommandBufferHelper();
// General Functions (non-renderPass specific) // General Functions (non-renderPass specific)
void initialize(angle::PoolAllocator *poolAllocator, void initialize(bool isRenderPassCommandBuffer, bool mergeBarriers);
bool canHaveRenderPass,
bool mergeBarriers);
void bufferRead(vk::ResourceUseList *resourceUseList, void bufferRead(vk::ResourceUseList *resourceUseList,
VkAccessFlags readAccessType, VkAccessFlags readAccessType,
...@@ -858,8 +856,9 @@ struct CommandBufferHelper : angle::NonCopyable ...@@ -858,8 +856,9 @@ struct CommandBufferHelper : angle::NonCopyable
void executeBarriers(vk::PrimaryCommandBuffer *primary); void executeBarriers(vk::PrimaryCommandBuffer *primary);
bool empty() const { return (!mCommandBuffer.empty() || mRenderPassStarted) ? false : true; } bool empty() const { return (!mCommandBuffer.empty() || mRenderPassStarted) ? false : true; }
void setHasRenderPass(bool hasRenderPass) { mIsRenderPassCommandBuffer = hasRenderPass; }
void reset(); void reset();
void releaseToContextQueue(ContextVk *contextVk);
// RenderPass related functions // RenderPass related functions
bool started() const bool started() const
...@@ -932,6 +931,9 @@ struct CommandBufferHelper : angle::NonCopyable ...@@ -932,6 +931,9 @@ struct CommandBufferHelper : angle::NonCopyable
private: private:
void addCommandDiagnostics(ContextVk *contextVk); void addCommandDiagnostics(ContextVk *contextVk);
// Allocator used by this class. Using a pool allocator per CBH to avoid threading issues
// that occur w/ shared allocator between multiple CBHs.
angle::PoolAllocator mAllocator;
// General state (non-renderPass related) // General state (non-renderPass related)
PipelineBarrierArray mPipelineBarriers; PipelineBarrierArray mPipelineBarriers;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment