Commit ed876984 by Courtney Goeltzenleuchter Committed by Commit Bot

Vulkan: functionally complete worker thread

Working on enhancing worker thread to completely own primary command buffers. This will include not only processing SCBs from main thread into a primary, but also submitting those command buffers to the queue. The CommandProcessor is a vk::Context so it can handle errors in the worker thread. When the main thread submits tasks to the worker thread it also syncs any outstanding errors from the worker. Include asynchronousCommandProcessing feature that will control whether the worker thread task does it's work in parallel or not. If false, we wait for the thread to complete it's work before letting the main thread continue. If true, the thread can execute in parallel with the main thread. Bug: b/154030730 Bug: b/161912801 Change-Id: I00f8f013d6cbb2af12a172c4f7927855db2f0ebf Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2328992 Commit-Queue: Courtney Goeltzenleuchter <courtneygo@google.com> Reviewed-by: 's avatarTim Van Patten <timvp@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 2882e1af
...@@ -343,6 +343,13 @@ struct FeaturesVk : FeatureSetBase ...@@ -343,6 +343,13 @@ struct FeaturesVk : FeatureSetBase
"Enable parallel processing and submission of Vulkan commands in worker thread", &members, "Enable parallel processing and submission of Vulkan commands in worker thread", &members,
"http://anglebug.com/4324"}; "http://anglebug.com/4324"};
// Enable parallel thread execution when enableCommandProcessingThread is enabled.
// Currently off by default.
Feature asynchronousCommandProcessing = {"asynchronous_command_processing",
FeatureCategory::VulkanFeatures,
"Enable/Disable parallel processing of worker thread",
&members, "http://anglebug.com/4324"};
// Whether the VkDevice supports the VK_KHR_shader_float16_int8 extension and has the // Whether the VkDevice supports the VK_KHR_shader_float16_int8 extension and has the
// shaderFloat16 feature. // shaderFloat16 feature.
Feature supportsShaderFloat16 = {"supports_shader_float16", FeatureCategory::VulkanFeatures, Feature supportsShaderFloat16 = {"supports_shader_float16", FeatureCategory::VulkanFeatures,
......
...@@ -8,12 +8,145 @@ ...@@ -8,12 +8,145 @@
// //
#include "libANGLE/renderer/vulkan/CommandProcessor.h" #include "libANGLE/renderer/vulkan/CommandProcessor.h"
#include "libANGLE/renderer/vulkan/RendererVk.h"
#include "libANGLE/trace.h" #include "libANGLE/trace.h"
namespace rx namespace rx
{ {
namespace
{
constexpr size_t kInFlightCommandsLimit = 100u;
constexpr bool kOutputVmaStatsString = false;
void InitializeSubmitInfo(VkSubmitInfo *submitInfo,
const vk::PrimaryCommandBuffer &commandBuffer,
const std::vector<VkSemaphore> &waitSemaphores,
std::vector<VkPipelineStageFlags> *waitSemaphoreStageMasks,
const vk::Semaphore *signalSemaphore)
{
// Verify that the submitInfo has been zero'd out.
ASSERT(submitInfo->signalSemaphoreCount == 0);
submitInfo->sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo->commandBufferCount = commandBuffer.valid() ? 1 : 0;
submitInfo->pCommandBuffers = commandBuffer.ptr();
if (waitSemaphoreStageMasks->size() < waitSemaphores.size())
{
waitSemaphoreStageMasks->resize(waitSemaphores.size(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
submitInfo->waitSemaphoreCount = static_cast<uint32_t>(waitSemaphores.size());
submitInfo->pWaitSemaphores = waitSemaphores.data();
submitInfo->pWaitDstStageMask = waitSemaphoreStageMasks->data();
if (signalSemaphore)
{
submitInfo->signalSemaphoreCount = 1;
submitInfo->pSignalSemaphores = signalSemaphore->ptr();
}
else
{
submitInfo->signalSemaphoreCount = 0;
submitInfo->pSignalSemaphores = nullptr;
}
}
} // namespace
namespace vk namespace vk
{ {
void CommandProcessorTask::initTask()
{
mTask = CustomTask::Invalid;
mContextVk = nullptr;
mRenderPass = nullptr;
mCommandBuffer = nullptr;
mSemaphore = nullptr;
mOneOffFence = nullptr;
mOneOffCommandBufferVk = VK_NULL_HANDLE;
}
// CommandProcessorTask implementation
void CommandProcessorTask::initProcessCommands(ContextVk *contextVk,
CommandBufferHelper *commandBuffer,
vk::RenderPass *renderPass)
{
mTask = vk::CustomTask::ProcessCommands;
mContextVk = contextVk;
mCommandBuffer = commandBuffer;
mRenderPass = renderPass;
}
void CommandProcessorTask::initPresent(egl::ContextPriority priority, VkPresentInfoKHR presentInfo)
{
mTask = vk::CustomTask::Present;
mPresentInfo = presentInfo;
mPriority = priority;
}
void CommandProcessorTask::initFinishToSerial(Serial serial)
{
// Note: sometimes the serial is not valid and that's okay, the finish will early exit in the
// TaskProcessor::finishToSerial
mTask = vk::CustomTask::FinishToSerial;
mSerial = serial;
}
void CommandProcessorTask::initFlushAndQueueSubmit(
std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
const vk::Semaphore *semaphore,
egl::ContextPriority priority,
vk::GarbageList &&currentGarbage,
vk::ResourceUseList &&currentResources)
{
mTask = vk::CustomTask::FlushAndQueueSubmit;
mWaitSemaphores = std::move(waitSemaphores);
mWaitSemaphoreStageMasks = std::move(waitSemaphoreStageMasks);
mSemaphore = semaphore;
mGarbage = std::move(currentGarbage);
mResourceUseList = std::move(currentResources);
mPriority = priority;
}
void CommandProcessorTask::initOneOffQueueSubmit(VkCommandBuffer oneOffCommandBufferVk,
egl::ContextPriority priority,
const vk::Fence *fence)
{
mTask = vk::CustomTask::OneOffQueueSubmit;
mOneOffCommandBufferVk = oneOffCommandBufferVk;
mOneOffFence = fence;
mPriority = priority;
}
CommandProcessorTask &CommandProcessorTask::operator=(CommandProcessorTask &&rhs)
{
if (this == &rhs)
{
return *this;
}
mContextVk = rhs.mContextVk;
mRenderPass = rhs.mRenderPass;
mCommandBuffer = rhs.mCommandBuffer;
std::swap(mTask, rhs.mTask);
std::swap(mWaitSemaphores, rhs.mWaitSemaphores);
std::swap(mWaitSemaphoreStageMasks, rhs.mWaitSemaphoreStageMasks);
mSemaphore = rhs.mSemaphore;
mOneOffFence = rhs.mOneOffFence;
std::swap(mGarbage, rhs.mGarbage);
std::swap(mSerial, rhs.mSerial);
std::swap(mPresentInfo, rhs.mPresentInfo);
std::swap(mPriority, rhs.mPriority);
std::swap(mResourceUseList, rhs.mResourceUseList);
mOneOffCommandBufferVk = rhs.mOneOffCommandBufferVk;
// clear rhs now that everything has moved.
rhs.initTask();
return *this;
}
// CommandBatch implementation. // CommandBatch implementation.
CommandBatch::CommandBatch() = default; CommandBatch::CommandBatch() = default;
...@@ -39,67 +172,612 @@ void CommandBatch::destroy(VkDevice device) ...@@ -39,67 +172,612 @@ void CommandBatch::destroy(VkDevice device)
commandPool.destroy(device); commandPool.destroy(device);
fence.reset(device); fence.reset(device);
} }
} // namespace vk
CommandProcessor::CommandProcessor() : mWorkerThreadIdle(true) {} // TaskProcessor implementation.
TaskProcessor::TaskProcessor() = default;
TaskProcessor::~TaskProcessor() = default;
void TaskProcessor::destroy(VkDevice device)
{
mPrimaryCommandPool.destroy(device);
ASSERT(mInFlightCommands.empty() && mGarbageQueue.empty());
}
angle::Result TaskProcessor::init(vk::Context *context, std::thread::id threadId)
{
mThreadId = threadId;
// Initialize the command pool now that we know the queue family index.
ANGLE_TRY(mPrimaryCommandPool.init(context, context->getRenderer()->getQueueFamilyIndex()));
return angle::Result::Continue;
}
angle::Result TaskProcessor::checkCompletedCommands(vk::Context *context)
{
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::checkCompletedCommands");
VkDevice device = context->getDevice();
RendererVk *rendererVk = context->getRenderer();
int finishedCount = 0;
for (vk::CommandBatch &batch : mInFlightCommands)
{
VkResult result = batch.fence.get().getStatus(device);
if (result == VK_NOT_READY)
{
break;
}
ANGLE_VK_TRY(context, result);
rendererVk->onCompletedSerial(batch.serial);
rendererVk->resetSharedFence(&batch.fence);
ANGLE_TRACE_EVENT0("gpu.angle", "command buffer recycling");
batch.commandPool.destroy(device);
ANGLE_TRY(releasePrimaryCommandBuffer(context, std::move(batch.primaryCommands)));
++finishedCount;
}
if (finishedCount > 0)
{
auto beginIter = mInFlightCommands.begin();
mInFlightCommands.erase(beginIter, beginIter + finishedCount);
}
Serial lastCompleted = rendererVk->getLastCompletedQueueSerial();
size_t freeIndex = 0;
for (; freeIndex < mGarbageQueue.size(); ++freeIndex)
{
vk::GarbageAndSerial &garbageList = mGarbageQueue[freeIndex];
if (garbageList.getSerial() <= lastCompleted)
{
for (vk::GarbageObject &garbage : garbageList.get())
{
garbage.destroy(rendererVk);
}
}
else
{
break;
}
}
// Remove the entries from the garbage list - they should be ready to go.
if (freeIndex > 0)
{
mGarbageQueue.erase(mGarbageQueue.begin(), mGarbageQueue.begin() + freeIndex);
}
return angle::Result::Continue;
}
angle::Result TaskProcessor::releaseToCommandBatch(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer,
vk::CommandPool *commandPool,
vk::CommandBatch *batch)
{
batch->primaryCommands = std::move(commandBuffer);
if (commandPool->valid())
{
batch->commandPool = std::move(*commandPool);
// Recreate CommandPool
VkCommandPoolCreateInfo poolInfo = {};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
poolInfo.queueFamilyIndex = context->getRenderer()->getQueueFamilyIndex();
ANGLE_VK_TRY(context, commandPool->init(context->getDevice(), poolInfo));
}
return angle::Result::Continue;
}
angle::Result TaskProcessor::allocatePrimaryCommandBuffer(
vk::Context *context,
vk::PrimaryCommandBuffer *commandBufferOut)
{
return mPrimaryCommandPool.allocate(context, commandBufferOut);
}
angle::Result TaskProcessor::releasePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer)
{
ASSERT(mPrimaryCommandPool.valid());
return mPrimaryCommandPool.collect(context, std::move(commandBuffer));
}
void TaskProcessor::handleDeviceLost(vk::Context *context)
{
VkDevice device = context->getDevice();
for (vk::CommandBatch &batch : mInFlightCommands)
{
// On device loss we need to wait for fence to be signaled before destroying it
VkResult status =
batch.fence.get().wait(device, context->getRenderer()->getMaxFenceWaitTimeNs());
// If the wait times out, it is probably not possible to recover from lost device
ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST);
// On device lost, here simply destroy the CommandBuffer, it will be fully cleared later by
// CommandPool::destroy
batch.primaryCommands.destroy(device);
batch.commandPool.destroy(device);
batch.fence.reset(device);
}
mInFlightCommands.clear();
}
// If there are any inflight commands worker will look for fence that corresponds to the request
// serial or the last available fence and wait on that fence. Will then do necessary cleanup work.
// This can cause the worker thread to block.
// TODO: https://issuetracker.google.com/issues/170312581 - A more optimal solution might be to do
// the wait in CommandProcessor rather than the worker thread. That would require protecting access
// to mInFlightCommands
angle::Result TaskProcessor::finishToSerial(vk::Context *context, Serial serial)
{
RendererVk *rendererVk = context->getRenderer();
uint64_t timeout = rendererVk->getMaxFenceWaitTimeNs();
if (mInFlightCommands.empty())
{
// No outstanding work, nothing to wait for.
return angle::Result::Continue;
}
// Find the first batch with serial equal to or bigger than given serial (note that
// the batch serials are unique, otherwise upper-bound would have been necessary).
size_t batchIndex = mInFlightCommands.size() - 1;
for (size_t i = 0; i < mInFlightCommands.size(); ++i)
{
if (mInFlightCommands[i].serial >= serial)
{
batchIndex = i;
break;
}
}
const vk::CommandBatch &batch = mInFlightCommands[batchIndex];
// Wait for it finish
VkDevice device = context->getDevice();
ANGLE_VK_TRY(context, batch.fence.get().wait(device, timeout));
// Clean up finished batches.
return checkCompletedCommands(context);
}
VkResult TaskProcessor::present(VkQueue queue, const VkPresentInfoKHR &presentInfo)
{
ANGLE_TRACE_EVENT0("gpu.angle", "vkQueuePresentKHR");
return vkQueuePresentKHR(queue, &presentInfo);
}
angle::Result TaskProcessor::submitFrame(vk::Context *context,
VkQueue queue,
const VkSubmitInfo &submitInfo,
const vk::Shared<vk::Fence> &sharedFence,
vk::GarbageList *currentGarbage,
vk::CommandPool *commandPool,
vk::PrimaryCommandBuffer &&commandBuffer,
const Serial &queueSerial)
{
ASSERT(std::this_thread::get_id() == mThreadId);
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::submitFrame");
VkDevice device = context->getDevice();
vk::DeviceScoped<vk::CommandBatch> scopedBatch(device);
vk::CommandBatch &batch = scopedBatch.get();
batch.fence.copy(device, sharedFence);
batch.serial = queueSerial;
ANGLE_TRY(queueSubmit(context, queue, submitInfo, &batch.fence.get()));
if (!currentGarbage->empty())
{
mGarbageQueue.emplace_back(std::move(*currentGarbage), queueSerial);
}
// Store the primary CommandBuffer and command pool used for secondary CommandBuffers
// in the in-flight list.
ANGLE_TRY(releaseToCommandBatch(context, std::move(commandBuffer), commandPool, &batch));
mInFlightCommands.emplace_back(scopedBatch.release());
ANGLE_TRY(checkCompletedCommands(context));
// CPU should be throttled to avoid mInFlightCommands from growing too fast. Important for
// off-screen scenarios.
while (mInFlightCommands.size() > kInFlightCommandsLimit)
{
ANGLE_TRY(finishToSerial(context, mInFlightCommands[0].serial));
}
return angle::Result::Continue;
}
void CommandProcessor::queueCommands(const vk::CommandProcessorTask &commands) vk::Shared<vk::Fence> TaskProcessor::getLastSubmittedFenceWithLock(VkDevice device) const
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queueCommands"); vk::Shared<vk::Fence> fence;
// Note: this must be called when the work queue is empty and while holding mWorkerMutex to
// ensure that worker isn't touching mInFlightCommands
if (!mInFlightCommands.empty())
{
fence.copy(device, mInFlightCommands.back().fence);
}
return fence;
}
angle::Result TaskProcessor::queueSubmit(vk::Context *context,
VkQueue queue,
const VkSubmitInfo &submitInfo,
const vk::Fence *fence)
{
ASSERT(std::this_thread::get_id() == mThreadId);
if (kOutputVmaStatsString)
{
context->getRenderer()->outputVmaStatString();
}
// Don't need a QueueMutex since all queue accesses are serialized through the worker.
VkFence handle = fence ? fence->getHandle() : VK_NULL_HANDLE;
ANGLE_VK_TRY(context, vkQueueSubmit(queue, 1, &submitInfo, handle));
return angle::Result::Continue;
}
void CommandProcessor::handleError(VkResult errorCode,
const char *file,
const char *function,
unsigned int line)
{
ASSERT(errorCode != VK_SUCCESS);
std::stringstream errorStream;
errorStream << "Internal Vulkan error (" << errorCode << "): " << VulkanResultString(errorCode)
<< ".";
if (errorCode == VK_ERROR_DEVICE_LOST)
{
WARN() << errorStream.str();
handleDeviceLost();
}
std::lock_guard<std::mutex> queueLock(mErrorMutex);
vk::Error error = {errorCode, file, function, line};
mErrors.emplace(error);
}
CommandProcessor::CommandProcessor(RendererVk *renderer)
: vk::Context(renderer),
mWorkerThreadIdle(false),
mCommandProcessorLastSubmittedSerial(mQueueSerialFactory.generate()),
mCommandProcessorCurrentQueueSerial(mQueueSerialFactory.generate())
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
while (!mErrors.empty())
{
mErrors.pop();
}
}
CommandProcessor::~CommandProcessor() = default;
bool CommandProcessor::isRobustResourceInitEnabled() const
{
// Unused for worker thread, just return false.
return false;
}
vk::Error CommandProcessor::getAndClearPendingError()
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
vk::Error tmpError({VK_SUCCESS, nullptr, nullptr, 0});
if (!mErrors.empty())
{
tmpError = mErrors.front();
mErrors.pop();
}
return tmpError;
}
void CommandProcessor::queueCommand(vk::Context *context, vk::CommandProcessorTask *task)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queueCommand");
{
// Grab the worker mutex so that we put things on the queue in the same order as we give out
// serials.
std::lock_guard<std::mutex> queueLock(mWorkerMutex); std::lock_guard<std::mutex> queueLock(mWorkerMutex);
ASSERT(commands.commandBuffer == nullptr || !commands.commandBuffer->empty());
mCommandsQueue.push(commands); if (task->getTaskCommand() == vk::CustomTask::FlushAndQueueSubmit ||
task->getTaskCommand() == vk::CustomTask::OneOffQueueSubmit)
{
std::lock_guard<std::mutex> lock(mCommandProcessorQueueSerialMutex);
// Flush submits work, so give it the current serial and generate a new one.
Serial queueSerial = mCommandProcessorCurrentQueueSerial;
task->setQueueSerial(queueSerial);
mCommandProcessorLastSubmittedSerial = mCommandProcessorCurrentQueueSerial;
mCommandProcessorCurrentQueueSerial = mQueueSerialFactory.generate();
task->getResourceUseList().releaseResourceUsesAndUpdateSerials(queueSerial);
}
mTasks.emplace(std::move(*task));
mWorkAvailableCondition.notify_one(); mWorkAvailableCondition.notify_one();
}
if (getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{
return;
}
// parallel task processing disabled so wait for work to complete.
waitForWorkComplete(context);
}
void CommandProcessor::processTasks()
{
while (true)
{
bool exitThread = false;
angle::Result result = processTasksImpl(&exitThread);
if (exitThread)
{
// We are doing a controlled exit of the thread, break out of the while loop.
break;
}
if (result != angle::Result::Continue)
{
// TODO: https://issuetracker.google.com/issues/170311829 - follow-up on error handling
// ContextVk::commandProcessorSyncErrorsAndQueueCommand and WindowSurfaceVk::destroy
// do error processing, is anything required here? Don't think so, mostly need to
// continue the worker thread until it's been told to exit.
UNREACHABLE();
}
}
} }
void CommandProcessor::processCommandProcessorTasks() angle::Result CommandProcessor::processTasksImpl(bool *exitThread)
{ {
// Initialization prior to work thread loop
ANGLE_TRY(mTaskProcessor.init(this, std::this_thread::get_id()));
// Allocate and begin primary command buffer
ANGLE_TRY(mTaskProcessor.allocatePrimaryCommandBuffer(this, &mPrimaryCommandBuffer));
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
ANGLE_VK_TRY(this, mPrimaryCommandBuffer.begin(beginInfo));
while (true) while (true)
{ {
std::unique_lock<std::mutex> lock(mWorkerMutex); std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.notify_one(); if (mTasks.empty())
{
mWorkerThreadIdle = true; mWorkerThreadIdle = true;
mWorkerIdleCondition.notify_all();
// Only wake if notified and command queue is not empty // Only wake if notified and command queue is not empty
mWorkAvailableCondition.wait(lock, [this] { return !mCommandsQueue.empty(); }); mWorkAvailableCondition.wait(lock, [this] { return !mTasks.empty(); });
}
mWorkerThreadIdle = false; mWorkerThreadIdle = false;
vk::CommandProcessorTask task = mCommandsQueue.front(); vk::CommandProcessorTask task(std::move(mTasks.front()));
mCommandsQueue.pop(); mTasks.pop();
lock.unlock(); lock.unlock();
// Either both ptrs should be null or non-null
ASSERT((task.commandBuffer != nullptr && task.contextVk != nullptr) || switch (task.getTaskCommand())
(task.commandBuffer == nullptr && task.contextVk == nullptr)); {
// A work block with null ptrs signals worker thread to exit case vk::CustomTask::Exit:
if (task.commandBuffer == nullptr && task.contextVk == nullptr) {
ANGLE_TRY(mTaskProcessor.finishToSerial(this, Serial::Infinite()));
*exitThread = true;
// Shutting down so cleanup
mTaskProcessor.destroy(mRenderer->getDevice());
mCommandPool.destroy(mRenderer->getDevice());
mPrimaryCommandBuffer.destroy(mRenderer->getDevice());
mWorkerThreadIdle = true;
mWorkerIdleCondition.notify_one();
return angle::Result::Continue;
}
default:
ANGLE_TRY(processTask(&task));
break;
}
}
UNREACHABLE();
return angle::Result::Stop;
}
angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task)
{
switch (task->getTaskCommand())
{ {
case vk::CustomTask::FlushAndQueueSubmit:
{
// End command buffer
ANGLE_VK_TRY(this, mPrimaryCommandBuffer.end());
// 1. Create submitInfo
VkSubmitInfo submitInfo = {};
InitializeSubmitInfo(&submitInfo, mPrimaryCommandBuffer, task->getWaitSemaphores(),
&task->getWaitSemaphoreStageMasks(), task->getSemaphore());
// 2. Get shared submit fence. It's possible there are other users of this fence that
// must wait for the work to be submitted before waiting on the fence. Reset the fence
// immediately so we are sure to get a fresh one next time.
vk::Shared<vk::Fence> fence;
ANGLE_TRY(mRenderer->getNextSubmitFence(&fence, true));
// 3. Call submitFrame()
ANGLE_TRY(mTaskProcessor.submitFrame(
this, getRenderer()->getVkQueue(task->getPriority()), submitInfo, fence,
&task->getGarbage(), &mCommandPool, std::move(mPrimaryCommandBuffer),
task->getQueueSerial()));
// 4. Allocate & begin new primary command buffer
ANGLE_TRY(mTaskProcessor.allocatePrimaryCommandBuffer(this, &mPrimaryCommandBuffer));
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
ANGLE_VK_TRY(this, mPrimaryCommandBuffer.begin(beginInfo));
// Free this local reference
getRenderer()->resetSharedFence(&fence);
ASSERT(task->getGarbage().empty());
break; break;
} }
case vk::CustomTask::OneOffQueueSubmit:
{
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
if (task->getOneOffCommandBufferVk() != VK_NULL_HANDLE)
{
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &task->getOneOffCommandBufferVk();
}
ASSERT(!task.commandBuffer->empty()); // TODO: https://issuetracker.google.com/issues/170328907 - vkQueueSubmit should be
// TODO: Will need some way to synchronize error reporting between threads // owned by TaskProcessor to ensure proper synchronization
(void)(task.commandBuffer->flushToPrimary(task.contextVk, task.primaryCB)); ANGLE_TRY(mTaskProcessor.queueSubmit(this,
ASSERT(task.commandBuffer->empty()); getRenderer()->getVkQueue(task->getPriority()),
task.commandBuffer->releaseToContextQueue(task.contextVk); submitInfo, task->getOneOffFence()));
ANGLE_TRY(mTaskProcessor.checkCompletedCommands(this));
break;
} }
case vk::CustomTask::FinishToSerial:
{
ANGLE_TRY(mTaskProcessor.finishToSerial(this, task->getQueueSerial()));
break;
}
case vk::CustomTask::Present:
{
VkResult result = mTaskProcessor.present(getRenderer()->getVkQueue(task->getPriority()),
task->getPresentInfo());
if (ANGLE_UNLIKELY(result != VK_SUCCESS))
{
// Save the error so that we can handle it (e.g. VK_OUT_OF_DATE)
// Don't leave processing loop, don't consider errors from present to be fatal.
// TODO: https://issuetracker.google.com/issues/170329600 - This needs to improve to
// properly parallelize present
handleError(result, __FILE__, __FUNCTION__, __LINE__);
}
break;
}
case vk::CustomTask::ProcessCommands:
{
ASSERT(!task->getCommandBuffer()->empty());
ANGLE_TRY(task->getCommandBuffer()->flushToPrimary(
getRenderer()->getFeatures(), &mPrimaryCommandBuffer, task->getRenderPass()));
ASSERT(task->getCommandBuffer()->empty());
task->getCommandBuffer()->releaseToContextQueue(task->getContextVk());
break;
}
default:
UNREACHABLE();
break;
}
return angle::Result::Continue;
} }
void CommandProcessor::waitForWorkComplete() void CommandProcessor::waitForWorkComplete(vk::Context *context)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitForWorkerThreadIdle"); ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitForWorkerThreadIdle");
std::unique_lock<std::mutex> lock(mWorkerMutex); std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
[this] { return (mCommandsQueue.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue // Worker thread is idle and command queue is empty so good to continue
lock.unlock();
if (!context)
{
return;
}
// Sync any errors to the context
while (hasPendingError())
{
vk::Error workerError = getAndClearPendingError();
if (workerError.mErrorCode != VK_SUCCESS)
{
context->handleError(workerError.mErrorCode, workerError.mFile, workerError.mFunction,
workerError.mLine);
}
}
} }
void CommandProcessor::shutdown(std::thread *commandProcessorThread) void CommandProcessor::shutdown(std::thread *commandProcessorThread)
{ {
waitForWorkComplete(); vk::CommandProcessorTask endTask;
const vk::CommandProcessorTask endTask = vk::kEndCommandProcessorThread; endTask.initTask(vk::CustomTask::Exit);
queueCommands(endTask); queueCommand(nullptr, &endTask);
waitForWorkComplete(nullptr);
if (commandProcessorThread->joinable()) if (commandProcessorThread->joinable())
{ {
commandProcessorThread->join(); commandProcessorThread->join();
} }
} }
// Return the fence for the last submit. This may mean waiting on the worker to process tasks to
// actually get to the last submit
vk::Shared<vk::Fence> CommandProcessor::getLastSubmittedFence() const
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::getLastSubmittedFence");
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue
return mTaskProcessor.getLastSubmittedFenceWithLock(getDevice());
}
Serial CommandProcessor::getLastSubmittedSerial()
{
std::lock_guard<std::mutex> lock(mCommandProcessorQueueSerialMutex);
return mCommandProcessorLastSubmittedSerial;
}
Serial CommandProcessor::getCurrentQueueSerial()
{
std::lock_guard<std::mutex> lock(mCommandProcessorQueueSerialMutex);
return mCommandProcessorCurrentQueueSerial;
}
// Wait until all commands up to and including serial have been processed
void CommandProcessor::finishToSerial(vk::Context *context, Serial serial)
{
vk::CommandProcessorTask finishToSerial;
finishToSerial.initFinishToSerial(serial);
queueCommand(context, &finishToSerial);
// Wait until the worker is idle. At that point we know that the finishToSerial command has
// completed executing, including any associated state cleanup.
waitForWorkComplete(context);
}
void CommandProcessor::handleDeviceLost()
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::handleDeviceLost");
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue
mTaskProcessor.handleDeviceLost(this);
}
void CommandProcessor::finishAllWork(vk::Context *context)
{
// Wait for GPU work to finish
finishToSerial(context, Serial::Infinite());
}
} // namespace vk
} // namespace rx } // namespace rx
...@@ -17,27 +17,120 @@ ...@@ -17,27 +17,120 @@
#include <thread> #include <thread>
#include "common/vulkan/vk_headers.h" #include "common/vulkan/vk_headers.h"
#include "libANGLE/renderer/vulkan/PersistentCommandPool.h"
#include "libANGLE/renderer/vulkan/vk_helpers.h" #include "libANGLE/renderer/vulkan/vk_helpers.h"
namespace rx namespace rx
{ {
class RendererVk;
class CommandProcessor;
namespace vk namespace vk
{ {
// CommandProcessorTask is used to queue a task to the worker thread when // CommandProcessorTask is used to queue a task to the worker thread when
// enableCommandProcessingThread feature is true. // enableCommandProcessingThread feature is true.
// The typical task includes pointers in all values and the worker thread will // Issuing the CustomTask::Exit command will cause the worker thread to clean up it's resources and
// process the SecondaryCommandBuffer commands in cbh into the primaryCB. // shut down. This command is sent when the renderer instance shuts down. Custom tasks are:
// There is a special task in which all of the pointers are null that will trigger
// the worker thread to exit, and is sent when the renderer instance shuts down. enum CustomTask
struct CommandProcessorTask
{ {
ContextVk *contextVk; Invalid = 0,
// TODO: b/153666475 Removed primaryCB in threading phase2. // Process SecondaryCommandBuffer commands into the primary CommandBuffer.
vk::PrimaryCommandBuffer *primaryCB; ProcessCommands,
CommandBufferHelper *commandBuffer; // End the current command buffer and submit commands to the queue
FlushAndQueueSubmit,
// Submit custom command buffer, excludes some state management
OneOffQueueSubmit,
// Finish queue commands up to given serial value, process garbage
FinishToSerial,
// Execute QueuePresent
Present,
// Exit the command processor thread
Exit,
}; };
static const CommandProcessorTask kEndCommandProcessorThread = {nullptr, nullptr, nullptr}; class CommandProcessorTask
{
public:
CommandProcessorTask() { initTask(); }
void initTask();
void initTask(CustomTask command) { mTask = command; }
void initProcessCommands(ContextVk *contextVk,
CommandBufferHelper *commandBuffer,
vk::RenderPass *renderPass);
void initPresent(egl::ContextPriority priority, VkPresentInfoKHR presentInfo);
void initFinishToSerial(Serial serial);
void initFlushAndQueueSubmit(std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
const vk::Semaphore *semaphore,
egl::ContextPriority priority,
vk::GarbageList &&currentGarbage,
vk::ResourceUseList &&currentResources);
void initOneOffQueueSubmit(VkCommandBuffer oneOffCommandBufferVk,
egl::ContextPriority priority,
const vk::Fence *fence);
CommandProcessorTask &operator=(CommandProcessorTask &&rhs);
CommandProcessorTask(CommandProcessorTask &&other) : CommandProcessorTask()
{
*this = std::move(other);
}
void setQueueSerial(Serial serial) { mSerial = serial; }
Serial getQueueSerial() const { return mSerial; }
vk::ResourceUseList &getResourceUseList() { return mResourceUseList; }
vk::CustomTask getTaskCommand() { return mTask; }
std::vector<VkSemaphore> &getWaitSemaphores() { return mWaitSemaphores; }
std::vector<VkPipelineStageFlags> &getWaitSemaphoreStageMasks()
{
return mWaitSemaphoreStageMasks;
}
const vk::Semaphore *getSemaphore() { return mSemaphore; }
vk::GarbageList &getGarbage() { return mGarbage; }
egl::ContextPriority getPriority() const { return mPriority; }
const VkCommandBuffer &getOneOffCommandBufferVk() const { return mOneOffCommandBufferVk; }
const vk::Fence *getOneOffFence() { return mOneOffFence; }
const VkPresentInfoKHR &getPresentInfo() const { return mPresentInfo; }
vk::RenderPass *getRenderPass() const { return mRenderPass; }
CommandBufferHelper *getCommandBuffer() const { return mCommandBuffer; }
ContextVk *getContextVk() const { return mContextVk; }
private:
CustomTask mTask;
// ProcessCommands
ContextVk *mContextVk;
vk::RenderPass *mRenderPass;
CommandBufferHelper *mCommandBuffer;
// Flush data
std::vector<VkSemaphore> mWaitSemaphores;
std::vector<VkPipelineStageFlags> mWaitSemaphoreStageMasks;
const vk::Semaphore *mSemaphore;
vk::GarbageList mGarbage;
vk::ResourceUseList mResourceUseList;
// FinishToSerial & Flush command data
Serial mSerial;
// Present command data
VkPresentInfoKHR mPresentInfo;
// Used by OneOffQueueSubmit
VkCommandBuffer mOneOffCommandBufferVk;
const vk::Fence *mOneOffFence;
// Flush, Present & QueueWaitIdle data
egl::ContextPriority mPriority;
};
struct CommandBatch final : angle::NonCopyable struct CommandBatch final : angle::NonCopyable
{ {
...@@ -54,37 +147,139 @@ struct CommandBatch final : angle::NonCopyable ...@@ -54,37 +147,139 @@ struct CommandBatch final : angle::NonCopyable
vk::Shared<vk::Fence> fence; vk::Shared<vk::Fence> fence;
Serial serial; Serial serial;
}; };
} // namespace vk
class CommandProcessor : angle::NonCopyable class TaskProcessor : angle::NonCopyable
{
public:
TaskProcessor();
~TaskProcessor();
angle::Result init(vk::Context *context, std::thread::id threadId);
void destroy(VkDevice device);
angle::Result allocatePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer *commandBufferOut);
angle::Result releasePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer);
void clearAllGarbage(vk::Context *context);
angle::Result finishToSerial(vk::Context *context, Serial serial);
VkResult present(VkQueue queue, const VkPresentInfoKHR &presentInfo);
angle::Result submitFrame(vk::Context *context,
VkQueue queue,
const VkSubmitInfo &submitInfo,
const vk::Shared<vk::Fence> &sharedFence,
vk::GarbageList *currentGarbage,
vk::CommandPool *commandPool,
vk::PrimaryCommandBuffer &&commandBuffer,
const Serial &queueSerial);
angle::Result queueSubmit(vk::Context *context,
VkQueue queue,
const VkSubmitInfo &submitInfo,
const vk::Fence *fence);
vk::Shared<vk::Fence> getLastSubmittedFenceWithLock(VkDevice device) const;
// Check to see which batches have finished completion (forward progress for
// mLastCompletedQueueSerial, for example for when the application busy waits on a query
// result). It would be nice if we didn't have to expose this for QueryVk::getResult.
angle::Result checkCompletedCommands(vk::Context *context);
void handleDeviceLost(vk::Context *context);
private:
angle::Result releaseToCommandBatch(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer,
vk::CommandPool *commandPool,
vk::CommandBatch *batch);
vk::GarbageQueue mGarbageQueue;
std::vector<vk::CommandBatch> mInFlightCommands;
// Keeps a free list of reusable primary command buffers.
vk::PersistentCommandPool mPrimaryCommandPool;
std::thread::id mThreadId;
};
class CommandProcessor : public vk::Context
{ {
public: public:
CommandProcessor(); CommandProcessor(RendererVk *renderer);
~CommandProcessor() = default; ~CommandProcessor() override;
// Main worker loop that should be launched in its own thread. The void handleError(VkResult result,
// loop waits for work to be submitted from a separate thread. const char *file,
void processCommandProcessorTasks(); const char *function,
// Called asynchronously from workLoop() thread to queue work that is unsigned int line) override;
// then processed by the workLoop() thread
void queueCommands(const vk::CommandProcessorTask &commands); bool isRobustResourceInitEnabled() const override;
// Used by separate thread to wait for worker thread to complete all
// outstanding work. // Entry point for command processor thread, calls processTasksImpl to do the
void waitForWorkComplete(); // work. called by RendererVk::initialization on main thread
// Stop the command processor loop void processTasks();
// Called asynchronously from main thread to queue work that is then processed by the worker
// thread
void queueCommand(vk::Context *context, vk::CommandProcessorTask *task);
// Used by main thread to wait for worker thread to complete all outstanding work.
void waitForWorkComplete(vk::Context *context);
Serial getCurrentQueueSerial();
Serial getLastSubmittedSerial();
// Wait until desired serial has been processed.
void finishToSerial(vk::Context *context, Serial serial);
vk::Shared<vk::Fence> getLastSubmittedFence() const;
void handleDeviceLost();
bool hasPendingError() const
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
return !mErrors.empty();
}
vk::Error getAndClearPendingError();
// Stop the command processor thread
void shutdown(std::thread *commandProcessorThread); void shutdown(std::thread *commandProcessorThread);
void finishAllWork(vk::Context *context);
private: private:
std::queue<vk::CommandProcessorTask> mCommandsQueue; // Command processor thread, called by processTasks. The loop waits for work to
std::mutex mWorkerMutex; // be submitted from a separate thread.
angle::Result processTasksImpl(bool *exitThread);
// Command processor thread, process a task
angle::Result processTask(vk::CommandProcessorTask *task);
std::queue<vk::CommandProcessorTask> mTasks;
mutable std::mutex mWorkerMutex;
// Signal worker thread when work is available // Signal worker thread when work is available
std::condition_variable mWorkAvailableCondition; std::condition_variable mWorkAvailableCondition;
// Signal main thread when all work completed // Signal main thread when all work completed
std::condition_variable mWorkerIdleCondition; mutable std::condition_variable mWorkerIdleCondition;
// Track worker thread Idle state for assertion purposes // Track worker thread Idle state for assertion purposes
bool mWorkerThreadIdle; bool mWorkerThreadIdle;
// Command pool to allocate processor thread primary command buffers from
vk::CommandPool mCommandPool;
vk::PrimaryCommandBuffer mPrimaryCommandBuffer;
TaskProcessor mTaskProcessor;
AtomicSerialFactory mQueueSerialFactory;
std::mutex mCommandProcessorQueueSerialMutex;
Serial mCommandProcessorLastSubmittedSerial;
Serial mCommandProcessorCurrentQueueSerial;
mutable std::mutex mErrorMutex;
std::queue<vk::Error> mErrors;
}; };
} // namespace vk
} // namespace rx } // namespace rx
#endif // LIBANGLE_RENDERER_VULKAN_COMMAND_PROCESSOR_H_ #endif // LIBANGLE_RENDERER_VULKAN_COMMAND_PROCESSOR_H_
...@@ -480,7 +480,6 @@ void CommandQueue::clearAllGarbage(RendererVk *renderer) ...@@ -480,7 +480,6 @@ void CommandQueue::clearAllGarbage(RendererVk *renderer)
} }
angle::Result CommandQueue::allocatePrimaryCommandBuffer(vk::Context *context, angle::Result CommandQueue::allocatePrimaryCommandBuffer(vk::Context *context,
const vk::CommandPool &commandPool,
vk::PrimaryCommandBuffer *commandBufferOut) vk::PrimaryCommandBuffer *commandBufferOut)
{ {
return mPrimaryCommandPool.allocate(context, commandBufferOut); return mPrimaryCommandPool.allocate(context, commandBufferOut);
...@@ -523,6 +522,8 @@ bool CommandQueue::hasInFlightCommands() const ...@@ -523,6 +522,8 @@ bool CommandQueue::hasInFlightCommands() const
angle::Result CommandQueue::finishToSerial(vk::Context *context, Serial serial, uint64_t timeout) angle::Result CommandQueue::finishToSerial(vk::Context *context, Serial serial, uint64_t timeout)
{ {
ASSERT(!context->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled);
if (mInFlightCommands.empty()) if (mInFlightCommands.empty())
{ {
return angle::Result::Continue; return angle::Result::Continue;
...@@ -607,6 +608,8 @@ angle::Result CommandQueue::submitFrame(vk::Context *context, ...@@ -607,6 +608,8 @@ angle::Result CommandQueue::submitFrame(vk::Context *context,
vk::Shared<vk::Fence> CommandQueue::getLastSubmittedFence(const vk::Context *context) const vk::Shared<vk::Fence> CommandQueue::getLastSubmittedFence(const vk::Context *context) const
{ {
ASSERT(!context->getRenderer()->getFeatures().enableCommandProcessingThread.enabled);
vk::Shared<vk::Fence> fence; vk::Shared<vk::Fence> fence;
if (!mInFlightCommands.empty()) if (!mInFlightCommands.empty())
{ {
...@@ -958,7 +961,7 @@ angle::Result ContextVk::initialize() ...@@ -958,7 +961,7 @@ angle::Result ContextVk::initialize()
angle::Result ContextVk::startPrimaryCommandBuffer() angle::Result ContextVk::startPrimaryCommandBuffer()
{ {
ANGLE_TRY(mCommandQueue.allocatePrimaryCommandBuffer(this, mCommandPool, &mPrimaryCommands)); ANGLE_TRY(mCommandQueue.allocatePrimaryCommandBuffer(this, &mPrimaryCommands));
VkCommandBufferBeginInfo beginInfo = {}; VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
...@@ -1407,6 +1410,8 @@ angle::Result ContextVk::handleDirtyGraphicsPipeline(const gl::Context *context, ...@@ -1407,6 +1410,8 @@ angle::Result ContextVk::handleDirtyGraphicsPipeline(const gl::Context *context,
commandBuffer->bindGraphicsPipeline(mCurrentGraphicsPipeline->getPipeline()); commandBuffer->bindGraphicsPipeline(mCurrentGraphicsPipeline->getPipeline());
// Update the queue serial for the pipeline object. // Update the queue serial for the pipeline object.
ASSERT(mCurrentGraphicsPipeline && mCurrentGraphicsPipeline->valid()); ASSERT(mCurrentGraphicsPipeline && mCurrentGraphicsPipeline->valid());
// TODO: https://issuetracker.google.com/issues/169788986: Need to change this so that we get
// the actual serial used when this work is submitted.
mCurrentGraphicsPipeline->updateSerial(getCurrentQueueSerial()); mCurrentGraphicsPipeline->updateSerial(getCurrentQueueSerial());
return angle::Result::Continue; return angle::Result::Continue;
} }
...@@ -1421,6 +1426,8 @@ angle::Result ContextVk::handleDirtyComputePipeline(const gl::Context *context, ...@@ -1421,6 +1426,8 @@ angle::Result ContextVk::handleDirtyComputePipeline(const gl::Context *context,
} }
commandBuffer->bindComputePipeline(mCurrentComputePipeline->get()); commandBuffer->bindComputePipeline(mCurrentComputePipeline->get());
// TODO: https://issuetracker.google.com/issues/169788986: Need to change this so that we get
// the actual serial used when this work is submitted.
mCurrentComputePipeline->updateSerial(getCurrentQueueSerial()); mCurrentComputePipeline->updateSerial(getCurrentQueueSerial());
return angle::Result::Continue; return angle::Result::Continue;
...@@ -1750,10 +1757,30 @@ void ContextVk::addOverlayUsedBuffersCount(vk::CommandBufferHelper *commandBuffe ...@@ -1750,10 +1757,30 @@ void ContextVk::addOverlayUsedBuffersCount(vk::CommandBufferHelper *commandBuffe
} }
} }
void ContextVk::commandProcessorSyncErrors()
{
while (mRenderer->hasPendingError())
{
vk::Error error = mRenderer->getAndClearPendingError();
if (error.mErrorCode != VK_SUCCESS)
{
handleError(error.mErrorCode, error.mFile, error.mFunction, error.mLine);
}
}
}
void ContextVk::commandProcessorSyncErrorsAndQueueCommand(vk::CommandProcessorTask *command)
{
commandProcessorSyncErrors();
mRenderer->queueCommand(this, command);
}
angle::Result ContextVk::submitFrame(const VkSubmitInfo &submitInfo, angle::Result ContextVk::submitFrame(const VkSubmitInfo &submitInfo,
vk::ResourceUseList *resourceList, vk::ResourceUseList *resourceList,
vk::PrimaryCommandBuffer &&commandBuffer) vk::PrimaryCommandBuffer &&commandBuffer)
{ {
ASSERT(!getRenderer()->getFeatures().enableCommandProcessingThread.enabled);
if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics) if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics)
{ {
dumpCommandStreamDiagnostics(); dumpCommandStreamDiagnostics();
...@@ -1932,6 +1959,8 @@ angle::Result ContextVk::synchronizeCpuGpuTime() ...@@ -1932,6 +1959,8 @@ angle::Result ContextVk::synchronizeCpuGpuTime()
double TeS = platform->monotonicallyIncreasingTime(platform); double TeS = platform->monotonicallyIncreasingTime(platform);
// Get the query results // Get the query results
// Note: This LastSubmittedQueueSerial may include more work then was submitted above if
// another thread had submitted work.
ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial())); ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial()));
uint64_t gpuTimestampCycles = 0; uint64_t gpuTimestampCycles = 0;
...@@ -2086,13 +2115,22 @@ void ContextVk::flushGpuEvents(double nextSyncGpuTimestampS, double nextSyncCpuT ...@@ -2086,13 +2115,22 @@ void ContextVk::flushGpuEvents(double nextSyncGpuTimestampS, double nextSyncCpuT
void ContextVk::clearAllGarbage() void ContextVk::clearAllGarbage()
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::clearAllGarbage"); ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::finishAllWork");
for (vk::GarbageObject &garbage : mCurrentGarbage) for (vk::GarbageObject &garbage : mCurrentGarbage)
{ {
garbage.destroy(mRenderer); garbage.destroy(mRenderer);
} }
mCurrentGarbage.clear(); mCurrentGarbage.clear();
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
// Issue command to CommandProcessor to ensure all work is complete, which will return any
// garbage items as well.
mRenderer->finishAllWork(this);
}
else
{
mCommandQueue.clearAllGarbage(mRenderer); mCommandQueue.clearAllGarbage(mRenderer);
}
} }
void ContextVk::handleDeviceLost() void ContextVk::handleDeviceLost()
...@@ -2100,7 +2138,14 @@ void ContextVk::handleDeviceLost() ...@@ -2100,7 +2138,14 @@ void ContextVk::handleDeviceLost()
mOutsideRenderPassCommands->reset(); mOutsideRenderPassCommands->reset();
mRenderPassCommands->reset(); mRenderPassCommands->reset();
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
mRenderer->handleDeviceLost();
}
else
{
mCommandQueue.handleDeviceLost(mRenderer); mCommandQueue.handleDeviceLost(mRenderer);
}
clearAllGarbage(); clearAllGarbage();
mRenderer->notifyDeviceLost(); mRenderer->notifyDeviceLost();
...@@ -3283,6 +3328,10 @@ angle::Result ContextVk::onMakeCurrent(const gl::Context *context) ...@@ -3283,6 +3328,10 @@ angle::Result ContextVk::onMakeCurrent(const gl::Context *context)
angle::Result ContextVk::onUnMakeCurrent(const gl::Context *context) angle::Result ContextVk::onUnMakeCurrent(const gl::Context *context)
{ {
ANGLE_TRY(flushImpl(nullptr)); ANGLE_TRY(flushImpl(nullptr));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
mRenderer->waitForCommandProcessorIdle(this);
}
mCurrentWindowSurface = nullptr; mCurrentWindowSurface = nullptr;
return angle::Result::Continue; return angle::Result::Continue;
} }
...@@ -4286,32 +4335,66 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore) ...@@ -4286,32 +4335,66 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore)
mDefaultUniformStorage.releaseInFlightBuffersToResourceUseList(this); mDefaultUniformStorage.releaseInFlightBuffersToResourceUseList(this);
mStagingBuffer.releaseInFlightBuffersToResourceUseList(this); mStagingBuffer.releaseInFlightBuffersToResourceUseList(this);
// TODO: https://issuetracker.google.com/issues/170329600 - Verify that
// waitForSwapchainImageIfNecessary makes sense both w/ & w/o threading. I believe they do, but
// want confirmation.
waitForSwapchainImageIfNecessary();
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{ {
// Worker thread must complete adding any commands that were just flushed above to the // Some tasks from ContextVk::submitFrame() that run before CommandQueue::submitFrame()
// primary command buffer before we can End the primary command buffer below. gl::RunningGraphWidget *renderPassCount =
mRenderer->waitForWorkerThreadIdle(); mState.getOverlay()->getRunningGraphWidget(gl::WidgetId::VulkanRenderPassCount);
renderPassCount->add(mRenderPassCommands->getAndResetCounter());
renderPassCount->next();
if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics)
{
dumpCommandStreamDiagnostics();
} }
ANGLE_VK_TRY(this, mPrimaryCommands.end()); // Send a flush command to worker thread that will:
// 1. Create submitInfo
// 2. Call submitFrame()
// 3. Allocate new primary command buffer
vk::CommandProcessorTask flushAndQueueSubmit;
flushAndQueueSubmit.initFlushAndQueueSubmit(
std::move(mWaitSemaphores), std::move(mWaitSemaphoreStageMasks), signalSemaphore,
mContextPriority, std::move(mCurrentGarbage), std::move(mResourceUseList));
waitForSwapchainImageIfNecessary(); commandProcessorSyncErrorsAndQueueCommand(&flushAndQueueSubmit);
// Some tasks from ContextVk::submitFrame() that run after CommandQueue::submitFrame()
onRenderPassFinished();
mComputeDirtyBits |= mNewComputeCommandBufferDirtyBits;
if (mGpuEventsEnabled)
{
ANGLE_TRY(checkCompletedGpuEvents());
}
}
else
{
ANGLE_VK_TRY(this, mPrimaryCommands.end());
VkSubmitInfo submitInfo = {}; VkSubmitInfo submitInfo = {};
InitializeSubmitInfo(&submitInfo, mPrimaryCommands, mWaitSemaphores, mWaitSemaphoreStageMasks, InitializeSubmitInfo(&submitInfo, mPrimaryCommands, mWaitSemaphores,
signalSemaphore); mWaitSemaphoreStageMasks, signalSemaphore);
ANGLE_TRY(submitFrame(submitInfo, &mResourceUseList, std::move(mPrimaryCommands))); ANGLE_TRY(submitFrame(submitInfo, &mResourceUseList, std::move(mPrimaryCommands)));
ANGLE_TRY(startPrimaryCommandBuffer()); ANGLE_TRY(startPrimaryCommandBuffer());
mWaitSemaphores.clear();
mWaitSemaphoreStageMasks.clear();
}
mPerfCounters.renderPasses = 0; mPerfCounters.renderPasses = 0;
mPerfCounters.writeDescriptorSets = 0; mPerfCounters.writeDescriptorSets = 0;
mPerfCounters.flushedOutsideRenderPassCommandBuffers = 0; mPerfCounters.flushedOutsideRenderPassCommandBuffers = 0;
mPerfCounters.resolveImageCommands = 0; mPerfCounters.resolveImageCommands = 0;
mWaitSemaphores.clear(); ASSERT(mWaitSemaphores.empty());
mWaitSemaphoreStageMasks.clear(); ASSERT(mWaitSemaphoreStageMasks.empty());
mPerfCounters.primaryBuffers++; mPerfCounters.primaryBuffers++;
...@@ -4331,8 +4414,15 @@ angle::Result ContextVk::finishImpl() ...@@ -4331,8 +4414,15 @@ angle::Result ContextVk::finishImpl()
ANGLE_TRY(flushImpl(nullptr)); ANGLE_TRY(flushImpl(nullptr));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial()));
}
else
{
ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial())); ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial()));
ASSERT(!mCommandQueue.hasInFlightCommands()); ASSERT(!mCommandQueue.hasInFlightCommands());
}
clearAllGarbage(); clearAllGarbage();
...@@ -4373,17 +4463,25 @@ bool ContextVk::isSerialInUse(Serial serial) const ...@@ -4373,17 +4463,25 @@ bool ContextVk::isSerialInUse(Serial serial) const
angle::Result ContextVk::checkCompletedCommands() angle::Result ContextVk::checkCompletedCommands()
{ {
ASSERT(!mRenderer->getFeatures().enableCommandProcessingThread.enabled);
return mCommandQueue.checkCompletedCommands(this); return mCommandQueue.checkCompletedCommands(this);
} }
angle::Result ContextVk::finishToSerial(Serial serial) angle::Result ContextVk::finishToSerial(Serial serial)
{ {
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
mRenderer->finishToSerial(this, serial);
return angle::Result::Continue;
}
return mCommandQueue.finishToSerial(this, serial, mRenderer->getMaxFenceWaitTimeNs()); return mCommandQueue.finishToSerial(this, serial, mRenderer->getMaxFenceWaitTimeNs());
} }
angle::Result ContextVk::getCompatibleRenderPass(const vk::RenderPassDesc &desc, angle::Result ContextVk::getCompatibleRenderPass(const vk::RenderPassDesc &desc,
vk::RenderPass **renderPassOut) vk::RenderPass **renderPassOut)
{ {
// Note: Each context has it's own RenderPassCache so no locking needed.
return mRenderPassCache.getCompatibleRenderPass(this, desc, renderPassOut); return mRenderPassCache.getCompatibleRenderPass(this, desc, renderPassOut);
} }
...@@ -4391,6 +4489,7 @@ angle::Result ContextVk::getRenderPassWithOps(const vk::RenderPassDesc &desc, ...@@ -4391,6 +4489,7 @@ angle::Result ContextVk::getRenderPassWithOps(const vk::RenderPassDesc &desc,
const vk::AttachmentOpsArray &ops, const vk::AttachmentOpsArray &ops,
vk::RenderPass **renderPassOut) vk::RenderPass **renderPassOut)
{ {
// Note: Each context has it's own RenderPassCache so no locking needed.
return mRenderPassCache.getRenderPassWithOps(this, desc, ops, renderPassOut); return mRenderPassCache.getRenderPassWithOps(this, desc, ops, renderPassOut);
} }
...@@ -4400,12 +4499,13 @@ angle::Result ContextVk::ensureSubmitFenceInitialized() ...@@ -4400,12 +4499,13 @@ angle::Result ContextVk::ensureSubmitFenceInitialized()
{ {
return angle::Result::Continue; return angle::Result::Continue;
} }
return mRenderer->newSharedFence(this, &mSubmitFence); return mRenderer->newSharedFence(this, &mSubmitFence);
} }
angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOut) angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOut)
{ {
ASSERT(!getRenderer()->getFeatures().enableCommandProcessingThread.enabled);
ANGLE_TRY(ensureSubmitFenceInitialized()); ANGLE_TRY(ensureSubmitFenceInitialized());
ASSERT(!sharedFenceOut->isReferenced()); ASSERT(!sharedFenceOut->isReferenced());
...@@ -4415,6 +4515,10 @@ angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOu ...@@ -4415,6 +4515,10 @@ angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOu
vk::Shared<vk::Fence> ContextVk::getLastSubmittedFence() const vk::Shared<vk::Fence> ContextVk::getLastSubmittedFence() const
{ {
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
return mRenderer->getLastSubmittedFence();
}
return mCommandQueue.getLastSubmittedFence(this); return mCommandQueue.getLastSubmittedFence(this);
} }
...@@ -4817,17 +4921,26 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass() ...@@ -4817,17 +4921,26 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass()
mRenderPassCommands->endRenderPass(this); mRenderPassCommands->endRenderPass(this);
if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics)
{
mRenderPassCommands->addCommandDiagnostics(this);
}
vk::RenderPass *renderPass = nullptr;
ANGLE_TRY(mRenderPassCommands->getRenderPassWithOps(this, &renderPass));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{ {
mRenderPassCommands->markClosed(); mRenderPassCommands->markClosed();
vk::CommandProcessorTask task = {this, &mPrimaryCommands, mRenderPassCommands}; vk::CommandProcessorTask flushToPrimary;
flushToPrimary.initProcessCommands(this, mRenderPassCommands, renderPass);
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::flushInsideRenderPassCommands"); ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::flushInsideRenderPassCommands");
queueCommandsToWorker(task); commandProcessorSyncErrorsAndQueueCommand(&flushToPrimary);
getNextAvailableCommandBuffer(&mRenderPassCommands, true); getNextAvailableCommandBuffer(&mRenderPassCommands, true);
} }
else else
{ {
ANGLE_TRY(mRenderPassCommands->flushToPrimary(this, &mPrimaryCommands)); ANGLE_TRY(mRenderPassCommands->flushToPrimary(this->getFeatures(), &mPrimaryCommands,
renderPass));
} }
mHasPrimaryCommands = true; mHasPrimaryCommands = true;
...@@ -4958,17 +5071,26 @@ angle::Result ContextVk::flushOutsideRenderPassCommands() ...@@ -4958,17 +5071,26 @@ angle::Result ContextVk::flushOutsideRenderPassCommands()
addOverlayUsedBuffersCount(mOutsideRenderPassCommands); addOverlayUsedBuffersCount(mOutsideRenderPassCommands);
if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics)
{
mOutsideRenderPassCommands->addCommandDiagnostics(this);
}
vk::RenderPass *renderPass = nullptr;
ANGLE_TRY(mOutsideRenderPassCommands->getRenderPassWithOps(this, &renderPass));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{ {
mOutsideRenderPassCommands->markClosed(); mOutsideRenderPassCommands->markClosed();
vk::CommandProcessorTask task = {this, &mPrimaryCommands, mOutsideRenderPassCommands}; vk::CommandProcessorTask flushToPrimary;
flushToPrimary.initProcessCommands(this, mOutsideRenderPassCommands, renderPass);
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::flushOutsideRenderPassCommands"); ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::flushOutsideRenderPassCommands");
queueCommandsToWorker(task); commandProcessorSyncErrorsAndQueueCommand(&flushToPrimary);
getNextAvailableCommandBuffer(&mOutsideRenderPassCommands, false); getNextAvailableCommandBuffer(&mOutsideRenderPassCommands, false);
} }
else else
{ {
ANGLE_TRY(mOutsideRenderPassCommands->flushToPrimary(this, &mPrimaryCommands)); ANGLE_TRY(mOutsideRenderPassCommands->flushToPrimary(getFeatures(), &mPrimaryCommands,
renderPass));
} }
mHasPrimaryCommands = true; mHasPrimaryCommands = true;
mPerfCounters.flushedOutsideRenderPassCommandBuffers++; mPerfCounters.flushedOutsideRenderPassCommandBuffers++;
......
...@@ -47,7 +47,6 @@ class CommandQueue final : angle::NonCopyable ...@@ -47,7 +47,6 @@ class CommandQueue final : angle::NonCopyable
bool hasInFlightCommands() const; bool hasInFlightCommands() const;
angle::Result allocatePrimaryCommandBuffer(vk::Context *context, angle::Result allocatePrimaryCommandBuffer(vk::Context *context,
const vk::CommandPool &commandPool,
vk::PrimaryCommandBuffer *commandBufferOut); vk::PrimaryCommandBuffer *commandBufferOut);
angle::Result releasePrimaryCommandBuffer(vk::Context *context, angle::Result releasePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer); vk::PrimaryCommandBuffer &&commandBuffer);
...@@ -653,11 +652,10 @@ class ContextVk : public ContextImpl, public vk::Context ...@@ -653,11 +652,10 @@ class ContextVk : public ContextImpl, public vk::Context
void updateOverlayOnPresent(); void updateOverlayOnPresent();
void addOverlayUsedBuffersCount(vk::CommandBufferHelper *commandBuffer); void addOverlayUsedBuffersCount(vk::CommandBufferHelper *commandBuffer);
// Submit commands to worker thread for processing // Sync any errors from the command processor
ANGLE_INLINE void queueCommandsToWorker(const vk::CommandProcessorTask &commands) void commandProcessorSyncErrors();
{ // Sync any error from worker thread and queue up next command for processing
mRenderer->queueCommands(commands); void commandProcessorSyncErrorsAndQueueCommand(vk::CommandProcessorTask *command);
}
// When worker thread completes, it releases command buffers back to context queue // When worker thread completes, it releases command buffers back to context queue
void recycleCommandBuffer(vk::CommandBufferHelper *commandBuffer); void recycleCommandBuffer(vk::CommandBufferHelper *commandBuffer);
...@@ -1085,6 +1083,7 @@ class ContextVk : public ContextImpl, public vk::Context ...@@ -1085,6 +1083,7 @@ class ContextVk : public ContextImpl, public vk::Context
// We use a single pool for recording commands. We also keep a free list for pool recycling. // We use a single pool for recording commands. We also keep a free list for pool recycling.
vk::CommandPool mCommandPool; vk::CommandPool mCommandPool;
// TODO: This can be killed once threading is enabled https://issuetracker.google.com/153666475
CommandQueue mCommandQueue; CommandQueue mCommandQueue;
vk::GarbageList mCurrentGarbage; vk::GarbageList mCurrentGarbage;
......
...@@ -180,18 +180,30 @@ angle::Result QueryVk::getResult(const gl::Context *context, bool wait) ...@@ -180,18 +180,30 @@ angle::Result QueryVk::getResult(const gl::Context *context, bool wait)
// finite time. // finite time.
// Note regarding time-elapsed: end should have been called after begin, so flushing when end // Note regarding time-elapsed: end should have been called after begin, so flushing when end
// has pending work should flush begin too. // has pending work should flush begin too.
if (mQueryHelper.hasPendingWork(contextVk)) // TODO: https://issuetracker.google.com/169788986 - can't guarantee hasPendingWork() works when
// using threaded worker
if (mQueryHelper.hasPendingWork(contextVk) ||
contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{ {
ANGLE_TRY(contextVk->flushImpl(nullptr)); ANGLE_TRY(contextVk->flushImpl(nullptr));
if (contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{
// TODO: https://issuetracker.google.com/170312581 - For now just stalling here
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
}
ASSERT(!mQueryHelperTimeElapsedBegin.hasPendingWork(contextVk)); ASSERT(!mQueryHelperTimeElapsedBegin.hasPendingWork(contextVk));
ASSERT(!mQueryHelper.hasPendingWork(contextVk)); ASSERT(!mQueryHelper.hasPendingWork(contextVk));
} }
// If the command buffer this query is being written to is still in flight, its reset command if (!contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
// may not have been performed by the GPU yet. To avoid a race condition in this case, wait {
// for the batch to finish first before querying (or return not-ready if not waiting). // If the command buffer this query is being written to is still in flight, its reset
// command may not have been performed by the GPU yet. To avoid a race condition in this
// case, wait for the batch to finish first before querying (or return not-ready if not
// waiting).
ANGLE_TRY(contextVk->checkCompletedCommands()); ANGLE_TRY(contextVk->checkCompletedCommands());
}
if (contextVk->isSerialInUse(mQueryHelper.getStoredQueueSerial())) if (contextVk->isSerialInUse(mQueryHelper.getStoredQueueSerial()))
{ {
if (!wait) if (!wait)
......
...@@ -467,6 +467,7 @@ RendererVk::RendererVk() ...@@ -467,6 +467,7 @@ RendererVk::RendererVk()
mPipelineCacheVkUpdateTimeout(kPipelineCacheVkUpdatePeriod), mPipelineCacheVkUpdateTimeout(kPipelineCacheVkUpdatePeriod),
mPipelineCacheDirty(false), mPipelineCacheDirty(false),
mPipelineCacheInitialized(false), mPipelineCacheInitialized(false),
mCommandProcessor(this),
mGlslangInitialized(false) mGlslangInitialized(false)
{ {
VkFormatProperties invalid = {0, 0, kInvalidFormatFeatureFlags}; VkFormatProperties invalid = {0, 0, kInvalidFormatFeatureFlags};
...@@ -536,6 +537,10 @@ void RendererVk::onDestroy() ...@@ -536,6 +537,10 @@ void RendererVk::onDestroy()
std::lock_guard<std::mutex> lock(mFenceRecyclerMutex); std::lock_guard<std::mutex> lock(mFenceRecyclerMutex);
mFenceRecycler.destroy(mDevice); mFenceRecycler.destroy(mDevice);
} }
{
std::lock_guard<decltype(mNextSubmitFenceMutex)> lock(mNextSubmitFenceMutex);
mNextSubmitFence.reset(mDevice);
}
mPipelineCache.destroy(mDevice); mPipelineCache.destroy(mDevice);
mSamplerCache.destroy(this); mSamplerCache.destroy(this);
...@@ -580,7 +585,7 @@ void RendererVk::notifyDeviceLost() ...@@ -580,7 +585,7 @@ void RendererVk::notifyDeviceLost()
{ {
{ {
std::lock_guard<std::mutex> lock(mQueueSerialMutex); std::lock_guard<std::mutex> lock(mQueueSerialMutex);
mLastCompletedQueueSerial = mLastSubmittedQueueSerial; mLastCompletedQueueSerial = getLastSubmittedQueueSerial();
} }
mDeviceLost = true; mDeviceLost = true;
mDisplay->notifyDeviceLost(); mDisplay->notifyDeviceLost();
...@@ -909,7 +914,8 @@ angle::Result RendererVk::initialize(DisplayVk *displayVk, ...@@ -909,7 +914,8 @@ angle::Result RendererVk::initialize(DisplayVk *displayVk,
if (getFeatures().enableCommandProcessingThread.enabled) if (getFeatures().enableCommandProcessingThread.enabled)
{ {
mCommandProcessorThread = mCommandProcessorThread =
std::thread(&CommandProcessor::processCommandProcessorTasks, &mCommandProcessor); std::thread(&vk::CommandProcessor::processTasks, &mCommandProcessor);
mCommandProcessor.waitForWorkComplete(nullptr);
} }
return angle::Result::Continue; return angle::Result::Continue;
...@@ -1907,6 +1913,9 @@ void RendererVk::initFeatures(DisplayVk *displayVk, const ExtensionNameList &dev ...@@ -1907,6 +1913,9 @@ void RendererVk::initFeatures(DisplayVk *displayVk, const ExtensionNameList &dev
// Currently disabled by default: http://anglebug.com/4324 // Currently disabled by default: http://anglebug.com/4324
ANGLE_FEATURE_CONDITION(&mFeatures, enableCommandProcessingThread, false); ANGLE_FEATURE_CONDITION(&mFeatures, enableCommandProcessingThread, false);
// Currently disabled by default: http://anglebug.com/4324
ANGLE_FEATURE_CONDITION(&mFeatures, asynchronousCommandProcessing, false);
ANGLE_FEATURE_CONDITION(&mFeatures, supportsYUVSamplerConversion, ANGLE_FEATURE_CONDITION(&mFeatures, supportsYUVSamplerConversion,
mSamplerYcbcrConversionFeatures.samplerYcbcrConversion != VK_FALSE); mSamplerYcbcrConversionFeatures.samplerYcbcrConversion != VK_FALSE);
...@@ -2183,15 +2192,8 @@ bool RendererVk::hasBufferFormatFeatureBits(VkFormat format, ...@@ -2183,15 +2192,8 @@ bool RendererVk::hasBufferFormatFeatureBits(VkFormat format,
return hasFormatFeatureBits<&VkFormatProperties::bufferFeatures>(format, featureBits); return hasFormatFeatureBits<&VkFormatProperties::bufferFeatures>(format, featureBits);
} }
angle::Result RendererVk::queueSubmit(vk::Context *context, void RendererVk::outputVmaStatString()
egl::ContextPriority priority,
const VkSubmitInfo &submitInfo,
vk::ResourceUseList *resourceList,
const vk::Fence *fence,
Serial *serialOut)
{ {
if (kOutputVmaStatsString)
{
// Output the VMA stats string // Output the VMA stats string
// This JSON string can be passed to VmaDumpVis.py to generate a visualization of the // This JSON string can be passed to VmaDumpVis.py to generate a visualization of the
// allocations the VMA has performed. // allocations the VMA has performed.
...@@ -2199,16 +2201,22 @@ angle::Result RendererVk::queueSubmit(vk::Context *context, ...@@ -2199,16 +2201,22 @@ angle::Result RendererVk::queueSubmit(vk::Context *context,
mAllocator.buildStatsString(&statsString, true); mAllocator.buildStatsString(&statsString, true);
INFO() << std::endl << statsString << std::endl; INFO() << std::endl << statsString << std::endl;
mAllocator.freeStatsString(statsString); mAllocator.freeStatsString(statsString);
} }
if (getFeatures().enableCommandProcessingThread.enabled) angle::Result RendererVk::queueSubmit(vk::Context *context,
egl::ContextPriority priority,
const VkSubmitInfo &submitInfo,
vk::ResourceUseList *resourceList,
const vk::Fence *fence,
Serial *serialOut)
{
if (kOutputVmaStatsString)
{ {
// For initial threading phase 1 code make sure any outstanding command processing outputVmaStatString();
// is complete.
// TODO: b/153666475 For phase2 investigate if this is required as most submits will take
// place through worker thread except for one-off submits below.
mCommandProcessor.waitForWorkComplete();
} }
ASSERT(!getFeatures().enableCommandProcessingThread.enabled);
{ {
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
std::lock_guard<std::mutex> serialLock(mQueueSerialMutex); std::lock_guard<std::mutex> serialLock(mQueueSerialMutex);
...@@ -2235,12 +2243,26 @@ angle::Result RendererVk::queueSubmitOneOff(vk::Context *context, ...@@ -2235,12 +2243,26 @@ angle::Result RendererVk::queueSubmitOneOff(vk::Context *context,
const vk::Fence *fence, const vk::Fence *fence,
Serial *serialOut) Serial *serialOut)
{ {
if (getFeatures().enableCommandProcessingThread.enabled)
{
vk::CommandProcessorTask oneOffQueueSubmit;
oneOffQueueSubmit.initOneOffQueueSubmit(primary.getHandle(), priority, fence);
queueCommand(context, &oneOffQueueSubmit);
waitForCommandProcessorIdle(context);
*serialOut = getLastSubmittedQueueSerial();
ANGLE_TRY(cleanupGarbage(false));
}
else
{
VkSubmitInfo submitInfo = {}; VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1; submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = primary.ptr(); submitInfo.pCommandBuffers = primary.ptr();
ANGLE_TRY(queueSubmit(context, priority, submitInfo, nullptr, fence, serialOut)); ANGLE_TRY(queueSubmit(context, priority, submitInfo, nullptr, fence, serialOut));
}
mPendingOneOffCommands.push_back({*serialOut, std::move(primary)}); mPendingOneOffCommands.push_back({*serialOut, std::move(primary)});
...@@ -2251,8 +2273,8 @@ angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriori ...@@ -2251,8 +2273,8 @@ angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriori
{ {
if (getFeatures().enableCommandProcessingThread.enabled) if (getFeatures().enableCommandProcessingThread.enabled)
{ {
// First make sure command processor is complete when waiting for queue idle. // Wait for all pending commands to get sent before issuing vkQueueWaitIdle
mCommandProcessor.waitForWorkComplete(); waitForCommandProcessorIdle(context);
} }
{ {
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
...@@ -2268,8 +2290,8 @@ angle::Result RendererVk::deviceWaitIdle(vk::Context *context) ...@@ -2268,8 +2290,8 @@ angle::Result RendererVk::deviceWaitIdle(vk::Context *context)
{ {
if (getFeatures().enableCommandProcessingThread.enabled) if (getFeatures().enableCommandProcessingThread.enabled)
{ {
// First make sure command processor is complete when waiting for device idle. // Wait for all pending commands to get sent before issuing vkQueueWaitIdle
mCommandProcessor.waitForWorkComplete(); waitForCommandProcessorIdle(context);
} }
{ {
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
...@@ -2286,12 +2308,7 @@ VkResult RendererVk::queuePresent(egl::ContextPriority priority, ...@@ -2286,12 +2308,7 @@ VkResult RendererVk::queuePresent(egl::ContextPriority priority,
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queuePresent"); ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queuePresent");
if (getFeatures().enableCommandProcessingThread.enabled) ASSERT(!getFeatures().enableCommandProcessingThread.enabled);
{
// First make sure command processor is complete before queue present as
// present may have dependencies on that thread.
mCommandProcessor.waitForWorkComplete();
}
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
...@@ -2329,6 +2346,30 @@ angle::Result RendererVk::newSharedFence(vk::Context *context, ...@@ -2329,6 +2346,30 @@ angle::Result RendererVk::newSharedFence(vk::Context *context,
return angle::Result::Continue; return angle::Result::Continue;
} }
// Return a shared fence to be used for the next submit
// Fence may be shared with a Sync object.
// reset indicates that nextSubmitFence should be reset before returning. This ensures that the next
// request for a submit fence gets a fresh fence.
// TODO: https://issuetracker.google.com/issues/170312581 - move to CommandProcessor as part of
// fence ownership follow-up task.
angle::Result RendererVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOut, bool reset)
{
std::lock_guard<decltype(mNextSubmitFenceMutex)> lock(mNextSubmitFenceMutex);
if (!mNextSubmitFence.isReferenced())
{
ANGLE_TRY(newSharedFence(&mCommandProcessor, &mNextSubmitFence));
}
ASSERT(!sharedFenceOut->isReferenced());
sharedFenceOut->copy(getDevice(), mNextSubmitFence);
if (reset)
{
resetSharedFence(&mNextSubmitFence);
}
return angle::Result::Continue;
}
template <VkFormatFeatureFlags VkFormatProperties::*features> template <VkFormatFeatureFlags VkFormatProperties::*features>
VkFormatFeatureFlags RendererVk::getFormatFeatureBits(VkFormat format, VkFormatFeatureFlags RendererVk::getFormatFeatureBits(VkFormat format,
const VkFormatFeatureFlags featureBits) const const VkFormatFeatureFlags featureBits) const
......
...@@ -168,6 +168,7 @@ class RendererVk : angle::NonCopyable ...@@ -168,6 +168,7 @@ class RendererVk : angle::NonCopyable
return mPriorities[priority]; return mPriorities[priority];
} }
// Queue submit that originates from the main thread
angle::Result queueSubmit(vk::Context *context, angle::Result queueSubmit(vk::Context *context,
egl::ContextPriority priority, egl::ContextPriority priority,
const VkSubmitInfo &submitInfo, const VkSubmitInfo &submitInfo,
...@@ -197,6 +198,8 @@ class RendererVk : angle::NonCopyable ...@@ -197,6 +198,8 @@ class RendererVk : angle::NonCopyable
sharedFenceIn->resetAndRecycle(&mFenceRecycler); sharedFenceIn->resetAndRecycle(&mFenceRecycler);
} }
angle::Result getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOut, bool reset);
template <typename... ArgsT> template <typename... ArgsT>
void collectGarbageAndReinit(vk::SharedResourceUse *use, ArgsT... garbageIn) void collectGarbageAndReinit(vk::SharedResourceUse *use, ArgsT... garbageIn)
{ {
...@@ -224,6 +227,12 @@ class RendererVk : angle::NonCopyable ...@@ -224,6 +227,12 @@ class RendererVk : angle::NonCopyable
} }
} }
vk::Shared<vk::Fence> getLastSubmittedFence() const
{
return mCommandProcessor.getLastSubmittedFence();
}
void handleDeviceLost() { mCommandProcessor.handleDeviceLost(); }
static constexpr size_t kMaxExtensionNames = 200; static constexpr size_t kMaxExtensionNames = 200;
using ExtensionNameList = angle::FixedVector<const char *, kMaxExtensionNames>; using ExtensionNameList = angle::FixedVector<const char *, kMaxExtensionNames>;
...@@ -241,11 +250,19 @@ class RendererVk : angle::NonCopyable ...@@ -241,11 +250,19 @@ class RendererVk : angle::NonCopyable
ANGLE_INLINE Serial getCurrentQueueSerial() ANGLE_INLINE Serial getCurrentQueueSerial()
{ {
if (getFeatures().enableCommandProcessingThread.enabled)
{
return mCommandProcessor.getCurrentQueueSerial();
}
std::lock_guard<std::mutex> lock(mQueueSerialMutex); std::lock_guard<std::mutex> lock(mQueueSerialMutex);
return mCurrentQueueSerial; return mCurrentQueueSerial;
} }
ANGLE_INLINE Serial getLastSubmittedQueueSerial() ANGLE_INLINE Serial getLastSubmittedQueueSerial()
{ {
if (getFeatures().enableCommandProcessingThread.enabled)
{
return mCommandProcessor.getLastSubmittedSerial();
}
std::lock_guard<std::mutex> lock(mQueueSerialMutex); std::lock_guard<std::mutex> lock(mQueueSerialMutex);
return mLastSubmittedQueueSerial; return mLastSubmittedQueueSerial;
} }
...@@ -264,11 +281,24 @@ class RendererVk : angle::NonCopyable ...@@ -264,11 +281,24 @@ class RendererVk : angle::NonCopyable
vk::ActiveHandleCounter &getActiveHandleCounts() { return mActiveHandleCounts; } vk::ActiveHandleCounter &getActiveHandleCounts() { return mActiveHandleCounts; }
// Queue commands to worker thread for processing // Queue commands to worker thread for processing
void queueCommands(const vk::CommandProcessorTask &commands) void queueCommand(vk::Context *context, vk::CommandProcessorTask *command)
{ {
mCommandProcessor.queueCommands(commands); mCommandProcessor.queueCommand(context, command);
}
bool hasPendingError() const { return mCommandProcessor.hasPendingError(); }
vk::Error getAndClearPendingError() { return mCommandProcessor.getAndClearPendingError(); }
void waitForCommandProcessorIdle(vk::Context *context)
{
mCommandProcessor.waitForWorkComplete(context);
} }
void waitForWorkerThreadIdle() { mCommandProcessor.waitForWorkComplete(); }
void finishToSerial(vk::Context *context, Serial serial)
{
mCommandProcessor.finishToSerial(context, serial);
}
void finishAllWork(vk::Context *context) { mCommandProcessor.finishAllWork(context); }
VkQueue getVkQueue(egl::ContextPriority priority) const { return mQueues[priority]; }
bool getEnableValidationLayers() const { return mEnableValidationLayers; } bool getEnableValidationLayers() const { return mEnableValidationLayers; }
...@@ -276,6 +306,8 @@ class RendererVk : angle::NonCopyable ...@@ -276,6 +306,8 @@ class RendererVk : angle::NonCopyable
void setGlobalDebugAnnotator(); void setGlobalDebugAnnotator();
void outputVmaStatString();
private: private:
angle::Result initializeDevice(DisplayVk *displayVk, uint32_t queueFamilyIndex); angle::Result initializeDevice(DisplayVk *displayVk, uint32_t queueFamilyIndex);
void ensureCapsInitialized() const; void ensureCapsInitialized() const;
...@@ -390,9 +422,13 @@ class RendererVk : angle::NonCopyable ...@@ -390,9 +422,13 @@ class RendererVk : angle::NonCopyable
}; };
std::deque<PendingOneOffCommands> mPendingOneOffCommands; std::deque<PendingOneOffCommands> mPendingOneOffCommands;
// Worker Thread // Command Processor Thread
CommandProcessor mCommandProcessor; vk::CommandProcessor mCommandProcessor;
std::thread mCommandProcessorThread; std::thread mCommandProcessorThread;
// mNextSubmitFence is the fence that's going to be signaled at the next submission. This is
// used to support SyncVk objects, which may outlive the context (as EGLSync objects).
vk::Shared<vk::Fence> mNextSubmitFence;
std::mutex mNextSubmitFenceMutex;
// track whether we initialized (or released) glslang // track whether we initialized (or released) glslang
bool mGlslangInitialized; bool mGlslangInitialized;
......
...@@ -102,11 +102,22 @@ ResourceUseList::ResourceUseList() ...@@ -102,11 +102,22 @@ ResourceUseList::ResourceUseList()
mResourceUses.reserve(kDefaultResourceUseCount); mResourceUses.reserve(kDefaultResourceUseCount);
} }
ResourceUseList::ResourceUseList(ResourceUseList &&other)
{
*this = std::move(other);
}
ResourceUseList::~ResourceUseList() ResourceUseList::~ResourceUseList()
{ {
ASSERT(mResourceUses.empty()); ASSERT(mResourceUses.empty());
} }
ResourceUseList &ResourceUseList::operator=(ResourceUseList &&rhs)
{
std::swap(mResourceUses, rhs.mResourceUses);
return *this;
}
void ResourceUseList::releaseResourceUses() void ResourceUseList::releaseResourceUses()
{ {
for (SharedResourceUse &use : mResourceUses) for (SharedResourceUse &use : mResourceUses)
......
...@@ -136,7 +136,9 @@ class ResourceUseList final : angle::NonCopyable ...@@ -136,7 +136,9 @@ class ResourceUseList final : angle::NonCopyable
{ {
public: public:
ResourceUseList(); ResourceUseList();
ResourceUseList(ResourceUseList &&other);
virtual ~ResourceUseList(); virtual ~ResourceUseList();
ResourceUseList &operator=(ResourceUseList &&rhs);
void add(const SharedResourceUse &resourceUse); void add(const SharedResourceUse &resourceUse);
......
...@@ -1186,6 +1186,7 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk, ...@@ -1186,6 +1186,7 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
bool *presentOutOfDate) bool *presentOutOfDate)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present"); ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present");
RendererVk *renderer = contextVk->getRenderer();
// Throttle the submissions to avoid getting too far ahead of the GPU. // Throttle the submissions to avoid getting too far ahead of the GPU.
SwapHistory &swap = mSwapHistory[mCurrentSwapHistoryIndex]; SwapHistory &swap = mSwapHistory[mCurrentSwapHistoryIndex];
...@@ -1194,7 +1195,7 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk, ...@@ -1194,7 +1195,7 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
if (swap.sharedFence.isReferenced()) if (swap.sharedFence.isReferenced())
{ {
ANGLE_TRY(swap.waitFence(contextVk)); ANGLE_TRY(swap.waitFence(contextVk));
swap.destroy(contextVk->getRenderer()); swap.destroy(renderer);
} }
} }
...@@ -1316,6 +1317,8 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk, ...@@ -1316,6 +1317,8 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
} }
// Update the swap history for this presentation // Update the swap history for this presentation
// TODO: https://issuetracker.google.com/issues/170312581 - this will force us to flush worker
// queue to get the fence.
swap.sharedFence = contextVk->getLastSubmittedFence(); swap.sharedFence = contextVk->getLastSubmittedFence();
ASSERT(!mAcquireImageSemaphore.valid()); ASSERT(!mAcquireImageSemaphore.valid());
...@@ -1323,13 +1326,47 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk, ...@@ -1323,13 +1326,47 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
mCurrentSwapHistoryIndex = mCurrentSwapHistoryIndex =
mCurrentSwapHistoryIndex == mSwapHistory.size() ? 0 : mCurrentSwapHistoryIndex; mCurrentSwapHistoryIndex == mSwapHistory.size() ? 0 : mCurrentSwapHistoryIndex;
VkResult result = contextVk->getRenderer()->queuePresent(contextVk->getPriority(), presentInfo); VkResult result;
if (renderer->getFeatures().enableCommandProcessingThread.enabled)
{
vk::CommandProcessorTask present;
present.initPresent(contextVk->getPriority(), presentInfo);
// Make sure everything has been submitted (and errors handled)
renderer->waitForCommandProcessorIdle(contextVk);
// Submit queuePresent all by itself (ignoring interference from other threads for now)
renderer->queueCommand(contextVk, &present);
// TODO: https://issuetracker.google.com/issues/170329600 - Just stalling here for now, but
// really want to let main thread continue
// need to figure out how to handle work below off-thread and sync to main
// Also, need to fix lifetime of presentInfo data when main thread continues.
// There is a bunch of work happening after present to deal with swapchain recreation.
// Will that require moving a large chunk of swapImpl to the CommandProcessor?
// That will likely require serializing access to the WindowSurfaceVk object in order
// to have current content.
result = VK_SUCCESS;
// wait for the queuePresent to be submitted and intentionally set the context to nullptr so
// that we can catch any error. Note this doesn't prevent another context from grabbing the
// error. Will be fixed properly in a follow-up as part of present work.
renderer->waitForCommandProcessorIdle(nullptr);
if (renderer->hasPendingError())
{
vk::Error error = renderer->getAndClearPendingError();
result = error.mErrorCode;
}
}
else
{
result = renderer->queuePresent(contextVk->getPriority(), presentInfo);
}
// If OUT_OF_DATE is returned, it's ok, we just need to recreate the swapchain before // If OUT_OF_DATE is returned, it's ok, we just need to recreate the swapchain before
// continuing. // continuing.
// If VK_SUBOPTIMAL_KHR is returned it's because the device orientation changed and we should // If VK_SUBOPTIMAL_KHR is returned it's because the device orientation changed and we should
// recreate the swapchain with a new window orientation. // recreate the swapchain with a new window orientation.
if (contextVk->getFeatures().enablePreRotateSurfaces.enabled) if (renderer->getFeatures().enablePreRotateSurfaces.enabled)
{ {
// Also check for VK_SUBOPTIMAL_KHR. // Also check for VK_SUBOPTIMAL_KHR.
*presentOutOfDate = ((result == VK_ERROR_OUT_OF_DATE_KHR) || (result == VK_SUBOPTIMAL_KHR)); *presentOutOfDate = ((result == VK_ERROR_OUT_OF_DATE_KHR) || (result == VK_SUBOPTIMAL_KHR));
......
...@@ -32,6 +32,12 @@ SyncHelper::~SyncHelper() {} ...@@ -32,6 +32,12 @@ SyncHelper::~SyncHelper() {}
void SyncHelper::releaseToRenderer(RendererVk *renderer) void SyncHelper::releaseToRenderer(RendererVk *renderer)
{ {
renderer->collectGarbageAndReinit(&mUse, &mEvent); renderer->collectGarbageAndReinit(&mUse, &mEvent);
// TODO: https://issuetracker.google.com/170312581 - Currently just stalling on worker thread
// here to try and avoid race condition. If this works, need some alternate solution
if (renderer->getFeatures().enableCommandProcessingThread.enabled)
{
renderer->waitForCommandProcessorIdle(nullptr);
}
mFence.reset(renderer->getDevice()); mFence.reset(renderer->getDevice());
} }
...@@ -48,7 +54,17 @@ angle::Result SyncHelper::initialize(ContextVk *contextVk) ...@@ -48,7 +54,17 @@ angle::Result SyncHelper::initialize(ContextVk *contextVk)
DeviceScoped<Event> event(device); DeviceScoped<Event> event(device);
ANGLE_VK_TRY(contextVk, event.get().init(device, eventCreateInfo)); ANGLE_VK_TRY(contextVk, event.get().init(device, eventCreateInfo));
// TODO: https://issuetracker.google.com/170312581 - For now wait for worker thread to finish
// then get next fence from renderer
if (contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
ANGLE_TRY(contextVk->getRenderer()->getNextSubmitFence(&mFence, false));
}
else
{
ANGLE_TRY(contextVk->getNextSubmitFence(&mFence)); ANGLE_TRY(contextVk->getNextSubmitFence(&mFence));
}
mEvent = event.release(); mEvent = event.release();
...@@ -90,9 +106,17 @@ angle::Result SyncHelper::clientWait(Context *context, ...@@ -90,9 +106,17 @@ angle::Result SyncHelper::clientWait(Context *context,
ANGLE_TRY(contextVk->flushImpl(nullptr)); ANGLE_TRY(contextVk->flushImpl(nullptr));
} }
// If we are using worker need to wait for the commands to be issued before waiting on the
// fence.
if (contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
}
// Wait on the fence that's expected to be signaled on the first vkQueueSubmit after // Wait on the fence that's expected to be signaled on the first vkQueueSubmit after
// `initialize` was called. The first fence is the fence created to signal this sync. // `initialize` was called. The first fence is the fence created to signal this sync.
ASSERT(mFence.get().valid()); ASSERT(mFence.get().valid());
// TODO: https://issuetracker.google.com/170312581 - Wait could be command to worker
VkResult status = mFence.get().wait(renderer->getDevice(), timeout); VkResult status = mFence.get().wait(renderer->getDevice(), timeout);
// Check for errors, but don't consider timeout as such. // Check for errors, but don't consider timeout as such.
...@@ -189,11 +213,24 @@ angle::Result SyncHelperNativeFence::initializeWithFd(ContextVk *contextVk, int ...@@ -189,11 +213,24 @@ angle::Result SyncHelperNativeFence::initializeWithFd(ContextVk *contextVk, int
retain(&contextVk->getResourceUseList()); retain(&contextVk->getResourceUseList());
if (renderer->getFeatures().enableCommandProcessingThread.enabled)
{
CommandProcessorTask oneOffQueueSubmit;
oneOffQueueSubmit.initOneOffQueueSubmit(VK_NULL_HANDLE, contextVk->getPriority(),
&fence.get());
renderer->queueCommand(contextVk, &oneOffQueueSubmit);
// TODO: https://issuetracker.google.com/170312581 - wait for now
renderer->waitForCommandProcessorIdle(contextVk);
}
else
{
Serial serialOut; Serial serialOut;
VkSubmitInfo submitInfo = {}; VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
ANGLE_TRY(renderer->queueSubmit(contextVk, contextVk->getPriority(), submitInfo, nullptr,
&fence.get(), &serialOut)); ANGLE_TRY(renderer->queueSubmit(contextVk, contextVk->getPriority(), submitInfo,
nullptr, &fence.get(), &serialOut));
}
VkFenceGetFdInfoKHR fenceGetFdInfo = {}; VkFenceGetFdInfoKHR fenceGetFdInfo = {};
fenceGetFdInfo.sType = VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR; fenceGetFdInfo.sType = VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR;
...@@ -253,6 +290,14 @@ angle::Result SyncHelperNativeFence::clientWait(Context *context, ...@@ -253,6 +290,14 @@ angle::Result SyncHelperNativeFence::clientWait(Context *context,
{ {
ANGLE_TRY(contextVk->flushImpl(nullptr)); ANGLE_TRY(contextVk->flushImpl(nullptr));
} }
// If we are using worker need to wait for the commands to be issued before waiting on the
// fence.
if (contextVk->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
}
// Wait for mFenceWithFd to be signaled. // Wait for mFenceWithFd to be signaled.
VkResult status = mFenceWithFd.wait(renderer->getDevice(), timeout); VkResult status = mFenceWithFd.wait(renderer->getDevice(), timeout);
......
...@@ -969,6 +969,7 @@ angle::Result UtilsVk::setupProgram(ContextVk *contextVk, ...@@ -969,6 +969,7 @@ angle::Result UtilsVk::setupProgram(ContextVk *contextVk,
vk::PipelineAndSerial *pipelineAndSerial; vk::PipelineAndSerial *pipelineAndSerial;
program->setShader(gl::ShaderType::Compute, fsCsShader); program->setShader(gl::ShaderType::Compute, fsCsShader);
ANGLE_TRY(program->getComputePipeline(contextVk, pipelineLayout.get(), &pipelineAndSerial)); ANGLE_TRY(program->getComputePipeline(contextVk, pipelineLayout.get(), &pipelineAndSerial));
// TODO: https://issuetracker.google.com/issues/169788986: Update serial handling.
pipelineAndSerial->updateSerial(serial); pipelineAndSerial->updateSerial(serial);
commandBuffer->bindComputePipeline(pipelineAndSerial->get()); commandBuffer->bindComputePipeline(pipelineAndSerial->get());
} }
......
...@@ -916,7 +916,8 @@ void CommandBufferHelper::restoreStencilContent() ...@@ -916,7 +916,8 @@ void CommandBufferHelper::restoreStencilContent()
} }
} }
void CommandBufferHelper::executeBarriers(ContextVk *contextVk, PrimaryCommandBuffer *primary) void CommandBufferHelper::executeBarriers(const angle::FeaturesVk &features,
PrimaryCommandBuffer *primary)
{ {
// make a local copy for faster access // make a local copy for faster access
PipelineStagesMask mask = mPipelineBarrierMask; PipelineStagesMask mask = mPipelineBarrierMask;
...@@ -925,7 +926,7 @@ void CommandBufferHelper::executeBarriers(ContextVk *contextVk, PrimaryCommandBu ...@@ -925,7 +926,7 @@ void CommandBufferHelper::executeBarriers(ContextVk *contextVk, PrimaryCommandBu
return; return;
} }
if (contextVk->getFeatures().preferAggregateBarrierCalls.enabled) if (features.preferAggregateBarrierCalls.enabled)
{ {
PipelineStagesMask::Iterator iter = mask.begin(); PipelineStagesMask::Iterator iter = mask.begin();
PipelineBarrier &barrier = mPipelineBarriers[*iter]; PipelineBarrier &barrier = mPipelineBarriers[*iter];
...@@ -1152,24 +1153,31 @@ void CommandBufferHelper::endTransformFeedback() ...@@ -1152,24 +1153,31 @@ void CommandBufferHelper::endTransformFeedback()
mValidTransformFeedbackBufferCount = 0; mValidTransformFeedbackBufferCount = 0;
} }
angle::Result CommandBufferHelper::flushToPrimary(ContextVk *contextVk, angle::Result CommandBufferHelper::getRenderPassWithOps(ContextVk *contextVk,
PrimaryCommandBuffer *primary) RenderPass **renderPass)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandBufferHelper::flushToPrimary"); *renderPass = nullptr;
ASSERT(!empty()); if (mIsRenderPassCommandBuffer)
if (kEnableCommandStreamDiagnostics)
{ {
addCommandDiagnostics(contextVk); ANGLE_TRY(contextVk->getRenderPassWithOps(mRenderPassDesc, mAttachmentOps, renderPass));
} }
return angle::Result::Continue;
}
angle::Result CommandBufferHelper::flushToPrimary(const angle::FeaturesVk &features,
PrimaryCommandBuffer *primary,
RenderPass *renderPass)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandBufferHelper::flushToPrimary");
ASSERT(!empty());
// Commands that are added to primary before beginRenderPass command // Commands that are added to primary before beginRenderPass command
executeBarriers(contextVk, primary); executeBarriers(features, primary);
if (mIsRenderPassCommandBuffer) if (mIsRenderPassCommandBuffer)
{ {
mCommandBuffer.executeQueuedResetQueryPoolCommands(primary->getHandle()); mCommandBuffer.executeQueuedResetQueryPoolCommands(primary->getHandle());
// Pull a RenderPass from the cache. ASSERT(renderPass != nullptr);
RenderPass *renderPass = nullptr;
ANGLE_TRY(contextVk->getRenderPassWithOps(mRenderPassDesc, mAttachmentOps, &renderPass));
VkRenderPassBeginInfo beginInfo = {}; VkRenderPassBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; beginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
......
...@@ -962,9 +962,12 @@ class CommandBufferHelper : angle::NonCopyable ...@@ -962,9 +962,12 @@ class CommandBufferHelper : angle::NonCopyable
CommandBuffer &getCommandBuffer() { return mCommandBuffer; } CommandBuffer &getCommandBuffer() { return mCommandBuffer; }
angle::Result flushToPrimary(ContextVk *contextVk, PrimaryCommandBuffer *primary); angle::Result getRenderPassWithOps(ContextVk *contextVk, RenderPass **renderPass);
angle::Result flushToPrimary(const angle::FeaturesVk &features,
PrimaryCommandBuffer *primary,
RenderPass *renderPass);
void executeBarriers(ContextVk *contextVk, PrimaryCommandBuffer *primary); void executeBarriers(const angle::FeaturesVk &features, PrimaryCommandBuffer *primary);
void setHasRenderPass(bool hasRenderPass) { mIsRenderPassCommandBuffer = hasRenderPass; } void setHasRenderPass(bool hasRenderPass) { mIsRenderPassCommandBuffer = hasRenderPass; }
...@@ -1129,9 +1132,9 @@ class CommandBufferHelper : angle::NonCopyable ...@@ -1129,9 +1132,9 @@ class CommandBufferHelper : angle::NonCopyable
bool isReadOnlyDepthMode() const { return mReadOnlyDepthStencilMode; } bool isReadOnlyDepthMode() const { return mReadOnlyDepthStencilMode; }
private:
void addCommandDiagnostics(ContextVk *contextVk); void addCommandDiagnostics(ContextVk *contextVk);
private:
bool onDepthStencilAccess(ResourceAccess access, bool onDepthStencilAccess(ResourceAccess access,
uint32_t *cmdCountInvalidated, uint32_t *cmdCountInvalidated,
uint32_t *cmdCountDisabled); uint32_t *cmdCountDisabled);
......
...@@ -949,7 +949,7 @@ gl::LevelIndex GetLevelIndex(vk::LevelIndex levelVk, gl::LevelIndex baseLevel); ...@@ -949,7 +949,7 @@ gl::LevelIndex GetLevelIndex(vk::LevelIndex levelVk, gl::LevelIndex baseLevel);
auto ANGLE_LOCAL_VAR = command; \ auto ANGLE_LOCAL_VAR = command; \
if (ANGLE_UNLIKELY(ANGLE_LOCAL_VAR != VK_SUCCESS)) \ if (ANGLE_UNLIKELY(ANGLE_LOCAL_VAR != VK_SUCCESS)) \
{ \ { \
context->handleError(ANGLE_LOCAL_VAR, __FILE__, ANGLE_FUNCTION, __LINE__); \ (context)->handleError(ANGLE_LOCAL_VAR, __FILE__, ANGLE_FUNCTION, __LINE__); \
return angle::Result::Stop; \ return angle::Result::Stop; \
} \ } \
} while (0) } while (0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment