Commit ed876984 by Courtney Goeltzenleuchter Committed by Commit Bot

Vulkan: functionally complete worker thread

Working on enhancing worker thread to completely own primary command buffers. This will include not only processing SCBs from main thread into a primary, but also submitting those command buffers to the queue. The CommandProcessor is a vk::Context so it can handle errors in the worker thread. When the main thread submits tasks to the worker thread it also syncs any outstanding errors from the worker. Include asynchronousCommandProcessing feature that will control whether the worker thread task does it's work in parallel or not. If false, we wait for the thread to complete it's work before letting the main thread continue. If true, the thread can execute in parallel with the main thread. Bug: b/154030730 Bug: b/161912801 Change-Id: I00f8f013d6cbb2af12a172c4f7927855db2f0ebf Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2328992 Commit-Queue: Courtney Goeltzenleuchter <courtneygo@google.com> Reviewed-by: 's avatarTim Van Patten <timvp@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 2882e1af
......@@ -343,6 +343,13 @@ struct FeaturesVk : FeatureSetBase
"Enable parallel processing and submission of Vulkan commands in worker thread", &members,
"http://anglebug.com/4324"};
// Enable parallel thread execution when enableCommandProcessingThread is enabled.
// Currently off by default.
Feature asynchronousCommandProcessing = {"asynchronous_command_processing",
FeatureCategory::VulkanFeatures,
"Enable/Disable parallel processing of worker thread",
&members, "http://anglebug.com/4324"};
// Whether the VkDevice supports the VK_KHR_shader_float16_int8 extension and has the
// shaderFloat16 feature.
Feature supportsShaderFloat16 = {"supports_shader_float16", FeatureCategory::VulkanFeatures,
......
......@@ -8,12 +8,145 @@
//
#include "libANGLE/renderer/vulkan/CommandProcessor.h"
#include "libANGLE/renderer/vulkan/RendererVk.h"
#include "libANGLE/trace.h"
namespace rx
{
namespace
{
constexpr size_t kInFlightCommandsLimit = 100u;
constexpr bool kOutputVmaStatsString = false;
void InitializeSubmitInfo(VkSubmitInfo *submitInfo,
const vk::PrimaryCommandBuffer &commandBuffer,
const std::vector<VkSemaphore> &waitSemaphores,
std::vector<VkPipelineStageFlags> *waitSemaphoreStageMasks,
const vk::Semaphore *signalSemaphore)
{
// Verify that the submitInfo has been zero'd out.
ASSERT(submitInfo->signalSemaphoreCount == 0);
submitInfo->sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo->commandBufferCount = commandBuffer.valid() ? 1 : 0;
submitInfo->pCommandBuffers = commandBuffer.ptr();
if (waitSemaphoreStageMasks->size() < waitSemaphores.size())
{
waitSemaphoreStageMasks->resize(waitSemaphores.size(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
submitInfo->waitSemaphoreCount = static_cast<uint32_t>(waitSemaphores.size());
submitInfo->pWaitSemaphores = waitSemaphores.data();
submitInfo->pWaitDstStageMask = waitSemaphoreStageMasks->data();
if (signalSemaphore)
{
submitInfo->signalSemaphoreCount = 1;
submitInfo->pSignalSemaphores = signalSemaphore->ptr();
}
else
{
submitInfo->signalSemaphoreCount = 0;
submitInfo->pSignalSemaphores = nullptr;
}
}
} // namespace
namespace vk
{
void CommandProcessorTask::initTask()
{
mTask = CustomTask::Invalid;
mContextVk = nullptr;
mRenderPass = nullptr;
mCommandBuffer = nullptr;
mSemaphore = nullptr;
mOneOffFence = nullptr;
mOneOffCommandBufferVk = VK_NULL_HANDLE;
}
// CommandProcessorTask implementation
void CommandProcessorTask::initProcessCommands(ContextVk *contextVk,
CommandBufferHelper *commandBuffer,
vk::RenderPass *renderPass)
{
mTask = vk::CustomTask::ProcessCommands;
mContextVk = contextVk;
mCommandBuffer = commandBuffer;
mRenderPass = renderPass;
}
void CommandProcessorTask::initPresent(egl::ContextPriority priority, VkPresentInfoKHR presentInfo)
{
mTask = vk::CustomTask::Present;
mPresentInfo = presentInfo;
mPriority = priority;
}
void CommandProcessorTask::initFinishToSerial(Serial serial)
{
// Note: sometimes the serial is not valid and that's okay, the finish will early exit in the
// TaskProcessor::finishToSerial
mTask = vk::CustomTask::FinishToSerial;
mSerial = serial;
}
void CommandProcessorTask::initFlushAndQueueSubmit(
std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
const vk::Semaphore *semaphore,
egl::ContextPriority priority,
vk::GarbageList &&currentGarbage,
vk::ResourceUseList &&currentResources)
{
mTask = vk::CustomTask::FlushAndQueueSubmit;
mWaitSemaphores = std::move(waitSemaphores);
mWaitSemaphoreStageMasks = std::move(waitSemaphoreStageMasks);
mSemaphore = semaphore;
mGarbage = std::move(currentGarbage);
mResourceUseList = std::move(currentResources);
mPriority = priority;
}
void CommandProcessorTask::initOneOffQueueSubmit(VkCommandBuffer oneOffCommandBufferVk,
egl::ContextPriority priority,
const vk::Fence *fence)
{
mTask = vk::CustomTask::OneOffQueueSubmit;
mOneOffCommandBufferVk = oneOffCommandBufferVk;
mOneOffFence = fence;
mPriority = priority;
}
CommandProcessorTask &CommandProcessorTask::operator=(CommandProcessorTask &&rhs)
{
if (this == &rhs)
{
return *this;
}
mContextVk = rhs.mContextVk;
mRenderPass = rhs.mRenderPass;
mCommandBuffer = rhs.mCommandBuffer;
std::swap(mTask, rhs.mTask);
std::swap(mWaitSemaphores, rhs.mWaitSemaphores);
std::swap(mWaitSemaphoreStageMasks, rhs.mWaitSemaphoreStageMasks);
mSemaphore = rhs.mSemaphore;
mOneOffFence = rhs.mOneOffFence;
std::swap(mGarbage, rhs.mGarbage);
std::swap(mSerial, rhs.mSerial);
std::swap(mPresentInfo, rhs.mPresentInfo);
std::swap(mPriority, rhs.mPriority);
std::swap(mResourceUseList, rhs.mResourceUseList);
mOneOffCommandBufferVk = rhs.mOneOffCommandBufferVk;
// clear rhs now that everything has moved.
rhs.initTask();
return *this;
}
// CommandBatch implementation.
CommandBatch::CommandBatch() = default;
......@@ -39,67 +172,612 @@ void CommandBatch::destroy(VkDevice device)
commandPool.destroy(device);
fence.reset(device);
}
} // namespace vk
CommandProcessor::CommandProcessor() : mWorkerThreadIdle(true) {}
// TaskProcessor implementation.
TaskProcessor::TaskProcessor() = default;
TaskProcessor::~TaskProcessor() = default;
void TaskProcessor::destroy(VkDevice device)
{
mPrimaryCommandPool.destroy(device);
ASSERT(mInFlightCommands.empty() && mGarbageQueue.empty());
}
angle::Result TaskProcessor::init(vk::Context *context, std::thread::id threadId)
{
mThreadId = threadId;
// Initialize the command pool now that we know the queue family index.
ANGLE_TRY(mPrimaryCommandPool.init(context, context->getRenderer()->getQueueFamilyIndex()));
return angle::Result::Continue;
}
angle::Result TaskProcessor::checkCompletedCommands(vk::Context *context)
{
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::checkCompletedCommands");
VkDevice device = context->getDevice();
RendererVk *rendererVk = context->getRenderer();
int finishedCount = 0;
for (vk::CommandBatch &batch : mInFlightCommands)
{
VkResult result = batch.fence.get().getStatus(device);
if (result == VK_NOT_READY)
{
break;
}
ANGLE_VK_TRY(context, result);
rendererVk->onCompletedSerial(batch.serial);
rendererVk->resetSharedFence(&batch.fence);
ANGLE_TRACE_EVENT0("gpu.angle", "command buffer recycling");
batch.commandPool.destroy(device);
ANGLE_TRY(releasePrimaryCommandBuffer(context, std::move(batch.primaryCommands)));
++finishedCount;
}
if (finishedCount > 0)
{
auto beginIter = mInFlightCommands.begin();
mInFlightCommands.erase(beginIter, beginIter + finishedCount);
}
Serial lastCompleted = rendererVk->getLastCompletedQueueSerial();
size_t freeIndex = 0;
for (; freeIndex < mGarbageQueue.size(); ++freeIndex)
{
vk::GarbageAndSerial &garbageList = mGarbageQueue[freeIndex];
if (garbageList.getSerial() <= lastCompleted)
{
for (vk::GarbageObject &garbage : garbageList.get())
{
garbage.destroy(rendererVk);
}
}
else
{
break;
}
}
// Remove the entries from the garbage list - they should be ready to go.
if (freeIndex > 0)
{
mGarbageQueue.erase(mGarbageQueue.begin(), mGarbageQueue.begin() + freeIndex);
}
return angle::Result::Continue;
}
angle::Result TaskProcessor::releaseToCommandBatch(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer,
vk::CommandPool *commandPool,
vk::CommandBatch *batch)
{
batch->primaryCommands = std::move(commandBuffer);
if (commandPool->valid())
{
batch->commandPool = std::move(*commandPool);
// Recreate CommandPool
VkCommandPoolCreateInfo poolInfo = {};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
poolInfo.queueFamilyIndex = context->getRenderer()->getQueueFamilyIndex();
ANGLE_VK_TRY(context, commandPool->init(context->getDevice(), poolInfo));
}
return angle::Result::Continue;
}
angle::Result TaskProcessor::allocatePrimaryCommandBuffer(
vk::Context *context,
vk::PrimaryCommandBuffer *commandBufferOut)
{
return mPrimaryCommandPool.allocate(context, commandBufferOut);
}
angle::Result TaskProcessor::releasePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer)
{
ASSERT(mPrimaryCommandPool.valid());
return mPrimaryCommandPool.collect(context, std::move(commandBuffer));
}
void TaskProcessor::handleDeviceLost(vk::Context *context)
{
VkDevice device = context->getDevice();
for (vk::CommandBatch &batch : mInFlightCommands)
{
// On device loss we need to wait for fence to be signaled before destroying it
VkResult status =
batch.fence.get().wait(device, context->getRenderer()->getMaxFenceWaitTimeNs());
// If the wait times out, it is probably not possible to recover from lost device
ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST);
// On device lost, here simply destroy the CommandBuffer, it will be fully cleared later by
// CommandPool::destroy
batch.primaryCommands.destroy(device);
void CommandProcessor::queueCommands(const vk::CommandProcessorTask &commands)
batch.commandPool.destroy(device);
batch.fence.reset(device);
}
mInFlightCommands.clear();
}
// If there are any inflight commands worker will look for fence that corresponds to the request
// serial or the last available fence and wait on that fence. Will then do necessary cleanup work.
// This can cause the worker thread to block.
// TODO: https://issuetracker.google.com/issues/170312581 - A more optimal solution might be to do
// the wait in CommandProcessor rather than the worker thread. That would require protecting access
// to mInFlightCommands
angle::Result TaskProcessor::finishToSerial(vk::Context *context, Serial serial)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queueCommands");
std::lock_guard<std::mutex> queueLock(mWorkerMutex);
ASSERT(commands.commandBuffer == nullptr || !commands.commandBuffer->empty());
mCommandsQueue.push(commands);
mWorkAvailableCondition.notify_one();
RendererVk *rendererVk = context->getRenderer();
uint64_t timeout = rendererVk->getMaxFenceWaitTimeNs();
if (mInFlightCommands.empty())
{
// No outstanding work, nothing to wait for.
return angle::Result::Continue;
}
// Find the first batch with serial equal to or bigger than given serial (note that
// the batch serials are unique, otherwise upper-bound would have been necessary).
size_t batchIndex = mInFlightCommands.size() - 1;
for (size_t i = 0; i < mInFlightCommands.size(); ++i)
{
if (mInFlightCommands[i].serial >= serial)
{
batchIndex = i;
break;
}
}
const vk::CommandBatch &batch = mInFlightCommands[batchIndex];
// Wait for it finish
VkDevice device = context->getDevice();
ANGLE_VK_TRY(context, batch.fence.get().wait(device, timeout));
// Clean up finished batches.
return checkCompletedCommands(context);
}
void CommandProcessor::processCommandProcessorTasks()
VkResult TaskProcessor::present(VkQueue queue, const VkPresentInfoKHR &presentInfo)
{
ANGLE_TRACE_EVENT0("gpu.angle", "vkQueuePresentKHR");
return vkQueuePresentKHR(queue, &presentInfo);
}
angle::Result TaskProcessor::submitFrame(vk::Context *context,
VkQueue queue,
const VkSubmitInfo &submitInfo,
const vk::Shared<vk::Fence> &sharedFence,
vk::GarbageList *currentGarbage,
vk::CommandPool *commandPool,
vk::PrimaryCommandBuffer &&commandBuffer,
const Serial &queueSerial)
{
ASSERT(std::this_thread::get_id() == mThreadId);
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::submitFrame");
VkDevice device = context->getDevice();
vk::DeviceScoped<vk::CommandBatch> scopedBatch(device);
vk::CommandBatch &batch = scopedBatch.get();
batch.fence.copy(device, sharedFence);
batch.serial = queueSerial;
ANGLE_TRY(queueSubmit(context, queue, submitInfo, &batch.fence.get()));
if (!currentGarbage->empty())
{
mGarbageQueue.emplace_back(std::move(*currentGarbage), queueSerial);
}
// Store the primary CommandBuffer and command pool used for secondary CommandBuffers
// in the in-flight list.
ANGLE_TRY(releaseToCommandBatch(context, std::move(commandBuffer), commandPool, &batch));
mInFlightCommands.emplace_back(scopedBatch.release());
ANGLE_TRY(checkCompletedCommands(context));
// CPU should be throttled to avoid mInFlightCommands from growing too fast. Important for
// off-screen scenarios.
while (mInFlightCommands.size() > kInFlightCommandsLimit)
{
ANGLE_TRY(finishToSerial(context, mInFlightCommands[0].serial));
}
return angle::Result::Continue;
}
vk::Shared<vk::Fence> TaskProcessor::getLastSubmittedFenceWithLock(VkDevice device) const
{
vk::Shared<vk::Fence> fence;
// Note: this must be called when the work queue is empty and while holding mWorkerMutex to
// ensure that worker isn't touching mInFlightCommands
if (!mInFlightCommands.empty())
{
fence.copy(device, mInFlightCommands.back().fence);
}
return fence;
}
angle::Result TaskProcessor::queueSubmit(vk::Context *context,
VkQueue queue,
const VkSubmitInfo &submitInfo,
const vk::Fence *fence)
{
ASSERT(std::this_thread::get_id() == mThreadId);
if (kOutputVmaStatsString)
{
context->getRenderer()->outputVmaStatString();
}
// Don't need a QueueMutex since all queue accesses are serialized through the worker.
VkFence handle = fence ? fence->getHandle() : VK_NULL_HANDLE;
ANGLE_VK_TRY(context, vkQueueSubmit(queue, 1, &submitInfo, handle));
return angle::Result::Continue;
}
void CommandProcessor::handleError(VkResult errorCode,
const char *file,
const char *function,
unsigned int line)
{
ASSERT(errorCode != VK_SUCCESS);
std::stringstream errorStream;
errorStream << "Internal Vulkan error (" << errorCode << "): " << VulkanResultString(errorCode)
<< ".";
if (errorCode == VK_ERROR_DEVICE_LOST)
{
WARN() << errorStream.str();
handleDeviceLost();
}
std::lock_guard<std::mutex> queueLock(mErrorMutex);
vk::Error error = {errorCode, file, function, line};
mErrors.emplace(error);
}
CommandProcessor::CommandProcessor(RendererVk *renderer)
: vk::Context(renderer),
mWorkerThreadIdle(false),
mCommandProcessorLastSubmittedSerial(mQueueSerialFactory.generate()),
mCommandProcessorCurrentQueueSerial(mQueueSerialFactory.generate())
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
while (!mErrors.empty())
{
mErrors.pop();
}
}
CommandProcessor::~CommandProcessor() = default;
bool CommandProcessor::isRobustResourceInitEnabled() const
{
// Unused for worker thread, just return false.
return false;
}
vk::Error CommandProcessor::getAndClearPendingError()
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
vk::Error tmpError({VK_SUCCESS, nullptr, nullptr, 0});
if (!mErrors.empty())
{
tmpError = mErrors.front();
mErrors.pop();
}
return tmpError;
}
void CommandProcessor::queueCommand(vk::Context *context, vk::CommandProcessorTask *task)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queueCommand");
{
// Grab the worker mutex so that we put things on the queue in the same order as we give out
// serials.
std::lock_guard<std::mutex> queueLock(mWorkerMutex);
if (task->getTaskCommand() == vk::CustomTask::FlushAndQueueSubmit ||
task->getTaskCommand() == vk::CustomTask::OneOffQueueSubmit)
{
std::lock_guard<std::mutex> lock(mCommandProcessorQueueSerialMutex);
// Flush submits work, so give it the current serial and generate a new one.
Serial queueSerial = mCommandProcessorCurrentQueueSerial;
task->setQueueSerial(queueSerial);
mCommandProcessorLastSubmittedSerial = mCommandProcessorCurrentQueueSerial;
mCommandProcessorCurrentQueueSerial = mQueueSerialFactory.generate();
task->getResourceUseList().releaseResourceUsesAndUpdateSerials(queueSerial);
}
mTasks.emplace(std::move(*task));
mWorkAvailableCondition.notify_one();
}
if (getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{
return;
}
// parallel task processing disabled so wait for work to complete.
waitForWorkComplete(context);
}
void CommandProcessor::processTasks()
{
while (true)
{
bool exitThread = false;
angle::Result result = processTasksImpl(&exitThread);
if (exitThread)
{
// We are doing a controlled exit of the thread, break out of the while loop.
break;
}
if (result != angle::Result::Continue)
{
// TODO: https://issuetracker.google.com/issues/170311829 - follow-up on error handling
// ContextVk::commandProcessorSyncErrorsAndQueueCommand and WindowSurfaceVk::destroy
// do error processing, is anything required here? Don't think so, mostly need to
// continue the worker thread until it's been told to exit.
UNREACHABLE();
}
}
}
angle::Result CommandProcessor::processTasksImpl(bool *exitThread)
{
// Initialization prior to work thread loop
ANGLE_TRY(mTaskProcessor.init(this, std::this_thread::get_id()));
// Allocate and begin primary command buffer
ANGLE_TRY(mTaskProcessor.allocatePrimaryCommandBuffer(this, &mPrimaryCommandBuffer));
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
ANGLE_VK_TRY(this, mPrimaryCommandBuffer.begin(beginInfo));
while (true)
{
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.notify_one();
mWorkerThreadIdle = true;
// Only wake if notified and command queue is not empty
mWorkAvailableCondition.wait(lock, [this] { return !mCommandsQueue.empty(); });
mWorkerThreadIdle = false;
vk::CommandProcessorTask task = mCommandsQueue.front();
mCommandsQueue.pop();
if (mTasks.empty())
{
mWorkerThreadIdle = true;
mWorkerIdleCondition.notify_all();
// Only wake if notified and command queue is not empty
mWorkAvailableCondition.wait(lock, [this] { return !mTasks.empty(); });
}
mWorkerThreadIdle = false;
vk::CommandProcessorTask task(std::move(mTasks.front()));
mTasks.pop();
lock.unlock();
// Either both ptrs should be null or non-null
ASSERT((task.commandBuffer != nullptr && task.contextVk != nullptr) ||
(task.commandBuffer == nullptr && task.contextVk == nullptr));
// A work block with null ptrs signals worker thread to exit
if (task.commandBuffer == nullptr && task.contextVk == nullptr)
switch (task.getTaskCommand())
{
case vk::CustomTask::Exit:
{
ANGLE_TRY(mTaskProcessor.finishToSerial(this, Serial::Infinite()));
*exitThread = true;
// Shutting down so cleanup
mTaskProcessor.destroy(mRenderer->getDevice());
mCommandPool.destroy(mRenderer->getDevice());
mPrimaryCommandBuffer.destroy(mRenderer->getDevice());
mWorkerThreadIdle = true;
mWorkerIdleCondition.notify_one();
return angle::Result::Continue;
}
default:
ANGLE_TRY(processTask(&task));
break;
}
}
UNREACHABLE();
return angle::Result::Stop;
}
angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task)
{
switch (task->getTaskCommand())
{
case vk::CustomTask::FlushAndQueueSubmit:
{
// End command buffer
ANGLE_VK_TRY(this, mPrimaryCommandBuffer.end());
// 1. Create submitInfo
VkSubmitInfo submitInfo = {};
InitializeSubmitInfo(&submitInfo, mPrimaryCommandBuffer, task->getWaitSemaphores(),
&task->getWaitSemaphoreStageMasks(), task->getSemaphore());
// 2. Get shared submit fence. It's possible there are other users of this fence that
// must wait for the work to be submitted before waiting on the fence. Reset the fence
// immediately so we are sure to get a fresh one next time.
vk::Shared<vk::Fence> fence;
ANGLE_TRY(mRenderer->getNextSubmitFence(&fence, true));
// 3. Call submitFrame()
ANGLE_TRY(mTaskProcessor.submitFrame(
this, getRenderer()->getVkQueue(task->getPriority()), submitInfo, fence,
&task->getGarbage(), &mCommandPool, std::move(mPrimaryCommandBuffer),
task->getQueueSerial()));
// 4. Allocate & begin new primary command buffer
ANGLE_TRY(mTaskProcessor.allocatePrimaryCommandBuffer(this, &mPrimaryCommandBuffer));
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
ANGLE_VK_TRY(this, mPrimaryCommandBuffer.begin(beginInfo));
// Free this local reference
getRenderer()->resetSharedFence(&fence);
ASSERT(task->getGarbage().empty());
break;
}
case vk::CustomTask::OneOffQueueSubmit:
{
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
if (task->getOneOffCommandBufferVk() != VK_NULL_HANDLE)
{
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &task->getOneOffCommandBufferVk();
}
ASSERT(!task.commandBuffer->empty());
// TODO: Will need some way to synchronize error reporting between threads
(void)(task.commandBuffer->flushToPrimary(task.contextVk, task.primaryCB));
ASSERT(task.commandBuffer->empty());
task.commandBuffer->releaseToContextQueue(task.contextVk);
// TODO: https://issuetracker.google.com/issues/170328907 - vkQueueSubmit should be
// owned by TaskProcessor to ensure proper synchronization
ANGLE_TRY(mTaskProcessor.queueSubmit(this,
getRenderer()->getVkQueue(task->getPriority()),
submitInfo, task->getOneOffFence()));
ANGLE_TRY(mTaskProcessor.checkCompletedCommands(this));
break;
}
case vk::CustomTask::FinishToSerial:
{
ANGLE_TRY(mTaskProcessor.finishToSerial(this, task->getQueueSerial()));
break;
}
case vk::CustomTask::Present:
{
VkResult result = mTaskProcessor.present(getRenderer()->getVkQueue(task->getPriority()),
task->getPresentInfo());
if (ANGLE_UNLIKELY(result != VK_SUCCESS))
{
// Save the error so that we can handle it (e.g. VK_OUT_OF_DATE)
// Don't leave processing loop, don't consider errors from present to be fatal.
// TODO: https://issuetracker.google.com/issues/170329600 - This needs to improve to
// properly parallelize present
handleError(result, __FILE__, __FUNCTION__, __LINE__);
}
break;
}
case vk::CustomTask::ProcessCommands:
{
ASSERT(!task->getCommandBuffer()->empty());
ANGLE_TRY(task->getCommandBuffer()->flushToPrimary(
getRenderer()->getFeatures(), &mPrimaryCommandBuffer, task->getRenderPass()));
ASSERT(task->getCommandBuffer()->empty());
task->getCommandBuffer()->releaseToContextQueue(task->getContextVk());
break;
}
default:
UNREACHABLE();
break;
}
return angle::Result::Continue;
}
void CommandProcessor::waitForWorkComplete()
void CommandProcessor::waitForWorkComplete(vk::Context *context)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitForWorkerThreadIdle");
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock,
[this] { return (mCommandsQueue.empty() && mWorkerThreadIdle); });
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue
lock.unlock();
if (!context)
{
return;
}
// Sync any errors to the context
while (hasPendingError())
{
vk::Error workerError = getAndClearPendingError();
if (workerError.mErrorCode != VK_SUCCESS)
{
context->handleError(workerError.mErrorCode, workerError.mFile, workerError.mFunction,
workerError.mLine);
}
}
}
void CommandProcessor::shutdown(std::thread *commandProcessorThread)
{
waitForWorkComplete();
const vk::CommandProcessorTask endTask = vk::kEndCommandProcessorThread;
queueCommands(endTask);
vk::CommandProcessorTask endTask;
endTask.initTask(vk::CustomTask::Exit);
queueCommand(nullptr, &endTask);
waitForWorkComplete(nullptr);
if (commandProcessorThread->joinable())
{
commandProcessorThread->join();
}
}
// Return the fence for the last submit. This may mean waiting on the worker to process tasks to
// actually get to the last submit
vk::Shared<vk::Fence> CommandProcessor::getLastSubmittedFence() const
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::getLastSubmittedFence");
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue
return mTaskProcessor.getLastSubmittedFenceWithLock(getDevice());
}
Serial CommandProcessor::getLastSubmittedSerial()
{
std::lock_guard<std::mutex> lock(mCommandProcessorQueueSerialMutex);
return mCommandProcessorLastSubmittedSerial;
}
Serial CommandProcessor::getCurrentQueueSerial()
{
std::lock_guard<std::mutex> lock(mCommandProcessorQueueSerialMutex);
return mCommandProcessorCurrentQueueSerial;
}
// Wait until all commands up to and including serial have been processed
void CommandProcessor::finishToSerial(vk::Context *context, Serial serial)
{
vk::CommandProcessorTask finishToSerial;
finishToSerial.initFinishToSerial(serial);
queueCommand(context, &finishToSerial);
// Wait until the worker is idle. At that point we know that the finishToSerial command has
// completed executing, including any associated state cleanup.
waitForWorkComplete(context);
}
void CommandProcessor::handleDeviceLost()
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::handleDeviceLost");
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue
mTaskProcessor.handleDeviceLost(this);
}
void CommandProcessor::finishAllWork(vk::Context *context)
{
// Wait for GPU work to finish
finishToSerial(context, Serial::Infinite());
}
} // namespace vk
} // namespace rx
......@@ -17,27 +17,120 @@
#include <thread>
#include "common/vulkan/vk_headers.h"
#include "libANGLE/renderer/vulkan/PersistentCommandPool.h"
#include "libANGLE/renderer/vulkan/vk_helpers.h"
namespace rx
{
class RendererVk;
class CommandProcessor;
namespace vk
{
// CommandProcessorTask is used to queue a task to the worker thread when
// enableCommandProcessingThread feature is true.
// The typical task includes pointers in all values and the worker thread will
// process the SecondaryCommandBuffer commands in cbh into the primaryCB.
// There is a special task in which all of the pointers are null that will trigger
// the worker thread to exit, and is sent when the renderer instance shuts down.
struct CommandProcessorTask
// enableCommandProcessingThread feature is true.
// Issuing the CustomTask::Exit command will cause the worker thread to clean up it's resources and
// shut down. This command is sent when the renderer instance shuts down. Custom tasks are:
enum CustomTask
{
ContextVk *contextVk;
// TODO: b/153666475 Removed primaryCB in threading phase2.
vk::PrimaryCommandBuffer *primaryCB;
CommandBufferHelper *commandBuffer;
Invalid = 0,
// Process SecondaryCommandBuffer commands into the primary CommandBuffer.
ProcessCommands,
// End the current command buffer and submit commands to the queue
FlushAndQueueSubmit,
// Submit custom command buffer, excludes some state management
OneOffQueueSubmit,
// Finish queue commands up to given serial value, process garbage
FinishToSerial,
// Execute QueuePresent
Present,
// Exit the command processor thread
Exit,
};
static const CommandProcessorTask kEndCommandProcessorThread = {nullptr, nullptr, nullptr};
class CommandProcessorTask
{
public:
CommandProcessorTask() { initTask(); }
void initTask();
void initTask(CustomTask command) { mTask = command; }
void initProcessCommands(ContextVk *contextVk,
CommandBufferHelper *commandBuffer,
vk::RenderPass *renderPass);
void initPresent(egl::ContextPriority priority, VkPresentInfoKHR presentInfo);
void initFinishToSerial(Serial serial);
void initFlushAndQueueSubmit(std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
const vk::Semaphore *semaphore,
egl::ContextPriority priority,
vk::GarbageList &&currentGarbage,
vk::ResourceUseList &&currentResources);
void initOneOffQueueSubmit(VkCommandBuffer oneOffCommandBufferVk,
egl::ContextPriority priority,
const vk::Fence *fence);
CommandProcessorTask &operator=(CommandProcessorTask &&rhs);
CommandProcessorTask(CommandProcessorTask &&other) : CommandProcessorTask()
{
*this = std::move(other);
}
void setQueueSerial(Serial serial) { mSerial = serial; }
Serial getQueueSerial() const { return mSerial; }
vk::ResourceUseList &getResourceUseList() { return mResourceUseList; }
vk::CustomTask getTaskCommand() { return mTask; }
std::vector<VkSemaphore> &getWaitSemaphores() { return mWaitSemaphores; }
std::vector<VkPipelineStageFlags> &getWaitSemaphoreStageMasks()
{
return mWaitSemaphoreStageMasks;
}
const vk::Semaphore *getSemaphore() { return mSemaphore; }
vk::GarbageList &getGarbage() { return mGarbage; }
egl::ContextPriority getPriority() const { return mPriority; }
const VkCommandBuffer &getOneOffCommandBufferVk() const { return mOneOffCommandBufferVk; }
const vk::Fence *getOneOffFence() { return mOneOffFence; }
const VkPresentInfoKHR &getPresentInfo() const { return mPresentInfo; }
vk::RenderPass *getRenderPass() const { return mRenderPass; }
CommandBufferHelper *getCommandBuffer() const { return mCommandBuffer; }
ContextVk *getContextVk() const { return mContextVk; }
private:
CustomTask mTask;
// ProcessCommands
ContextVk *mContextVk;
vk::RenderPass *mRenderPass;
CommandBufferHelper *mCommandBuffer;
// Flush data
std::vector<VkSemaphore> mWaitSemaphores;
std::vector<VkPipelineStageFlags> mWaitSemaphoreStageMasks;
const vk::Semaphore *mSemaphore;
vk::GarbageList mGarbage;
vk::ResourceUseList mResourceUseList;
// FinishToSerial & Flush command data
Serial mSerial;
// Present command data
VkPresentInfoKHR mPresentInfo;
// Used by OneOffQueueSubmit
VkCommandBuffer mOneOffCommandBufferVk;
const vk::Fence *mOneOffFence;
// Flush, Present & QueueWaitIdle data
egl::ContextPriority mPriority;
};
struct CommandBatch final : angle::NonCopyable
{
......@@ -54,37 +147,139 @@ struct CommandBatch final : angle::NonCopyable
vk::Shared<vk::Fence> fence;
Serial serial;
};
} // namespace vk
class CommandProcessor : angle::NonCopyable
class TaskProcessor : angle::NonCopyable
{
public:
TaskProcessor();
~TaskProcessor();
angle::Result init(vk::Context *context, std::thread::id threadId);
void destroy(VkDevice device);
angle::Result allocatePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer *commandBufferOut);
angle::Result releasePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer);
void clearAllGarbage(vk::Context *context);
angle::Result finishToSerial(vk::Context *context, Serial serial);
VkResult present(VkQueue queue, const VkPresentInfoKHR &presentInfo);
angle::Result submitFrame(vk::Context *context,
VkQueue queue,
const VkSubmitInfo &submitInfo,
const vk::Shared<vk::Fence> &sharedFence,
vk::GarbageList *currentGarbage,
vk::CommandPool *commandPool,
vk::PrimaryCommandBuffer &&commandBuffer,
const Serial &queueSerial);
angle::Result queueSubmit(vk::Context *context,
VkQueue queue,
const VkSubmitInfo &submitInfo,
const vk::Fence *fence);
vk::Shared<vk::Fence> getLastSubmittedFenceWithLock(VkDevice device) const;
// Check to see which batches have finished completion (forward progress for
// mLastCompletedQueueSerial, for example for when the application busy waits on a query
// result). It would be nice if we didn't have to expose this for QueryVk::getResult.
angle::Result checkCompletedCommands(vk::Context *context);
void handleDeviceLost(vk::Context *context);
private:
angle::Result releaseToCommandBatch(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer,
vk::CommandPool *commandPool,
vk::CommandBatch *batch);
vk::GarbageQueue mGarbageQueue;
std::vector<vk::CommandBatch> mInFlightCommands;
// Keeps a free list of reusable primary command buffers.
vk::PersistentCommandPool mPrimaryCommandPool;
std::thread::id mThreadId;
};
class CommandProcessor : public vk::Context
{
public:
CommandProcessor();
~CommandProcessor() = default;
// Main worker loop that should be launched in its own thread. The
// loop waits for work to be submitted from a separate thread.
void processCommandProcessorTasks();
// Called asynchronously from workLoop() thread to queue work that is
// then processed by the workLoop() thread
void queueCommands(const vk::CommandProcessorTask &commands);
// Used by separate thread to wait for worker thread to complete all
// outstanding work.
void waitForWorkComplete();
// Stop the command processor loop
CommandProcessor(RendererVk *renderer);
~CommandProcessor() override;
void handleError(VkResult result,
const char *file,
const char *function,
unsigned int line) override;
bool isRobustResourceInitEnabled() const override;
// Entry point for command processor thread, calls processTasksImpl to do the
// work. called by RendererVk::initialization on main thread
void processTasks();
// Called asynchronously from main thread to queue work that is then processed by the worker
// thread
void queueCommand(vk::Context *context, vk::CommandProcessorTask *task);
// Used by main thread to wait for worker thread to complete all outstanding work.
void waitForWorkComplete(vk::Context *context);
Serial getCurrentQueueSerial();
Serial getLastSubmittedSerial();
// Wait until desired serial has been processed.
void finishToSerial(vk::Context *context, Serial serial);
vk::Shared<vk::Fence> getLastSubmittedFence() const;
void handleDeviceLost();
bool hasPendingError() const
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
return !mErrors.empty();
}
vk::Error getAndClearPendingError();
// Stop the command processor thread
void shutdown(std::thread *commandProcessorThread);
void finishAllWork(vk::Context *context);
private:
std::queue<vk::CommandProcessorTask> mCommandsQueue;
std::mutex mWorkerMutex;
// Command processor thread, called by processTasks. The loop waits for work to
// be submitted from a separate thread.
angle::Result processTasksImpl(bool *exitThread);
// Command processor thread, process a task
angle::Result processTask(vk::CommandProcessorTask *task);
std::queue<vk::CommandProcessorTask> mTasks;
mutable std::mutex mWorkerMutex;
// Signal worker thread when work is available
std::condition_variable mWorkAvailableCondition;
// Signal main thread when all work completed
std::condition_variable mWorkerIdleCondition;
mutable std::condition_variable mWorkerIdleCondition;
// Track worker thread Idle state for assertion purposes
bool mWorkerThreadIdle;
// Command pool to allocate processor thread primary command buffers from
vk::CommandPool mCommandPool;
vk::PrimaryCommandBuffer mPrimaryCommandBuffer;
TaskProcessor mTaskProcessor;
AtomicSerialFactory mQueueSerialFactory;
std::mutex mCommandProcessorQueueSerialMutex;
Serial mCommandProcessorLastSubmittedSerial;
Serial mCommandProcessorCurrentQueueSerial;
mutable std::mutex mErrorMutex;
std::queue<vk::Error> mErrors;
};
} // namespace vk
} // namespace rx
#endif // LIBANGLE_RENDERER_VULKAN_COMMAND_PROCESSOR_H_
......@@ -480,7 +480,6 @@ void CommandQueue::clearAllGarbage(RendererVk *renderer)
}
angle::Result CommandQueue::allocatePrimaryCommandBuffer(vk::Context *context,
const vk::CommandPool &commandPool,
vk::PrimaryCommandBuffer *commandBufferOut)
{
return mPrimaryCommandPool.allocate(context, commandBufferOut);
......@@ -523,6 +522,8 @@ bool CommandQueue::hasInFlightCommands() const
angle::Result CommandQueue::finishToSerial(vk::Context *context, Serial serial, uint64_t timeout)
{
ASSERT(!context->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled);
if (mInFlightCommands.empty())
{
return angle::Result::Continue;
......@@ -607,6 +608,8 @@ angle::Result CommandQueue::submitFrame(vk::Context *context,
vk::Shared<vk::Fence> CommandQueue::getLastSubmittedFence(const vk::Context *context) const
{
ASSERT(!context->getRenderer()->getFeatures().enableCommandProcessingThread.enabled);
vk::Shared<vk::Fence> fence;
if (!mInFlightCommands.empty())
{
......@@ -958,7 +961,7 @@ angle::Result ContextVk::initialize()
angle::Result ContextVk::startPrimaryCommandBuffer()
{
ANGLE_TRY(mCommandQueue.allocatePrimaryCommandBuffer(this, mCommandPool, &mPrimaryCommands));
ANGLE_TRY(mCommandQueue.allocatePrimaryCommandBuffer(this, &mPrimaryCommands));
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
......@@ -1407,6 +1410,8 @@ angle::Result ContextVk::handleDirtyGraphicsPipeline(const gl::Context *context,
commandBuffer->bindGraphicsPipeline(mCurrentGraphicsPipeline->getPipeline());
// Update the queue serial for the pipeline object.
ASSERT(mCurrentGraphicsPipeline && mCurrentGraphicsPipeline->valid());
// TODO: https://issuetracker.google.com/issues/169788986: Need to change this so that we get
// the actual serial used when this work is submitted.
mCurrentGraphicsPipeline->updateSerial(getCurrentQueueSerial());
return angle::Result::Continue;
}
......@@ -1421,6 +1426,8 @@ angle::Result ContextVk::handleDirtyComputePipeline(const gl::Context *context,
}
commandBuffer->bindComputePipeline(mCurrentComputePipeline->get());
// TODO: https://issuetracker.google.com/issues/169788986: Need to change this so that we get
// the actual serial used when this work is submitted.
mCurrentComputePipeline->updateSerial(getCurrentQueueSerial());
return angle::Result::Continue;
......@@ -1750,10 +1757,30 @@ void ContextVk::addOverlayUsedBuffersCount(vk::CommandBufferHelper *commandBuffe
}
}
void ContextVk::commandProcessorSyncErrors()
{
while (mRenderer->hasPendingError())
{
vk::Error error = mRenderer->getAndClearPendingError();
if (error.mErrorCode != VK_SUCCESS)
{
handleError(error.mErrorCode, error.mFile, error.mFunction, error.mLine);
}
}
}
void ContextVk::commandProcessorSyncErrorsAndQueueCommand(vk::CommandProcessorTask *command)
{
commandProcessorSyncErrors();
mRenderer->queueCommand(this, command);
}
angle::Result ContextVk::submitFrame(const VkSubmitInfo &submitInfo,
vk::ResourceUseList *resourceList,
vk::PrimaryCommandBuffer &&commandBuffer)
{
ASSERT(!getRenderer()->getFeatures().enableCommandProcessingThread.enabled);
if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics)
{
dumpCommandStreamDiagnostics();
......@@ -1932,6 +1959,8 @@ angle::Result ContextVk::synchronizeCpuGpuTime()
double TeS = platform->monotonicallyIncreasingTime(platform);
// Get the query results
// Note: This LastSubmittedQueueSerial may include more work then was submitted above if
// another thread had submitted work.
ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial()));
uint64_t gpuTimestampCycles = 0;
......@@ -2086,13 +2115,22 @@ void ContextVk::flushGpuEvents(double nextSyncGpuTimestampS, double nextSyncCpuT
void ContextVk::clearAllGarbage()
{
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::clearAllGarbage");
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::finishAllWork");
for (vk::GarbageObject &garbage : mCurrentGarbage)
{
garbage.destroy(mRenderer);
}
mCurrentGarbage.clear();
mCommandQueue.clearAllGarbage(mRenderer);
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
// Issue command to CommandProcessor to ensure all work is complete, which will return any
// garbage items as well.
mRenderer->finishAllWork(this);
}
else
{
mCommandQueue.clearAllGarbage(mRenderer);
}
}
void ContextVk::handleDeviceLost()
......@@ -2100,7 +2138,14 @@ void ContextVk::handleDeviceLost()
mOutsideRenderPassCommands->reset();
mRenderPassCommands->reset();
mCommandQueue.handleDeviceLost(mRenderer);
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
mRenderer->handleDeviceLost();
}
else
{
mCommandQueue.handleDeviceLost(mRenderer);
}
clearAllGarbage();
mRenderer->notifyDeviceLost();
......@@ -3283,6 +3328,10 @@ angle::Result ContextVk::onMakeCurrent(const gl::Context *context)
angle::Result ContextVk::onUnMakeCurrent(const gl::Context *context)
{
ANGLE_TRY(flushImpl(nullptr));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
mRenderer->waitForCommandProcessorIdle(this);
}
mCurrentWindowSurface = nullptr;
return angle::Result::Continue;
}
......@@ -4286,32 +4335,66 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore)
mDefaultUniformStorage.releaseInFlightBuffersToResourceUseList(this);
mStagingBuffer.releaseInFlightBuffersToResourceUseList(this);
// TODO: https://issuetracker.google.com/issues/170329600 - Verify that
// waitForSwapchainImageIfNecessary makes sense both w/ & w/o threading. I believe they do, but
// want confirmation.
waitForSwapchainImageIfNecessary();
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
// Worker thread must complete adding any commands that were just flushed above to the
// primary command buffer before we can End the primary command buffer below.
mRenderer->waitForWorkerThreadIdle();
}
// Some tasks from ContextVk::submitFrame() that run before CommandQueue::submitFrame()
gl::RunningGraphWidget *renderPassCount =
mState.getOverlay()->getRunningGraphWidget(gl::WidgetId::VulkanRenderPassCount);
renderPassCount->add(mRenderPassCommands->getAndResetCounter());
renderPassCount->next();
ANGLE_VK_TRY(this, mPrimaryCommands.end());
if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics)
{
dumpCommandStreamDiagnostics();
}
waitForSwapchainImageIfNecessary();
// Send a flush command to worker thread that will:
// 1. Create submitInfo
// 2. Call submitFrame()
// 3. Allocate new primary command buffer
vk::CommandProcessorTask flushAndQueueSubmit;
flushAndQueueSubmit.initFlushAndQueueSubmit(
std::move(mWaitSemaphores), std::move(mWaitSemaphoreStageMasks), signalSemaphore,
mContextPriority, std::move(mCurrentGarbage), std::move(mResourceUseList));
VkSubmitInfo submitInfo = {};
InitializeSubmitInfo(&submitInfo, mPrimaryCommands, mWaitSemaphores, mWaitSemaphoreStageMasks,
signalSemaphore);
commandProcessorSyncErrorsAndQueueCommand(&flushAndQueueSubmit);
ANGLE_TRY(submitFrame(submitInfo, &mResourceUseList, std::move(mPrimaryCommands)));
// Some tasks from ContextVk::submitFrame() that run after CommandQueue::submitFrame()
onRenderPassFinished();
mComputeDirtyBits |= mNewComputeCommandBufferDirtyBits;
ANGLE_TRY(startPrimaryCommandBuffer());
if (mGpuEventsEnabled)
{
ANGLE_TRY(checkCompletedGpuEvents());
}
}
else
{
ANGLE_VK_TRY(this, mPrimaryCommands.end());
VkSubmitInfo submitInfo = {};
InitializeSubmitInfo(&submitInfo, mPrimaryCommands, mWaitSemaphores,
mWaitSemaphoreStageMasks, signalSemaphore);
ANGLE_TRY(submitFrame(submitInfo, &mResourceUseList, std::move(mPrimaryCommands)));
ANGLE_TRY(startPrimaryCommandBuffer());
mWaitSemaphores.clear();
mWaitSemaphoreStageMasks.clear();
}
mPerfCounters.renderPasses = 0;
mPerfCounters.writeDescriptorSets = 0;
mPerfCounters.flushedOutsideRenderPassCommandBuffers = 0;
mPerfCounters.resolveImageCommands = 0;
mWaitSemaphores.clear();
mWaitSemaphoreStageMasks.clear();
ASSERT(mWaitSemaphores.empty());
ASSERT(mWaitSemaphoreStageMasks.empty());
mPerfCounters.primaryBuffers++;
......@@ -4331,8 +4414,15 @@ angle::Result ContextVk::finishImpl()
ANGLE_TRY(flushImpl(nullptr));
ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial()));
ASSERT(!mCommandQueue.hasInFlightCommands());
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial()));
}
else
{
ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial()));
ASSERT(!mCommandQueue.hasInFlightCommands());
}
clearAllGarbage();
......@@ -4373,17 +4463,25 @@ bool ContextVk::isSerialInUse(Serial serial) const
angle::Result ContextVk::checkCompletedCommands()
{
ASSERT(!mRenderer->getFeatures().enableCommandProcessingThread.enabled);
return mCommandQueue.checkCompletedCommands(this);
}
angle::Result ContextVk::finishToSerial(Serial serial)
{
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
mRenderer->finishToSerial(this, serial);
return angle::Result::Continue;
}
return mCommandQueue.finishToSerial(this, serial, mRenderer->getMaxFenceWaitTimeNs());
}
angle::Result ContextVk::getCompatibleRenderPass(const vk::RenderPassDesc &desc,
vk::RenderPass **renderPassOut)
{
// Note: Each context has it's own RenderPassCache so no locking needed.
return mRenderPassCache.getCompatibleRenderPass(this, desc, renderPassOut);
}
......@@ -4391,6 +4489,7 @@ angle::Result ContextVk::getRenderPassWithOps(const vk::RenderPassDesc &desc,
const vk::AttachmentOpsArray &ops,
vk::RenderPass **renderPassOut)
{
// Note: Each context has it's own RenderPassCache so no locking needed.
return mRenderPassCache.getRenderPassWithOps(this, desc, ops, renderPassOut);
}
......@@ -4400,12 +4499,13 @@ angle::Result ContextVk::ensureSubmitFenceInitialized()
{
return angle::Result::Continue;
}
return mRenderer->newSharedFence(this, &mSubmitFence);
}
angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOut)
{
ASSERT(!getRenderer()->getFeatures().enableCommandProcessingThread.enabled);
ANGLE_TRY(ensureSubmitFenceInitialized());
ASSERT(!sharedFenceOut->isReferenced());
......@@ -4415,6 +4515,10 @@ angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOu
vk::Shared<vk::Fence> ContextVk::getLastSubmittedFence() const
{
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
return mRenderer->getLastSubmittedFence();
}
return mCommandQueue.getLastSubmittedFence(this);
}
......@@ -4817,17 +4921,26 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass()
mRenderPassCommands->endRenderPass(this);
if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics)
{
mRenderPassCommands->addCommandDiagnostics(this);
}
vk::RenderPass *renderPass = nullptr;
ANGLE_TRY(mRenderPassCommands->getRenderPassWithOps(this, &renderPass));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
mRenderPassCommands->markClosed();
vk::CommandProcessorTask task = {this, &mPrimaryCommands, mRenderPassCommands};
vk::CommandProcessorTask flushToPrimary;
flushToPrimary.initProcessCommands(this, mRenderPassCommands, renderPass);
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::flushInsideRenderPassCommands");
queueCommandsToWorker(task);
commandProcessorSyncErrorsAndQueueCommand(&flushToPrimary);
getNextAvailableCommandBuffer(&mRenderPassCommands, true);
}
else
{
ANGLE_TRY(mRenderPassCommands->flushToPrimary(this, &mPrimaryCommands));
ANGLE_TRY(mRenderPassCommands->flushToPrimary(this->getFeatures(), &mPrimaryCommands,
renderPass));
}
mHasPrimaryCommands = true;
......@@ -4958,17 +5071,26 @@ angle::Result ContextVk::flushOutsideRenderPassCommands()
addOverlayUsedBuffersCount(mOutsideRenderPassCommands);
if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics)
{
mOutsideRenderPassCommands->addCommandDiagnostics(this);
}
vk::RenderPass *renderPass = nullptr;
ANGLE_TRY(mOutsideRenderPassCommands->getRenderPassWithOps(this, &renderPass));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled)
{
mOutsideRenderPassCommands->markClosed();
vk::CommandProcessorTask task = {this, &mPrimaryCommands, mOutsideRenderPassCommands};
vk::CommandProcessorTask flushToPrimary;
flushToPrimary.initProcessCommands(this, mOutsideRenderPassCommands, renderPass);
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::flushOutsideRenderPassCommands");
queueCommandsToWorker(task);
commandProcessorSyncErrorsAndQueueCommand(&flushToPrimary);
getNextAvailableCommandBuffer(&mOutsideRenderPassCommands, false);
}
else
{
ANGLE_TRY(mOutsideRenderPassCommands->flushToPrimary(this, &mPrimaryCommands));
ANGLE_TRY(mOutsideRenderPassCommands->flushToPrimary(getFeatures(), &mPrimaryCommands,
renderPass));
}
mHasPrimaryCommands = true;
mPerfCounters.flushedOutsideRenderPassCommandBuffers++;
......
......@@ -47,7 +47,6 @@ class CommandQueue final : angle::NonCopyable
bool hasInFlightCommands() const;
angle::Result allocatePrimaryCommandBuffer(vk::Context *context,
const vk::CommandPool &commandPool,
vk::PrimaryCommandBuffer *commandBufferOut);
angle::Result releasePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer);
......@@ -653,11 +652,10 @@ class ContextVk : public ContextImpl, public vk::Context
void updateOverlayOnPresent();
void addOverlayUsedBuffersCount(vk::CommandBufferHelper *commandBuffer);
// Submit commands to worker thread for processing
ANGLE_INLINE void queueCommandsToWorker(const vk::CommandProcessorTask &commands)
{
mRenderer->queueCommands(commands);
}
// Sync any errors from the command processor
void commandProcessorSyncErrors();
// Sync any error from worker thread and queue up next command for processing
void commandProcessorSyncErrorsAndQueueCommand(vk::CommandProcessorTask *command);
// When worker thread completes, it releases command buffers back to context queue
void recycleCommandBuffer(vk::CommandBufferHelper *commandBuffer);
......@@ -1085,6 +1083,7 @@ class ContextVk : public ContextImpl, public vk::Context
// We use a single pool for recording commands. We also keep a free list for pool recycling.
vk::CommandPool mCommandPool;
// TODO: This can be killed once threading is enabled https://issuetracker.google.com/153666475
CommandQueue mCommandQueue;
vk::GarbageList mCurrentGarbage;
......
......@@ -180,18 +180,30 @@ angle::Result QueryVk::getResult(const gl::Context *context, bool wait)
// finite time.
// Note regarding time-elapsed: end should have been called after begin, so flushing when end
// has pending work should flush begin too.
if (mQueryHelper.hasPendingWork(contextVk))
// TODO: https://issuetracker.google.com/169788986 - can't guarantee hasPendingWork() works when
// using threaded worker
if (mQueryHelper.hasPendingWork(contextVk) ||
contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{
ANGLE_TRY(contextVk->flushImpl(nullptr));
if (contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{
// TODO: https://issuetracker.google.com/170312581 - For now just stalling here
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
}
ASSERT(!mQueryHelperTimeElapsedBegin.hasPendingWork(contextVk));
ASSERT(!mQueryHelper.hasPendingWork(contextVk));
}
// If the command buffer this query is being written to is still in flight, its reset command
// may not have been performed by the GPU yet. To avoid a race condition in this case, wait
// for the batch to finish first before querying (or return not-ready if not waiting).
ANGLE_TRY(contextVk->checkCompletedCommands());
if (!contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{
// If the command buffer this query is being written to is still in flight, its reset
// command may not have been performed by the GPU yet. To avoid a race condition in this
// case, wait for the batch to finish first before querying (or return not-ready if not
// waiting).
ANGLE_TRY(contextVk->checkCompletedCommands());
}
if (contextVk->isSerialInUse(mQueryHelper.getStoredQueueSerial()))
{
if (!wait)
......
......@@ -467,6 +467,7 @@ RendererVk::RendererVk()
mPipelineCacheVkUpdateTimeout(kPipelineCacheVkUpdatePeriod),
mPipelineCacheDirty(false),
mPipelineCacheInitialized(false),
mCommandProcessor(this),
mGlslangInitialized(false)
{
VkFormatProperties invalid = {0, 0, kInvalidFormatFeatureFlags};
......@@ -536,6 +537,10 @@ void RendererVk::onDestroy()
std::lock_guard<std::mutex> lock(mFenceRecyclerMutex);
mFenceRecycler.destroy(mDevice);
}
{
std::lock_guard<decltype(mNextSubmitFenceMutex)> lock(mNextSubmitFenceMutex);
mNextSubmitFence.reset(mDevice);
}
mPipelineCache.destroy(mDevice);
mSamplerCache.destroy(this);
......@@ -580,7 +585,7 @@ void RendererVk::notifyDeviceLost()
{
{
std::lock_guard<std::mutex> lock(mQueueSerialMutex);
mLastCompletedQueueSerial = mLastSubmittedQueueSerial;
mLastCompletedQueueSerial = getLastSubmittedQueueSerial();
}
mDeviceLost = true;
mDisplay->notifyDeviceLost();
......@@ -909,7 +914,8 @@ angle::Result RendererVk::initialize(DisplayVk *displayVk,
if (getFeatures().enableCommandProcessingThread.enabled)
{
mCommandProcessorThread =
std::thread(&CommandProcessor::processCommandProcessorTasks, &mCommandProcessor);
std::thread(&vk::CommandProcessor::processTasks, &mCommandProcessor);
mCommandProcessor.waitForWorkComplete(nullptr);
}
return angle::Result::Continue;
......@@ -1907,6 +1913,9 @@ void RendererVk::initFeatures(DisplayVk *displayVk, const ExtensionNameList &dev
// Currently disabled by default: http://anglebug.com/4324
ANGLE_FEATURE_CONDITION(&mFeatures, enableCommandProcessingThread, false);
// Currently disabled by default: http://anglebug.com/4324
ANGLE_FEATURE_CONDITION(&mFeatures, asynchronousCommandProcessing, false);
ANGLE_FEATURE_CONDITION(&mFeatures, supportsYUVSamplerConversion,
mSamplerYcbcrConversionFeatures.samplerYcbcrConversion != VK_FALSE);
......@@ -2183,6 +2192,17 @@ bool RendererVk::hasBufferFormatFeatureBits(VkFormat format,
return hasFormatFeatureBits<&VkFormatProperties::bufferFeatures>(format, featureBits);
}
void RendererVk::outputVmaStatString()
{
// Output the VMA stats string
// This JSON string can be passed to VmaDumpVis.py to generate a visualization of the
// allocations the VMA has performed.
char *statsString;
mAllocator.buildStatsString(&statsString, true);
INFO() << std::endl << statsString << std::endl;
mAllocator.freeStatsString(statsString);
}
angle::Result RendererVk::queueSubmit(vk::Context *context,
egl::ContextPriority priority,
const VkSubmitInfo &submitInfo,
......@@ -2192,23 +2212,11 @@ angle::Result RendererVk::queueSubmit(vk::Context *context,
{
if (kOutputVmaStatsString)
{
// Output the VMA stats string
// This JSON string can be passed to VmaDumpVis.py to generate a visualization of the
// allocations the VMA has performed.
char *statsString;
mAllocator.buildStatsString(&statsString, true);
INFO() << std::endl << statsString << std::endl;
mAllocator.freeStatsString(statsString);
outputVmaStatString();
}
if (getFeatures().enableCommandProcessingThread.enabled)
{
// For initial threading phase 1 code make sure any outstanding command processing
// is complete.
// TODO: b/153666475 For phase2 investigate if this is required as most submits will take
// place through worker thread except for one-off submits below.
mCommandProcessor.waitForWorkComplete();
}
ASSERT(!getFeatures().enableCommandProcessingThread.enabled);
{
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
std::lock_guard<std::mutex> serialLock(mQueueSerialMutex);
......@@ -2235,12 +2243,26 @@ angle::Result RendererVk::queueSubmitOneOff(vk::Context *context,
const vk::Fence *fence,
Serial *serialOut)
{
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = primary.ptr();
if (getFeatures().enableCommandProcessingThread.enabled)
{
vk::CommandProcessorTask oneOffQueueSubmit;
oneOffQueueSubmit.initOneOffQueueSubmit(primary.getHandle(), priority, fence);
queueCommand(context, &oneOffQueueSubmit);
waitForCommandProcessorIdle(context);
*serialOut = getLastSubmittedQueueSerial();
ANGLE_TRY(cleanupGarbage(false));
}
else
{
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = primary.ptr();
ANGLE_TRY(queueSubmit(context, priority, submitInfo, nullptr, fence, serialOut));
ANGLE_TRY(queueSubmit(context, priority, submitInfo, nullptr, fence, serialOut));
}
mPendingOneOffCommands.push_back({*serialOut, std::move(primary)});
......@@ -2251,8 +2273,8 @@ angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriori
{
if (getFeatures().enableCommandProcessingThread.enabled)
{
// First make sure command processor is complete when waiting for queue idle.
mCommandProcessor.waitForWorkComplete();
// Wait for all pending commands to get sent before issuing vkQueueWaitIdle
waitForCommandProcessorIdle(context);
}
{
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
......@@ -2268,8 +2290,8 @@ angle::Result RendererVk::deviceWaitIdle(vk::Context *context)
{
if (getFeatures().enableCommandProcessingThread.enabled)
{
// First make sure command processor is complete when waiting for device idle.
mCommandProcessor.waitForWorkComplete();
// Wait for all pending commands to get sent before issuing vkQueueWaitIdle
waitForCommandProcessorIdle(context);
}
{
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
......@@ -2286,12 +2308,7 @@ VkResult RendererVk::queuePresent(egl::ContextPriority priority,
{
ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queuePresent");
if (getFeatures().enableCommandProcessingThread.enabled)
{
// First make sure command processor is complete before queue present as
// present may have dependencies on that thread.
mCommandProcessor.waitForWorkComplete();
}
ASSERT(!getFeatures().enableCommandProcessingThread.enabled);
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
......@@ -2329,6 +2346,30 @@ angle::Result RendererVk::newSharedFence(vk::Context *context,
return angle::Result::Continue;
}
// Return a shared fence to be used for the next submit
// Fence may be shared with a Sync object.
// reset indicates that nextSubmitFence should be reset before returning. This ensures that the next
// request for a submit fence gets a fresh fence.
// TODO: https://issuetracker.google.com/issues/170312581 - move to CommandProcessor as part of
// fence ownership follow-up task.
angle::Result RendererVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOut, bool reset)
{
std::lock_guard<decltype(mNextSubmitFenceMutex)> lock(mNextSubmitFenceMutex);
if (!mNextSubmitFence.isReferenced())
{
ANGLE_TRY(newSharedFence(&mCommandProcessor, &mNextSubmitFence));
}
ASSERT(!sharedFenceOut->isReferenced());
sharedFenceOut->copy(getDevice(), mNextSubmitFence);
if (reset)
{
resetSharedFence(&mNextSubmitFence);
}
return angle::Result::Continue;
}
template <VkFormatFeatureFlags VkFormatProperties::*features>
VkFormatFeatureFlags RendererVk::getFormatFeatureBits(VkFormat format,
const VkFormatFeatureFlags featureBits) const
......
......@@ -168,6 +168,7 @@ class RendererVk : angle::NonCopyable
return mPriorities[priority];
}
// Queue submit that originates from the main thread
angle::Result queueSubmit(vk::Context *context,
egl::ContextPriority priority,
const VkSubmitInfo &submitInfo,
......@@ -197,6 +198,8 @@ class RendererVk : angle::NonCopyable
sharedFenceIn->resetAndRecycle(&mFenceRecycler);
}
angle::Result getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOut, bool reset);
template <typename... ArgsT>
void collectGarbageAndReinit(vk::SharedResourceUse *use, ArgsT... garbageIn)
{
......@@ -224,6 +227,12 @@ class RendererVk : angle::NonCopyable
}
}
vk::Shared<vk::Fence> getLastSubmittedFence() const
{
return mCommandProcessor.getLastSubmittedFence();
}
void handleDeviceLost() { mCommandProcessor.handleDeviceLost(); }
static constexpr size_t kMaxExtensionNames = 200;
using ExtensionNameList = angle::FixedVector<const char *, kMaxExtensionNames>;
......@@ -241,11 +250,19 @@ class RendererVk : angle::NonCopyable
ANGLE_INLINE Serial getCurrentQueueSerial()
{
if (getFeatures().enableCommandProcessingThread.enabled)
{
return mCommandProcessor.getCurrentQueueSerial();
}
std::lock_guard<std::mutex> lock(mQueueSerialMutex);
return mCurrentQueueSerial;
}
ANGLE_INLINE Serial getLastSubmittedQueueSerial()
{
if (getFeatures().enableCommandProcessingThread.enabled)
{
return mCommandProcessor.getLastSubmittedSerial();
}
std::lock_guard<std::mutex> lock(mQueueSerialMutex);
return mLastSubmittedQueueSerial;
}
......@@ -264,11 +281,24 @@ class RendererVk : angle::NonCopyable
vk::ActiveHandleCounter &getActiveHandleCounts() { return mActiveHandleCounts; }
// Queue commands to worker thread for processing
void queueCommands(const vk::CommandProcessorTask &commands)
void queueCommand(vk::Context *context, vk::CommandProcessorTask *command)
{
mCommandProcessor.queueCommands(commands);
mCommandProcessor.queueCommand(context, command);
}
bool hasPendingError() const { return mCommandProcessor.hasPendingError(); }
vk::Error getAndClearPendingError() { return mCommandProcessor.getAndClearPendingError(); }
void waitForCommandProcessorIdle(vk::Context *context)
{
mCommandProcessor.waitForWorkComplete(context);
}
void waitForWorkerThreadIdle() { mCommandProcessor.waitForWorkComplete(); }
void finishToSerial(vk::Context *context, Serial serial)
{
mCommandProcessor.finishToSerial(context, serial);
}
void finishAllWork(vk::Context *context) { mCommandProcessor.finishAllWork(context); }
VkQueue getVkQueue(egl::ContextPriority priority) const { return mQueues[priority]; }
bool getEnableValidationLayers() const { return mEnableValidationLayers; }
......@@ -276,6 +306,8 @@ class RendererVk : angle::NonCopyable
void setGlobalDebugAnnotator();
void outputVmaStatString();
private:
angle::Result initializeDevice(DisplayVk *displayVk, uint32_t queueFamilyIndex);
void ensureCapsInitialized() const;
......@@ -390,9 +422,13 @@ class RendererVk : angle::NonCopyable
};
std::deque<PendingOneOffCommands> mPendingOneOffCommands;
// Worker Thread
CommandProcessor mCommandProcessor;
// Command Processor Thread
vk::CommandProcessor mCommandProcessor;
std::thread mCommandProcessorThread;
// mNextSubmitFence is the fence that's going to be signaled at the next submission. This is
// used to support SyncVk objects, which may outlive the context (as EGLSync objects).
vk::Shared<vk::Fence> mNextSubmitFence;
std::mutex mNextSubmitFenceMutex;
// track whether we initialized (or released) glslang
bool mGlslangInitialized;
......
......@@ -102,11 +102,22 @@ ResourceUseList::ResourceUseList()
mResourceUses.reserve(kDefaultResourceUseCount);
}
ResourceUseList::ResourceUseList(ResourceUseList &&other)
{
*this = std::move(other);
}
ResourceUseList::~ResourceUseList()
{
ASSERT(mResourceUses.empty());
}
ResourceUseList &ResourceUseList::operator=(ResourceUseList &&rhs)
{
std::swap(mResourceUses, rhs.mResourceUses);
return *this;
}
void ResourceUseList::releaseResourceUses()
{
for (SharedResourceUse &use : mResourceUses)
......
......@@ -136,7 +136,9 @@ class ResourceUseList final : angle::NonCopyable
{
public:
ResourceUseList();
ResourceUseList(ResourceUseList &&other);
virtual ~ResourceUseList();
ResourceUseList &operator=(ResourceUseList &&rhs);
void add(const SharedResourceUse &resourceUse);
......
......@@ -1186,6 +1186,7 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
bool *presentOutOfDate)
{
ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present");
RendererVk *renderer = contextVk->getRenderer();
// Throttle the submissions to avoid getting too far ahead of the GPU.
SwapHistory &swap = mSwapHistory[mCurrentSwapHistoryIndex];
......@@ -1194,7 +1195,7 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
if (swap.sharedFence.isReferenced())
{
ANGLE_TRY(swap.waitFence(contextVk));
swap.destroy(contextVk->getRenderer());
swap.destroy(renderer);
}
}
......@@ -1316,6 +1317,8 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
}
// Update the swap history for this presentation
// TODO: https://issuetracker.google.com/issues/170312581 - this will force us to flush worker
// queue to get the fence.
swap.sharedFence = contextVk->getLastSubmittedFence();
ASSERT(!mAcquireImageSemaphore.valid());
......@@ -1323,13 +1326,47 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
mCurrentSwapHistoryIndex =
mCurrentSwapHistoryIndex == mSwapHistory.size() ? 0 : mCurrentSwapHistoryIndex;
VkResult result = contextVk->getRenderer()->queuePresent(contextVk->getPriority(), presentInfo);
VkResult result;
if (renderer->getFeatures().enableCommandProcessingThread.enabled)
{
vk::CommandProcessorTask present;
present.initPresent(contextVk->getPriority(), presentInfo);
// Make sure everything has been submitted (and errors handled)
renderer->waitForCommandProcessorIdle(contextVk);
// Submit queuePresent all by itself (ignoring interference from other threads for now)
renderer->queueCommand(contextVk, &present);
// TODO: https://issuetracker.google.com/issues/170329600 - Just stalling here for now, but
// really want to let main thread continue
// need to figure out how to handle work below off-thread and sync to main
// Also, need to fix lifetime of presentInfo data when main thread continues.
// There is a bunch of work happening after present to deal with swapchain recreation.
// Will that require moving a large chunk of swapImpl to the CommandProcessor?
// That will likely require serializing access to the WindowSurfaceVk object in order
// to have current content.
result = VK_SUCCESS;
// wait for the queuePresent to be submitted and intentionally set the context to nullptr so
// that we can catch any error. Note this doesn't prevent another context from grabbing the
// error. Will be fixed properly in a follow-up as part of present work.
renderer->waitForCommandProcessorIdle(nullptr);
if (renderer->hasPendingError())
{
vk::Error error = renderer->getAndClearPendingError();
result = error.mErrorCode;
}
}
else
{
result = renderer->queuePresent(contextVk->getPriority(), presentInfo);
}
// If OUT_OF_DATE is returned, it's ok, we just need to recreate the swapchain before
// continuing.
// If VK_SUBOPTIMAL_KHR is returned it's because the device orientation changed and we should
// recreate the swapchain with a new window orientation.
if (contextVk->getFeatures().enablePreRotateSurfaces.enabled)
if (renderer->getFeatures().enablePreRotateSurfaces.enabled)
{
// Also check for VK_SUBOPTIMAL_KHR.
*presentOutOfDate = ((result == VK_ERROR_OUT_OF_DATE_KHR) || (result == VK_SUBOPTIMAL_KHR));
......
......@@ -32,6 +32,12 @@ SyncHelper::~SyncHelper() {}
void SyncHelper::releaseToRenderer(RendererVk *renderer)
{
renderer->collectGarbageAndReinit(&mUse, &mEvent);
// TODO: https://issuetracker.google.com/170312581 - Currently just stalling on worker thread
// here to try and avoid race condition. If this works, need some alternate solution
if (renderer->getFeatures().enableCommandProcessingThread.enabled)
{
renderer->waitForCommandProcessorIdle(nullptr);
}
mFence.reset(renderer->getDevice());
}
......@@ -48,7 +54,17 @@ angle::Result SyncHelper::initialize(ContextVk *contextVk)
DeviceScoped<Event> event(device);
ANGLE_VK_TRY(contextVk, event.get().init(device, eventCreateInfo));
ANGLE_TRY(contextVk->getNextSubmitFence(&mFence));
// TODO: https://issuetracker.google.com/170312581 - For now wait for worker thread to finish
// then get next fence from renderer
if (contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
ANGLE_TRY(contextVk->getRenderer()->getNextSubmitFence(&mFence, false));
}
else
{
ANGLE_TRY(contextVk->getNextSubmitFence(&mFence));
}
mEvent = event.release();
......@@ -90,9 +106,17 @@ angle::Result SyncHelper::clientWait(Context *context,
ANGLE_TRY(contextVk->flushImpl(nullptr));
}
// If we are using worker need to wait for the commands to be issued before waiting on the
// fence.
if (contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
}
// Wait on the fence that's expected to be signaled on the first vkQueueSubmit after
// `initialize` was called. The first fence is the fence created to signal this sync.
ASSERT(mFence.get().valid());
// TODO: https://issuetracker.google.com/170312581 - Wait could be command to worker
VkResult status = mFence.get().wait(renderer->getDevice(), timeout);
// Check for errors, but don't consider timeout as such.
......@@ -189,11 +213,24 @@ angle::Result SyncHelperNativeFence::initializeWithFd(ContextVk *contextVk, int
retain(&contextVk->getResourceUseList());
Serial serialOut;
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
ANGLE_TRY(renderer->queueSubmit(contextVk, contextVk->getPriority(), submitInfo, nullptr,
&fence.get(), &serialOut));
if (renderer->getFeatures().enableCommandProcessingThread.enabled)
{
CommandProcessorTask oneOffQueueSubmit;
oneOffQueueSubmit.initOneOffQueueSubmit(VK_NULL_HANDLE, contextVk->getPriority(),
&fence.get());
renderer->queueCommand(contextVk, &oneOffQueueSubmit);
// TODO: https://issuetracker.google.com/170312581 - wait for now
renderer->waitForCommandProcessorIdle(contextVk);
}
else
{
Serial serialOut;
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
ANGLE_TRY(renderer->queueSubmit(contextVk, contextVk->getPriority(), submitInfo,
nullptr, &fence.get(), &serialOut));
}
VkFenceGetFdInfoKHR fenceGetFdInfo = {};
fenceGetFdInfo.sType = VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR;
......@@ -253,6 +290,14 @@ angle::Result SyncHelperNativeFence::clientWait(Context *context,
{
ANGLE_TRY(contextVk->flushImpl(nullptr));
}
// If we are using worker need to wait for the commands to be issued before waiting on the
// fence.
if (contextVk->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
}
// Wait for mFenceWithFd to be signaled.
VkResult status = mFenceWithFd.wait(renderer->getDevice(), timeout);
......
......@@ -969,6 +969,7 @@ angle::Result UtilsVk::setupProgram(ContextVk *contextVk,
vk::PipelineAndSerial *pipelineAndSerial;
program->setShader(gl::ShaderType::Compute, fsCsShader);
ANGLE_TRY(program->getComputePipeline(contextVk, pipelineLayout.get(), &pipelineAndSerial));
// TODO: https://issuetracker.google.com/issues/169788986: Update serial handling.
pipelineAndSerial->updateSerial(serial);
commandBuffer->bindComputePipeline(pipelineAndSerial->get());
}
......
......@@ -916,7 +916,8 @@ void CommandBufferHelper::restoreStencilContent()
}
}
void CommandBufferHelper::executeBarriers(ContextVk *contextVk, PrimaryCommandBuffer *primary)
void CommandBufferHelper::executeBarriers(const angle::FeaturesVk &features,
PrimaryCommandBuffer *primary)
{
// make a local copy for faster access
PipelineStagesMask mask = mPipelineBarrierMask;
......@@ -925,7 +926,7 @@ void CommandBufferHelper::executeBarriers(ContextVk *contextVk, PrimaryCommandBu
return;
}
if (contextVk->getFeatures().preferAggregateBarrierCalls.enabled)
if (features.preferAggregateBarrierCalls.enabled)
{
PipelineStagesMask::Iterator iter = mask.begin();
PipelineBarrier &barrier = mPipelineBarriers[*iter];
......@@ -1152,24 +1153,31 @@ void CommandBufferHelper::endTransformFeedback()
mValidTransformFeedbackBufferCount = 0;
}
angle::Result CommandBufferHelper::flushToPrimary(ContextVk *contextVk,
PrimaryCommandBuffer *primary)
angle::Result CommandBufferHelper::getRenderPassWithOps(ContextVk *contextVk,
RenderPass **renderPass)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandBufferHelper::flushToPrimary");
ASSERT(!empty());
if (kEnableCommandStreamDiagnostics)
*renderPass = nullptr;
if (mIsRenderPassCommandBuffer)
{
addCommandDiagnostics(contextVk);
ANGLE_TRY(contextVk->getRenderPassWithOps(mRenderPassDesc, mAttachmentOps, renderPass));
}
return angle::Result::Continue;
}
angle::Result CommandBufferHelper::flushToPrimary(const angle::FeaturesVk &features,
PrimaryCommandBuffer *primary,
RenderPass *renderPass)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandBufferHelper::flushToPrimary");
ASSERT(!empty());
// Commands that are added to primary before beginRenderPass command
executeBarriers(contextVk, primary);
executeBarriers(features, primary);
if (mIsRenderPassCommandBuffer)
{
mCommandBuffer.executeQueuedResetQueryPoolCommands(primary->getHandle());
// Pull a RenderPass from the cache.
RenderPass *renderPass = nullptr;
ANGLE_TRY(contextVk->getRenderPassWithOps(mRenderPassDesc, mAttachmentOps, &renderPass));
ASSERT(renderPass != nullptr);
VkRenderPassBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
......
......@@ -962,9 +962,12 @@ class CommandBufferHelper : angle::NonCopyable
CommandBuffer &getCommandBuffer() { return mCommandBuffer; }
angle::Result flushToPrimary(ContextVk *contextVk, PrimaryCommandBuffer *primary);
angle::Result getRenderPassWithOps(ContextVk *contextVk, RenderPass **renderPass);
angle::Result flushToPrimary(const angle::FeaturesVk &features,
PrimaryCommandBuffer *primary,
RenderPass *renderPass);
void executeBarriers(ContextVk *contextVk, PrimaryCommandBuffer *primary);
void executeBarriers(const angle::FeaturesVk &features, PrimaryCommandBuffer *primary);
void setHasRenderPass(bool hasRenderPass) { mIsRenderPassCommandBuffer = hasRenderPass; }
......@@ -1129,9 +1132,9 @@ class CommandBufferHelper : angle::NonCopyable
bool isReadOnlyDepthMode() const { return mReadOnlyDepthStencilMode; }
private:
void addCommandDiagnostics(ContextVk *contextVk);
private:
bool onDepthStencilAccess(ResourceAccess access,
uint32_t *cmdCountInvalidated,
uint32_t *cmdCountDisabled);
......
......@@ -943,15 +943,15 @@ gl::LevelIndex GetLevelIndex(vk::LevelIndex levelVk, gl::LevelIndex baseLevel);
} // namespace rx
#define ANGLE_VK_TRY(context, command) \
do \
{ \
auto ANGLE_LOCAL_VAR = command; \
if (ANGLE_UNLIKELY(ANGLE_LOCAL_VAR != VK_SUCCESS)) \
{ \
context->handleError(ANGLE_LOCAL_VAR, __FILE__, ANGLE_FUNCTION, __LINE__); \
return angle::Result::Stop; \
} \
#define ANGLE_VK_TRY(context, command) \
do \
{ \
auto ANGLE_LOCAL_VAR = command; \
if (ANGLE_UNLIKELY(ANGLE_LOCAL_VAR != VK_SUCCESS)) \
{ \
(context)->handleError(ANGLE_LOCAL_VAR, __FILE__, ANGLE_FUNCTION, __LINE__); \
return angle::Result::Stop; \
} \
} while (0)
#define ANGLE_VK_CHECK(context, test, error) ANGLE_VK_TRY(context, test ? VK_SUCCESS : error)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment