Commit 6bc362c4 by Courtney Goeltzenleuchter Committed by Commit Bot

Allow single threaded CommandProcessor

In replacing the legacy CommandQueue code with the threading capable CommandProcessor it would be good to be able to run the CommandProcessor in a single-thread environment. This CL changes the meaning of the feature flags for the commandProcessor and asynchronousCommandProcessing so that enabling commandProcess only changes the code paths to use the command processor but work it still done as part of the submitting thread (e.g. ContextVk). Enabling asynchronousCommandProcessing will cause a separate worker thread to be spawned which will asynchronously process the commands. This allows us to switch to the CommandProcessor without threading and then enable threading once performance issues are resolved. Bug: b/161912801 Bug: b/170329600 Change-Id: I534862b109a7e7708108190b7c3e894071d4c2ed Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2483580Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarTim Van Patten <timvp@google.com> Commit-Queue: Courtney Goeltzenleuchter <courtneygo@google.com>
parent c06a424f
...@@ -336,14 +336,14 @@ struct FeaturesVk : FeatureSetBase ...@@ -336,14 +336,14 @@ struct FeaturesVk : FeatureSetBase
"fine grained pipeline stage dependency information", "fine grained pipeline stage dependency information",
&members, "http://anglebug.com/4633"}; &members, "http://anglebug.com/4633"};
// Enable parallel thread that processes and submits vulkan command buffers. // Tell Vulkan back-end to use CommandProcessor class to dispatch work to the GPU. The work will
// Currently off by default to enable testing. // happen asynchronously in a different thread if asynchronousCommandProcessing is true.
Feature enableCommandProcessingThread = { // Otherwise use Renderer::CommandQueue to dispatch work.
"enable_command_processing_thread", FeatureCategory::VulkanFeatures, Feature commandProcessor = {"command_processor", FeatureCategory::VulkanFeatures,
"Enable parallel processing and submission of Vulkan commands in worker thread", &members, "Use CommandProcessor class to dispatch work to GPU.", &members,
"http://anglebug.com/4324"}; "http://anglebug.com/4324"};
// Enable parallel thread execution when enableCommandProcessingThread is enabled. // Enable parallel thread execution when commandProcessor is enabled.
// Currently off by default. // Currently off by default.
Feature asynchronousCommandProcessing = {"asynchronous_command_processing", Feature asynchronousCommandProcessing = {"asynchronous_command_processing",
FeatureCategory::VulkanFeatures, FeatureCategory::VulkanFeatures,
......
...@@ -57,13 +57,19 @@ namespace vk ...@@ -57,13 +57,19 @@ namespace vk
{ {
void CommandProcessorTask::initTask() void CommandProcessorTask::initTask()
{ {
mTask = CustomTask::Invalid; mTask = CustomTask::Invalid;
mContextVk = nullptr; mContextVk = nullptr;
mRenderPass = nullptr; mRenderPass = nullptr;
mCommandBuffer = nullptr; mCommandBuffer = nullptr;
mSemaphore = nullptr; mSemaphore = nullptr;
mOneOffFence = nullptr; mOneOffFence = nullptr;
mOneOffCommandBufferVk = VK_NULL_HANDLE; mPresentInfo = {};
mPresentInfo.pResults = nullptr;
mPresentInfo.pSwapchains = nullptr;
mPresentInfo.pImageIndices = nullptr;
mPresentInfo.pNext = nullptr;
mPresentInfo.pWaitSemaphores = nullptr;
mOneOffCommandBufferVk = VK_NULL_HANDLE;
} }
// CommandProcessorTask implementation // CommandProcessorTask implementation
...@@ -77,11 +83,75 @@ void CommandProcessorTask::initProcessCommands(ContextVk *contextVk, ...@@ -77,11 +83,75 @@ void CommandProcessorTask::initProcessCommands(ContextVk *contextVk,
mRenderPass = renderPass; mRenderPass = renderPass;
} }
void CommandProcessorTask::initPresent(egl::ContextPriority priority, VkPresentInfoKHR presentInfo) void CommandProcessorTask::copyPresentInfo(const VkPresentInfoKHR &other)
{ {
mTask = vk::CustomTask::Present; if (other.sType == VK_NULL_HANDLE)
mPresentInfo = presentInfo; {
mPriority = priority; return;
}
mPresentInfo.sType = other.sType;
mPresentInfo.pNext = other.pNext;
if (other.swapchainCount > 0)
{
ASSERT(other.swapchainCount == 1);
mPresentInfo.swapchainCount = 1;
mSwapchain = other.pSwapchains[0];
mPresentInfo.pSwapchains = &mSwapchain;
mImageIndex = other.pImageIndices[0];
mPresentInfo.pImageIndices = &mImageIndex;
}
if (other.waitSemaphoreCount > 0)
{
ASSERT(other.waitSemaphoreCount == 1);
mPresentInfo.waitSemaphoreCount = 1;
mWaitSemaphore = other.pWaitSemaphores[0];
mPresentInfo.pWaitSemaphores = &mWaitSemaphore;
}
mPresentInfo.pResults = other.pResults;
void *pNext = const_cast<void *>(other.pNext);
while (pNext != nullptr)
{
VkStructureType sType = *reinterpret_cast<VkStructureType *>(pNext);
switch (sType)
{
case VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR:
{
const VkPresentRegionsKHR *presentRegions =
reinterpret_cast<VkPresentRegionsKHR *>(pNext);
mPresentRegion = *presentRegions->pRegions;
mRects.resize(mPresentRegion.rectangleCount);
for (uint32_t i = 0; i < mPresentRegion.rectangleCount; i++)
{
mRects[i] = presentRegions->pRegions->pRectangles[i];
}
mPresentRegion.pRectangles = mRects.data();
mPresentRegions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR;
mPresentRegions.pNext = presentRegions->pNext;
mPresentRegions.swapchainCount = 1;
mPresentRegions.pRegions = &mPresentRegion;
mPresentInfo.pNext = &mPresentRegions;
pNext = const_cast<void *>(presentRegions->pNext);
break;
}
default:
ERR() << "Unknown sType: " << sType << " in VkPresentInfoKHR.pNext chain";
UNREACHABLE();
break;
}
}
}
void CommandProcessorTask::initPresent(egl::ContextPriority priority, VkPresentInfoKHR &presentInfo)
{
mTask = vk::CustomTask::Present;
mPriority = priority;
copyPresentInfo(presentInfo);
} }
void CommandProcessorTask::initFinishToSerial(Serial serial) void CommandProcessorTask::initFinishToSerial(Serial serial)
...@@ -136,11 +206,12 @@ CommandProcessorTask &CommandProcessorTask::operator=(CommandProcessorTask &&rhs ...@@ -136,11 +206,12 @@ CommandProcessorTask &CommandProcessorTask::operator=(CommandProcessorTask &&rhs
mOneOffFence = rhs.mOneOffFence; mOneOffFence = rhs.mOneOffFence;
std::swap(mGarbage, rhs.mGarbage); std::swap(mGarbage, rhs.mGarbage);
std::swap(mSerial, rhs.mSerial); std::swap(mSerial, rhs.mSerial);
std::swap(mPresentInfo, rhs.mPresentInfo);
std::swap(mPriority, rhs.mPriority); std::swap(mPriority, rhs.mPriority);
std::swap(mResourceUseList, rhs.mResourceUseList); std::swap(mResourceUseList, rhs.mResourceUseList);
mOneOffCommandBufferVk = rhs.mOneOffCommandBufferVk; mOneOffCommandBufferVk = rhs.mOneOffCommandBufferVk;
copyPresentInfo(rhs.mPresentInfo);
// clear rhs now that everything has moved. // clear rhs now that everything has moved.
rhs.initTask(); rhs.initTask();
...@@ -194,9 +265,32 @@ angle::Result TaskProcessor::init(vk::Context *context, std::thread::id threadId ...@@ -194,9 +265,32 @@ angle::Result TaskProcessor::init(vk::Context *context, std::thread::id threadId
return angle::Result::Continue; return angle::Result::Continue;
} }
angle::Result TaskProcessor::checkCompletedCommands(vk::Context *context) angle::Result TaskProcessor::lockAndCheckCompletedCommands(vk::Context *context)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::checkCompletedCommands"); ASSERT(isValidWorkerThread(context));
std::lock_guard<std::mutex> inFlightLock(mInFlightCommandsMutex);
return checkCompletedCommandsNoLock(context);
}
VkResult TaskProcessor::getLastAndClearPresentResult(VkSwapchainKHR swapchain)
{
std::unique_lock<std::mutex> lock(mSwapchainStatusMutex);
if (mSwapchainStatus.find(swapchain) == mSwapchainStatus.end())
{
// Wake when required swapchain status becomes available
mSwapchainStatusCondition.wait(lock, [this, swapchain] {
return mSwapchainStatus.find(swapchain) != mSwapchainStatus.end();
});
}
VkResult result = mSwapchainStatus[swapchain];
mSwapchainStatus.erase(swapchain);
return result;
}
angle::Result TaskProcessor::checkCompletedCommandsNoLock(vk::Context *context)
{
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::checkCompletedCommandsNoLock");
VkDevice device = context->getDevice(); VkDevice device = context->getDevice();
RendererVk *rendererVk = context->getRenderer(); RendererVk *rendererVk = context->getRenderer();
...@@ -260,6 +354,8 @@ angle::Result TaskProcessor::releaseToCommandBatch(vk::Context *context, ...@@ -260,6 +354,8 @@ angle::Result TaskProcessor::releaseToCommandBatch(vk::Context *context,
vk::CommandPool *commandPool, vk::CommandPool *commandPool,
vk::CommandBatch *batch) vk::CommandBatch *batch)
{ {
ASSERT(isValidWorkerThread(context));
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::releaseToCommandBatch");
batch->primaryCommands = std::move(commandBuffer); batch->primaryCommands = std::move(commandBuffer);
if (commandPool->valid()) if (commandPool->valid())
...@@ -281,19 +377,26 @@ angle::Result TaskProcessor::allocatePrimaryCommandBuffer( ...@@ -281,19 +377,26 @@ angle::Result TaskProcessor::allocatePrimaryCommandBuffer(
vk::Context *context, vk::Context *context,
vk::PrimaryCommandBuffer *commandBufferOut) vk::PrimaryCommandBuffer *commandBufferOut)
{ {
ASSERT(isValidWorkerThread(context));
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::allocatePrimaryCommandBuffer");
return mPrimaryCommandPool.allocate(context, commandBufferOut); return mPrimaryCommandPool.allocate(context, commandBufferOut);
} }
angle::Result TaskProcessor::releasePrimaryCommandBuffer(vk::Context *context, angle::Result TaskProcessor::releasePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer) vk::PrimaryCommandBuffer &&commandBuffer)
{ {
ASSERT(isValidWorkerThread(context));
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::releasePrimaryCommandBuffer");
ASSERT(mPrimaryCommandPool.valid()); ASSERT(mPrimaryCommandPool.valid());
return mPrimaryCommandPool.collect(context, std::move(commandBuffer)); return mPrimaryCommandPool.collect(context, std::move(commandBuffer));
} }
void TaskProcessor::handleDeviceLost(vk::Context *context) void TaskProcessor::handleDeviceLost(vk::Context *context)
{ {
ASSERT(isValidWorkerThread(context));
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::handleDeviceLost");
VkDevice device = context->getDevice(); VkDevice device = context->getDevice();
std::lock_guard<std::mutex> inFlightLock(mInFlightCommandsMutex);
for (vk::CommandBatch &batch : mInFlightCommands) for (vk::CommandBatch &batch : mInFlightCommands)
{ {
...@@ -321,8 +424,11 @@ void TaskProcessor::handleDeviceLost(vk::Context *context) ...@@ -321,8 +424,11 @@ void TaskProcessor::handleDeviceLost(vk::Context *context)
// to mInFlightCommands // to mInFlightCommands
angle::Result TaskProcessor::finishToSerial(vk::Context *context, Serial serial) angle::Result TaskProcessor::finishToSerial(vk::Context *context, Serial serial)
{ {
ASSERT(isValidWorkerThread(context));
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::finishToSerial");
RendererVk *rendererVk = context->getRenderer(); RendererVk *rendererVk = context->getRenderer();
uint64_t timeout = rendererVk->getMaxFenceWaitTimeNs(); uint64_t timeout = rendererVk->getMaxFenceWaitTimeNs();
std::unique_lock<std::mutex> inFlightLock(mInFlightCommandsMutex);
if (mInFlightCommands.empty()) if (mInFlightCommands.empty())
{ {
...@@ -343,18 +449,31 @@ angle::Result TaskProcessor::finishToSerial(vk::Context *context, Serial serial) ...@@ -343,18 +449,31 @@ angle::Result TaskProcessor::finishToSerial(vk::Context *context, Serial serial)
} }
const vk::CommandBatch &batch = mInFlightCommands[batchIndex]; const vk::CommandBatch &batch = mInFlightCommands[batchIndex];
// Don't need to hold the lock while waiting for the fence
inFlightLock.unlock();
// Wait for it finish // Wait for it finish
VkDevice device = context->getDevice(); VkDevice device = context->getDevice();
ANGLE_VK_TRY(context, batch.fence.get().wait(device, timeout)); ANGLE_VK_TRY(context, batch.fence.get().wait(device, timeout));
// Clean up finished batches. // Clean up finished batches.
return checkCompletedCommands(context); return lockAndCheckCompletedCommands(context);
} }
VkResult TaskProcessor::present(VkQueue queue, const VkPresentInfoKHR &presentInfo) VkResult TaskProcessor::present(VkQueue queue, const VkPresentInfoKHR &presentInfo)
{ {
std::lock_guard<std::mutex> lock(mSwapchainStatusMutex);
ANGLE_TRACE_EVENT0("gpu.angle", "vkQueuePresentKHR"); ANGLE_TRACE_EVENT0("gpu.angle", "vkQueuePresentKHR");
return vkQueuePresentKHR(queue, &presentInfo); VkResult result = vkQueuePresentKHR(queue, &presentInfo);
// Verify that we are presenting one and only one swapchain
ASSERT(presentInfo.swapchainCount == 1);
ASSERT(presentInfo.pResults == nullptr);
mSwapchainStatus[presentInfo.pSwapchains[0]] = result;
mSwapchainStatusCondition.notify_all();
return result;
} }
angle::Result TaskProcessor::submitFrame(vk::Context *context, angle::Result TaskProcessor::submitFrame(vk::Context *context,
...@@ -366,7 +485,7 @@ angle::Result TaskProcessor::submitFrame(vk::Context *context, ...@@ -366,7 +485,7 @@ angle::Result TaskProcessor::submitFrame(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer, vk::PrimaryCommandBuffer &&commandBuffer,
const Serial &queueSerial) const Serial &queueSerial)
{ {
ASSERT(std::this_thread::get_id() == mThreadId); ASSERT(isValidWorkerThread(context));
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::submitFrame"); ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::submitFrame");
VkDevice device = context->getDevice(); VkDevice device = context->getDevice();
...@@ -387,15 +506,19 @@ angle::Result TaskProcessor::submitFrame(vk::Context *context, ...@@ -387,15 +506,19 @@ angle::Result TaskProcessor::submitFrame(vk::Context *context,
// in the in-flight list. // in the in-flight list.
ANGLE_TRY(releaseToCommandBatch(context, std::move(commandBuffer), commandPool, &batch)); ANGLE_TRY(releaseToCommandBatch(context, std::move(commandBuffer), commandPool, &batch));
std::unique_lock<std::mutex> inFlightLock(mInFlightCommandsMutex);
mInFlightCommands.emplace_back(scopedBatch.release()); mInFlightCommands.emplace_back(scopedBatch.release());
ANGLE_TRY(checkCompletedCommands(context)); ANGLE_TRY(checkCompletedCommandsNoLock(context));
// CPU should be throttled to avoid mInFlightCommands from growing too fast. Important for // CPU should be throttled to avoid mInFlightCommands from growing too fast. Important for
// off-screen scenarios. // off-screen scenarios.
while (mInFlightCommands.size() > kInFlightCommandsLimit) if (mInFlightCommands.size() > kInFlightCommandsLimit)
{ {
ANGLE_TRY(finishToSerial(context, mInFlightCommands[0].serial)); size_t numCommandsToFinish = mInFlightCommands.size() - kInFlightCommandsLimit;
Serial finishSerial = mInFlightCommands[numCommandsToFinish].serial;
inFlightLock.unlock();
return finishToSerial(context, finishSerial);
} }
return angle::Result::Continue; return angle::Result::Continue;
...@@ -404,8 +527,8 @@ angle::Result TaskProcessor::submitFrame(vk::Context *context, ...@@ -404,8 +527,8 @@ angle::Result TaskProcessor::submitFrame(vk::Context *context,
vk::Shared<vk::Fence> TaskProcessor::getLastSubmittedFenceWithLock(VkDevice device) const vk::Shared<vk::Fence> TaskProcessor::getLastSubmittedFenceWithLock(VkDevice device) const
{ {
vk::Shared<vk::Fence> fence; vk::Shared<vk::Fence> fence;
// Note: this must be called when the work queue is empty and while holding mWorkerMutex to std::lock_guard<std::mutex> inFlightLock(mInFlightCommandsMutex);
// ensure that worker isn't touching mInFlightCommands
if (!mInFlightCommands.empty()) if (!mInFlightCommands.empty())
{ {
fence.copy(device, mInFlightCommands.back().fence); fence.copy(device, mInFlightCommands.back().fence);
...@@ -419,7 +542,10 @@ angle::Result TaskProcessor::queueSubmit(vk::Context *context, ...@@ -419,7 +542,10 @@ angle::Result TaskProcessor::queueSubmit(vk::Context *context,
const VkSubmitInfo &submitInfo, const VkSubmitInfo &submitInfo,
const vk::Fence *fence) const vk::Fence *fence)
{ {
ASSERT(std::this_thread::get_id() == mThreadId); ASSERT(isValidWorkerThread(context));
ANGLE_TRACE_EVENT0("gpu.angle", "TaskProcessor::queueSubmit");
ASSERT((context->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled == false) ||
std::this_thread::get_id() == mThreadId);
if (kOutputVmaStatsString) if (kOutputVmaStatsString)
{ {
context->getRenderer()->outputVmaStatString(); context->getRenderer()->outputVmaStatString();
...@@ -429,7 +555,14 @@ angle::Result TaskProcessor::queueSubmit(vk::Context *context, ...@@ -429,7 +555,14 @@ angle::Result TaskProcessor::queueSubmit(vk::Context *context,
VkFence handle = fence ? fence->getHandle() : VK_NULL_HANDLE; VkFence handle = fence ? fence->getHandle() : VK_NULL_HANDLE;
ANGLE_VK_TRY(context, vkQueueSubmit(queue, 1, &submitInfo, handle)); ANGLE_VK_TRY(context, vkQueueSubmit(queue, 1, &submitInfo, handle));
return angle::Result::Continue; // Now that we've submitted work, clean up RendererVk garbage
return context->getRenderer()->cleanupGarbage(false);
}
bool TaskProcessor::isValidWorkerThread(vk::Context *context) const
{
return (context->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled == false) ||
std::this_thread::get_id() == mThreadId;
} }
void CommandProcessor::handleError(VkResult errorCode, void CommandProcessor::handleError(VkResult errorCode,
...@@ -484,35 +617,55 @@ vk::Error CommandProcessor::getAndClearPendingError() ...@@ -484,35 +617,55 @@ vk::Error CommandProcessor::getAndClearPendingError()
void CommandProcessor::queueCommand(vk::Context *context, vk::CommandProcessorTask *task) void CommandProcessor::queueCommand(vk::Context *context, vk::CommandProcessorTask *task)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queueCommand"); ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queueCommand");
{ // Grab the worker mutex so that we put things on the queue in the same order as we give out
// Grab the worker mutex so that we put things on the queue in the same order as we give out // serials.
// serials. std::lock_guard<std::mutex> queueLock(mWorkerMutex);
std::lock_guard<std::mutex> queueLock(mWorkerMutex);
if (task->getTaskCommand() == vk::CustomTask::FlushAndQueueSubmit || if (task->getTaskCommand() == vk::CustomTask::FlushAndQueueSubmit ||
task->getTaskCommand() == vk::CustomTask::OneOffQueueSubmit) task->getTaskCommand() == vk::CustomTask::OneOffQueueSubmit)
{ {
std::lock_guard<std::mutex> lock(mCommandProcessorQueueSerialMutex); std::lock_guard<std::mutex> lock(mCommandProcessorQueueSerialMutex);
// Flush submits work, so give it the current serial and generate a new one. // Flush submits work, so give it the current serial and generate a new one.
Serial queueSerial = mCommandProcessorCurrentQueueSerial; Serial queueSerial = mCommandProcessorCurrentQueueSerial;
task->setQueueSerial(queueSerial); task->setQueueSerial(queueSerial);
mCommandProcessorLastSubmittedSerial = mCommandProcessorCurrentQueueSerial; mCommandProcessorLastSubmittedSerial = mCommandProcessorCurrentQueueSerial;
mCommandProcessorCurrentQueueSerial = mQueueSerialFactory.generate(); mCommandProcessorCurrentQueueSerial = mQueueSerialFactory.generate();
task->getResourceUseList().releaseResourceUsesAndUpdateSerials(queueSerial); task->getResourceUseList().releaseResourceUsesAndUpdateSerials(queueSerial);
} }
if (context->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{
mTasks.emplace(std::move(*task)); mTasks.emplace(std::move(*task));
mWorkAvailableCondition.notify_one(); mWorkAvailableCondition.notify_one();
} }
else
if (getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{ {
return; angle::Result result = processTask(context, task);
if (ANGLE_UNLIKELY(IsError(result)))
{
// TODO: Ignore error, similar to ANGLE_CONTEXT_TRY.
// Vulkan errors will get passed back to the calling context. We are still in the
// context's thread so no mutex needed.
return;
}
} }
}
angle::Result CommandProcessor::initTaskProcessor(vk::Context *context)
{
// Initialization prior to work thread loop
ANGLE_TRY(mTaskProcessor.init(context, std::this_thread::get_id()));
// Allocate and begin primary command buffer
ANGLE_TRY(mTaskProcessor.allocatePrimaryCommandBuffer(context, &mPrimaryCommandBuffer));
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
// parallel task processing disabled so wait for work to complete. ANGLE_VK_TRY(context, mPrimaryCommandBuffer.begin(beginInfo));
waitForWorkComplete(context);
return angle::Result::Continue;
} }
void CommandProcessor::processTasks() void CommandProcessor::processTasks()
...@@ -540,16 +693,7 @@ void CommandProcessor::processTasks() ...@@ -540,16 +693,7 @@ void CommandProcessor::processTasks()
angle::Result CommandProcessor::processTasksImpl(bool *exitThread) angle::Result CommandProcessor::processTasksImpl(bool *exitThread)
{ {
// Initialization prior to work thread loop ANGLE_TRY(initTaskProcessor(this));
ANGLE_TRY(mTaskProcessor.init(this, std::this_thread::get_id()));
// Allocate and begin primary command buffer
ANGLE_TRY(mTaskProcessor.allocatePrimaryCommandBuffer(this, &mPrimaryCommandBuffer));
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
ANGLE_VK_TRY(this, mPrimaryCommandBuffer.begin(beginInfo));
while (true) while (true)
{ {
...@@ -566,23 +710,15 @@ angle::Result CommandProcessor::processTasksImpl(bool *exitThread) ...@@ -566,23 +710,15 @@ angle::Result CommandProcessor::processTasksImpl(bool *exitThread)
mTasks.pop(); mTasks.pop();
lock.unlock(); lock.unlock();
switch (task.getTaskCommand()) ANGLE_TRY(processTask(this, &task));
if (task.getTaskCommand() == vk::CustomTask::Exit)
{ {
case vk::CustomTask::Exit:
{ *exitThread = true;
ANGLE_TRY(mTaskProcessor.finishToSerial(this, Serial::Infinite())); lock.lock();
*exitThread = true; mWorkerThreadIdle = true;
// Shutting down so cleanup mWorkerIdleCondition.notify_one();
mTaskProcessor.destroy(mRenderer->getDevice()); return angle::Result::Continue;
mCommandPool.destroy(mRenderer->getDevice());
mPrimaryCommandBuffer.destroy(mRenderer->getDevice());
mWorkerThreadIdle = true;
mWorkerIdleCondition.notify_one();
return angle::Result::Continue;
}
default:
ANGLE_TRY(processTask(&task));
break;
} }
} }
...@@ -590,14 +726,24 @@ angle::Result CommandProcessor::processTasksImpl(bool *exitThread) ...@@ -590,14 +726,24 @@ angle::Result CommandProcessor::processTasksImpl(bool *exitThread)
return angle::Result::Stop; return angle::Result::Stop;
} }
angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task) angle::Result CommandProcessor::processTask(vk::Context *context, vk::CommandProcessorTask *task)
{ {
switch (task->getTaskCommand()) switch (task->getTaskCommand())
{ {
case vk::CustomTask::Exit:
{
ANGLE_TRY(mTaskProcessor.finishToSerial(context, Serial::Infinite()));
// Shutting down so cleanup
mTaskProcessor.destroy(mRenderer->getDevice());
mCommandPool.destroy(mRenderer->getDevice());
mPrimaryCommandBuffer.destroy(mRenderer->getDevice());
break;
}
case vk::CustomTask::FlushAndQueueSubmit: case vk::CustomTask::FlushAndQueueSubmit:
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "processTask::FlushAndQueueSubmit");
// End command buffer // End command buffer
ANGLE_VK_TRY(this, mPrimaryCommandBuffer.end()); ANGLE_VK_TRY(context, mPrimaryCommandBuffer.end());
// 1. Create submitInfo // 1. Create submitInfo
VkSubmitInfo submitInfo = {}; VkSubmitInfo submitInfo = {};
InitializeSubmitInfo(&submitInfo, mPrimaryCommandBuffer, task->getWaitSemaphores(), InitializeSubmitInfo(&submitInfo, mPrimaryCommandBuffer, task->getWaitSemaphores(),
...@@ -611,17 +757,17 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task) ...@@ -611,17 +757,17 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task)
// 3. Call submitFrame() // 3. Call submitFrame()
ANGLE_TRY(mTaskProcessor.submitFrame( ANGLE_TRY(mTaskProcessor.submitFrame(
this, getRenderer()->getVkQueue(task->getPriority()), submitInfo, fence, context, getRenderer()->getVkQueue(task->getPriority()), submitInfo, fence,
&task->getGarbage(), &mCommandPool, std::move(mPrimaryCommandBuffer), &task->getGarbage(), &mCommandPool, std::move(mPrimaryCommandBuffer),
task->getQueueSerial())); task->getQueueSerial()));
// 4. Allocate & begin new primary command buffer // 4. Allocate & begin new primary command buffer
ANGLE_TRY(mTaskProcessor.allocatePrimaryCommandBuffer(this, &mPrimaryCommandBuffer)); ANGLE_TRY(mTaskProcessor.allocatePrimaryCommandBuffer(context, &mPrimaryCommandBuffer));
VkCommandBufferBeginInfo beginInfo = {}; VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr; beginInfo.pInheritanceInfo = nullptr;
ANGLE_VK_TRY(this, mPrimaryCommandBuffer.begin(beginInfo)); ANGLE_VK_TRY(context, mPrimaryCommandBuffer.begin(beginInfo));
// Free this local reference // Free this local reference
getRenderer()->resetSharedFence(&fence); getRenderer()->resetSharedFence(&fence);
...@@ -631,6 +777,7 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task) ...@@ -631,6 +777,7 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task)
} }
case vk::CustomTask::OneOffQueueSubmit: case vk::CustomTask::OneOffQueueSubmit:
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "processTask::OneOffQueueSubmit");
VkSubmitInfo submitInfo = {}; VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
if (task->getOneOffCommandBufferVk() != VK_NULL_HANDLE) if (task->getOneOffCommandBufferVk() != VK_NULL_HANDLE)
...@@ -641,28 +788,32 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task) ...@@ -641,28 +788,32 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task)
// TODO: https://issuetracker.google.com/issues/170328907 - vkQueueSubmit should be // TODO: https://issuetracker.google.com/issues/170328907 - vkQueueSubmit should be
// owned by TaskProcessor to ensure proper synchronization // owned by TaskProcessor to ensure proper synchronization
ANGLE_TRY(mTaskProcessor.queueSubmit(this, ANGLE_TRY(mTaskProcessor.queueSubmit(context,
getRenderer()->getVkQueue(task->getPriority()), getRenderer()->getVkQueue(task->getPriority()),
submitInfo, task->getOneOffFence())); submitInfo, task->getOneOffFence()));
ANGLE_TRY(mTaskProcessor.checkCompletedCommands(this)); ANGLE_TRY(mTaskProcessor.lockAndCheckCompletedCommands(context));
break; break;
} }
case vk::CustomTask::FinishToSerial: case vk::CustomTask::FinishToSerial:
{ {
ANGLE_TRY(mTaskProcessor.finishToSerial(this, task->getQueueSerial())); ANGLE_TRY(mTaskProcessor.finishToSerial(context, task->getQueueSerial()));
break; break;
} }
case vk::CustomTask::Present: case vk::CustomTask::Present:
{ {
VkResult result = mTaskProcessor.present(getRenderer()->getVkQueue(task->getPriority()), VkResult result = mTaskProcessor.present(getRenderer()->getVkQueue(task->getPriority()),
task->getPresentInfo()); task->getPresentInfo());
if (ANGLE_UNLIKELY(result != VK_SUCCESS)) if (ANGLE_UNLIKELY(result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR))
{
// We get to ignore these as they are not fatal
}
else if (ANGLE_UNLIKELY(result != VK_SUCCESS))
{ {
// Save the error so that we can handle it (e.g. VK_OUT_OF_DATE) // Save the error so that we can handle it.
// Don't leave processing loop, don't consider errors from present to be fatal. // Don't leave processing loop, don't consider errors from present to be fatal.
// TODO: https://issuetracker.google.com/issues/170329600 - This needs to improve to // TODO: https://issuetracker.google.com/issues/170329600 - This needs to improve to
// properly parallelize present // properly parallelize present
handleError(result, __FILE__, __FUNCTION__, __LINE__); context->handleError(result, __FILE__, __FUNCTION__, __LINE__);
} }
break; break;
} }
...@@ -675,6 +826,11 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task) ...@@ -675,6 +826,11 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task)
task->getCommandBuffer()->releaseToContextQueue(task->getContextVk()); task->getCommandBuffer()->releaseToContextQueue(task->getContextVk());
break; break;
} }
case vk::CustomTask::CheckCompletedCommands:
{
ANGLE_TRY(mTaskProcessor.lockAndCheckCompletedCommands(this));
break;
}
default: default:
UNREACHABLE(); UNREACHABLE();
break; break;
...@@ -683,9 +839,17 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task) ...@@ -683,9 +839,17 @@ angle::Result CommandProcessor::processTask(vk::CommandProcessorTask *task)
return angle::Result::Continue; return angle::Result::Continue;
} }
void CommandProcessor::checkCompletedCommands(vk::Context *context)
{
vk::CommandProcessorTask checkCompletedTask;
checkCompletedTask.initTask(vk::CustomTask::CheckCompletedCommands);
queueCommand(this, &checkCompletedTask);
}
void CommandProcessor::waitForWorkComplete(vk::Context *context) void CommandProcessor::waitForWorkComplete(vk::Context *context)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitForWorkerThreadIdle"); ASSERT(getRenderer()->getFeatures().asynchronousCommandProcessing.enabled);
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitForWorkComplete");
std::unique_lock<std::mutex> lock(mWorkerMutex); std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); }); mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue // Worker thread is idle and command queue is empty so good to continue
...@@ -707,26 +871,33 @@ void CommandProcessor::waitForWorkComplete(vk::Context *context) ...@@ -707,26 +871,33 @@ void CommandProcessor::waitForWorkComplete(vk::Context *context)
} }
} }
// TODO: https://issuetracker.google.com/170311829 - Add vk::Context so that queueCommand has
// someplace to send errors.
void CommandProcessor::shutdown(std::thread *commandProcessorThread) void CommandProcessor::shutdown(std::thread *commandProcessorThread)
{ {
vk::CommandProcessorTask endTask; vk::CommandProcessorTask endTask;
endTask.initTask(vk::CustomTask::Exit); endTask.initTask(vk::CustomTask::Exit);
queueCommand(nullptr, &endTask); queueCommand(this, &endTask);
waitForWorkComplete(nullptr); if (this->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
if (commandProcessorThread->joinable())
{ {
commandProcessorThread->join(); waitForWorkComplete(nullptr);
if (commandProcessorThread->joinable())
{
commandProcessorThread->join();
}
} }
} }
// Return the fence for the last submit. This may mean waiting on the worker to process tasks to // Return the fence for the last submit. This may mean waiting on the worker to process tasks to
// actually get to the last submit // actually get to the last submit
vk::Shared<vk::Fence> CommandProcessor::getLastSubmittedFence() const vk::Shared<vk::Fence> CommandProcessor::getLastSubmittedFence(const vk::Context *context) const
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::getLastSubmittedFence"); ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::getLastSubmittedFence");
std::unique_lock<std::mutex> lock(mWorkerMutex); std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); }); if (context->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
}
// Worker thread is idle and command queue is empty so good to continue // Worker thread is idle and command queue is empty so good to continue
return mTaskProcessor.getLastSubmittedFenceWithLock(getDevice()); return mTaskProcessor.getLastSubmittedFenceWithLock(getDevice());
...@@ -747,20 +918,27 @@ Serial CommandProcessor::getCurrentQueueSerial() ...@@ -747,20 +918,27 @@ Serial CommandProcessor::getCurrentQueueSerial()
// Wait until all commands up to and including serial have been processed // Wait until all commands up to and including serial have been processed
void CommandProcessor::finishToSerial(vk::Context *context, Serial serial) void CommandProcessor::finishToSerial(vk::Context *context, Serial serial)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::finishToSerial");
vk::CommandProcessorTask finishToSerial; vk::CommandProcessorTask finishToSerial;
finishToSerial.initFinishToSerial(serial); finishToSerial.initFinishToSerial(serial);
queueCommand(context, &finishToSerial); queueCommand(context, &finishToSerial);
// Wait until the worker is idle. At that point we know that the finishToSerial command has // Wait until the worker is idle. At that point we know that the finishToSerial command has
// completed executing, including any associated state cleanup. // completed executing, including any associated state cleanup.
waitForWorkComplete(context); if (context->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{
waitForWorkComplete(context);
}
} }
void CommandProcessor::handleDeviceLost() void CommandProcessor::handleDeviceLost()
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::handleDeviceLost"); ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::handleDeviceLost");
std::unique_lock<std::mutex> lock(mWorkerMutex); std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); }); if (getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
}
// Worker thread is idle and command queue is empty so good to continue // Worker thread is idle and command queue is empty so good to continue
mTaskProcessor.handleDeviceLost(this); mTaskProcessor.handleDeviceLost(this);
...@@ -768,6 +946,7 @@ void CommandProcessor::handleDeviceLost() ...@@ -768,6 +946,7 @@ void CommandProcessor::handleDeviceLost()
void CommandProcessor::finishAllWork(vk::Context *context) void CommandProcessor::finishAllWork(vk::Context *context)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::finishAllWork");
// Wait for GPU work to finish // Wait for GPU work to finish
finishToSerial(context, Serial::Infinite()); finishToSerial(context, Serial::Infinite());
} }
......
...@@ -27,10 +27,11 @@ class CommandProcessor; ...@@ -27,10 +27,11 @@ class CommandProcessor;
namespace vk namespace vk
{ {
// CommandProcessorTask is used to queue a task to the worker thread when // CommandProcessor is used to dispatch work to the GPU when commandProcessor feature is true.
// enableCommandProcessingThread feature is true. // If asynchronousCommandProcessing is enabled the work will be queued and handled by a worker
// Issuing the CustomTask::Exit command will cause the worker thread to clean up it's resources and // thread asynchronous to the context. Issuing the CustomTask::Exit command will cause the worker
// shut down. This command is sent when the renderer instance shuts down. Custom tasks are: // thread to clean up it's resources and shut down. This command is sent when the renderer instance
// shuts down. Custom tasks are:
enum CustomTask enum CustomTask
{ {
...@@ -45,6 +46,10 @@ enum CustomTask ...@@ -45,6 +46,10 @@ enum CustomTask
FinishToSerial, FinishToSerial,
// Execute QueuePresent // Execute QueuePresent
Present, Present,
// do cleanup processing on completed commands
// TODO: https://issuetracker.google.com/170312581 - should be able to remove
// checkCompletedCommands command with fence refactor.
CheckCompletedCommands,
// Exit the command processor thread // Exit the command processor thread
Exit, Exit,
}; };
...@@ -62,7 +67,7 @@ class CommandProcessorTask ...@@ -62,7 +67,7 @@ class CommandProcessorTask
CommandBufferHelper *commandBuffer, CommandBufferHelper *commandBuffer,
vk::RenderPass *renderPass); vk::RenderPass *renderPass);
void initPresent(egl::ContextPriority priority, VkPresentInfoKHR presentInfo); void initPresent(egl::ContextPriority priority, VkPresentInfoKHR &presentInfo);
void initFinishToSerial(Serial serial); void initFinishToSerial(Serial serial);
...@@ -104,6 +109,8 @@ class CommandProcessorTask ...@@ -104,6 +109,8 @@ class CommandProcessorTask
ContextVk *getContextVk() const { return mContextVk; } ContextVk *getContextVk() const { return mContextVk; }
private: private:
void copyPresentInfo(const VkPresentInfoKHR &other);
CustomTask mTask; CustomTask mTask;
// ProcessCommands // ProcessCommands
...@@ -123,6 +130,13 @@ class CommandProcessorTask ...@@ -123,6 +130,13 @@ class CommandProcessorTask
// Present command data // Present command data
VkPresentInfoKHR mPresentInfo; VkPresentInfoKHR mPresentInfo;
VkSwapchainKHR mSwapchain;
VkSemaphore mWaitSemaphore;
uint32_t mImageIndex;
// Used by Present if supportsIncrementalPresent is enabled
VkPresentRegionKHR mPresentRegion;
VkPresentRegionsKHR mPresentRegions;
std::vector<VkRectLayerKHR> mRects;
// Used by OneOffQueueSubmit // Used by OneOffQueueSubmit
VkCommandBuffer mOneOffCommandBufferVk; VkCommandBuffer mOneOffCommandBufferVk;
...@@ -162,8 +176,6 @@ class TaskProcessor : angle::NonCopyable ...@@ -162,8 +176,6 @@ class TaskProcessor : angle::NonCopyable
angle::Result releasePrimaryCommandBuffer(vk::Context *context, angle::Result releasePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer); vk::PrimaryCommandBuffer &&commandBuffer);
void clearAllGarbage(vk::Context *context);
angle::Result finishToSerial(vk::Context *context, Serial serial); angle::Result finishToSerial(vk::Context *context, Serial serial);
VkResult present(VkQueue queue, const VkPresentInfoKHR &presentInfo); VkResult present(VkQueue queue, const VkPresentInfoKHR &presentInfo);
...@@ -183,25 +195,39 @@ class TaskProcessor : angle::NonCopyable ...@@ -183,25 +195,39 @@ class TaskProcessor : angle::NonCopyable
vk::Shared<vk::Fence> getLastSubmittedFenceWithLock(VkDevice device) const; vk::Shared<vk::Fence> getLastSubmittedFenceWithLock(VkDevice device) const;
// Check to see which batches have finished completion (forward progress for
// mLastCompletedQueueSerial, for example for when the application busy waits on a query
// result). It would be nice if we didn't have to expose this for QueryVk::getResult.
angle::Result checkCompletedCommands(vk::Context *context);
void handleDeviceLost(vk::Context *context); void handleDeviceLost(vk::Context *context);
// Called by CommandProcessor to process any completed work
angle::Result lockAndCheckCompletedCommands(vk::Context *context);
VkResult getLastAndClearPresentResult(VkSwapchainKHR swapchain);
private: private:
bool isValidWorkerThread(vk::Context *context) const;
angle::Result releaseToCommandBatch(vk::Context *context, angle::Result releaseToCommandBatch(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer, vk::PrimaryCommandBuffer &&commandBuffer,
vk::CommandPool *commandPool, vk::CommandPool *commandPool,
vk::CommandBatch *batch); vk::CommandBatch *batch);
// Check to see which batches have finished completion (forward progress for
// mLastCompletedQueueSerial, for example for when the application busy waits on a query
// result). It would be nice if we didn't have to expose this for QueryVk::getResult.
angle::Result checkCompletedCommandsNoLock(vk::Context *context);
vk::GarbageQueue mGarbageQueue; vk::GarbageQueue mGarbageQueue;
mutable std::mutex mInFlightCommandsMutex;
std::vector<vk::CommandBatch> mInFlightCommands; std::vector<vk::CommandBatch> mInFlightCommands;
// Keeps a free list of reusable primary command buffers. // Keeps a free list of reusable primary command buffers.
vk::PersistentCommandPool mPrimaryCommandPool; vk::PersistentCommandPool mPrimaryCommandPool;
std::thread::id mThreadId; std::thread::id mThreadId;
// Track present info
std::mutex mSwapchainStatusMutex;
std::condition_variable mSwapchainStatusCondition;
std::map<VkSwapchainKHR, VkResult> mSwapchainStatus;
}; };
class CommandProcessor : public vk::Context class CommandProcessor : public vk::Context
...@@ -210,6 +236,8 @@ class CommandProcessor : public vk::Context ...@@ -210,6 +236,8 @@ class CommandProcessor : public vk::Context
CommandProcessor(RendererVk *renderer); CommandProcessor(RendererVk *renderer);
~CommandProcessor() override; ~CommandProcessor() override;
angle::Result initTaskProcessor(vk::Context *context);
void handleError(VkResult result, void handleError(VkResult result,
const char *file, const char *file,
const char *function, const char *function,
...@@ -223,6 +251,8 @@ class CommandProcessor : public vk::Context ...@@ -223,6 +251,8 @@ class CommandProcessor : public vk::Context
// thread // thread
void queueCommand(vk::Context *context, vk::CommandProcessorTask *task); void queueCommand(vk::Context *context, vk::CommandProcessorTask *task);
void checkCompletedCommands(vk::Context *context);
// Used by main thread to wait for worker thread to complete all outstanding work. // Used by main thread to wait for worker thread to complete all outstanding work.
void waitForWorkComplete(vk::Context *context); void waitForWorkComplete(vk::Context *context);
Serial getCurrentQueueSerial(); Serial getCurrentQueueSerial();
...@@ -231,7 +261,7 @@ class CommandProcessor : public vk::Context ...@@ -231,7 +261,7 @@ class CommandProcessor : public vk::Context
// Wait until desired serial has been processed. // Wait until desired serial has been processed.
void finishToSerial(vk::Context *context, Serial serial); void finishToSerial(vk::Context *context, Serial serial);
vk::Shared<vk::Fence> getLastSubmittedFence() const; vk::Shared<vk::Fence> getLastSubmittedFence(const vk::Context *context) const;
void handleDeviceLost(); void handleDeviceLost();
bool hasPendingError() const bool hasPendingError() const
...@@ -246,13 +276,18 @@ class CommandProcessor : public vk::Context ...@@ -246,13 +276,18 @@ class CommandProcessor : public vk::Context
void finishAllWork(vk::Context *context); void finishAllWork(vk::Context *context);
VkResult getLastPresentResult(VkSwapchainKHR swapchain)
{
return mTaskProcessor.getLastAndClearPresentResult(swapchain);
}
private: private:
// Command processor thread, called by processTasks. The loop waits for work to // Command processor thread, called by processTasks. The loop waits for work to
// be submitted from a separate thread. // be submitted from a separate thread.
angle::Result processTasksImpl(bool *exitThread); angle::Result processTasksImpl(bool *exitThread);
// Command processor thread, process a task // Command processor thread, process a task
angle::Result processTask(vk::CommandProcessorTask *task); angle::Result processTask(vk::Context *context, vk::CommandProcessorTask *task);
std::queue<vk::CommandProcessorTask> mTasks; std::queue<vk::CommandProcessorTask> mTasks;
mutable std::mutex mWorkerMutex; mutable std::mutex mWorkerMutex;
......
...@@ -399,7 +399,8 @@ angle::Result CommandQueue::init(vk::Context *context) ...@@ -399,7 +399,8 @@ angle::Result CommandQueue::init(vk::Context *context)
angle::Result CommandQueue::checkCompletedCommands(vk::Context *context) angle::Result CommandQueue::checkCompletedCommands(vk::Context *context)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::checkCompletedCommands"); ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::checkCompletedCommandsNoLock");
ASSERT(!context->getRenderer()->getFeatures().commandProcessor.enabled);
RendererVk *renderer = context->getRenderer(); RendererVk *renderer = context->getRenderer();
VkDevice device = renderer->getDevice(); VkDevice device = renderer->getDevice();
...@@ -522,6 +523,7 @@ angle::Result CommandQueue::allocatePrimaryCommandBuffer(vk::Context *context, ...@@ -522,6 +523,7 @@ angle::Result CommandQueue::allocatePrimaryCommandBuffer(vk::Context *context,
angle::Result CommandQueue::releasePrimaryCommandBuffer(vk::Context *context, angle::Result CommandQueue::releasePrimaryCommandBuffer(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer) vk::PrimaryCommandBuffer &&commandBuffer)
{ {
ASSERT(!context->getRenderer()->getFeatures().commandProcessor.enabled);
ASSERT(mPrimaryCommandPool.valid()); ASSERT(mPrimaryCommandPool.valid());
ANGLE_TRY(mPrimaryCommandPool.collect(context, std::move(commandBuffer))); ANGLE_TRY(mPrimaryCommandPool.collect(context, std::move(commandBuffer)));
...@@ -558,7 +560,7 @@ angle::Result CommandQueue::finishToSerial(vk::Context *context, ...@@ -558,7 +560,7 @@ angle::Result CommandQueue::finishToSerial(vk::Context *context,
Serial finishSerial, Serial finishSerial,
uint64_t timeout) uint64_t timeout)
{ {
ASSERT(!context->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled); ASSERT(!context->getRenderer()->getFeatures().commandProcessor.enabled);
if (mInFlightCommands.empty()) if (mInFlightCommands.empty())
{ {
...@@ -612,6 +614,7 @@ angle::Result CommandQueue::submitFrame(vk::Context *context, ...@@ -612,6 +614,7 @@ angle::Result CommandQueue::submitFrame(vk::Context *context,
vk::PrimaryCommandBuffer &&commandBuffer) vk::PrimaryCommandBuffer &&commandBuffer)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::submitFrame"); ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::submitFrame");
ASSERT(!context->getRenderer()->getFeatures().commandProcessor.enabled);
RendererVk *renderer = context->getRenderer(); RendererVk *renderer = context->getRenderer();
VkDevice device = renderer->getDevice(); VkDevice device = renderer->getDevice();
...@@ -649,7 +652,7 @@ angle::Result CommandQueue::submitFrame(vk::Context *context, ...@@ -649,7 +652,7 @@ angle::Result CommandQueue::submitFrame(vk::Context *context,
vk::Shared<vk::Fence> CommandQueue::getLastSubmittedFence(const vk::Context *context) const vk::Shared<vk::Fence> CommandQueue::getLastSubmittedFence(const vk::Context *context) const
{ {
ASSERT(!context->getRenderer()->getFeatures().enableCommandProcessingThread.enabled); ASSERT(!context->getRenderer()->getFeatures().commandProcessor.enabled);
vk::Shared<vk::Fence> fence; vk::Shared<vk::Fence> fence;
if (!mInFlightCommands.empty()) if (!mInFlightCommands.empty())
...@@ -844,6 +847,9 @@ void ContextVk::onDestroy(const gl::Context *context) ...@@ -844,6 +847,9 @@ void ContextVk::onDestroy(const gl::Context *context)
mGpuEventQueryPool.destroy(device); mGpuEventQueryPool.destroy(device);
mCommandPool.destroy(device); mCommandPool.destroy(device);
mPrimaryCommands.destroy(device); mPrimaryCommands.destroy(device);
// This will clean up any outstanding buffer allocations
(void)mRenderer->cleanupGarbage(false);
} }
angle::Result ContextVk::getIncompleteTexture(const gl::Context *context, angle::Result ContextVk::getIncompleteTexture(const gl::Context *context,
...@@ -1861,7 +1867,7 @@ angle::Result ContextVk::submitFrame(const VkSubmitInfo &submitInfo, ...@@ -1861,7 +1867,7 @@ angle::Result ContextVk::submitFrame(const VkSubmitInfo &submitInfo,
vk::ResourceUseList *resourceList, vk::ResourceUseList *resourceList,
vk::PrimaryCommandBuffer &&commandBuffer) vk::PrimaryCommandBuffer &&commandBuffer)
{ {
ASSERT(!getRenderer()->getFeatures().enableCommandProcessingThread.enabled); ASSERT(!getRenderer()->getFeatures().commandProcessor.enabled);
if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics) if (vk::CommandBufferHelper::kEnableCommandStreamDiagnostics)
{ {
...@@ -2203,7 +2209,7 @@ void ContextVk::clearAllGarbage() ...@@ -2203,7 +2209,7 @@ void ContextVk::clearAllGarbage()
garbage.destroy(mRenderer); garbage.destroy(mRenderer);
} }
mCurrentGarbage.clear(); mCurrentGarbage.clear();
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().commandProcessor.enabled)
{ {
// Issue command to CommandProcessor to ensure all work is complete, which will return any // Issue command to CommandProcessor to ensure all work is complete, which will return any
// garbage items as well. // garbage items as well.
...@@ -2220,7 +2226,7 @@ void ContextVk::handleDeviceLost() ...@@ -2220,7 +2226,7 @@ void ContextVk::handleDeviceLost()
mOutsideRenderPassCommands->reset(); mOutsideRenderPassCommands->reset();
mRenderPassCommands->reset(); mRenderPassCommands->reset();
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().commandProcessor.enabled)
{ {
mRenderer->handleDeviceLost(); mRenderer->handleDeviceLost();
} }
...@@ -3410,9 +3416,10 @@ angle::Result ContextVk::onMakeCurrent(const gl::Context *context) ...@@ -3410,9 +3416,10 @@ angle::Result ContextVk::onMakeCurrent(const gl::Context *context)
angle::Result ContextVk::onUnMakeCurrent(const gl::Context *context) angle::Result ContextVk::onUnMakeCurrent(const gl::Context *context)
{ {
ANGLE_TRY(flushImpl(nullptr)); ANGLE_TRY(flushImpl(nullptr));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().commandProcessor.enabled)
{ {
mRenderer->waitForCommandProcessorIdle(this); ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::onUnMakeCurrent");
mRenderer->finishAllWork(this);
} }
mCurrentWindowSurface = nullptr; mCurrentWindowSurface = nullptr;
return angle::Result::Continue; return angle::Result::Continue;
...@@ -4427,12 +4434,9 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore) ...@@ -4427,12 +4434,9 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore)
mDefaultUniformStorage.releaseInFlightBuffersToResourceUseList(this); mDefaultUniformStorage.releaseInFlightBuffersToResourceUseList(this);
mStagingBuffer.releaseInFlightBuffersToResourceUseList(this); mStagingBuffer.releaseInFlightBuffersToResourceUseList(this);
// TODO: https://issuetracker.google.com/issues/170329600 - Verify that
// waitForSwapchainImageIfNecessary makes sense both w/ & w/o threading. I believe they do, but
// want confirmation.
waitForSwapchainImageIfNecessary(); waitForSwapchainImageIfNecessary();
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().commandProcessor.enabled)
{ {
// Some tasks from ContextVk::submitFrame() that run before CommandQueue::submitFrame() // Some tasks from ContextVk::submitFrame() that run before CommandQueue::submitFrame()
gl::RunningGraphWidget *renderPassCount = gl::RunningGraphWidget *renderPassCount =
...@@ -4506,7 +4510,7 @@ angle::Result ContextVk::finishImpl() ...@@ -4506,7 +4510,7 @@ angle::Result ContextVk::finishImpl()
ANGLE_TRY(flushImpl(nullptr)); ANGLE_TRY(flushImpl(nullptr));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().commandProcessor.enabled)
{ {
ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial())); ANGLE_TRY(finishToSerial(getLastSubmittedQueueSerial()));
} }
...@@ -4555,14 +4559,21 @@ bool ContextVk::isSerialInUse(Serial serial) const ...@@ -4555,14 +4559,21 @@ bool ContextVk::isSerialInUse(Serial serial) const
angle::Result ContextVk::checkCompletedCommands() angle::Result ContextVk::checkCompletedCommands()
{ {
ASSERT(!mRenderer->getFeatures().enableCommandProcessingThread.enabled); if (mRenderer->getFeatures().commandProcessor.enabled)
{
// TODO: https://issuetracker.google.com/169788986 - would be better if we could just wait
// for the work we need but that requires QueryHelper to use the actual serial for the
// query.
mRenderer->checkCompletedCommands(this);
return angle::Result::Continue;
}
return mCommandQueue.checkCompletedCommands(this); return mCommandQueue.checkCompletedCommands(this);
} }
angle::Result ContextVk::finishToSerial(Serial serial) angle::Result ContextVk::finishToSerial(Serial serial)
{ {
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().commandProcessor.enabled)
{ {
mRenderer->finishToSerial(this, serial); mRenderer->finishToSerial(this, serial);
return angle::Result::Continue; return angle::Result::Continue;
...@@ -4596,7 +4607,7 @@ angle::Result ContextVk::ensureSubmitFenceInitialized() ...@@ -4596,7 +4607,7 @@ angle::Result ContextVk::ensureSubmitFenceInitialized()
angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOut) angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOut)
{ {
ASSERT(!getRenderer()->getFeatures().enableCommandProcessingThread.enabled); ASSERT(!getRenderer()->getFeatures().commandProcessor.enabled);
ANGLE_TRY(ensureSubmitFenceInitialized()); ANGLE_TRY(ensureSubmitFenceInitialized());
...@@ -4607,9 +4618,9 @@ angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOu ...@@ -4607,9 +4618,9 @@ angle::Result ContextVk::getNextSubmitFence(vk::Shared<vk::Fence> *sharedFenceOu
vk::Shared<vk::Fence> ContextVk::getLastSubmittedFence() const vk::Shared<vk::Fence> ContextVk::getLastSubmittedFence() const
{ {
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().commandProcessor.enabled)
{ {
return mRenderer->getLastSubmittedFence(); return mRenderer->getLastSubmittedFence(this);
} }
return mCommandQueue.getLastSubmittedFence(this); return mCommandQueue.getLastSubmittedFence(this);
} }
...@@ -5020,7 +5031,7 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass() ...@@ -5020,7 +5031,7 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass()
vk::RenderPass *renderPass = nullptr; vk::RenderPass *renderPass = nullptr;
ANGLE_TRY(mRenderPassCommands->getRenderPassWithOps(this, &renderPass)); ANGLE_TRY(mRenderPassCommands->getRenderPassWithOps(this, &renderPass));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().commandProcessor.enabled)
{ {
mRenderPassCommands->markClosed(); mRenderPassCommands->markClosed();
vk::CommandProcessorTask flushToPrimary; vk::CommandProcessorTask flushToPrimary;
...@@ -5170,7 +5181,7 @@ angle::Result ContextVk::flushOutsideRenderPassCommands() ...@@ -5170,7 +5181,7 @@ angle::Result ContextVk::flushOutsideRenderPassCommands()
vk::RenderPass *renderPass = nullptr; vk::RenderPass *renderPass = nullptr;
ANGLE_TRY(mOutsideRenderPassCommands->getRenderPassWithOps(this, &renderPass)); ANGLE_TRY(mOutsideRenderPassCommands->getRenderPassWithOps(this, &renderPass));
if (mRenderer->getFeatures().enableCommandProcessingThread.enabled) if (mRenderer->getFeatures().commandProcessor.enabled)
{ {
mOutsideRenderPassCommands->markClosed(); mOutsideRenderPassCommands->markClosed();
vk::CommandProcessorTask flushToPrimary; vk::CommandProcessorTask flushToPrimary;
......
...@@ -168,6 +168,8 @@ angle::Result QueryVk::queryCounter(const gl::Context *context) ...@@ -168,6 +168,8 @@ angle::Result QueryVk::queryCounter(const gl::Context *context)
angle::Result QueryVk::getResult(const gl::Context *context, bool wait) angle::Result QueryVk::getResult(const gl::Context *context, bool wait)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "QueryVk::getResult");
if (mCachedResultValid) if (mCachedResultValid)
{ {
return angle::Result::Continue; return angle::Result::Continue;
...@@ -182,11 +184,10 @@ angle::Result QueryVk::getResult(const gl::Context *context, bool wait) ...@@ -182,11 +184,10 @@ angle::Result QueryVk::getResult(const gl::Context *context, bool wait)
// has pending work should flush begin too. // has pending work should flush begin too.
// TODO: https://issuetracker.google.com/169788986 - can't guarantee hasPendingWork() works when // TODO: https://issuetracker.google.com/169788986 - can't guarantee hasPendingWork() works when
// using threaded worker // using threaded worker
if (mQueryHelper.hasPendingWork(contextVk) || if (mQueryHelper.hasPendingWork(contextVk))
contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled)
{ {
ANGLE_TRY(contextVk->flushImpl(nullptr)); ANGLE_TRY(contextVk->flushImpl(nullptr));
if (contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled) if (contextVk->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{ {
// TODO: https://issuetracker.google.com/170312581 - For now just stalling here // TODO: https://issuetracker.google.com/170312581 - For now just stalling here
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk); contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
...@@ -196,14 +197,12 @@ angle::Result QueryVk::getResult(const gl::Context *context, bool wait) ...@@ -196,14 +197,12 @@ angle::Result QueryVk::getResult(const gl::Context *context, bool wait)
ASSERT(!mQueryHelper.hasPendingWork(contextVk)); ASSERT(!mQueryHelper.hasPendingWork(contextVk));
} }
if (!contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled) ANGLE_TRY(contextVk->checkCompletedCommands());
{
// If the command buffer this query is being written to is still in flight, its reset // If the command buffer this query is being written to is still in flight, its reset
// command may not have been performed by the GPU yet. To avoid a race condition in this // command may not have been performed by the GPU yet. To avoid a race condition in this
// case, wait for the batch to finish first before querying (or return not-ready if not // case, wait for the batch to finish first before querying (or return not-ready if not
// waiting). // waiting).
ANGLE_TRY(contextVk->checkCompletedCommands());
}
if (contextVk->isSerialInUse(mQueryHelper.getStoredQueueSerial())) if (contextVk->isSerialInUse(mQueryHelper.getStoredQueueSerial()))
{ {
if (!wait) if (!wait)
......
...@@ -505,7 +505,7 @@ void RendererVk::releaseSharedResources(vk::ResourceUseList *resourceList) ...@@ -505,7 +505,7 @@ void RendererVk::releaseSharedResources(vk::ResourceUseList *resourceList)
void RendererVk::onDestroy() void RendererVk::onDestroy()
{ {
if (getFeatures().enableCommandProcessingThread.enabled) if (getFeatures().commandProcessor.enabled)
{ {
// Shutdown worker thread // Shutdown worker thread
mCommandProcessor.shutdown(&mCommandProcessorThread); mCommandProcessor.shutdown(&mCommandProcessorThread);
...@@ -914,11 +914,19 @@ angle::Result RendererVk::initialize(DisplayVk *displayVk, ...@@ -914,11 +914,19 @@ angle::Result RendererVk::initialize(DisplayVk *displayVk,
setGlobalDebugAnnotator(); setGlobalDebugAnnotator();
if (getFeatures().enableCommandProcessingThread.enabled) if (getFeatures().commandProcessor.enabled)
{ {
mCommandProcessorThread = if (getFeatures().asynchronousCommandProcessing.enabled)
std::thread(&vk::CommandProcessor::processTasks, &mCommandProcessor); {
mCommandProcessor.waitForWorkComplete(nullptr); ASSERT(getFeatures().commandProcessor.enabled);
mCommandProcessorThread =
std::thread(&vk::CommandProcessor::processTasks, &mCommandProcessor);
waitForCommandProcessorIdle(displayVk);
}
else
{
ANGLE_TRY(mCommandProcessor.initTaskProcessor(displayVk));
}
} }
return angle::Result::Continue; return angle::Result::Continue;
...@@ -1931,7 +1939,7 @@ void RendererVk::initFeatures(DisplayVk *displayVk, const ExtensionNameList &dev ...@@ -1931,7 +1939,7 @@ void RendererVk::initFeatures(DisplayVk *displayVk, const ExtensionNameList &dev
ANGLE_FEATURE_CONDITION(&mFeatures, preferAggregateBarrierCalls, isNvidia || isAMD || isIntel); ANGLE_FEATURE_CONDITION(&mFeatures, preferAggregateBarrierCalls, isNvidia || isAMD || isIntel);
// Currently disabled by default: http://anglebug.com/4324 // Currently disabled by default: http://anglebug.com/4324
ANGLE_FEATURE_CONDITION(&mFeatures, enableCommandProcessingThread, false); ANGLE_FEATURE_CONDITION(&mFeatures, commandProcessor, false);
// Currently disabled by default: http://anglebug.com/4324 // Currently disabled by default: http://anglebug.com/4324
ANGLE_FEATURE_CONDITION(&mFeatures, asynchronousCommandProcessing, false); ANGLE_FEATURE_CONDITION(&mFeatures, asynchronousCommandProcessing, false);
...@@ -2238,7 +2246,7 @@ angle::Result RendererVk::queueSubmit(vk::Context *context, ...@@ -2238,7 +2246,7 @@ angle::Result RendererVk::queueSubmit(vk::Context *context,
outputVmaStatString(); outputVmaStatString();
} }
ASSERT(!getFeatures().enableCommandProcessingThread.enabled); ASSERT(!getFeatures().commandProcessor.enabled);
{ {
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
...@@ -2266,15 +2274,20 @@ angle::Result RendererVk::queueSubmitOneOff(vk::Context *context, ...@@ -2266,15 +2274,20 @@ angle::Result RendererVk::queueSubmitOneOff(vk::Context *context,
const vk::Fence *fence, const vk::Fence *fence,
Serial *serialOut) Serial *serialOut)
{ {
if (getFeatures().enableCommandProcessingThread.enabled) ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queueSubmitOneOff");
if (getFeatures().commandProcessor.enabled)
{ {
vk::CommandProcessorTask oneOffQueueSubmit; vk::CommandProcessorTask oneOffQueueSubmit;
oneOffQueueSubmit.initOneOffQueueSubmit(primary.getHandle(), priority, fence); oneOffQueueSubmit.initOneOffQueueSubmit(primary.getHandle(), priority, fence);
queueCommand(context, &oneOffQueueSubmit); queueCommand(context, &oneOffQueueSubmit);
waitForCommandProcessorIdle(context); // TODO: https://issuetracker.google.com/170312581 - should go away with improved fence
// management
if (getFeatures().asynchronousCommandProcessing.enabled)
{
waitForCommandProcessorIdle(context);
}
*serialOut = getLastSubmittedQueueSerial(); *serialOut = getLastSubmittedQueueSerial();
ANGLE_TRY(cleanupGarbage(false));
} }
else else
{ {
...@@ -2294,7 +2307,8 @@ angle::Result RendererVk::queueSubmitOneOff(vk::Context *context, ...@@ -2294,7 +2307,8 @@ angle::Result RendererVk::queueSubmitOneOff(vk::Context *context,
angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriority priority) angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriority priority)
{ {
if (getFeatures().enableCommandProcessingThread.enabled) ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queueWaitIdle");
if (getFeatures().asynchronousCommandProcessing.enabled)
{ {
// Wait for all pending commands to get sent before issuing vkQueueWaitIdle // Wait for all pending commands to get sent before issuing vkQueueWaitIdle
waitForCommandProcessorIdle(context); waitForCommandProcessorIdle(context);
...@@ -2311,7 +2325,9 @@ angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriori ...@@ -2311,7 +2325,9 @@ angle::Result RendererVk::queueWaitIdle(vk::Context *context, egl::ContextPriori
angle::Result RendererVk::deviceWaitIdle(vk::Context *context) angle::Result RendererVk::deviceWaitIdle(vk::Context *context)
{ {
if (getFeatures().enableCommandProcessingThread.enabled) ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::deviceWaitIdle");
if (getFeatures().asynchronousCommandProcessing.enabled)
{ {
// Wait for all pending commands to get sent before issuing vkQueueWaitIdle // Wait for all pending commands to get sent before issuing vkQueueWaitIdle
waitForCommandProcessorIdle(context); waitForCommandProcessorIdle(context);
...@@ -2331,7 +2347,7 @@ VkResult RendererVk::queuePresent(egl::ContextPriority priority, ...@@ -2331,7 +2347,7 @@ VkResult RendererVk::queuePresent(egl::ContextPriority priority,
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queuePresent"); ANGLE_TRACE_EVENT0("gpu.angle", "RendererVk::queuePresent");
ASSERT(!getFeatures().enableCommandProcessingThread.enabled); ASSERT(!getFeatures().commandProcessor.enabled);
std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex); std::lock_guard<decltype(mQueueMutex)> lock(mQueueMutex);
......
...@@ -227,9 +227,9 @@ class RendererVk : angle::NonCopyable ...@@ -227,9 +227,9 @@ class RendererVk : angle::NonCopyable
} }
} }
vk::Shared<vk::Fence> getLastSubmittedFence() const vk::Shared<vk::Fence> getLastSubmittedFence(const vk::Context *context) const
{ {
return mCommandProcessor.getLastSubmittedFence(); return mCommandProcessor.getLastSubmittedFence(context);
} }
void handleDeviceLost() { mCommandProcessor.handleDeviceLost(); } void handleDeviceLost() { mCommandProcessor.handleDeviceLost(); }
...@@ -250,7 +250,7 @@ class RendererVk : angle::NonCopyable ...@@ -250,7 +250,7 @@ class RendererVk : angle::NonCopyable
ANGLE_INLINE Serial getCurrentQueueSerial() ANGLE_INLINE Serial getCurrentQueueSerial()
{ {
if (getFeatures().enableCommandProcessingThread.enabled) if (getFeatures().commandProcessor.enabled)
{ {
return mCommandProcessor.getCurrentQueueSerial(); return mCommandProcessor.getCurrentQueueSerial();
} }
...@@ -259,7 +259,7 @@ class RendererVk : angle::NonCopyable ...@@ -259,7 +259,7 @@ class RendererVk : angle::NonCopyable
} }
ANGLE_INLINE Serial getLastSubmittedQueueSerial() ANGLE_INLINE Serial getLastSubmittedQueueSerial()
{ {
if (getFeatures().enableCommandProcessingThread.enabled) if (getFeatures().commandProcessor.enabled)
{ {
return mCommandProcessor.getLastSubmittedSerial(); return mCommandProcessor.getLastSubmittedSerial();
} }
...@@ -274,6 +274,11 @@ class RendererVk : angle::NonCopyable ...@@ -274,6 +274,11 @@ class RendererVk : angle::NonCopyable
void onCompletedSerial(Serial serial); void onCompletedSerial(Serial serial);
VkResult getLastPresentResult(VkSwapchainKHR swapchain)
{
return mCommandProcessor.getLastPresentResult(swapchain);
}
bool enableDebugUtils() const { return mEnableDebugUtils; } bool enableDebugUtils() const { return mEnableDebugUtils; }
SamplerCache &getSamplerCache() { return mSamplerCache; } SamplerCache &getSamplerCache() { return mSamplerCache; }
...@@ -289,6 +294,7 @@ class RendererVk : angle::NonCopyable ...@@ -289,6 +294,7 @@ class RendererVk : angle::NonCopyable
vk::Error getAndClearPendingError() { return mCommandProcessor.getAndClearPendingError(); } vk::Error getAndClearPendingError() { return mCommandProcessor.getAndClearPendingError(); }
void waitForCommandProcessorIdle(vk::Context *context) void waitForCommandProcessorIdle(vk::Context *context)
{ {
ASSERT(getFeatures().asynchronousCommandProcessing.enabled);
mCommandProcessor.waitForWorkComplete(context); mCommandProcessor.waitForWorkComplete(context);
} }
...@@ -297,6 +303,11 @@ class RendererVk : angle::NonCopyable ...@@ -297,6 +303,11 @@ class RendererVk : angle::NonCopyable
mCommandProcessor.finishToSerial(context, serial); mCommandProcessor.finishToSerial(context, serial);
} }
void checkCompletedCommands(vk::Context *context)
{
mCommandProcessor.checkCompletedCommands(context);
}
void finishAllWork(vk::Context *context) { mCommandProcessor.finishAllWork(context); } void finishAllWork(vk::Context *context) { mCommandProcessor.finishAllWork(context); }
VkQueue getVkQueue(egl::ContextPriority priority) const { return mQueues[priority]; } VkQueue getVkQueue(egl::ContextPriority priority) const { return mQueues[priority]; }
...@@ -308,6 +319,8 @@ class RendererVk : angle::NonCopyable ...@@ -308,6 +319,8 @@ class RendererVk : angle::NonCopyable
void outputVmaStatString(); void outputVmaStatString();
angle::Result cleanupGarbage(bool block);
private: private:
angle::Result initializeDevice(DisplayVk *displayVk, uint32_t queueFamilyIndex); angle::Result initializeDevice(DisplayVk *displayVk, uint32_t queueFamilyIndex);
void ensureCapsInitialized() const; void ensureCapsInitialized() const;
...@@ -327,8 +340,6 @@ class RendererVk : angle::NonCopyable ...@@ -327,8 +340,6 @@ class RendererVk : angle::NonCopyable
template <VkFormatFeatureFlags VkFormatProperties::*features> template <VkFormatFeatureFlags VkFormatProperties::*features>
bool hasFormatFeatureBits(VkFormat format, const VkFormatFeatureFlags featureBits) const; bool hasFormatFeatureBits(VkFormat format, const VkFormatFeatureFlags featureBits) const;
angle::Result cleanupGarbage(bool block);
egl::Display *mDisplay; egl::Display *mDisplay;
mutable bool mCapsInitialized; mutable bool mCapsInitialized;
......
...@@ -464,6 +464,8 @@ void SwapHistory::destroy(RendererVk *renderer) ...@@ -464,6 +464,8 @@ void SwapHistory::destroy(RendererVk *renderer)
angle::Result SwapHistory::waitFence(ContextVk *contextVk) angle::Result SwapHistory::waitFence(ContextVk *contextVk)
{ {
ASSERT(sharedFence.isReferenced()); ASSERT(sharedFence.isReferenced());
// TODO: https://issuetracker.google.com/170312581 - This wait needs to be synchronized with
// worker thread
ANGLE_VK_TRY(contextVk, sharedFence.get().wait(contextVk->getDevice(), ANGLE_VK_TRY(contextVk, sharedFence.get().wait(contextVk->getDevice(),
std::numeric_limits<uint64_t>::max())); std::numeric_limits<uint64_t>::max()));
return angle::Result::Continue; return angle::Result::Continue;
...@@ -1224,6 +1226,35 @@ egl::Error WindowSurfaceVk::swap(const gl::Context *context) ...@@ -1224,6 +1226,35 @@ egl::Error WindowSurfaceVk::swap(const gl::Context *context)
return angle::ToEGL(result, displayVk, EGL_BAD_SURFACE); return angle::ToEGL(result, displayVk, EGL_BAD_SURFACE);
} }
angle::Result WindowSurfaceVk::computePresentOutOfDate(vk::Context *context,
VkResult result,
bool *presentOutOfDate)
{
// If OUT_OF_DATE is returned, it's ok, we just need to recreate the swapchain before
// continuing.
// If VK_SUBOPTIMAL_KHR is returned it's because the device orientation changed and we should
// recreate the swapchain with a new window orientation.
if (context->getRenderer()->getFeatures().enablePreRotateSurfaces.enabled)
{
// Also check for VK_SUBOPTIMAL_KHR.
*presentOutOfDate = ((result == VK_ERROR_OUT_OF_DATE_KHR) || (result == VK_SUBOPTIMAL_KHR));
if (!*presentOutOfDate)
{
ANGLE_VK_TRY(context, result);
}
}
else
{
// We aren't quite ready for that so just ignore for now.
*presentOutOfDate = result == VK_ERROR_OUT_OF_DATE_KHR;
if (!*presentOutOfDate && result != VK_SUBOPTIMAL_KHR)
{
ANGLE_VK_TRY(context, result);
}
}
return angle::Result::Continue;
}
angle::Result WindowSurfaceVk::present(ContextVk *contextVk, angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
EGLint *rects, EGLint *rects,
EGLint n_rects, EGLint n_rects,
...@@ -1239,6 +1270,8 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk, ...@@ -1239,6 +1270,8 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present: Throttle CPU"); ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present: Throttle CPU");
if (swap.sharedFence.isReferenced()) if (swap.sharedFence.isReferenced())
{ {
// TODO: https://issuetracker.google.com/170312581 - This wait needs to be sure to
// happen after work has submitted
ANGLE_TRY(swap.waitFence(contextVk)); ANGLE_TRY(swap.waitFence(contextVk));
swap.destroy(renderer); swap.destroy(renderer);
} }
...@@ -1354,7 +1387,7 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk, ...@@ -1354,7 +1387,7 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
presentRegion.pRectangles = vkRects.data(); presentRegion.pRectangles = vkRects.data();
presentRegions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR; presentRegions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR;
presentRegions.pNext = nullptr; presentRegions.pNext = presentInfo.pNext;
presentRegions.swapchainCount = 1; presentRegions.swapchainCount = 1;
presentRegions.pRegions = &presentRegion; presentRegions.pRegions = &presentRegion;
...@@ -1372,63 +1405,24 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk, ...@@ -1372,63 +1405,24 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
mCurrentSwapHistoryIndex == mSwapHistory.size() ? 0 : mCurrentSwapHistoryIndex; mCurrentSwapHistoryIndex == mSwapHistory.size() ? 0 : mCurrentSwapHistoryIndex;
VkResult result; VkResult result;
if (renderer->getFeatures().enableCommandProcessingThread.enabled) if (renderer->getFeatures().commandProcessor.enabled)
{ {
vk::CommandProcessorTask present; vk::CommandProcessorTask present;
present.initPresent(contextVk->getPriority(), presentInfo); present.initPresent(contextVk->getPriority(), presentInfo);
// Make sure everything has been submitted (and errors handled) ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present");
renderer->waitForCommandProcessorIdle(contextVk);
// Submit queuePresent all by itself (ignoring interference from other threads for now)
renderer->queueCommand(contextVk, &present); renderer->queueCommand(contextVk, &present);
// TODO: https://issuetracker.google.com/issues/170329600 - Just stalling here for now, but // Always return success, when we call acquireNextImage we'll check the return code. This
// really want to let main thread continue // allows the app to continue working until we really need to know the return code from
// need to figure out how to handle work below off-thread and sync to main // present.
// Also, need to fix lifetime of presentInfo data when main thread continues.
// There is a bunch of work happening after present to deal with swapchain recreation.
// Will that require moving a large chunk of swapImpl to the CommandProcessor?
// That will likely require serializing access to the WindowSurfaceVk object in order
// to have current content.
result = VK_SUCCESS; result = VK_SUCCESS;
// wait for the queuePresent to be submitted and intentionally set the context to nullptr so
// that we can catch any error. Note this doesn't prevent another context from grabbing the
// error. Will be fixed properly in a follow-up as part of present work.
renderer->waitForCommandProcessorIdle(nullptr);
if (renderer->hasPendingError())
{
vk::Error error = renderer->getAndClearPendingError();
result = error.mErrorCode;
}
} }
else else
{ {
result = renderer->queuePresent(contextVk->getPriority(), presentInfo); result = renderer->queuePresent(contextVk->getPriority(), presentInfo);
} }
// If OUT_OF_DATE is returned, it's ok, we just need to recreate the swapchain before ANGLE_TRY(computePresentOutOfDate(contextVk, result, presentOutOfDate));
// continuing.
// If VK_SUBOPTIMAL_KHR is returned it's because the device orientation changed and we should
// recreate the swapchain with a new window orientation.
if (renderer->getFeatures().enablePreRotateSurfaces.enabled)
{
// Also check for VK_SUBOPTIMAL_KHR.
*presentOutOfDate = ((result == VK_ERROR_OUT_OF_DATE_KHR) || (result == VK_SUBOPTIMAL_KHR));
if (!*presentOutOfDate)
{
ANGLE_VK_TRY(contextVk, result);
}
}
else
{
// We aren't quite ready for that so just ignore for now.
*presentOutOfDate = result == VK_ERROR_OUT_OF_DATE_KHR;
if (!*presentOutOfDate && result != VK_SUBOPTIMAL_KHR)
{
ANGLE_VK_TRY(contextVk, result);
}
}
return angle::Result::Continue; return angle::Result::Continue;
} }
...@@ -1488,6 +1482,15 @@ angle::Result WindowSurfaceVk::doDeferredAcquireNextImage(const gl::Context *con ...@@ -1488,6 +1482,15 @@ angle::Result WindowSurfaceVk::doDeferredAcquireNextImage(const gl::Context *con
ContextVk *contextVk = vk::GetImpl(context); ContextVk *contextVk = vk::GetImpl(context);
DisplayVk *displayVk = vk::GetImpl(context->getDisplay()); DisplayVk *displayVk = vk::GetImpl(context->getDisplay());
if (contextVk->getFeatures().commandProcessor.enabled)
{
VkResult result = contextVk->getRenderer()->getLastPresentResult(mSwapchain);
// Now that we have the result from the last present need to determine if it's out of date
// or not.
ANGLE_TRY(computePresentOutOfDate(contextVk, result, &presentOutOfDate));
}
ANGLE_TRY(checkForOutOfDateSwapchain(contextVk, presentOutOfDate)); ANGLE_TRY(checkForOutOfDateSwapchain(contextVk, presentOutOfDate));
{ {
......
...@@ -289,6 +289,9 @@ class WindowSurfaceVk : public SurfaceVk ...@@ -289,6 +289,9 @@ class WindowSurfaceVk : public SurfaceVk
// Called when a swapchain image whose acquisition was deferred must be acquired. This method // Called when a swapchain image whose acquisition was deferred must be acquired. This method
// will recreate the swapchain (if needed) and call the acquireNextSwapchainImage() method. // will recreate the swapchain (if needed) and call the acquireNextSwapchainImage() method.
angle::Result doDeferredAcquireNextImage(const gl::Context *context, bool presentOutOfDate); angle::Result doDeferredAcquireNextImage(const gl::Context *context, bool presentOutOfDate);
angle::Result computePresentOutOfDate(vk::Context *context,
VkResult result,
bool *presentOutOfDate);
angle::Result present(ContextVk *contextVk, angle::Result present(ContextVk *contextVk,
EGLint *rects, EGLint *rects,
EGLint n_rects, EGLint n_rects,
......
...@@ -34,8 +34,9 @@ void SyncHelper::releaseToRenderer(RendererVk *renderer) ...@@ -34,8 +34,9 @@ void SyncHelper::releaseToRenderer(RendererVk *renderer)
renderer->collectGarbageAndReinit(&mUse, &mEvent); renderer->collectGarbageAndReinit(&mUse, &mEvent);
// TODO: https://issuetracker.google.com/170312581 - Currently just stalling on worker thread // TODO: https://issuetracker.google.com/170312581 - Currently just stalling on worker thread
// here to try and avoid race condition. If this works, need some alternate solution // here to try and avoid race condition. If this works, need some alternate solution
if (renderer->getFeatures().enableCommandProcessingThread.enabled) if (renderer->getFeatures().asynchronousCommandProcessing.enabled)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "SyncHelper::releaseToRenderer");
renderer->waitForCommandProcessorIdle(nullptr); renderer->waitForCommandProcessorIdle(nullptr);
} }
mFence.reset(renderer->getDevice()); mFence.reset(renderer->getDevice());
...@@ -56,9 +57,12 @@ angle::Result SyncHelper::initialize(ContextVk *contextVk) ...@@ -56,9 +57,12 @@ angle::Result SyncHelper::initialize(ContextVk *contextVk)
ANGLE_VK_TRY(contextVk, event.get().init(device, eventCreateInfo)); ANGLE_VK_TRY(contextVk, event.get().init(device, eventCreateInfo));
// TODO: https://issuetracker.google.com/170312581 - For now wait for worker thread to finish // TODO: https://issuetracker.google.com/170312581 - For now wait for worker thread to finish
// then get next fence from renderer // then get next fence from renderer
if (contextVk->getRenderer()->getFeatures().enableCommandProcessingThread.enabled) if (contextVk->getRenderer()->getFeatures().commandProcessor.enabled)
{ {
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk); if (contextVk->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
}
ANGLE_TRY(contextVk->getRenderer()->getNextSubmitFence(&mFence, false)); ANGLE_TRY(contextVk->getRenderer()->getNextSubmitFence(&mFence, false));
} }
else else
...@@ -106,10 +110,11 @@ angle::Result SyncHelper::clientWait(Context *context, ...@@ -106,10 +110,11 @@ angle::Result SyncHelper::clientWait(Context *context,
ANGLE_TRY(contextVk->flushImpl(nullptr)); ANGLE_TRY(contextVk->flushImpl(nullptr));
} }
// If we are using worker need to wait for the commands to be issued before waiting on the // TODO: https://issuetracker.google.com/170312581 - If we are using worker need to wait for the
// fence. // commands to be issued before waiting on the fence.
if (renderer->getFeatures().enableCommandProcessingThread.enabled) if (renderer->getFeatures().asynchronousCommandProcessing.enabled)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "SyncHelper::clientWait");
renderer->waitForCommandProcessorIdle(contextVk); renderer->waitForCommandProcessorIdle(contextVk);
} }
...@@ -213,14 +218,18 @@ angle::Result SyncHelperNativeFence::initializeWithFd(ContextVk *contextVk, int ...@@ -213,14 +218,18 @@ angle::Result SyncHelperNativeFence::initializeWithFd(ContextVk *contextVk, int
retain(&contextVk->getResourceUseList()); retain(&contextVk->getResourceUseList());
if (renderer->getFeatures().enableCommandProcessingThread.enabled) if (renderer->getFeatures().commandProcessor.enabled)
{ {
CommandProcessorTask oneOffQueueSubmit; CommandProcessorTask oneOffQueueSubmit;
oneOffQueueSubmit.initOneOffQueueSubmit(VK_NULL_HANDLE, contextVk->getPriority(), oneOffQueueSubmit.initOneOffQueueSubmit(VK_NULL_HANDLE, contextVk->getPriority(),
&fence.get()); &fence.get());
renderer->queueCommand(contextVk, &oneOffQueueSubmit); renderer->queueCommand(contextVk, &oneOffQueueSubmit);
// TODO: https://issuetracker.google.com/170312581 - wait for now // TODO: https://issuetracker.google.com/170312581 - wait for now
renderer->waitForCommandProcessorIdle(contextVk); if (renderer->getFeatures().asynchronousCommandProcessing.enabled)
{
ANGLE_TRACE_EVENT0("gpu.angle", "SyncHelperNativeFence::initializeWithFd");
renderer->waitForCommandProcessorIdle(contextVk);
}
} }
else else
{ {
...@@ -291,10 +300,11 @@ angle::Result SyncHelperNativeFence::clientWait(Context *context, ...@@ -291,10 +300,11 @@ angle::Result SyncHelperNativeFence::clientWait(Context *context,
ANGLE_TRY(contextVk->flushImpl(nullptr)); ANGLE_TRY(contextVk->flushImpl(nullptr));
} }
// If we are using worker need to wait for the commands to be issued before waiting on the // TODO: https://issuetracker.google.com/170312581 - If we are using worker need to wait for the
// fence. // commands to be issued before waiting on the fence.
if (contextVk->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled) if (contextVk->getRenderer()->getFeatures().asynchronousCommandProcessing.enabled)
{ {
ANGLE_TRACE_EVENT0("gpu.angle", "SyncHelperNativeFence::clientWait");
contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk); contextVk->getRenderer()->waitForCommandProcessorIdle(contextVk);
} }
......
...@@ -2324,8 +2324,9 @@ void QueryHelper::writeTimestamp(ContextVk *contextVk, CommandBuffer *commandBuf ...@@ -2324,8 +2324,9 @@ void QueryHelper::writeTimestamp(ContextVk *contextVk, CommandBuffer *commandBuf
bool QueryHelper::hasPendingWork(ContextVk *contextVk) bool QueryHelper::hasPendingWork(ContextVk *contextVk)
{ {
// If the renderer has a queue serial higher than the stored one, the command buffers that // TODO: https://issuetracker.google.com/169788986 - this is not a valid statement with
// recorded this query have already been submitted, so there is no pending work. // CommandProcessor: If the renderer has a queue serial higher than the stored one, the command
// buffers that recorded this query have already been submitted, so there is no pending work.
return mMostRecentSerial.valid() && (mMostRecentSerial == contextVk->getCurrentQueueSerial()); return mMostRecentSerial.valid() && (mMostRecentSerial == contextVk->getCurrentQueueSerial());
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment