Commit 5cf7472d by Tim Van Patten Committed by Commit Bot

Vulkan: Ignore glFlush to reduce vkQueueSubmits in Asphalt 9

Multithreaded apps can use the following pattern: glDrawElements() glFenceSync() glFlush() glWaitSync() This currently results in a vkQueueSubmit for every glFlush() to ensure that the work has landed in the command queue in the correct order. However, ANGLE can instead avoid the vkQueueSubmit during the glFlush() in this situation by instead flushing the ContextVk's commands and ending the render pass to ensure the commands are submitted in the correct order to the renderer. This improves performance for Asphalt 9 by reducing frame times from 150-200msec to 35-55msec. Specifically, ANGLE will call flushCommandsAndEndRenderPass() when there is a sync object pending a flush or if the ContextVk is currently shared. Additionally, on all devices except Qualcomm, ANGLE can ignore all other glFlush() calls entirely and return immediately. For Qualcomm devices, ANGLE is still required to perform a full flush (resulting in a vkQueueSubmit), since ignoring the glFlush() reduces the Manhattan 3.0 offscreen score by ~3%. Bug: angleproject:5306 Bug: angleproject:5425 Change-Id: I9d747caf5bf306166be0fec630a78caf41208c27 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2552718 Commit-Queue: Tim Van Patten <timvp@google.com> Reviewed-by: 's avatarCharlie Lao <cclao@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 4abf3788
......@@ -396,7 +396,8 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mRenderPassCommands(nullptr),
mGpuEventsEnabled(false),
mSyncObjectPendingFlush(false),
mDeferredFlushCount(0),
mEGLSyncObjectPendingFlush(false),
mHasDeferredFlush(false),
mGpuClockSync{std::numeric_limits<double>::max(), std::numeric_limits<double>::max()},
mGpuEventTimestampOrigin(0),
mPerfCounters{},
......@@ -700,14 +701,29 @@ angle::Result ContextVk::initialize()
angle::Result ContextVk::flush(const gl::Context *context)
{
// If we are in middle of renderpass and it is not a shared context, then we will defer the
// glFlush call here until the renderpass ends. If sync object has been used, we must respect
// glFlush call, otherwise we a wait for sync object without GL_SYNC_FLUSH_COMMANDS_BIT may
// never come back.
if (mRenderer->getFeatures().deferFlushUntilEndRenderPass.enabled && !context->isShared() &&
!mSyncObjectPendingFlush && hasStartedRenderPass())
// If a sync object has been used or this is a shared context, then we need to flush the
// commands and end the render pass to make sure the sync object (and any preceding commands)
// lands in the correct place within the command stream.
// EGL sync objects can span across context share groups, so don't defer flushes if there's one
// pending a flush.
if (mSyncObjectPendingFlush && context->isShared() && !mEGLSyncObjectPendingFlush)
{
mDeferredFlushCount++;
// Flush the commands to create a sync point in the command stream.
ANGLE_TRY(flushCommandsAndEndRenderPass());
// Move the resources to the share group, so they are released during the next vkQueueSubmit
// performed by any context in the share group. Note that this relies heavily on the global
// mutex to guarantee that no two contexts are modifying the lists at the same time.
getShareGroupVk()->acquireResourceUseList(std::move(mResourceUseList));
mHasDeferredFlush = true;
return angle::Result::Continue;
}
// EGL sync objects can span across context share groups, so don't defer flushes if there's one
// pending a flush.
if (!mEGLSyncObjectPendingFlush &&
mRenderer->getFeatures().deferFlushUntilEndRenderPass.enabled && hasStartedRenderPass())
{
mHasDeferredFlush = true;
return angle::Result::Continue;
}
......@@ -1569,9 +1585,11 @@ angle::Result ContextVk::submitFrame(const vk::Semaphore *signalSemaphore)
dumpCommandStreamDiagnostics();
}
ANGLE_TRY(mRenderer->submitFrame(
this, mContextPriority, std::move(mWaitSemaphores), std::move(mWaitSemaphoreStageMasks),
signalSemaphore, std::move(mResourceUseList), std::move(mCurrentGarbage), &mCommandPool));
getShareGroupVk()->acquireResourceUseList(std::move(mResourceUseList));
ANGLE_TRY(mRenderer->submitFrame(this, mContextPriority, std::move(mWaitSemaphores),
std::move(mWaitSemaphoreStageMasks), signalSemaphore,
getShareGroupVk()->releaseResourceUseLists(),
std::move(mCurrentGarbage), &mCommandPool));
onRenderPassFinished();
mComputeDirtyBits |= mNewComputeCommandBufferDirtyBits;
......@@ -4256,9 +4274,9 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore)
{
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::flushImpl");
// We must set this to zero before calling flushCommandsAndEndRenderPass to prevent it from
// We must set this to false before calling flushCommandsAndEndRenderPass to prevent it from
// calling back to flushImpl.
mDeferredFlushCount = 0;
mHasDeferredFlush = false;
mSyncObjectPendingFlush = false;
ANGLE_TRY(flushCommandsAndEndRenderPass());
......@@ -4651,7 +4669,6 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass()
if (!mRenderPassCommands->started())
{
ASSERT(!mDeferredFlushCount);
onRenderPassFinished();
return angle::Result::Continue;
}
......@@ -4707,7 +4724,7 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass()
ANGLE_TRY(flushOutsideRenderPassCommands());
}
if (mDeferredFlushCount > 0)
if (mHasDeferredFlush)
{
// If we have deferred glFlush call in the middle of renderpass, flush them now.
ANGLE_TRY(flushImpl(nullptr));
......
......@@ -569,6 +569,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
vk::PerfCounters &getPerfCounters() { return mPerfCounters; }
void onSyncHelperInitialize() { mSyncObjectPendingFlush = true; }
void onEGLSyncHelperInitialize() { mEGLSyncObjectPendingFlush = true; }
// When UtilsVk issues a draw call on the currently running render pass, the pipelines and
// descriptor sets it binds need to be undone.
......@@ -1000,7 +1001,8 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
// Track SyncHelper object been added into secondary command buffer that has not been flushed to
// vulkan.
bool mSyncObjectPendingFlush;
uint32_t mDeferredFlushCount;
bool mEGLSyncObjectPendingFlush;
bool mHasDeferredFlush;
// Semaphores that must be waited on in the next submission.
std::vector<VkSemaphore> mWaitSemaphores;
......
......@@ -12,6 +12,7 @@
#include "common/MemoryBuffer.h"
#include "libANGLE/renderer/DisplayImpl.h"
#include "libANGLE/renderer/vulkan/ResourceVk.h"
#include "libANGLE/renderer/vulkan/vk_cache_utils.h"
#include "libANGLE/renderer/vulkan/vk_utils.h"
......@@ -34,6 +35,15 @@ class ShareGroupVk : public ShareGroupImpl
DescriptorSetLayoutCache &getDescriptorSetLayoutCache() { return mDescriptorSetLayoutCache; }
ShareContextSet *getShareContextSet() { return &mShareContextSet; }
std::vector<vk::ResourceUseList> &&releaseResourceUseLists()
{
return std::move(mResourceUseLists);
}
void acquireResourceUseList(vk::ResourceUseList &&resourceUseList)
{
mResourceUseLists.emplace_back(std::move(resourceUseList));
}
private:
// ANGLE uses a PipelineLayout cache to store compatible pipeline layouts.
PipelineLayoutCache mPipelineLayoutCache;
......@@ -43,6 +53,10 @@ class ShareGroupVk : public ShareGroupImpl
// The list of contexts within the share group
ShareContextSet mShareContextSet;
// List of resources currently used that need to be freed when any ContextVk in this
// ShareGroupVk submits the next command.
std::vector<vk::ResourceUseList> mResourceUseLists;
};
class DisplayVk : public DisplayImpl, public vk::Context
......
......@@ -29,7 +29,7 @@ void FenceNVVk::onDestroy(const gl::Context *context)
angle::Result FenceNVVk::set(const gl::Context *context, GLenum condition)
{
ASSERT(condition == GL_ALL_COMPLETED_NV);
return mFenceSync.initialize(vk::GetImpl(context));
return mFenceSync.initialize(vk::GetImpl(context), false);
}
angle::Result FenceNVVk::test(const gl::Context *context, GLboolean *outFinished)
......
......@@ -2511,7 +2511,7 @@ angle::Result RendererVk::submitFrame(vk::Context *context,
std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
const vk::Semaphore *signalSemaphore,
vk::ResourceUseList &&resourceUseList,
std::vector<vk::ResourceUseList> &&resourceUseLists,
vk::GarbageList &&currentGarbage,
vk::CommandPool *commandPool)
{
......@@ -2538,7 +2538,10 @@ angle::Result RendererVk::submitFrame(vk::Context *context,
waitSemaphores.clear();
waitSemaphoreStageMasks.clear();
resourceUseList.releaseResourceUsesAndUpdateSerials(submitQueueSerial);
for (vk::ResourceUseList &it : resourceUseLists)
{
it.releaseResourceUsesAndUpdateSerials(submitQueueSerial);
}
return angle::Result::Continue;
}
......
......@@ -332,7 +332,7 @@ class RendererVk : angle::NonCopyable
std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
const vk::Semaphore *signalSemaphore,
vk::ResourceUseList &&resourceUseList,
std::vector<vk::ResourceUseList> &&resourceUseLists,
vk::GarbageList &&currentGarbage,
vk::CommandPool *commandPool);
......
......@@ -83,7 +83,7 @@ void SyncHelper::releaseToRenderer(RendererVk *renderer)
renderer->collectGarbageAndReinit(&mUse, &mEvent);
}
angle::Result SyncHelper::initialize(ContextVk *contextVk)
angle::Result SyncHelper::initialize(ContextVk *contextVk, bool isEglSyncObject)
{
ASSERT(!mEvent.valid());
......@@ -108,7 +108,14 @@ angle::Result SyncHelper::initialize(ContextVk *contextVk)
commandBuffer->setEvent(mEvent.getHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
retain(&contextVk->getResourceUseList());
contextVk->onSyncHelperInitialize();
if (isEglSyncObject)
{
contextVk->onEGLSyncHelperInitialize();
}
else
{
contextVk->onSyncHelperInitialize();
}
return angle::Result::Continue;
}
......@@ -137,19 +144,13 @@ angle::Result SyncHelper::clientWait(Context *context,
return angle::Result::Continue;
}
if (flushCommands && contextVk)
// We defer (ignore) flushes, so it's possible that the glFence's signal operation is pending
// submission.
if ((flushCommands && contextVk) || usedInRecordedCommands())
{
ANGLE_TRY(contextVk->flushImpl(nullptr));
}
// Undefined behaviour. Early exit.
if (usedInRecordedCommands())
{
WARN() << "Waiting on a sync that is not flushed";
*outResult = VK_TIMEOUT;
return angle::Result::Continue;
}
ASSERT(mUse.getSerial().valid());
VkResult status = VK_SUCCESS;
......@@ -401,7 +402,7 @@ angle::Result SyncVk::set(const gl::Context *context, GLenum condition, GLbitfie
ASSERT(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
ASSERT(flags == 0);
return mSyncHelper.initialize(vk::GetImpl(context));
return mSyncHelper.initialize(vk::GetImpl(context), false);
}
angle::Result SyncVk::clientWait(const gl::Context *context,
......@@ -484,7 +485,7 @@ egl::Error EGLSyncVk::initialize(const egl::Display *display,
case EGL_SYNC_FENCE_KHR:
ASSERT(mAttribs.isEmpty());
mSyncHelper = new vk::SyncHelper();
if (mSyncHelper->initialize(vk::GetImpl(context)) == angle::Result::Stop)
if (mSyncHelper->initialize(vk::GetImpl(context), true) == angle::Result::Stop)
{
return egl::Error(EGL_BAD_ALLOC, "eglCreateSyncKHR failed to create sync object");
}
......
......@@ -38,7 +38,7 @@ class SyncHelper : public vk::Resource
virtual void releaseToRenderer(RendererVk *renderer);
virtual angle::Result initialize(ContextVk *contextVk);
virtual angle::Result initialize(ContextVk *contextVk, bool isEglSyncObject);
virtual angle::Result clientWait(Context *context,
ContextVk *contextVk,
bool flushCommands,
......
......@@ -13,6 +13,7 @@
#include "test_utils/gl_raii.h"
#include "util/EGLWindow.h"
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <thread>
......@@ -360,6 +361,8 @@ TEST_P(EGLContextSharingTest, SamplerLifetime)
TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
{
ANGLE_SKIP_TEST_IF(!platformSupportsMultithreading());
// GL Fences require GLES 3.0+
ANGLE_SKIP_TEST_IF(getClientMajorVersion() < 3);
// Initialize contexts
EGLWindow *window = getEGLWindow();
......@@ -423,6 +426,8 @@ TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
// Synchronization tools to ensure the two threads are interleaved as designed by this test.
std::mutex mutex;
std::condition_variable condVar;
std::atomic<GLsync> deletingThreadSyncObj;
std::atomic<GLsync> continuingThreadSyncObj;
enum class Step
{
......@@ -498,16 +503,25 @@ TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
// Draw using the shared texture.
drawQuad(program[0].get(), essl1_shaders::PositionAttrib(), 0.5f);
deletingThreadSyncObj = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
ASSERT_GL_NO_ERROR();
// Force the fence to be created
glFlush();
// Wait for the other thread to also draw using the shared texture.
nextStep(Step::Thread0Draw);
ASSERT_TRUE(waitForStep(Step::Thread1Draw));
ASSERT_TRUE(continuingThreadSyncObj != nullptr);
glWaitSync(continuingThreadSyncObj, 0, GL_TIMEOUT_IGNORED);
ASSERT_GL_NO_ERROR();
glDeleteSync(continuingThreadSyncObj);
ASSERT_GL_NO_ERROR();
continuingThreadSyncObj = nullptr;
// Delete this thread's framebuffer (reader of the shared texture).
fbo[0].reset();
// Flush to make sure the graph nodes associated with this context are deleted.
glFlush();
// Wait for the other thread to use the shared texture again before unbinding the
// context (so no implicit flush happens).
nextStep(Step::Thread0Delete);
......@@ -527,9 +541,21 @@ TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
// Wait for first thread to draw using the shared texture.
ASSERT_TRUE(waitForStep(Step::Thread0Draw));
ASSERT_TRUE(deletingThreadSyncObj != nullptr);
glWaitSync(deletingThreadSyncObj, 0, GL_TIMEOUT_IGNORED);
ASSERT_GL_NO_ERROR();
glDeleteSync(deletingThreadSyncObj);
ASSERT_GL_NO_ERROR();
deletingThreadSyncObj = nullptr;
// Draw using the shared texture.
drawQuad(program[0].get(), essl1_shaders::PositionAttrib(), 0.5f);
continuingThreadSyncObj = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
ASSERT_GL_NO_ERROR();
// Force the fence to be created
glFlush();
// Wait for the other thread to delete its framebuffer.
nextStep(Step::Thread1Draw);
ASSERT_TRUE(waitForStep(Step::Thread0Delete));
......
......@@ -214,17 +214,13 @@ TEST_P(EGLSyncTest, BasicOperations)
glFlush();
EGLint value = 0;
unsigned int loopCount = 0;
// Use 'loopCount' to make sure the test doesn't get stuck in an infinite loop
while (value != EGL_SIGNALED_KHR && loopCount <= 1000000)
{
loopCount++;
EXPECT_EGL_TRUE(eglGetSyncAttribKHR(display, sync, EGL_SYNC_STATUS_KHR, &value));
}
glClear(GL_COLOR_BUFFER_BIT);
ASSERT_EQ(value, EGL_SIGNALED_KHR);
// Don't wait forever to make sure the test terminates
constexpr GLuint64 kTimeout = 1'000'000'000; // 1 second
EGLint value = 0;
ASSERT_EQ(EGL_CONDITION_SATISFIED_KHR,
eglClientWaitSyncKHR(display, sync, EGL_SYNC_FLUSH_COMMANDS_BIT_KHR, kTimeout));
for (size_t i = 0; i < 20; i++)
{
......
......@@ -230,6 +230,9 @@ TEST_P(FenceSyncTest, BasicQueries)
// Test that basic usage works and doesn't generate errors or crash
TEST_P(FenceSyncTest, BasicOperations)
{
// TODO(http://anglebug.com/5425): glClientWaitSync() returns GL_TIMEOUT_EXPIRED
ANGLE_SKIP_TEST_IF(IsAMD() && IsD3D11());
glClearColor(1.0f, 0.0f, 1.0f, 1.0f);
GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
......@@ -238,28 +241,17 @@ TEST_P(FenceSyncTest, BasicOperations)
glWaitSync(sync, 0, GL_TIMEOUT_IGNORED);
EXPECT_GL_NO_ERROR();
GLsizei length = 0;
GLint value = 0;
unsigned int loopCount = 0;
glFlush();
// Use 'loopCount' to make sure the test doesn't get stuck in an infinite loop
while (value != GL_SIGNALED && loopCount <= 1000000)
{
loopCount++;
glGetSynciv(sync, GL_SYNC_STATUS, 1, &length, &value);
ASSERT_GL_NO_ERROR();
}
ASSERT_GLENUM_EQ(GL_SIGNALED, value);
// Don't wait forever to make sure the test terminates
constexpr GLuint64 kTimeout = 1'000'000'000; // 1 second
GLint value = 0;
for (size_t i = 0; i < 20; i++)
{
glClear(GL_COLOR_BUFFER_BIT);
glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
value = glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, kTimeout);
EXPECT_GL_NO_ERROR();
ASSERT_TRUE(value == GL_CONDITION_SATISFIED || value == GL_ALREADY_SIGNALED);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment