Commit a19bd601 by Corentin Wallez Committed by Commit Bot

Revert "Vulkan: Ignore glFlush to reduce vkQueueSubmits in Asphalt 9"

This reverts commit 5cf7472d. Reason for revert: causes timeouts, see anglebug.com/5470 Original change's description: > Vulkan: Ignore glFlush to reduce vkQueueSubmits in Asphalt 9 > > Multithreaded apps can use the following pattern: > > glDrawElements() > glFenceSync() > glFlush() > glWaitSync() > > This currently results in a vkQueueSubmit for every glFlush() to ensure > that the work has landed in the command queue in the correct order. > However, ANGLE can instead avoid the vkQueueSubmit during the glFlush() > in this situation by instead flushing the ContextVk's commands and > ending the render pass to ensure the commands are submitted in the > correct order to the renderer. This improves performance for Asphalt 9 > by reducing frame times from 150-200msec to 35-55msec. > > Specifically, ANGLE will call flushCommandsAndEndRenderPass() when > there is a sync object pending a flush or if the ContextVk is currently > shared. > > Additionally, on all devices except Qualcomm, ANGLE can ignore all other > glFlush() calls entirely and return immediately. For Qualcomm devices, > ANGLE is still required to perform a full flush (resulting in a > vkQueueSubmit), since ignoring the glFlush() reduces the Manhattan 3.0 > offscreen score by ~3%. > > Bug: angleproject:5306 > Bug: angleproject:5425 > Change-Id: I9d747caf5bf306166be0fec630a78caf41208c27 > Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2552718 > Commit-Queue: Tim Van Patten <timvp@google.com> > Reviewed-by: Charlie Lao <cclao@google.com> > Reviewed-by: Jamie Madill <jmadill@chromium.org> TBR=timvp@google.com,jmadill@chromium.org,cclao@google.com Change-Id: I9886bf901a835d408b6a4b8be7ea408fa2121be0 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: angleproject:5306 Bug: angleproject:5425 Bug: angleproject:5470 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2595032Reviewed-by: 's avatarCorentin Wallez <cwallez@chromium.org> Commit-Queue: Corentin Wallez <cwallez@chromium.org>
parent 0c87891f
......@@ -397,8 +397,7 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mRenderPassCommands(nullptr),
mGpuEventsEnabled(false),
mSyncObjectPendingFlush(false),
mEGLSyncObjectPendingFlush(false),
mHasDeferredFlush(false),
mDeferredFlushCount(0),
mGpuClockSync{std::numeric_limits<double>::max(), std::numeric_limits<double>::max()},
mGpuEventTimestampOrigin(0),
mPerfCounters{},
......@@ -702,29 +701,14 @@ angle::Result ContextVk::initialize()
angle::Result ContextVk::flush(const gl::Context *context)
{
// If a sync object has been used or this is a shared context, then we need to flush the
// commands and end the render pass to make sure the sync object (and any preceding commands)
// lands in the correct place within the command stream.
// EGL sync objects can span across context share groups, so don't defer flushes if there's one
// pending a flush.
if (mSyncObjectPendingFlush && context->isShared() && !mEGLSyncObjectPendingFlush)
// If we are in middle of renderpass and it is not a shared context, then we will defer the
// glFlush call here until the renderpass ends. If sync object has been used, we must respect
// glFlush call, otherwise we a wait for sync object without GL_SYNC_FLUSH_COMMANDS_BIT may
// never come back.
if (mRenderer->getFeatures().deferFlushUntilEndRenderPass.enabled && !context->isShared() &&
!mSyncObjectPendingFlush && hasStartedRenderPass())
{
// Flush the commands to create a sync point in the command stream.
ANGLE_TRY(flushCommandsAndEndRenderPass());
// Move the resources to the share group, so they are released during the next vkQueueSubmit
// performed by any context in the share group. Note that this relies heavily on the global
// mutex to guarantee that no two contexts are modifying the lists at the same time.
getShareGroupVk()->acquireResourceUseList(std::move(mResourceUseList));
mHasDeferredFlush = true;
return angle::Result::Continue;
}
// EGL sync objects can span across context share groups, so don't defer flushes if there's one
// pending a flush.
if (!mEGLSyncObjectPendingFlush &&
mRenderer->getFeatures().deferFlushUntilEndRenderPass.enabled && hasStartedRenderPass())
{
mHasDeferredFlush = true;
mDeferredFlushCount++;
return angle::Result::Continue;
}
......@@ -1586,11 +1570,9 @@ angle::Result ContextVk::submitFrame(const vk::Semaphore *signalSemaphore)
dumpCommandStreamDiagnostics();
}
getShareGroupVk()->acquireResourceUseList(std::move(mResourceUseList));
ANGLE_TRY(mRenderer->submitFrame(this, mContextPriority, std::move(mWaitSemaphores),
std::move(mWaitSemaphoreStageMasks), signalSemaphore,
getShareGroupVk()->releaseResourceUseLists(),
std::move(mCurrentGarbage), &mCommandPool));
ANGLE_TRY(mRenderer->submitFrame(
this, mContextPriority, std::move(mWaitSemaphores), std::move(mWaitSemaphoreStageMasks),
signalSemaphore, std::move(mResourceUseList), std::move(mCurrentGarbage), &mCommandPool));
onRenderPassFinished();
mComputeDirtyBits |= mNewComputeCommandBufferDirtyBits;
......@@ -4275,9 +4257,9 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore)
{
ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::flushImpl");
// We must set this to false before calling flushCommandsAndEndRenderPass to prevent it from
// We must set this to zero before calling flushCommandsAndEndRenderPass to prevent it from
// calling back to flushImpl.
mHasDeferredFlush = false;
mDeferredFlushCount = 0;
mSyncObjectPendingFlush = false;
ANGLE_TRY(flushCommandsAndEndRenderPass());
......@@ -4670,6 +4652,7 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass()
if (!mRenderPassCommands->started())
{
ASSERT(!mDeferredFlushCount);
onRenderPassFinished();
return angle::Result::Continue;
}
......@@ -4725,7 +4708,7 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass()
ANGLE_TRY(flushOutsideRenderPassCommands());
}
if (mHasDeferredFlush)
if (mDeferredFlushCount > 0)
{
// If we have deferred glFlush call in the middle of renderpass, flush them now.
ANGLE_TRY(flushImpl(nullptr));
......
......@@ -569,7 +569,6 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
vk::PerfCounters &getPerfCounters() { return mPerfCounters; }
void onSyncHelperInitialize() { mSyncObjectPendingFlush = true; }
void onEGLSyncHelperInitialize() { mEGLSyncObjectPendingFlush = true; }
// When UtilsVk issues a draw call on the currently running render pass, the pipelines and
// descriptor sets it binds need to be undone.
......@@ -1001,8 +1000,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
// Track SyncHelper object been added into secondary command buffer that has not been flushed to
// vulkan.
bool mSyncObjectPendingFlush;
bool mEGLSyncObjectPendingFlush;
bool mHasDeferredFlush;
uint32_t mDeferredFlushCount;
// Semaphores that must be waited on in the next submission.
std::vector<VkSemaphore> mWaitSemaphores;
......
......@@ -12,7 +12,6 @@
#include "common/MemoryBuffer.h"
#include "libANGLE/renderer/DisplayImpl.h"
#include "libANGLE/renderer/vulkan/ResourceVk.h"
#include "libANGLE/renderer/vulkan/vk_cache_utils.h"
#include "libANGLE/renderer/vulkan/vk_utils.h"
......@@ -35,15 +34,6 @@ class ShareGroupVk : public ShareGroupImpl
DescriptorSetLayoutCache &getDescriptorSetLayoutCache() { return mDescriptorSetLayoutCache; }
ShareContextSet *getShareContextSet() { return &mShareContextSet; }
std::vector<vk::ResourceUseList> &&releaseResourceUseLists()
{
return std::move(mResourceUseLists);
}
void acquireResourceUseList(vk::ResourceUseList &&resourceUseList)
{
mResourceUseLists.emplace_back(std::move(resourceUseList));
}
private:
// ANGLE uses a PipelineLayout cache to store compatible pipeline layouts.
PipelineLayoutCache mPipelineLayoutCache;
......@@ -53,10 +43,6 @@ class ShareGroupVk : public ShareGroupImpl
// The list of contexts within the share group
ShareContextSet mShareContextSet;
// List of resources currently used that need to be freed when any ContextVk in this
// ShareGroupVk submits the next command.
std::vector<vk::ResourceUseList> mResourceUseLists;
};
class DisplayVk : public DisplayImpl, public vk::Context
......
......@@ -29,7 +29,7 @@ void FenceNVVk::onDestroy(const gl::Context *context)
angle::Result FenceNVVk::set(const gl::Context *context, GLenum condition)
{
ASSERT(condition == GL_ALL_COMPLETED_NV);
return mFenceSync.initialize(vk::GetImpl(context), false);
return mFenceSync.initialize(vk::GetImpl(context));
}
angle::Result FenceNVVk::test(const gl::Context *context, GLboolean *outFinished)
......
......@@ -2513,7 +2513,7 @@ angle::Result RendererVk::submitFrame(vk::Context *context,
std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
const vk::Semaphore *signalSemaphore,
std::vector<vk::ResourceUseList> &&resourceUseLists,
vk::ResourceUseList &&resourceUseList,
vk::GarbageList &&currentGarbage,
vk::CommandPool *commandPool)
{
......@@ -2540,10 +2540,7 @@ angle::Result RendererVk::submitFrame(vk::Context *context,
waitSemaphores.clear();
waitSemaphoreStageMasks.clear();
for (vk::ResourceUseList &it : resourceUseLists)
{
it.releaseResourceUsesAndUpdateSerials(submitQueueSerial);
}
resourceUseList.releaseResourceUsesAndUpdateSerials(submitQueueSerial);
return angle::Result::Continue;
}
......
......@@ -332,7 +332,7 @@ class RendererVk : angle::NonCopyable
std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
const vk::Semaphore *signalSemaphore,
std::vector<vk::ResourceUseList> &&resourceUseLists,
vk::ResourceUseList &&resourceUseList,
vk::GarbageList &&currentGarbage,
vk::CommandPool *commandPool);
......
......@@ -83,7 +83,7 @@ void SyncHelper::releaseToRenderer(RendererVk *renderer)
renderer->collectGarbageAndReinit(&mUse, &mEvent);
}
angle::Result SyncHelper::initialize(ContextVk *contextVk, bool isEglSyncObject)
angle::Result SyncHelper::initialize(ContextVk *contextVk)
{
ASSERT(!mEvent.valid());
......@@ -108,14 +108,7 @@ angle::Result SyncHelper::initialize(ContextVk *contextVk, bool isEglSyncObject)
commandBuffer->setEvent(mEvent.getHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
retain(&contextVk->getResourceUseList());
if (isEglSyncObject)
{
contextVk->onEGLSyncHelperInitialize();
}
else
{
contextVk->onSyncHelperInitialize();
}
contextVk->onSyncHelperInitialize();
return angle::Result::Continue;
}
......@@ -144,13 +137,19 @@ angle::Result SyncHelper::clientWait(Context *context,
return angle::Result::Continue;
}
// We defer (ignore) flushes, so it's possible that the glFence's signal operation is pending
// submission.
if ((flushCommands && contextVk) || usedInRecordedCommands())
if (flushCommands && contextVk)
{
ANGLE_TRY(contextVk->flushImpl(nullptr));
}
// Undefined behaviour. Early exit.
if (usedInRecordedCommands())
{
WARN() << "Waiting on a sync that is not flushed";
*outResult = VK_TIMEOUT;
return angle::Result::Continue;
}
ASSERT(mUse.getSerial().valid());
VkResult status = VK_SUCCESS;
......@@ -402,7 +401,7 @@ angle::Result SyncVk::set(const gl::Context *context, GLenum condition, GLbitfie
ASSERT(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
ASSERT(flags == 0);
return mSyncHelper.initialize(vk::GetImpl(context), false);
return mSyncHelper.initialize(vk::GetImpl(context));
}
angle::Result SyncVk::clientWait(const gl::Context *context,
......@@ -485,7 +484,7 @@ egl::Error EGLSyncVk::initialize(const egl::Display *display,
case EGL_SYNC_FENCE_KHR:
ASSERT(mAttribs.isEmpty());
mSyncHelper = new vk::SyncHelper();
if (mSyncHelper->initialize(vk::GetImpl(context), true) == angle::Result::Stop)
if (mSyncHelper->initialize(vk::GetImpl(context)) == angle::Result::Stop)
{
return egl::Error(EGL_BAD_ALLOC, "eglCreateSyncKHR failed to create sync object");
}
......
......@@ -38,7 +38,7 @@ class SyncHelper : public vk::Resource
virtual void releaseToRenderer(RendererVk *renderer);
virtual angle::Result initialize(ContextVk *contextVk, bool isEglSyncObject);
virtual angle::Result initialize(ContextVk *contextVk);
virtual angle::Result clientWait(Context *context,
ContextVk *contextVk,
bool flushCommands,
......
......@@ -13,7 +13,6 @@
#include "test_utils/gl_raii.h"
#include "util/EGLWindow.h"
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <thread>
......@@ -361,8 +360,6 @@ TEST_P(EGLContextSharingTest, SamplerLifetime)
TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
{
ANGLE_SKIP_TEST_IF(!platformSupportsMultithreading());
// GL Fences require GLES 3.0+
ANGLE_SKIP_TEST_IF(getClientMajorVersion() < 3);
// Initialize contexts
EGLWindow *window = getEGLWindow();
......@@ -426,8 +423,6 @@ TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
// Synchronization tools to ensure the two threads are interleaved as designed by this test.
std::mutex mutex;
std::condition_variable condVar;
std::atomic<GLsync> deletingThreadSyncObj;
std::atomic<GLsync> continuingThreadSyncObj;
enum class Step
{
......@@ -503,25 +498,16 @@ TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
// Draw using the shared texture.
drawQuad(program[0].get(), essl1_shaders::PositionAttrib(), 0.5f);
deletingThreadSyncObj = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
ASSERT_GL_NO_ERROR();
// Force the fence to be created
glFlush();
// Wait for the other thread to also draw using the shared texture.
nextStep(Step::Thread0Draw);
ASSERT_TRUE(waitForStep(Step::Thread1Draw));
ASSERT_TRUE(continuingThreadSyncObj != nullptr);
glWaitSync(continuingThreadSyncObj, 0, GL_TIMEOUT_IGNORED);
ASSERT_GL_NO_ERROR();
glDeleteSync(continuingThreadSyncObj);
ASSERT_GL_NO_ERROR();
continuingThreadSyncObj = nullptr;
// Delete this thread's framebuffer (reader of the shared texture).
fbo[0].reset();
// Flush to make sure the graph nodes associated with this context are deleted.
glFlush();
// Wait for the other thread to use the shared texture again before unbinding the
// context (so no implicit flush happens).
nextStep(Step::Thread0Delete);
......@@ -541,21 +527,9 @@ TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
// Wait for first thread to draw using the shared texture.
ASSERT_TRUE(waitForStep(Step::Thread0Draw));
ASSERT_TRUE(deletingThreadSyncObj != nullptr);
glWaitSync(deletingThreadSyncObj, 0, GL_TIMEOUT_IGNORED);
ASSERT_GL_NO_ERROR();
glDeleteSync(deletingThreadSyncObj);
ASSERT_GL_NO_ERROR();
deletingThreadSyncObj = nullptr;
// Draw using the shared texture.
drawQuad(program[0].get(), essl1_shaders::PositionAttrib(), 0.5f);
continuingThreadSyncObj = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
ASSERT_GL_NO_ERROR();
// Force the fence to be created
glFlush();
// Wait for the other thread to delete its framebuffer.
nextStep(Step::Thread1Draw);
ASSERT_TRUE(waitForStep(Step::Thread0Delete));
......
......@@ -214,13 +214,17 @@ TEST_P(EGLSyncTest, BasicOperations)
glFlush();
glClear(GL_COLOR_BUFFER_BIT);
EGLint value = 0;
unsigned int loopCount = 0;
// Use 'loopCount' to make sure the test doesn't get stuck in an infinite loop
while (value != EGL_SIGNALED_KHR && loopCount <= 1000000)
{
loopCount++;
EXPECT_EGL_TRUE(eglGetSyncAttribKHR(display, sync, EGL_SYNC_STATUS_KHR, &value));
}
// Don't wait forever to make sure the test terminates
constexpr GLuint64 kTimeout = 1'000'000'000; // 1 second
EGLint value = 0;
ASSERT_EQ(EGL_CONDITION_SATISFIED_KHR,
eglClientWaitSyncKHR(display, sync, EGL_SYNC_FLUSH_COMMANDS_BIT_KHR, kTimeout));
ASSERT_EQ(value, EGL_SIGNALED_KHR);
for (size_t i = 0; i < 20; i++)
{
......
......@@ -230,9 +230,6 @@ TEST_P(FenceSyncTest, BasicQueries)
// Test that basic usage works and doesn't generate errors or crash
TEST_P(FenceSyncTest, BasicOperations)
{
// TODO(http://anglebug.com/5425): glClientWaitSync() returns GL_TIMEOUT_EXPIRED
ANGLE_SKIP_TEST_IF(IsAMD() && IsD3D11());
glClearColor(1.0f, 0.0f, 1.0f, 1.0f);
GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
......@@ -241,17 +238,28 @@ TEST_P(FenceSyncTest, BasicOperations)
glWaitSync(sync, 0, GL_TIMEOUT_IGNORED);
EXPECT_GL_NO_ERROR();
GLsizei length = 0;
GLint value = 0;
unsigned int loopCount = 0;
glFlush();
// Don't wait forever to make sure the test terminates
constexpr GLuint64 kTimeout = 1'000'000'000; // 1 second
GLint value = 0;
// Use 'loopCount' to make sure the test doesn't get stuck in an infinite loop
while (value != GL_SIGNALED && loopCount <= 1000000)
{
loopCount++;
glGetSynciv(sync, GL_SYNC_STATUS, 1, &length, &value);
ASSERT_GL_NO_ERROR();
}
ASSERT_GLENUM_EQ(GL_SIGNALED, value);
for (size_t i = 0; i < 20; i++)
{
glClear(GL_COLOR_BUFFER_BIT);
value = glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, kTimeout);
glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
EXPECT_GL_NO_ERROR();
ASSERT_TRUE(value == GL_CONDITION_SATISFIED || value == GL_ALREADY_SIGNALED);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment