Revert "Vulkan: Ignore glFlush to reduce vkQueueSubmits in Asphalt 9"

This reverts commit 5cf7472d. Reason for revert: causes timeouts, see anglebug.com/5470 Original change's description: > Vulkan: Ignore glFlush to reduce vkQueueSubmits in Asphalt 9 > > Multithreaded apps can use the following pattern: > > glDrawElements() > glFenceSync() > glFlush() > glWaitSync() > > This currently results in a vkQueueSubmit for every glFlush() to ensure > that the work has landed in the command queue in the correct order. > However, ANGLE can instead avoid the vkQueueSubmit during the glFlush() > in this situation by instead flushing the ContextVk's commands and > ending the render pass to ensure the commands are submitted in the > correct order to the renderer. This improves performance for Asphalt 9 > by reducing frame times from 150-200msec to 35-55msec. > > Specifically, ANGLE will call flushCommandsAndEndRenderPass() when > there is a sync object pending a flush or if the ContextVk is currently > shared. > > Additionally, on all devices except Qualcomm, ANGLE can ignore all other > glFlush() calls entirely and return immediately. For Qualcomm devices, > ANGLE is still required to perform a full flush (resulting in a > vkQueueSubmit), since ignoring the glFlush() reduces the Manhattan 3.0 > offscreen score by ~3%. > > Bug: angleproject:5306 > Bug: angleproject:5425 > Change-Id: I9d747caf5bf306166be0fec630a78caf41208c27 > Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2552718 > Commit-Queue: Tim Van Patten <timvp@google.com> > Reviewed-by: Charlie Lao <cclao@google.com> > Reviewed-by: Jamie Madill <jmadill@chromium.org> TBR=timvp@google.com,jmadill@chromium.org,cclao@google.com Change-Id: I9886bf901a835d408b6a4b8be7ea408fa2121be0 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: angleproject:5306 Bug: angleproject:5425 Bug: angleproject:5470 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2595032Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Corentin Wallez <cwallez@chromium.org>

Revert "Vulkan: Ignore glFlush to reduce vkQueueSubmits in Asphalt 9"
a19bd601 · Corentin Wallez · Commit Bot · 0c87891f · a19bd601 · a19bd601
Commit a19bd601 authored Dec 16, 2020 by Corentin Wallez Committed by Commit Bot Dec 16, 2020
11 changed files
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -397,8 +397,7 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk 
      mRenderPassCommands(nullptr),
      mGpuEventsEnabled(false),
      mSyncObjectPendingFlush(false),
-      mEGLSyncObjectPendingFlush(false),
-      mHasDeferredFlush(false),
+      mDeferredFlushCount(0),
      mGpuClockSync{std::numeric_limits<double>::max(), std::numeric_limits<double>::max()},
      mGpuEventTimestampOrigin(0),
      mPerfCounters{},
@@ -702,29 +701,14 @@ angle::Result ContextVk::initialize()

 angle::Result ContextVk::flush(const gl::Context *context)
 {
-    // If a sync object has been used or this is a shared context, then we need to flush the
-    // commands and end the render pass to make sure the sync object (and any preceding commands)
-    // lands in the correct place within the command stream.
-    // EGL sync objects can span across context share groups, so don't defer flushes if there's one
-    // pending a flush.
-    if (mSyncObjectPendingFlush && context->isShared() && !mEGLSyncObjectPendingFlush)
+    // If we are in middle of renderpass and it is not a shared context, then we will defer the
+    // glFlush call here until the renderpass ends. If sync object has been used, we must respect
+    // glFlush call, otherwise we a wait for sync object without GL_SYNC_FLUSH_COMMANDS_BIT may
+    // never come back.
+    if (mRenderer->getFeatures().deferFlushUntilEndRenderPass.enabled && !context->isShared() &&
+        !mSyncObjectPendingFlush && hasStartedRenderPass())
    {
-        // Flush the commands to create a sync point in the command stream.
-        ANGLE_TRY(flushCommandsAndEndRenderPass());
-        // Move the resources to the share group, so they are released during the next vkQueueSubmit
-        // performed by any context in the share group. Note that this relies heavily on the global
-        // mutex to guarantee that no two contexts are modifying the lists at the same time.
-        getShareGroupVk()->acquireResourceUseList(std::move(mResourceUseList));
-        mHasDeferredFlush = true;
-        return angle::Result::Continue;
-    }
-
-    // EGL sync objects can span across context share groups, so don't defer flushes if there's one
-    // pending a flush.
-    if (!mEGLSyncObjectPendingFlush &&
-        mRenderer->getFeatures().deferFlushUntilEndRenderPass.enabled && hasStartedRenderPass())
-    {
-        mHasDeferredFlush = true;
+        mDeferredFlushCount++;
        return angle::Result::Continue;
    }

@@ -1586,11 +1570,9 @@ angle::Result ContextVk::submitFrame(const vk::Semaphore *signalSemaphore)
        dumpCommandStreamDiagnostics();
    }

-    getShareGroupVk()->acquireResourceUseList(std::move(mResourceUseList));
-    ANGLE_TRY(mRenderer->submitFrame(this, mContextPriority, std::move(mWaitSemaphores),
-                                     std::move(mWaitSemaphoreStageMasks), signalSemaphore,
-                                     getShareGroupVk()->releaseResourceUseLists(),
-                                     std::move(mCurrentGarbage), &mCommandPool));
+    ANGLE_TRY(mRenderer->submitFrame(
+        this, mContextPriority, std::move(mWaitSemaphores), std::move(mWaitSemaphoreStageMasks),
+        signalSemaphore, std::move(mResourceUseList), std::move(mCurrentGarbage), &mCommandPool));

    onRenderPassFinished();
    mComputeDirtyBits |= mNewComputeCommandBufferDirtyBits;
@@ -4275,9 +4257,9 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore)
 {
    ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::flushImpl");

-    // We must set this to false before calling flushCommandsAndEndRenderPass to prevent it from
+    // We must set this to zero before calling flushCommandsAndEndRenderPass to prevent it from
    // calling back to flushImpl.
-    mHasDeferredFlush       = false;
+    mDeferredFlushCount     = 0;
    mSyncObjectPendingFlush = false;

    ANGLE_TRY(flushCommandsAndEndRenderPass());
@@ -4670,6 +4652,7 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass()

    if (!mRenderPassCommands->started())
    {
+        ASSERT(!mDeferredFlushCount);
        onRenderPassFinished();
        return angle::Result::Continue;
    }
@@ -4725,7 +4708,7 @@ angle::Result ContextVk::flushCommandsAndEndRenderPass()
        ANGLE_TRY(flushOutsideRenderPassCommands());
    }

-    if (mHasDeferredFlush)
+    if (mDeferredFlushCount > 0)
    {
        // If we have deferred glFlush call in the middle of renderpass, flush them now.
        ANGLE_TRY(flushImpl(nullptr));

--- a/src/libANGLE/renderer/vulkan/ContextVk.h
+++ b/src/libANGLE/renderer/vulkan/ContextVk.h
@@ -569,7 +569,6 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
    vk::PerfCounters &getPerfCounters() { return mPerfCounters; }

    void onSyncHelperInitialize() { mSyncObjectPendingFlush = true; }
-    void onEGLSyncHelperInitialize() { mEGLSyncObjectPendingFlush = true; }

    // When UtilsVk issues a draw call on the currently running render pass, the pipelines and
    // descriptor sets it binds need to be undone.
@@ -1001,8 +1000,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
    // Track SyncHelper object been added into secondary command buffer that has not been flushed to
    // vulkan.
    bool mSyncObjectPendingFlush;
-    bool mEGLSyncObjectPendingFlush;
-    bool mHasDeferredFlush;
+    uint32_t mDeferredFlushCount;

    // Semaphores that must be waited on in the next submission.
    std::vector<VkSemaphore> mWaitSemaphores;

--- a/src/libANGLE/renderer/vulkan/DisplayVk.h
+++ b/src/libANGLE/renderer/vulkan/DisplayVk.h
@@ -12,7 +12,6 @@

 #include "common/MemoryBuffer.h"
 #include "libANGLE/renderer/DisplayImpl.h"
-#include "libANGLE/renderer/vulkan/ResourceVk.h"
 #include "libANGLE/renderer/vulkan/vk_cache_utils.h"
 #include "libANGLE/renderer/vulkan/vk_utils.h"

@@ -35,15 +34,6 @@ class ShareGroupVk : public ShareGroupImpl
    DescriptorSetLayoutCache &getDescriptorSetLayoutCache() { return mDescriptorSetLayoutCache; }
    ShareContextSet *getShareContextSet() { return &mShareContextSet; }

-    std::vector<vk::ResourceUseList> &&releaseResourceUseLists()
-    {
-        return std::move(mResourceUseLists);
-    }
-    void acquireResourceUseList(vk::ResourceUseList &&resourceUseList)
-    {
-        mResourceUseLists.emplace_back(std::move(resourceUseList));
-    }
-
  private:
    // ANGLE uses a PipelineLayout cache to store compatible pipeline layouts.
    PipelineLayoutCache mPipelineLayoutCache;
@@ -53,10 +43,6 @@ class ShareGroupVk : public ShareGroupImpl

    // The list of contexts within the share group
    ShareContextSet mShareContextSet;
-
-    // List of resources currently used that need to be freed when any ContextVk in this
-    // ShareGroupVk submits the next command.
-    std::vector<vk::ResourceUseList> mResourceUseLists;
 };

 class DisplayVk : public DisplayImpl, public vk::Context

--- a/src/libANGLE/renderer/vulkan/FenceNVVk.cpp
+++ b/src/libANGLE/renderer/vulkan/FenceNVVk.cpp
@@ -29,7 +29,7 @@ void FenceNVVk::onDestroy(const gl::Context *context)
 angle::Result FenceNVVk::set(const gl::Context *context, GLenum condition)
 {
    ASSERT(condition == GL_ALL_COMPLETED_NV);
-    return mFenceSync.initialize(vk::GetImpl(context), false);
+    return mFenceSync.initialize(vk::GetImpl(context));
 }

 angle::Result FenceNVVk::test(const gl::Context *context, GLboolean *outFinished)

--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -2513,7 +2513,7 @@ angle::Result RendererVk::submitFrame(vk::Context *context,
                                      std::vector<VkSemaphore> &&waitSemaphores,
                                      std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
                                      const vk::Semaphore *signalSemaphore,
-                                      std::vector<vk::ResourceUseList> &&resourceUseLists,
+                                      vk::ResourceUseList &&resourceUseList,
                                      vk::GarbageList &&currentGarbage,
                                      vk::CommandPool *commandPool)
 {
@@ -2540,10 +2540,7 @@ angle::Result RendererVk::submitFrame(vk::Context *context,

    waitSemaphores.clear();
    waitSemaphoreStageMasks.clear();
-    for (vk::ResourceUseList &it : resourceUseLists)
-    {
-        it.releaseResourceUsesAndUpdateSerials(submitQueueSerial);
-    }
+    resourceUseList.releaseResourceUsesAndUpdateSerials(submitQueueSerial);

    return angle::Result::Continue;
 }

--- a/src/libANGLE/renderer/vulkan/RendererVk.h
+++ b/src/libANGLE/renderer/vulkan/RendererVk.h
@@ -332,7 +332,7 @@ class RendererVk : angle::NonCopyable
                              std::vector<VkSemaphore> &&waitSemaphores,
                              std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks,
                              const vk::Semaphore *signalSemaphore,
-                              std::vector<vk::ResourceUseList> &&resourceUseLists,
+                              vk::ResourceUseList &&resourceUseList,
                              vk::GarbageList &&currentGarbage,
                              vk::CommandPool *commandPool);


--- a/src/libANGLE/renderer/vulkan/SyncVk.cpp
+++ b/src/libANGLE/renderer/vulkan/SyncVk.cpp
@@ -83,7 +83,7 @@ void SyncHelper::releaseToRenderer(RendererVk *renderer)
    renderer->collectGarbageAndReinit(&mUse, &mEvent);
 }

-angle::Result SyncHelper::initialize(ContextVk *contextVk, bool isEglSyncObject)
+angle::Result SyncHelper::initialize(ContextVk *contextVk)
 {
    ASSERT(!mEvent.valid());

@@ -108,14 +108,7 @@ angle::Result SyncHelper::initialize(ContextVk *contextVk, bool isEglSyncObject)
    commandBuffer->setEvent(mEvent.getHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
    retain(&contextVk->getResourceUseList());

-    if (isEglSyncObject)
-    {
-        contextVk->onEGLSyncHelperInitialize();
-    }
-    else
-    {
-        contextVk->onSyncHelperInitialize();
-    }
+    contextVk->onSyncHelperInitialize();

    return angle::Result::Continue;
 }
@@ -144,13 +137,19 @@ angle::Result SyncHelper::clientWait(Context *context,
        return angle::Result::Continue;
    }

-    // We defer (ignore) flushes, so it's possible that the glFence's signal operation is pending
-    // submission.
-    if ((flushCommands && contextVk) || usedInRecordedCommands())
+    if (flushCommands && contextVk)
    {
        ANGLE_TRY(contextVk->flushImpl(nullptr));
    }

+    // Undefined behaviour. Early exit.
+    if (usedInRecordedCommands())
+    {
+        WARN() << "Waiting on a sync that is not flushed";
+        *outResult = VK_TIMEOUT;
+        return angle::Result::Continue;
+    }
+
    ASSERT(mUse.getSerial().valid());

    VkResult status = VK_SUCCESS;
@@ -402,7 +401,7 @@ angle::Result SyncVk::set(const gl::Context *context, GLenum condition, GLbitfie
    ASSERT(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
    ASSERT(flags == 0);

-    return mSyncHelper.initialize(vk::GetImpl(context), false);
+    return mSyncHelper.initialize(vk::GetImpl(context));
 }

 angle::Result SyncVk::clientWait(const gl::Context *context,
@@ -485,7 +484,7 @@ egl::Error EGLSyncVk::initialize(const egl::Display *display,
        case EGL_SYNC_FENCE_KHR:
            ASSERT(mAttribs.isEmpty());
            mSyncHelper = new vk::SyncHelper();
-            if (mSyncHelper->initialize(vk::GetImpl(context), true) == angle::Result::Stop)
+            if (mSyncHelper->initialize(vk::GetImpl(context)) == angle::Result::Stop)
            {
                return egl::Error(EGL_BAD_ALLOC, "eglCreateSyncKHR failed to create sync object");
            }

--- a/src/libANGLE/renderer/vulkan/SyncVk.h
+++ b/src/libANGLE/renderer/vulkan/SyncVk.h
@@ -38,7 +38,7 @@ class SyncHelper : public vk::Resource

    virtual void releaseToRenderer(RendererVk *renderer);

-    virtual angle::Result initialize(ContextVk *contextVk, bool isEglSyncObject);
+    virtual angle::Result initialize(ContextVk *contextVk);
    virtual angle::Result clientWait(Context *context,
                                     ContextVk *contextVk,
                                     bool flushCommands,

--- a/src/tests/egl_tests/EGLContextSharingTest.cpp
+++ b/src/tests/egl_tests/EGLContextSharingTest.cpp
@@ -13,7 +13,6 @@
 #include "test_utils/gl_raii.h"
 #include "util/EGLWindow.h"

-#include <atomic>
 #include <condition_variable>
 #include <mutex>
 #include <thread>
@@ -361,8 +360,6 @@ TEST_P(EGLContextSharingTest, SamplerLifetime)
 TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
 {
    ANGLE_SKIP_TEST_IF(!platformSupportsMultithreading());
-    // GL Fences require GLES 3.0+
-    ANGLE_SKIP_TEST_IF(getClientMajorVersion() < 3);

    // Initialize contexts
    EGLWindow *window = getEGLWindow();
@@ -426,8 +423,6 @@ TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
    // Synchronization tools to ensure the two threads are interleaved as designed by this test.
    std::mutex mutex;
    std::condition_variable condVar;
-    std::atomic<GLsync> deletingThreadSyncObj;
-    std::atomic<GLsync> continuingThreadSyncObj;

    enum class Step
    {
@@ -503,25 +498,16 @@ TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
        // Draw using the shared texture.
        drawQuad(program[0].get(), essl1_shaders::PositionAttrib(), 0.5f);

-        deletingThreadSyncObj = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-        ASSERT_GL_NO_ERROR();
-        // Force the fence to be created
-        glFlush();
-
        // Wait for the other thread to also draw using the shared texture.
        nextStep(Step::Thread0Draw);
        ASSERT_TRUE(waitForStep(Step::Thread1Draw));

-        ASSERT_TRUE(continuingThreadSyncObj != nullptr);
-        glWaitSync(continuingThreadSyncObj, 0, GL_TIMEOUT_IGNORED);
-        ASSERT_GL_NO_ERROR();
-        glDeleteSync(continuingThreadSyncObj);
-        ASSERT_GL_NO_ERROR();
-        continuingThreadSyncObj = nullptr;
-
        // Delete this thread's framebuffer (reader of the shared texture).
        fbo[0].reset();

+        // Flush to make sure the graph nodes associated with this context are deleted.
+        glFlush();
+
        // Wait for the other thread to use the shared texture again before unbinding the
        // context (so no implicit flush happens).
        nextStep(Step::Thread0Delete);
@@ -541,21 +527,9 @@ TEST_P(EGLContextSharingTest, DeleteReaderOfSharedTexture)
        // Wait for first thread to draw using the shared texture.
        ASSERT_TRUE(waitForStep(Step::Thread0Draw));

-        ASSERT_TRUE(deletingThreadSyncObj != nullptr);
-        glWaitSync(deletingThreadSyncObj, 0, GL_TIMEOUT_IGNORED);
-        ASSERT_GL_NO_ERROR();
-        glDeleteSync(deletingThreadSyncObj);
-        ASSERT_GL_NO_ERROR();
-        deletingThreadSyncObj = nullptr;
-
        // Draw using the shared texture.
        drawQuad(program[0].get(), essl1_shaders::PositionAttrib(), 0.5f);

-        continuingThreadSyncObj = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-        ASSERT_GL_NO_ERROR();
-        // Force the fence to be created
-        glFlush();
-
        // Wait for the other thread to delete its framebuffer.
        nextStep(Step::Thread1Draw);
        ASSERT_TRUE(waitForStep(Step::Thread0Delete));

--- a/src/tests/egl_tests/EGLSyncTest.cpp
+++ b/src/tests/egl_tests/EGLSyncTest.cpp
@@ -214,13 +214,17 @@ TEST_P(EGLSyncTest, BasicOperations)

    glFlush();

-    glClear(GL_COLOR_BUFFER_BIT);
+    EGLint value           = 0;
+    unsigned int loopCount = 0;
+
+    // Use 'loopCount' to make sure the test doesn't get stuck in an infinite loop
+    while (value != EGL_SIGNALED_KHR && loopCount <= 1000000)
+    {
+        loopCount++;
+        EXPECT_EGL_TRUE(eglGetSyncAttribKHR(display, sync, EGL_SYNC_STATUS_KHR, &value));
+    }

-    // Don't wait forever to make sure the test terminates
-    constexpr GLuint64 kTimeout = 1'000'000'000;  // 1 second
-    EGLint value                = 0;
-    ASSERT_EQ(EGL_CONDITION_SATISFIED_KHR,
-              eglClientWaitSyncKHR(display, sync, EGL_SYNC_FLUSH_COMMANDS_BIT_KHR, kTimeout));
+    ASSERT_EQ(value, EGL_SIGNALED_KHR);

    for (size_t i = 0; i < 20; i++)
    {

--- a/src/tests/gl_tests/FenceSyncTests.cpp
+++ b/src/tests/gl_tests/FenceSyncTests.cpp
@@ -230,9 +230,6 @@ TEST_P(FenceSyncTest, BasicQueries)
 // Test that basic usage works and doesn't generate errors or crash
 TEST_P(FenceSyncTest, BasicOperations)
 {
-    // TODO(http://anglebug.com/5425): glClientWaitSync() returns GL_TIMEOUT_EXPIRED
-    ANGLE_SKIP_TEST_IF(IsAMD() && IsD3D11());
-
    glClearColor(1.0f, 0.0f, 1.0f, 1.0f);

    GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
@@ -241,17 +238,28 @@ TEST_P(FenceSyncTest, BasicOperations)
    glWaitSync(sync, 0, GL_TIMEOUT_IGNORED);
    EXPECT_GL_NO_ERROR();

+    GLsizei length         = 0;
+    GLint value            = 0;
+    unsigned int loopCount = 0;
+
    glFlush();

-    // Don't wait forever to make sure the test terminates
-    constexpr GLuint64 kTimeout = 1'000'000'000;  // 1 second
-    GLint value                 = 0;
+    // Use 'loopCount' to make sure the test doesn't get stuck in an infinite loop
+    while (value != GL_SIGNALED && loopCount <= 1000000)
+    {
+        loopCount++;
+
+        glGetSynciv(sync, GL_SYNC_STATUS, 1, &length, &value);
+        ASSERT_GL_NO_ERROR();
+    }
+
+    ASSERT_GLENUM_EQ(GL_SIGNALED, value);
+
    for (size_t i = 0; i < 20; i++)
    {
        glClear(GL_COLOR_BUFFER_BIT);
-        value = glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, kTimeout);
+        glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
        EXPECT_GL_NO_ERROR();
-        ASSERT_TRUE(value == GL_CONDITION_SATISFIED || value == GL_ALREADY_SIGNALED);
    }
 }