Commit d3e800e9 by Jamie Madill Committed by Commit Bot

Vulkan: Restart RenderPasses in DS read-only mode.

We can combine an initial RenderPass with a read-only RP if the first RP never writes to depth. We can check the depth-write tracking in CommandBufferHelper and substitute in a new Framebuffer/RP Desc in this case as well as issue new layout barriers. We need to disable barrier merging in this special case. This reduces the RenderPass count in the Manhattan trace from 15->13. The performance on the Pixel 4 benchmark goes to ~82% of native for the on-screen version and ~88% for off-screen. There's also a ~5% bump in speed for the desktop trace. Bug: angleproject:4959 Change-Id: I70241824f75eaa1e11b50370f826abc36e91686e Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2358772 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarTim Van Patten <timvp@google.com> Reviewed-by: 's avatarCharlie Lao <cclao@google.com>
parent 552f0f76
......@@ -2999,6 +2999,7 @@ angle::Result ContextVk::syncState(const gl::Context *context,
gl::Framebuffer *drawFramebuffer = glState.getDrawFramebuffer();
mDrawFramebuffer = vk::GetImpl(drawFramebuffer);
mDrawFramebuffer->setReadOnlyDepthMode(false);
updateFlipViewportDrawFramebuffer(glState);
updateSurfaceRotationDrawFramebuffer(glState);
updateViewport(mDrawFramebuffer, glState.getViewport(), glState.getNearPlane(),
......@@ -3896,6 +3897,11 @@ angle::Result ContextVk::updateActiveTextures(const gl::Context *context)
}
else if (shouldSwitchToDepthReadOnlyMode(context, texture))
{
// The "readOnlyDepthMode" feature enables read-only depth-stencil feedback loops. We
// only switch to "read-only" mode when there's loop. We track the depth-stencil access
// mode in the RenderPass. The tracking tells us when we can retroactively go back and
// change the RenderPass to read-only. If there are any writes we need to break and
// finish the current RP before starting the read-only one.
ASSERT(!mState.isDepthWriteEnabled());
// Special handling for deferred clears.
......@@ -3906,11 +3912,19 @@ angle::Result ContextVk::updateActiveTextures(const gl::Context *context)
ANGLE_TRY(mDrawFramebuffer->flushDeferredClears(this, scissoredRenderArea));
}
// TODO(jmadill): Don't end RenderPass. http://anglebug.com/4959
if (hasStartedRenderPass())
{
ANGLE_TRY(flushCommandsAndEndRenderPass());
if (mRenderPassCommands->getDepthStartAccess() == vk::ResourceAccess::Write)
{
ANGLE_TRY(flushCommandsAndEndRenderPass());
}
else
{
ANGLE_TRY(mDrawFramebuffer->restartRenderPassInReadOnlyDepthMode(
this, mRenderPassCommands));
}
}
mDrawFramebuffer->setReadOnlyDepthMode(true);
}
......@@ -4846,7 +4860,7 @@ void ContextVk::setDefaultUniformBlocksMinSizeForTesting(size_t minSize)
angle::Result ContextVk::updateRenderPassDepthAccess()
{
if (mState.isDepthTestEnabled() && mRenderPassCommands->started())
if (mState.isDepthTestEnabled() && hasStartedRenderPass())
{
vk::ResourceAccess access = GetDepthAccess(mState.getDepthStencilState());
......
......@@ -2503,4 +2503,19 @@ void FramebufferVk::setReadOnlyDepthMode(bool readOnlyDepthEnabled)
updateRenderPassDesc();
}
}
angle::Result FramebufferVk::restartRenderPassInReadOnlyDepthMode(
ContextVk *contextVk,
vk::CommandBufferHelper *renderPass)
{
ASSERT(!isReadOnlyDepthMode());
setReadOnlyDepthMode(true);
vk::Framebuffer *currentFramebuffer = nullptr;
ANGLE_TRY(getFramebuffer(contextVk, &currentFramebuffer, nullptr));
renderPass->restartRenderPassWithReadOnlyDepth(*currentFramebuffer, mRenderPassDesc);
return angle::Result::Continue;
}
} // namespace rx
......@@ -139,6 +139,8 @@ class FramebufferVk : public FramebufferImpl
bool hasDeferredClears() const { return !mDeferredClears.empty(); }
angle::Result flushDeferredClears(ContextVk *contextVk, const gl::Rectangle &renderArea);
void setReadOnlyDepthMode(bool readOnlyDepthEnabled);
angle::Result restartRenderPassInReadOnlyDepthMode(ContextVk *contextVk,
vk::CommandBufferHelper *renderPass);
private:
FramebufferVk(RendererVk *renderer,
......
......@@ -582,6 +582,7 @@ CommandBufferHelper::CommandBufferHelper()
mCounter(0),
mClearValues{},
mRenderPassStarted(false),
mForceIndividualBarriers(false),
mTransformFeedbackCounterBuffers{},
mValidTransformFeedbackBufferCount(0),
mRebindTransformFeedbackBuffers(false),
......@@ -734,7 +735,20 @@ void CommandBufferHelper::executeBarriers(ContextVk *contextVk, PrimaryCommandBu
return;
}
if (contextVk->getFeatures().preferAggregateBarrierCalls.enabled)
if (mForceIndividualBarriers)
{
// Note: ideally we could merge double barriers into a single barrier (or even completely
// eliminate them in some cases). This is a bit trickier to manage than splitting barriers
// into single calls. It should only affect Framebuffer transitions.
// TODO: Investigate merging barriers. http://anglebug.com/4976
for (PipelineStage pipelineStage : mask)
{
PipelineBarrier &barrier = mPipelineBarriers[pipelineStage];
barrier.executeIndividually(primary);
}
mForceIndividualBarriers = false;
}
else if (contextVk->getFeatures().preferAggregateBarrierCalls.enabled)
{
PipelineStagesMask::Iterator iter = mask.begin();
PipelineBarrier &barrier = mPipelineBarriers[*iter];
......@@ -770,14 +784,45 @@ void CommandBufferHelper::beginRenderPass(const Framebuffer &framebuffer,
mAttachmentOps = renderPassAttachmentOps;
mDepthStencilAttachmentIndex = depthStencilAttachmentIndex;
mFramebuffer.setHandle(framebuffer.getHandle());
mRenderArea = renderArea;
mClearValues = clearValues;
*commandBufferOut = &mCommandBuffer;
mRenderArea = renderArea;
mClearValues = clearValues;
*commandBufferOut = &mCommandBuffer;
mForceIndividualBarriers = false;
if (mDepthStencilAttachmentIndex != vk::kInvalidAttachmentIndex)
{
if (renderPassAttachmentOps[mDepthStencilAttachmentIndex].loadOp ==
VK_ATTACHMENT_LOAD_OP_CLEAR)
{
mDepthStartAccess = ResourceAccess::Write;
}
if (renderPassAttachmentOps[mDepthStencilAttachmentIndex].stencilLoadOp ==
VK_ATTACHMENT_LOAD_OP_CLEAR)
{
mStencilStartAccess = ResourceAccess::Write;
}
}
mRenderPassStarted = true;
mCounter++;
}
void CommandBufferHelper::restartRenderPassWithReadOnlyDepth(const Framebuffer &framebuffer,
const RenderPassDesc &renderPassDesc)
{
ASSERT(mIsRenderPassCommandBuffer);
ASSERT(mRenderPassStarted);
mRenderPassDesc = renderPassDesc;
mAttachmentOps.setLayouts(mDepthStencilAttachmentIndex, ImageLayout::DepthStencilReadOnly,
ImageLayout::DepthStencilReadOnly);
mFramebuffer.setHandle(framebuffer.getHandle());
// Barrier aggregation messes up with RenderPass restarting.
mForceIndividualBarriers = true;
}
void CommandBufferHelper::endRenderPass()
{
pauseTransformFeedbackIfStarted();
......
......@@ -636,6 +636,33 @@ class PipelineBarrier : angle::NonCopyable
reset();
}
void executeIndividually(PrimaryCommandBuffer *primary)
{
if (isEmpty())
{
return;
}
// Issue vkCmdPipelineBarrier call
VkMemoryBarrier memoryBarrier = {};
uint32_t memoryBarrierCount = 0;
if (mMemoryBarrierSrcAccess != 0)
{
memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
memoryBarrier.srcAccessMask = mMemoryBarrierSrcAccess;
memoryBarrier.dstAccessMask = mMemoryBarrierDstAccess;
memoryBarrierCount++;
}
for (const VkImageMemoryBarrier &imageBarrier : mImageMemoryBarriers)
{
primary->pipelineBarrier(mSrcStageMask, mDstStageMask, 0, memoryBarrierCount,
&memoryBarrier, 0, nullptr, 1, &imageBarrier);
}
reset();
}
// merge two barriers into one
void merge(PipelineBarrier *other)
{
......@@ -915,6 +942,9 @@ class CommandBufferHelper : angle::NonCopyable
void endRenderPass();
void restartRenderPassWithReadOnlyDepth(const Framebuffer &framebuffer,
const RenderPassDesc &renderPassDesc);
void beginTransformFeedback(size_t validBufferCount,
const VkBuffer *counterBuffers,
bool rebindBuffers);
......@@ -992,6 +1022,7 @@ class CommandBufferHelper : angle::NonCopyable
void updateRenderPassForResolve(vk::Framebuffer *newFramebuffer,
const vk::RenderPassDesc &renderPassDesc);
ResourceAccess getDepthStartAccess() const { return mDepthStartAccess; }
private:
void addCommandDiagnostics(ContextVk *contextVk);
......@@ -1012,6 +1043,7 @@ class CommandBufferHelper : angle::NonCopyable
gl::Rectangle mRenderArea;
ClearValuesArray mClearValues;
bool mRenderPassStarted;
bool mForceIndividualBarriers;
// Transform feedback state
gl::TransformFeedbackBuffersArray<VkBuffer> mTransformFeedbackCounterBuffers;
......
......@@ -1722,6 +1722,94 @@ void main()
EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
}
// Tests corner cases with read-only depth-stencil feedback loops.
TEST_P(FramebufferTest_ES3, ReadOnlyDepthFeedbackLoopStateChanges)
{
// Feedback loops not supported on D3D11 and may not ever be.
ANGLE_SKIP_TEST_IF(IsD3D11());
// Also this particular test doesn't work on Android despite similar support in Manhattan.
ANGLE_SKIP_TEST_IF(IsAndroid() && IsOpenGLES());
constexpr GLuint kSize = 2;
glViewport(0, 0, kSize, kSize);
constexpr char kFS[] = R"(precision mediump float;
varying vec2 v_texCoord;
uniform sampler2D depth;
void main()
{
if (abs(texture2D(depth, v_texCoord).x - 0.5) < 0.1)
{
gl_FragColor = vec4(0, 1, 0, 1);
}
else
{
gl_FragColor = vec4(1, 0, 0, 1);
}
})";
ANGLE_GL_PROGRAM(program, essl1_shaders::vs::Texture2D(), kFS);
glUseProgram(program);
setupQuadVertexBuffer(0.5f, 1.0f);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, 0);
glEnableVertexAttribArray(0);
GLFramebuffer framebuffer1;
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer1);
GLTexture colorTexture;
glBindTexture(GL_TEXTURE_2D, colorTexture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, kSize, kSize, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorTexture, 0);
GLTexture depthTexture;
glBindTexture(GL_TEXTURE_2D, depthTexture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, kSize, kSize, 0, GL_DEPTH_COMPONENT,
GL_UNSIGNED_INT, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTexture, 0);
ASSERT_GL_FRAMEBUFFER_COMPLETE(GL_FRAMEBUFFER);
GLFramebuffer framebuffer2;
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer2);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTexture, 0);
ASSERT_GL_FRAMEBUFFER_COMPLETE(GL_FRAMEBUFFER);
ASSERT_GL_NO_ERROR();
// Clear depth to 0.5.
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer1);
glClearDepthf(0.5f);
glClear(GL_DEPTH_BUFFER_BIT);
glFlush();
// Disable depth. Although this does not remove the feedback loop as defined by the
// spec it mimics what gfxbench does in its rendering tests.
glDepthMask(false);
glDisable(GL_DEPTH_TEST);
// Draw with loop.
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// Draw with no loop and second FBO. Starts RP in writable mode.
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer2);
glBindTexture(GL_TEXTURE_2D, 0);
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// Draw with loop, restarts RP.
glBindTexture(GL_TEXTURE_2D, depthTexture);
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
}
// Covers a bug in ANGLE's Vulkan back-end. Our VkFramebuffer cache would in some cases forget to
// check the draw states when computing a cache key.
TEST_P(FramebufferTest_ES3, DisabledAttachmentRedefinition)
......
......@@ -361,8 +361,7 @@ TEST_P(VulkanPerformanceCounterTest, ReadOnlyDepthStencilFeedbackLoopUsesSingleR
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// TODO(jmadill): Remove extra RenderPass. http://anglebug.com/4959
uint32_t expectedRenderPassCount = counters.renderPasses + 2;
uint32_t expectedRenderPassCount = counters.renderPasses + 1;
// Start new RenderPass with depth write disabled and no loop.
glBindFramebuffer(GL_FRAMEBUFFER, depthAndColorFBO);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment