Commit dff47d5f by Tim Van Patten Committed by Commit Bot

Vulkan: Optimize MSAA using subpass resolve attachments

If a user is performing a blit to resolve a multisample color buffer into a single attachment, ANGLE can use subpass resolve attachments to resolve directly into the destination buffer as part of the render pass. This allows the data to remain in tiler memory and reduce the extra bandwidth required to write the multisampled data back to perform the copy. This work also requires restoring/reopening a render pass if it has been finished already, assuming the finished render pass was started and for the framebuffer that is the source for the blit command. Other objects that were created when the render pass was started need to be updated as well, such as the source FramebufferVk's resolve attachment, the CommandBufferHelper's vk::Framebuffer and vk::RenderPassDesc, etc. While this is better than performing vkCmdResolveImage(), there is still another major part of optimizing MSAA using resolve attachments not implemented here: discarding the multisampled image rather than writing it to GMEM, which requires the user to invalidate the read FBO after the blit. This CL was verified with AGI to make sure there are no explicit blits to resolve the multiple sampled image. Bug: b/159903491 Test: FramebufferTest_ES31.*Blit* Test: VulkanPerformanceCounterTest_ES31.MultisampleResolveWithBlit Change-Id: I320a26088d8f614a295e7feec275d71310391806 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2298663 Commit-Queue: Tim Van Patten <timvp@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarShahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarCharlie Lao <cclao@google.com>
parent 5dff6075
......@@ -4071,6 +4071,7 @@ angle::Result ContextVk::flushImpl(const vk::Semaphore *signalSemaphore)
mPerfCounters.renderPasses = 0;
mPerfCounters.writeDescriptorSets = 0;
mPerfCounters.flushedOutsideRenderPassCommandBuffers = 0;
mPerfCounters.resolveImageCommands = 0;
mWaitSemaphores.clear();
mWaitSemaphoreStageMasks.clear();
......@@ -4484,6 +4485,27 @@ angle::Result ContextVk::startRenderPass(gl::Rectangle renderArea,
return angle::Result::Continue;
}
void ContextVk::restoreFinishedRenderPass(vk::Framebuffer *framebuffer)
{
if (mRenderPassCommandBuffer != nullptr)
{
// The render pass isn't finished yet, so nothing to restore.
return;
}
if (mRenderPassCommands->started() &&
mRenderPassCommands->getFramebufferHandle() == framebuffer->getHandle())
{
// There is already a render pass open for this framebuffer, so just restore the
// pointer rather than starting a whole new render pass. One possible path here
// is if the draw framebuffer binding has changed from FBO A -> B -> A, without
// any commands that started a new render pass for FBO B (such as a clear being
// issued that was deferred).
mRenderPassCommandBuffer = &mRenderPassCommands->getCommandBuffer();
ASSERT(hasStartedRenderPass());
}
}
angle::Result ContextVk::flushCommandsAndEndRenderPass()
{
// Ensure we flush the RenderPass *after* the prior commands.
......
......@@ -574,6 +574,9 @@ class ContextVk : public ContextImpl, public vk::Context
return *mRenderPassCommands;
}
// TODO(https://anglebug.com/4968): Support multiple open render passes.
void restoreFinishedRenderPass(vk::Framebuffer *framebuffer);
egl::ContextPriority getContextPriority() const override { return mContextPriority; }
angle::Result startRenderPass(gl::Rectangle renderArea, vk::CommandBuffer **commandBufferOut);
angle::Result flushCommandsAndEndRenderPass();
......@@ -628,6 +631,7 @@ class ContextVk : public ContextImpl, public vk::Context
vk::DynamicBuffer *getStagingBufferStorage() { return &mStagingBufferStorage; }
const vk::PerfCounters &getPerfCounters() const { return mPerfCounters; }
vk::PerfCounters &getPerfCounters() { return mPerfCounters; }
private:
// Dirty bits.
......
......@@ -122,7 +122,7 @@ class FramebufferVk : public FramebufferImpl
GLint getSamples() const;
const vk::RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; }
const vk::FramebufferDesc &getFramebufferDesc() const { return mCurrentFramebufferDesc; }
// We only support depth/stencil packed format and depthstencil attachment always follow all
// color attachments
size_t getDepthStencilAttachmentIndexVk() const
......@@ -130,6 +130,10 @@ class FramebufferVk : public FramebufferImpl
return getState().getEnabledDrawBuffers().count();
}
angle::Result getFramebuffer(ContextVk *contextVk,
vk::Framebuffer **framebufferOut,
const vk::ImageView *resolveImageViewIn);
private:
FramebufferVk(RendererVk *renderer,
const gl::FramebufferState &state,
......@@ -148,6 +152,10 @@ class FramebufferVk : public FramebufferImpl
bool flipX,
bool flipY);
// Resolve color with subpass attachment
angle::Result resolveColorWithSubpass(ContextVk *contextVk,
const UtilsVk::BlitResolveParameters &params);
// Resolve color with vkCmdResolveImage
angle::Result resolveColorWithCommand(ContextVk *contextVk,
const UtilsVk::BlitResolveParameters &params,
......@@ -158,8 +166,6 @@ class FramebufferVk : public FramebufferImpl
angle::Result copyResolveToMultisampedAttachment(ContextVk *contextVk,
RenderTargetVk *colorRenderTarget);
angle::Result getFramebuffer(ContextVk *contextVk, vk::Framebuffer **framebufferOut);
angle::Result clearImpl(const gl::Context *context,
gl::DrawBufferMask clearColorBuffers,
bool clearDepth,
......@@ -219,6 +225,9 @@ class FramebufferVk : public FramebufferImpl
VkClearValue getCorrectedColorClearValue(size_t colorIndexGL,
const VkClearColorValue &clearColor) const;
void updateColorResolveAttachment(uint32_t colorIndexGL,
vk::ImageViewSubresourceSerial resolveImageViewSerial);
WindowSurfaceVk *mBackbuffer;
vk::RenderPassDesc mRenderPassDesc;
......
......@@ -148,18 +148,15 @@ class RenderTargetVk final : public FramebufferAttachmentRenderTarget
// extension, even though a resolve attachment is not even provided.
// - Multisampled swapchain: TODO(syoussefi) this is true for the multisampled color attachment.
// http://anglebug.com/4836
// - glBlitFramebuffer optimization: TODO(timvp) this is **false** in this case, as the
// multisampled attachment and the resolve attachments belong to independent framebuffers.
// http://anglebug.com/4753
//
// Based on the above, we have:
//
// mResolveImage == nullptr | mResolveImage != nullptr
// |
// Normal rendering | Blit optimization
// !IsTransient No resolve | Resolve
// storeOp = STORE | storeOp = STORE
// Owner of data: mImage | Owner of data: mImage
// Normal rendering | Invalid
// !IsTransient No resolve |
// storeOp = STORE |
// Owner of data: mImage |
// |
// ---------------------------------------------+---------------------------------------
// |
......
......@@ -32,15 +32,6 @@ namespace vk
namespace
{
// In the FramebufferDesc object:
// - Depth/stencil serial is at index 0
// - Color serials are at indices [1:gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
// - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1,
// gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2]
constexpr size_t kFramebufferDescDepthStencilIndex = 0;
constexpr size_t kFramebufferDescColorIndexOffset = 1;
constexpr size_t kFramebufferDescResolveIndexOffset = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
uint8_t PackGLBlendOp(GLenum blendOp)
{
switch (blendOp)
......
......@@ -911,6 +911,15 @@ constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFER
template <typename T>
using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
// In the FramebufferDesc object:
// - Depth/stencil serial is at index 0
// - Color serials are at indices [1:gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
// - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1,
// gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2]
constexpr size_t kFramebufferDescDepthStencilIndex = 0;
constexpr size_t kFramebufferDescColorIndexOffset = 1;
constexpr size_t kFramebufferDescResolveIndexOffset = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
class FramebufferDesc
{
public:
......@@ -931,6 +940,12 @@ class FramebufferDesc
uint32_t attachmentCount() const;
ImageViewSubresourceSerial getColorImageViewSerial(uint32_t index)
{
ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
return mSerials[kFramebufferDescColorIndexOffset + index];
}
private:
void update(uint32_t index, ImageViewSubresourceSerial serial);
......
......@@ -899,6 +899,14 @@ angle::Result CommandBufferHelper::flushToPrimary(ContextVk *contextVk,
return angle::Result::Continue;
}
void CommandBufferHelper::updateRenderPassForResolve(vk::Framebuffer *newFramebuffer,
const vk::RenderPassDesc &renderPassDesc)
{
ASSERT(newFramebuffer);
mFramebuffer.setHandle(newFramebuffer->getHandle());
mRenderPassDesc = renderPassDesc;
}
// Helper functions used below
char GetLoadOpShorthand(uint32_t loadOp)
{
......
......@@ -984,6 +984,9 @@ class CommandBufferHelper : angle::NonCopyable
void onDepthAccess(ResourceAccess access) { UpdateAccess(&mDepthStartAccess, access); }
void onStencilAccess(ResourceAccess access) { UpdateAccess(&mStencilStartAccess, access); }
void updateRenderPassForResolve(vk::Framebuffer *newFramebuffer,
const vk::RenderPassDesc &renderPassDesc);
private:
void addCommandDiagnostics(ContextVk *contextVk);
// Allocator used by this class. Using a pool allocator per CBH to avoid threading issues
......
......@@ -753,6 +753,7 @@ struct PerfCounters
uint32_t renderPasses;
uint32_t writeDescriptorSets;
uint32_t flushedOutsideRenderPassCommandBuffers;
uint32_t resolveImageCommands;
};
} // namespace vk
......
......@@ -31,11 +31,14 @@ class VulkanPerformanceCounterTest : public ANGLETest
const rx::vk::PerfCounters &hackANGLE() const
{
// Hack the angle!
const gl::Context *context = static_cast<gl::Context *>(getEGLWindow()->getContext());
return rx::GetImplAs<rx::ContextVk>(context)->getPerfCounters();
const gl::Context *context = static_cast<const gl::Context *>(getEGLWindow()->getContext());
return rx::GetImplAs<const rx::ContextVk>(context)->getPerfCounters();
}
};
class VulkanPerformanceCounterTest_ES31 : public VulkanPerformanceCounterTest
{};
// Tests that texture updates to unused textures don't break the RP.
TEST_P(VulkanPerformanceCounterTest, NewTextureDoesNotBreakRenderPass)
{
......@@ -263,6 +266,58 @@ TEST_P(VulkanPerformanceCounterTest, IndependentBufferCopiesShareSingleBarrier)
EXPECT_EQ(expectedFlushCount, actualFlushCount);
}
// Test resolving a multisampled texture with blit doesn't break the render pass so a subpass can be
// used
TEST_P(VulkanPerformanceCounterTest_ES31, MultisampleResolveWithBlit)
{
constexpr int kSize = 16;
glViewport(0, 0, kSize, kSize);
GLFramebuffer msaaFBO;
glBindFramebuffer(GL_FRAMEBUFFER, msaaFBO.get());
GLTexture texture;
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, texture.get());
glTexStorage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, 4, GL_RGBA8, kSize, kSize, false);
ASSERT_GL_NO_ERROR();
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_MULTISAMPLE,
texture.get(), 0);
ASSERT_GL_FRAMEBUFFER_COMPLETE(GL_FRAMEBUFFER);
ANGLE_GL_PROGRAM(gradientProgram, essl31_shaders::vs::Passthrough(),
essl31_shaders::fs::RedGreenGradient());
drawQuad(gradientProgram, essl31_shaders::PositionAttrib(), 0.5f, 1.0f, true);
ASSERT_GL_NO_ERROR();
// Create another FBO to resolve the multisample buffer into.
GLTexture resolveTexture;
GLFramebuffer resolveFBO;
glBindTexture(GL_TEXTURE_2D, resolveTexture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, kSize, kSize, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glBindFramebuffer(GL_FRAMEBUFFER, resolveFBO);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, resolveTexture, 0);
EXPECT_GLENUM_EQ(GL_FRAMEBUFFER_COMPLETE, glCheckFramebufferStatus(GL_FRAMEBUFFER));
glBindFramebuffer(GL_READ_FRAMEBUFFER, msaaFBO);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, resolveFBO);
glBlitFramebuffer(0, 0, kSize, kSize, 0, 0, kSize, kSize, GL_COLOR_BUFFER_BIT, GL_NEAREST);
ASSERT_GL_NO_ERROR();
const rx::vk::PerfCounters &counters = hackANGLE();
EXPECT_EQ(counters.resolveImageCommands, 0u);
glBindFramebuffer(GL_READ_FRAMEBUFFER, resolveFBO);
EXPECT_PIXEL_NEAR(0, 0, 0, 0, 0, 255, 1.0); // Black
EXPECT_PIXEL_NEAR(kSize - 1, 1, 239, 0, 0, 255, 1.0); // Red
EXPECT_PIXEL_NEAR(0, kSize - 1, 0, 239, 0, 255, 1.0); // Green
EXPECT_PIXEL_NEAR(kSize - 1, kSize - 1, 239, 239, 0, 255, 1.0); // Yellow
}
ANGLE_INSTANTIATE_TEST(VulkanPerformanceCounterTest, ES3_VULKAN());
ANGLE_INSTANTIATE_TEST(VulkanPerformanceCounterTest_ES31, ES31_VULKAN());
} // anonymous namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment