Commit 39d7fc18 by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Don't break the render pass on dispatch calls

The only reason a dispatch call may need to break the render pass implicitly is for read-after-writes where the write originates from the render pass but is not through a storage buffer/image. There are only two such scenrios possible: - Framebuffer attachment write -> texture sample - Transform feedback write -> ubo read All other uses of the buffers and textures that require breaking the render pass are handled by `glMemoryBarrier`. Bug: angleproject:5070 Change-Id: I92b50d69d8782097ee8ff477ac57da6209c326a1 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2698998 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarTim Van Patten <timvp@google.com>
parent 5d7f4e80
......@@ -1033,10 +1033,10 @@ angle::Result ContextVk::setupLineLoopDraw(const gl::Context *context,
angle::Result ContextVk::setupDispatch(const gl::Context *context)
{
// |setupDispatch| and |setupDraw| are special in that they flush dirty bits. Therefore they
// don't use the same APIs to record commands as the functions outside ContextVk.
// The following ensures prior commands are flushed before we start processing dirty bits.
ANGLE_TRY(flushCommandsAndEndRenderPass());
// Note: numerous tests miss a glMemoryBarrier call between the initial texture data upload and
// the dispatch call. Flush the outside render pass command buffer as a workaround.
// TODO: Remove this and fix tests. http://anglebug.com/5070
ANGLE_TRY(flushOutsideRenderPassCommands());
// Create a local object to ensure we flush the descriptor updates to device when we leave this
// function
......@@ -3265,7 +3265,7 @@ angle::Result ContextVk::syncState(const gl::Context *context,
ASSERT(gl::State::DIRTY_BIT_TEXTURE_BINDINGS >
gl::State::DIRTY_BIT_PROGRAM_EXECUTABLE);
iter.setLaterBit(gl::State::DIRTY_BIT_TEXTURE_BINDINGS);
invalidateCurrentShaderResources();
ANGLE_TRY(invalidateCurrentShaderResources());
ANGLE_TRY(invalidateProgramExecutableHelper(context));
break;
}
......@@ -3283,17 +3283,17 @@ angle::Result ContextVk::syncState(const gl::Context *context,
// Nothing to do.
break;
case gl::State::DIRTY_BIT_SHADER_STORAGE_BUFFER_BINDING:
invalidateCurrentShaderResources();
ANGLE_TRY(invalidateCurrentShaderResources());
break;
case gl::State::DIRTY_BIT_UNIFORM_BUFFER_BINDINGS:
invalidateCurrentShaderResources();
ANGLE_TRY(invalidateCurrentShaderResources());
break;
case gl::State::DIRTY_BIT_ATOMIC_COUNTER_BUFFER_BINDING:
invalidateCurrentShaderResources();
ANGLE_TRY(invalidateCurrentShaderResources());
invalidateDriverUniforms();
break;
case gl::State::DIRTY_BIT_IMAGE_BINDINGS:
invalidateCurrentShaderResources();
ANGLE_TRY(invalidateCurrentShaderResources());
break;
case gl::State::DIRTY_BIT_MULTISAMPLING:
// TODO(syoussefi): this should configure the pipeline to render as if
......@@ -3685,12 +3685,18 @@ angle::Result ContextVk::invalidateCurrentTextures(const gl::Context *context)
mComputeDirtyBits |= kTexturesAndDescSetDirtyBits;
ANGLE_TRY(updateActiveTextures(context));
// Take care of read-after-write hazards that require implicit synchronization.
if (executable->isCompute())
{
ANGLE_TRY(endRenderPassIfComputeReadAfterAttachmentWrite());
}
}
return angle::Result::Continue;
}
void ContextVk::invalidateCurrentShaderResources()
angle::Result ContextVk::invalidateCurrentShaderResources()
{
const gl::ProgramExecutable *executable = mState.getProgramExecutable();
ASSERT(executable);
......@@ -3706,6 +3712,12 @@ void ContextVk::invalidateCurrentShaderResources()
mComputeDirtyBits |= kResourcesAndDescSetDirtyBits;
}
// Take care of read-after-write hazards that require implicit synchronization.
if (hasUniformBuffers && executable->isCompute())
{
ANGLE_TRY(endRenderPassIfComputeReadAfterTransformFeedbackWrite());
}
// If memory barrier has been issued but the command buffers haven't been flushed, make sure
// they get a chance to do so if necessary on program and storage buffer/image binding change.
const bool hasGLMemoryBarrierIssuedInCommandBuffers =
......@@ -3717,6 +3729,8 @@ void ContextVk::invalidateCurrentShaderResources()
mGraphicsDirtyBits.set(DIRTY_BIT_MEMORY_BARRIER);
mComputeDirtyBits.set(DIRTY_BIT_MEMORY_BARRIER);
}
return angle::Result::Continue;
}
void ContextVk::invalidateGraphicsDriverUniforms()
......@@ -3730,13 +3744,13 @@ void ContextVk::invalidateDriverUniforms()
mComputeDirtyBits |= kDriverUniformsAndBindingDirtyBits;
}
void ContextVk::onFramebufferChange(FramebufferVk *framebufferVk)
angle::Result ContextVk::onFramebufferChange(FramebufferVk *framebufferVk)
{
// This is called from FramebufferVk::syncState. Skip these updates if the framebuffer being
// synced is the read framebuffer (which is not equal the draw framebuffer).
if (framebufferVk != vk::GetImpl(mState.getDrawFramebuffer()))
{
return;
return angle::Result::Continue;
}
// Ensure that the pipeline description is updated.
......@@ -3751,10 +3765,12 @@ void ContextVk::onFramebufferChange(FramebufferVk *framebufferVk)
if (mState.getProgramExecutable())
{
invalidateCurrentShaderResources();
ANGLE_TRY(invalidateCurrentShaderResources());
}
onDrawFramebufferRenderPassDescChange(framebufferVk, nullptr);
return angle::Result::Continue;
}
void ContextVk::onDrawFramebufferRenderPassDescChange(FramebufferVk *framebufferVk,
......@@ -5579,6 +5595,86 @@ angle::Result ContextVk::flushCommandBuffersIfNecessary(const vk::CommandBufferA
return angle::Result::Continue;
}
angle::Result ContextVk::endRenderPassIfComputeReadAfterTransformFeedbackWrite()
{
// Similar to flushCommandBuffersIfNecessary(), but using uniform buffers currently bound and
// used by the current (compute) program. This is to handle read-after-write hazards where the
// write originates from transform feedback.
if (mCurrentTransformFeedbackBuffers.empty())
{
return angle::Result::Continue;
}
const gl::ProgramExecutable *executable = mState.getProgramExecutable();
ASSERT(executable && executable->isCompute());
gl::ShaderMap<const gl::ProgramState *> programStates;
mExecutable->fillProgramStateMap(this, &programStates);
for (const gl::ShaderType shaderType : executable->getLinkedShaderStages())
{
const gl::ProgramState *programState = programStates[shaderType];
ASSERT(programState);
// Uniform buffers:
const std::vector<gl::InterfaceBlock> &blocks = programState->getUniformBlocks();
for (uint32_t bufferIndex = 0; bufferIndex < blocks.size(); ++bufferIndex)
{
const gl::InterfaceBlock &block = blocks[bufferIndex];
const gl::OffsetBindingPointer<gl::Buffer> &bufferBinding =
mState.getIndexedUniformBuffer(block.binding);
if (!block.isActive(shaderType) || bufferBinding.get() == nullptr)
{
continue;
}
vk::BufferHelper &buffer = vk::GetImpl(bufferBinding.get())->getBuffer();
if (mCurrentTransformFeedbackBuffers.contains(&buffer))
{
return flushCommandsAndEndRenderPass();
}
}
}
return angle::Result::Continue;
}
angle::Result ContextVk::endRenderPassIfComputeReadAfterAttachmentWrite()
{
// Similar to flushCommandBuffersIfNecessary(), but using textures currently bound and used by
// the current (compute) program. This is to handle read-after-write hazards where the write
// originates from a framebuffer attachment.
const gl::ProgramExecutable *executable = mState.getProgramExecutable();
ASSERT(executable && executable->isCompute() && executable->hasTextures());
const gl::ActiveTexturesCache &textures = mState.getActiveTexturesCache();
const gl::ActiveTextureTypeArray &textureTypes = executable->getActiveSamplerTypes();
for (size_t textureUnit : executable->getActiveSamplersMask())
{
gl::Texture *texture = textures[textureUnit];
gl::TextureType textureType = textureTypes[textureUnit];
if (texture == nullptr || textureType == gl::TextureType::Buffer)
{
continue;
}
TextureVk *textureVk = vk::GetImpl(texture);
ASSERT(textureVk != nullptr);
vk::ImageHelper &image = textureVk->getImage();
if (IsRenderPassStartedAndUsesImage(*mRenderPassCommands, image))
{
return flushCommandsAndEndRenderPass();
}
}
return angle::Result::Continue;
}
// Requires that trace is enabled to see the output, which is supported with is_debug=true
void ContextVk::outputCumulativePerfCounters()
{
......
......@@ -319,7 +319,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
void invalidateDefaultAttribute(size_t attribIndex);
void invalidateDefaultAttributes(const gl::AttributesMask &dirtyMask);
void onFramebufferChange(FramebufferVk *framebufferVk);
angle::Result onFramebufferChange(FramebufferVk *framebufferVk);
void onDrawFramebufferRenderPassDescChange(FramebufferVk *framebufferVk,
bool *renderPassDescChangedOut);
void onHostVisibleBufferWrite() { mIsAnyHostVisibleBufferWritten = true; }
......@@ -778,7 +778,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
void invalidateCurrentDefaultUniforms();
angle::Result invalidateCurrentTextures(const gl::Context *context);
void invalidateCurrentShaderResources();
angle::Result invalidateCurrentShaderResources();
void invalidateGraphicsDriverUniforms();
void invalidateDriverUniforms();
......@@ -879,7 +879,22 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
void initIndexTypeMap();
// Read-after-write hazards are generally handled with |glMemoryBarrier| when the source of
// write is storage output. When the write is outside render pass, the natural placement of the
// render pass after the current outside render pass commands ensures that the memory barriers
// and image layout transitions automatically take care of such synchronizations.
//
// There are a number of read-after-write cases that require breaking the render pass however to
// preserve the order of operations:
//
// - Transform feedback write (in render pass), then vertex/index read (in render pass)
// - Transform feedback write (in render pass), then ubo read (outside render pass)
// - Framebuffer attachment write (in render pass), then texture sample (outside render pass)
// * Note that texture sampling inside render pass would cause a feedback loop
//
angle::Result endRenderPassIfTransformFeedbackBuffer(const vk::BufferHelper *buffer);
angle::Result endRenderPassIfComputeReadAfterTransformFeedbackWrite();
angle::Result endRenderPassIfComputeReadAfterAttachmentWrite();
void populateTransformFeedbackBufferSet(
size_t bufferCount,
......
......@@ -1876,9 +1876,7 @@ angle::Result FramebufferVk::syncState(const gl::Context *context,
mFramebuffer = nullptr;
// Notify the ContextVk to update the pipeline desc.
contextVk->onFramebufferChange(this);
return angle::Result::Continue;
return contextVk->onFramebufferChange(this);
}
void FramebufferVk::updateRenderPassDesc()
......
......@@ -22,6 +22,9 @@
//// General Vulkan expectations
////
// Test bug with missing glMemoryBarrier call
5673 VULKAN : KHR-GLES31.core.texture_buffer.texture_buffer_operations_transform_feedback = FAIL
// Framebuffer without attachments:
3579 VULKAN : KHR-GLES31.core.framebuffer_no_attachments.api = FAIL
......
......@@ -6,6 +6,10 @@
// For now we only log Vulkan test expectations. More back-ends can follow as we need them.
// Test bug with missing glMemoryBarrier call
5673 VULKAN LINUX : KHR-GLES32.core.texture_buffer.texture_buffer_operations_transform_feedback = FAIL
5673 VULKAN WIN : KHR-GLES32.core.texture_buffer.texture_buffer_operations_transform_feedback = FAIL
// Geometry shader support (conditioned to windows as that's our only bot capable of running these
// tests)
// Translator's lack of support for redefining gl_PerVertex
......
......@@ -4439,6 +4439,70 @@ void main()
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
// Tests that writing to a buffer with transform feedback in one draw call followed by reading from
// it in a dispatch call works correctly. This requires an implicit barrier in between the calls.
TEST_P(SimpleStateChangeTestES31, TransformFeedbackThenReadWithCompute)
{
constexpr GLsizei kBufferSize = sizeof(float) * 4 * 6;
GLBuffer buffer;
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, buffer);
glBufferData(GL_TRANSFORM_FEEDBACK_BUFFER, kBufferSize, nullptr, GL_STATIC_DRAW);
glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, buffer);
std::vector<std::string> tfVaryings = {"gl_Position"};
ANGLE_GL_PROGRAM_TRANSFORM_FEEDBACK(program, essl3_shaders::vs::Simple(),
essl3_shaders::fs::Green(), tfVaryings,
GL_INTERLEAVED_ATTRIBS);
glUseProgram(program);
glBeginTransformFeedback(GL_TRIANGLES);
drawQuad(program, essl3_shaders::PositionAttrib(), 0.0f);
glEndTransformFeedback();
constexpr char kCS[] = R"(#version 310 es
layout(local_size_x=1, local_size_y=1) in;
layout(binding = 0) uniform Input
{
vec4 data[3];
};
layout(binding = 0, std430) buffer Output {
bool pass;
};
void main()
{
pass = data[0] == vec4(-1, 1, 0, 1) &&
data[1] == vec4(-1, -1, 0, 1) &&
data[2] == vec4(1, -1, 0, 1);
})";
ANGLE_GL_COMPUTE_PROGRAM(readProgram, kCS);
glUseProgram(readProgram);
glBindBufferBase(GL_UNIFORM_BUFFER, 0, buffer);
constexpr GLsizei kResultSize = sizeof(uint32_t);
GLBuffer resultBuffer;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, resultBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, kResultSize, nullptr, GL_STATIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, resultBuffer);
glDispatchCompute(1, 1, 1);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
EXPECT_GL_NO_ERROR();
// Verify the output of rendering
EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
// Verify the output from the compute shader
const uint32_t *ptr = reinterpret_cast<const uint32_t *>(
glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, kResultSize, GL_MAP_READ_BIT));
EXPECT_EQ(ptr[0], 1u);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
// Tests that deleting an in-flight image texture does not immediately delete the resource.
TEST_P(SimpleStateChangeTestComputeES31, DeleteImageTextureInUse)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment