Commit 39d7fc18 by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Don't break the render pass on dispatch calls

The only reason a dispatch call may need to break the render pass implicitly is for read-after-writes where the write originates from the render pass but is not through a storage buffer/image. There are only two such scenrios possible: - Framebuffer attachment write -> texture sample - Transform feedback write -> ubo read All other uses of the buffers and textures that require breaking the render pass are handled by `glMemoryBarrier`. Bug: angleproject:5070 Change-Id: I92b50d69d8782097ee8ff477ac57da6209c326a1 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2698998 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarTim Van Patten <timvp@google.com>
parent 5d7f4e80
...@@ -1033,10 +1033,10 @@ angle::Result ContextVk::setupLineLoopDraw(const gl::Context *context, ...@@ -1033,10 +1033,10 @@ angle::Result ContextVk::setupLineLoopDraw(const gl::Context *context,
angle::Result ContextVk::setupDispatch(const gl::Context *context) angle::Result ContextVk::setupDispatch(const gl::Context *context)
{ {
// |setupDispatch| and |setupDraw| are special in that they flush dirty bits. Therefore they // Note: numerous tests miss a glMemoryBarrier call between the initial texture data upload and
// don't use the same APIs to record commands as the functions outside ContextVk. // the dispatch call. Flush the outside render pass command buffer as a workaround.
// The following ensures prior commands are flushed before we start processing dirty bits. // TODO: Remove this and fix tests. http://anglebug.com/5070
ANGLE_TRY(flushCommandsAndEndRenderPass()); ANGLE_TRY(flushOutsideRenderPassCommands());
// Create a local object to ensure we flush the descriptor updates to device when we leave this // Create a local object to ensure we flush the descriptor updates to device when we leave this
// function // function
...@@ -3265,7 +3265,7 @@ angle::Result ContextVk::syncState(const gl::Context *context, ...@@ -3265,7 +3265,7 @@ angle::Result ContextVk::syncState(const gl::Context *context,
ASSERT(gl::State::DIRTY_BIT_TEXTURE_BINDINGS > ASSERT(gl::State::DIRTY_BIT_TEXTURE_BINDINGS >
gl::State::DIRTY_BIT_PROGRAM_EXECUTABLE); gl::State::DIRTY_BIT_PROGRAM_EXECUTABLE);
iter.setLaterBit(gl::State::DIRTY_BIT_TEXTURE_BINDINGS); iter.setLaterBit(gl::State::DIRTY_BIT_TEXTURE_BINDINGS);
invalidateCurrentShaderResources(); ANGLE_TRY(invalidateCurrentShaderResources());
ANGLE_TRY(invalidateProgramExecutableHelper(context)); ANGLE_TRY(invalidateProgramExecutableHelper(context));
break; break;
} }
...@@ -3283,17 +3283,17 @@ angle::Result ContextVk::syncState(const gl::Context *context, ...@@ -3283,17 +3283,17 @@ angle::Result ContextVk::syncState(const gl::Context *context,
// Nothing to do. // Nothing to do.
break; break;
case gl::State::DIRTY_BIT_SHADER_STORAGE_BUFFER_BINDING: case gl::State::DIRTY_BIT_SHADER_STORAGE_BUFFER_BINDING:
invalidateCurrentShaderResources(); ANGLE_TRY(invalidateCurrentShaderResources());
break; break;
case gl::State::DIRTY_BIT_UNIFORM_BUFFER_BINDINGS: case gl::State::DIRTY_BIT_UNIFORM_BUFFER_BINDINGS:
invalidateCurrentShaderResources(); ANGLE_TRY(invalidateCurrentShaderResources());
break; break;
case gl::State::DIRTY_BIT_ATOMIC_COUNTER_BUFFER_BINDING: case gl::State::DIRTY_BIT_ATOMIC_COUNTER_BUFFER_BINDING:
invalidateCurrentShaderResources(); ANGLE_TRY(invalidateCurrentShaderResources());
invalidateDriverUniforms(); invalidateDriverUniforms();
break; break;
case gl::State::DIRTY_BIT_IMAGE_BINDINGS: case gl::State::DIRTY_BIT_IMAGE_BINDINGS:
invalidateCurrentShaderResources(); ANGLE_TRY(invalidateCurrentShaderResources());
break; break;
case gl::State::DIRTY_BIT_MULTISAMPLING: case gl::State::DIRTY_BIT_MULTISAMPLING:
// TODO(syoussefi): this should configure the pipeline to render as if // TODO(syoussefi): this should configure the pipeline to render as if
...@@ -3685,12 +3685,18 @@ angle::Result ContextVk::invalidateCurrentTextures(const gl::Context *context) ...@@ -3685,12 +3685,18 @@ angle::Result ContextVk::invalidateCurrentTextures(const gl::Context *context)
mComputeDirtyBits |= kTexturesAndDescSetDirtyBits; mComputeDirtyBits |= kTexturesAndDescSetDirtyBits;
ANGLE_TRY(updateActiveTextures(context)); ANGLE_TRY(updateActiveTextures(context));
// Take care of read-after-write hazards that require implicit synchronization.
if (executable->isCompute())
{
ANGLE_TRY(endRenderPassIfComputeReadAfterAttachmentWrite());
}
} }
return angle::Result::Continue; return angle::Result::Continue;
} }
void ContextVk::invalidateCurrentShaderResources() angle::Result ContextVk::invalidateCurrentShaderResources()
{ {
const gl::ProgramExecutable *executable = mState.getProgramExecutable(); const gl::ProgramExecutable *executable = mState.getProgramExecutable();
ASSERT(executable); ASSERT(executable);
...@@ -3706,6 +3712,12 @@ void ContextVk::invalidateCurrentShaderResources() ...@@ -3706,6 +3712,12 @@ void ContextVk::invalidateCurrentShaderResources()
mComputeDirtyBits |= kResourcesAndDescSetDirtyBits; mComputeDirtyBits |= kResourcesAndDescSetDirtyBits;
} }
// Take care of read-after-write hazards that require implicit synchronization.
if (hasUniformBuffers && executable->isCompute())
{
ANGLE_TRY(endRenderPassIfComputeReadAfterTransformFeedbackWrite());
}
// If memory barrier has been issued but the command buffers haven't been flushed, make sure // If memory barrier has been issued but the command buffers haven't been flushed, make sure
// they get a chance to do so if necessary on program and storage buffer/image binding change. // they get a chance to do so if necessary on program and storage buffer/image binding change.
const bool hasGLMemoryBarrierIssuedInCommandBuffers = const bool hasGLMemoryBarrierIssuedInCommandBuffers =
...@@ -3717,6 +3729,8 @@ void ContextVk::invalidateCurrentShaderResources() ...@@ -3717,6 +3729,8 @@ void ContextVk::invalidateCurrentShaderResources()
mGraphicsDirtyBits.set(DIRTY_BIT_MEMORY_BARRIER); mGraphicsDirtyBits.set(DIRTY_BIT_MEMORY_BARRIER);
mComputeDirtyBits.set(DIRTY_BIT_MEMORY_BARRIER); mComputeDirtyBits.set(DIRTY_BIT_MEMORY_BARRIER);
} }
return angle::Result::Continue;
} }
void ContextVk::invalidateGraphicsDriverUniforms() void ContextVk::invalidateGraphicsDriverUniforms()
...@@ -3730,13 +3744,13 @@ void ContextVk::invalidateDriverUniforms() ...@@ -3730,13 +3744,13 @@ void ContextVk::invalidateDriverUniforms()
mComputeDirtyBits |= kDriverUniformsAndBindingDirtyBits; mComputeDirtyBits |= kDriverUniformsAndBindingDirtyBits;
} }
void ContextVk::onFramebufferChange(FramebufferVk *framebufferVk) angle::Result ContextVk::onFramebufferChange(FramebufferVk *framebufferVk)
{ {
// This is called from FramebufferVk::syncState. Skip these updates if the framebuffer being // This is called from FramebufferVk::syncState. Skip these updates if the framebuffer being
// synced is the read framebuffer (which is not equal the draw framebuffer). // synced is the read framebuffer (which is not equal the draw framebuffer).
if (framebufferVk != vk::GetImpl(mState.getDrawFramebuffer())) if (framebufferVk != vk::GetImpl(mState.getDrawFramebuffer()))
{ {
return; return angle::Result::Continue;
} }
// Ensure that the pipeline description is updated. // Ensure that the pipeline description is updated.
...@@ -3751,10 +3765,12 @@ void ContextVk::onFramebufferChange(FramebufferVk *framebufferVk) ...@@ -3751,10 +3765,12 @@ void ContextVk::onFramebufferChange(FramebufferVk *framebufferVk)
if (mState.getProgramExecutable()) if (mState.getProgramExecutable())
{ {
invalidateCurrentShaderResources(); ANGLE_TRY(invalidateCurrentShaderResources());
} }
onDrawFramebufferRenderPassDescChange(framebufferVk, nullptr); onDrawFramebufferRenderPassDescChange(framebufferVk, nullptr);
return angle::Result::Continue;
} }
void ContextVk::onDrawFramebufferRenderPassDescChange(FramebufferVk *framebufferVk, void ContextVk::onDrawFramebufferRenderPassDescChange(FramebufferVk *framebufferVk,
...@@ -5579,6 +5595,86 @@ angle::Result ContextVk::flushCommandBuffersIfNecessary(const vk::CommandBufferA ...@@ -5579,6 +5595,86 @@ angle::Result ContextVk::flushCommandBuffersIfNecessary(const vk::CommandBufferA
return angle::Result::Continue; return angle::Result::Continue;
} }
angle::Result ContextVk::endRenderPassIfComputeReadAfterTransformFeedbackWrite()
{
// Similar to flushCommandBuffersIfNecessary(), but using uniform buffers currently bound and
// used by the current (compute) program. This is to handle read-after-write hazards where the
// write originates from transform feedback.
if (mCurrentTransformFeedbackBuffers.empty())
{
return angle::Result::Continue;
}
const gl::ProgramExecutable *executable = mState.getProgramExecutable();
ASSERT(executable && executable->isCompute());
gl::ShaderMap<const gl::ProgramState *> programStates;
mExecutable->fillProgramStateMap(this, &programStates);
for (const gl::ShaderType shaderType : executable->getLinkedShaderStages())
{
const gl::ProgramState *programState = programStates[shaderType];
ASSERT(programState);
// Uniform buffers:
const std::vector<gl::InterfaceBlock> &blocks = programState->getUniformBlocks();
for (uint32_t bufferIndex = 0; bufferIndex < blocks.size(); ++bufferIndex)
{
const gl::InterfaceBlock &block = blocks[bufferIndex];
const gl::OffsetBindingPointer<gl::Buffer> &bufferBinding =
mState.getIndexedUniformBuffer(block.binding);
if (!block.isActive(shaderType) || bufferBinding.get() == nullptr)
{
continue;
}
vk::BufferHelper &buffer = vk::GetImpl(bufferBinding.get())->getBuffer();
if (mCurrentTransformFeedbackBuffers.contains(&buffer))
{
return flushCommandsAndEndRenderPass();
}
}
}
return angle::Result::Continue;
}
angle::Result ContextVk::endRenderPassIfComputeReadAfterAttachmentWrite()
{
// Similar to flushCommandBuffersIfNecessary(), but using textures currently bound and used by
// the current (compute) program. This is to handle read-after-write hazards where the write
// originates from a framebuffer attachment.
const gl::ProgramExecutable *executable = mState.getProgramExecutable();
ASSERT(executable && executable->isCompute() && executable->hasTextures());
const gl::ActiveTexturesCache &textures = mState.getActiveTexturesCache();
const gl::ActiveTextureTypeArray &textureTypes = executable->getActiveSamplerTypes();
for (size_t textureUnit : executable->getActiveSamplersMask())
{
gl::Texture *texture = textures[textureUnit];
gl::TextureType textureType = textureTypes[textureUnit];
if (texture == nullptr || textureType == gl::TextureType::Buffer)
{
continue;
}
TextureVk *textureVk = vk::GetImpl(texture);
ASSERT(textureVk != nullptr);
vk::ImageHelper &image = textureVk->getImage();
if (IsRenderPassStartedAndUsesImage(*mRenderPassCommands, image))
{
return flushCommandsAndEndRenderPass();
}
}
return angle::Result::Continue;
}
// Requires that trace is enabled to see the output, which is supported with is_debug=true // Requires that trace is enabled to see the output, which is supported with is_debug=true
void ContextVk::outputCumulativePerfCounters() void ContextVk::outputCumulativePerfCounters()
{ {
......
...@@ -319,7 +319,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText ...@@ -319,7 +319,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
void invalidateDefaultAttribute(size_t attribIndex); void invalidateDefaultAttribute(size_t attribIndex);
void invalidateDefaultAttributes(const gl::AttributesMask &dirtyMask); void invalidateDefaultAttributes(const gl::AttributesMask &dirtyMask);
void onFramebufferChange(FramebufferVk *framebufferVk); angle::Result onFramebufferChange(FramebufferVk *framebufferVk);
void onDrawFramebufferRenderPassDescChange(FramebufferVk *framebufferVk, void onDrawFramebufferRenderPassDescChange(FramebufferVk *framebufferVk,
bool *renderPassDescChangedOut); bool *renderPassDescChangedOut);
void onHostVisibleBufferWrite() { mIsAnyHostVisibleBufferWritten = true; } void onHostVisibleBufferWrite() { mIsAnyHostVisibleBufferWritten = true; }
...@@ -778,7 +778,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText ...@@ -778,7 +778,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
void invalidateCurrentDefaultUniforms(); void invalidateCurrentDefaultUniforms();
angle::Result invalidateCurrentTextures(const gl::Context *context); angle::Result invalidateCurrentTextures(const gl::Context *context);
void invalidateCurrentShaderResources(); angle::Result invalidateCurrentShaderResources();
void invalidateGraphicsDriverUniforms(); void invalidateGraphicsDriverUniforms();
void invalidateDriverUniforms(); void invalidateDriverUniforms();
...@@ -879,7 +879,22 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText ...@@ -879,7 +879,22 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
void initIndexTypeMap(); void initIndexTypeMap();
// Read-after-write hazards are generally handled with |glMemoryBarrier| when the source of
// write is storage output. When the write is outside render pass, the natural placement of the
// render pass after the current outside render pass commands ensures that the memory barriers
// and image layout transitions automatically take care of such synchronizations.
//
// There are a number of read-after-write cases that require breaking the render pass however to
// preserve the order of operations:
//
// - Transform feedback write (in render pass), then vertex/index read (in render pass)
// - Transform feedback write (in render pass), then ubo read (outside render pass)
// - Framebuffer attachment write (in render pass), then texture sample (outside render pass)
// * Note that texture sampling inside render pass would cause a feedback loop
//
angle::Result endRenderPassIfTransformFeedbackBuffer(const vk::BufferHelper *buffer); angle::Result endRenderPassIfTransformFeedbackBuffer(const vk::BufferHelper *buffer);
angle::Result endRenderPassIfComputeReadAfterTransformFeedbackWrite();
angle::Result endRenderPassIfComputeReadAfterAttachmentWrite();
void populateTransformFeedbackBufferSet( void populateTransformFeedbackBufferSet(
size_t bufferCount, size_t bufferCount,
......
...@@ -1876,9 +1876,7 @@ angle::Result FramebufferVk::syncState(const gl::Context *context, ...@@ -1876,9 +1876,7 @@ angle::Result FramebufferVk::syncState(const gl::Context *context,
mFramebuffer = nullptr; mFramebuffer = nullptr;
// Notify the ContextVk to update the pipeline desc. // Notify the ContextVk to update the pipeline desc.
contextVk->onFramebufferChange(this); return contextVk->onFramebufferChange(this);
return angle::Result::Continue;
} }
void FramebufferVk::updateRenderPassDesc() void FramebufferVk::updateRenderPassDesc()
......
...@@ -22,6 +22,9 @@ ...@@ -22,6 +22,9 @@
//// General Vulkan expectations //// General Vulkan expectations
//// ////
// Test bug with missing glMemoryBarrier call
5673 VULKAN : KHR-GLES31.core.texture_buffer.texture_buffer_operations_transform_feedback = FAIL
// Framebuffer without attachments: // Framebuffer without attachments:
3579 VULKAN : KHR-GLES31.core.framebuffer_no_attachments.api = FAIL 3579 VULKAN : KHR-GLES31.core.framebuffer_no_attachments.api = FAIL
......
...@@ -6,6 +6,10 @@ ...@@ -6,6 +6,10 @@
// For now we only log Vulkan test expectations. More back-ends can follow as we need them. // For now we only log Vulkan test expectations. More back-ends can follow as we need them.
// Test bug with missing glMemoryBarrier call
5673 VULKAN LINUX : KHR-GLES32.core.texture_buffer.texture_buffer_operations_transform_feedback = FAIL
5673 VULKAN WIN : KHR-GLES32.core.texture_buffer.texture_buffer_operations_transform_feedback = FAIL
// Geometry shader support (conditioned to windows as that's our only bot capable of running these // Geometry shader support (conditioned to windows as that's our only bot capable of running these
// tests) // tests)
// Translator's lack of support for redefining gl_PerVertex // Translator's lack of support for redefining gl_PerVertex
......
...@@ -4439,6 +4439,70 @@ void main() ...@@ -4439,6 +4439,70 @@ void main()
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
} }
// Tests that writing to a buffer with transform feedback in one draw call followed by reading from
// it in a dispatch call works correctly. This requires an implicit barrier in between the calls.
TEST_P(SimpleStateChangeTestES31, TransformFeedbackThenReadWithCompute)
{
constexpr GLsizei kBufferSize = sizeof(float) * 4 * 6;
GLBuffer buffer;
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, buffer);
glBufferData(GL_TRANSFORM_FEEDBACK_BUFFER, kBufferSize, nullptr, GL_STATIC_DRAW);
glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, buffer);
std::vector<std::string> tfVaryings = {"gl_Position"};
ANGLE_GL_PROGRAM_TRANSFORM_FEEDBACK(program, essl3_shaders::vs::Simple(),
essl3_shaders::fs::Green(), tfVaryings,
GL_INTERLEAVED_ATTRIBS);
glUseProgram(program);
glBeginTransformFeedback(GL_TRIANGLES);
drawQuad(program, essl3_shaders::PositionAttrib(), 0.0f);
glEndTransformFeedback();
constexpr char kCS[] = R"(#version 310 es
layout(local_size_x=1, local_size_y=1) in;
layout(binding = 0) uniform Input
{
vec4 data[3];
};
layout(binding = 0, std430) buffer Output {
bool pass;
};
void main()
{
pass = data[0] == vec4(-1, 1, 0, 1) &&
data[1] == vec4(-1, -1, 0, 1) &&
data[2] == vec4(1, -1, 0, 1);
})";
ANGLE_GL_COMPUTE_PROGRAM(readProgram, kCS);
glUseProgram(readProgram);
glBindBufferBase(GL_UNIFORM_BUFFER, 0, buffer);
constexpr GLsizei kResultSize = sizeof(uint32_t);
GLBuffer resultBuffer;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, resultBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, kResultSize, nullptr, GL_STATIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, resultBuffer);
glDispatchCompute(1, 1, 1);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
EXPECT_GL_NO_ERROR();
// Verify the output of rendering
EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
// Verify the output from the compute shader
const uint32_t *ptr = reinterpret_cast<const uint32_t *>(
glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, kResultSize, GL_MAP_READ_BIT));
EXPECT_EQ(ptr[0], 1u);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
// Tests that deleting an in-flight image texture does not immediately delete the resource. // Tests that deleting an in-flight image texture does not immediately delete the resource.
TEST_P(SimpleStateChangeTestComputeES31, DeleteImageTextureInUse) TEST_P(SimpleStateChangeTestComputeES31, DeleteImageTextureInUse)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment