Commit 0fd917a2 by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Remove unnecessary same-layout transitions

Previously, only read-only layouts skipped same-layout transition. This is extended to color attachment and depth/stencil attachment layouts as well. On Nvidia+Linux, this does reduce the number of barriers in the Draw* benchmarks, though there's no visible CPU-side performance difference. GPU time is not measured in these tests, but it's possible that the driver already skips such transitions (i.e. this only saves us from recording an ineffective transition). Additionally, transfer dst and shader write same-layout transitions are turned into an execution barrier instead of a memory barrier to avoid an unnecessary flush. Currently, this particularly affects texture upload, and shows a 10% improvement in TextureUploadSubImageBenchmark. Note that this optimization is temporarily disabled due to a bug in the windows AMD driver. Bug: angleproject:3347 Change-Id: I7dc9d0b5dd2ad87ec19ae13277b330438038519f Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1659149Reviewed-by: 's avatarTobin Ehlis <tobine@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
parent b937093e
......@@ -204,6 +204,14 @@ void SecondaryCommandBuffer::executeCommands(VkCommandBuffer cmdBuffer)
vkCmdEndQuery(cmdBuffer, params->queryPool, params->query);
break;
}
case CommandID::ExecutionBarrier:
{
const ExecutionBarrierParams *params =
getParamPtr<ExecutionBarrierParams>(currentCommand);
vkCmdPipelineBarrier(cmdBuffer, params->stageMask, params->stageMask, 0, 0,
nullptr, 0, nullptr, 0, nullptr);
break;
}
case CommandID::FillBuffer:
{
const FillBufferParams *params = getParamPtr<FillBufferParams>(currentCommand);
......@@ -395,6 +403,9 @@ std::string SecondaryCommandBuffer::dumpCommands(const char *separator) const
case CommandID::EndQuery:
result += "EndQuery";
break;
case CommandID::ExecutionBarrier:
result += "ExecutionBarrier";
break;
case CommandID::FillBuffer:
result += "FillBuffer";
break;
......
......@@ -50,6 +50,7 @@ enum class CommandID : uint16_t
DrawIndexedInstanced,
DrawInstanced,
EndQuery,
ExecutionBarrier,
FillBuffer,
ImageBarrier,
MemoryBarrier,
......@@ -249,6 +250,12 @@ struct PipelineBarrierParams
};
VERIFY_4_BYTE_ALIGNMENT(PipelineBarrierParams)
struct ExecutionBarrierParams
{
VkPipelineStageFlags stageMask;
};
VERIFY_4_BYTE_ALIGNMENT(ExecutionBarrierParams)
struct ImageBarrierParams
{
VkPipelineStageFlags srcStageMask;
......@@ -439,6 +446,8 @@ class SecondaryCommandBuffer final : angle::NonCopyable
void endQuery(VkQueryPool queryPool, uint32_t query);
void executionBarrier(VkPipelineStageFlags stageMask);
void fillBuffer(const Buffer &dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize size,
......@@ -880,6 +889,13 @@ ANGLE_INLINE void SecondaryCommandBuffer::endQuery(VkQueryPool queryPool, uint32
paramStruct->query = query;
}
ANGLE_INLINE void SecondaryCommandBuffer::executionBarrier(VkPipelineStageFlags stageMask)
{
ExecutionBarrierParams *paramStruct =
initCommand<ExecutionBarrierParams>(CommandID::ExecutionBarrier);
paramStruct->stageMask = stageMask;
}
ANGLE_INLINE void SecondaryCommandBuffer::fillBuffer(const Buffer &dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize size,
......
......@@ -57,9 +57,14 @@ struct ImageMemoryBarrierData
VkAccessFlags srcAccessMask;
// If access is read-only, the execution barrier can be skipped altogether if retransitioning to
// the same layout. This is because read-after-read does not need an execution or memory
// barrier.
bool isReadOnlyAccess;
// the same layout. This is because read-after-read does not need an execution or memory
// barrier. Vulkan additionally guarantees color attachment and depth/stencil attachment
// read/writes to be in execution order, so they too won't need a barrier if transitioning to
// the same layout.
//
// Otherwise, same-layout transitions only require an execution barrier (and not a memory
// barrier).
bool sameLayoutTransitionRequiresBarrier;
};
// clang-format off
......@@ -74,7 +79,7 @@ constexpr angle::PackedEnumMap<ImageLayout, ImageMemoryBarrierData> kImageMemory
0,
// Transition from: there's no data in the image to care about.
0,
true,
false,
},
},
{
......@@ -100,7 +105,7 @@ constexpr angle::PackedEnumMap<ImageLayout, ImageMemoryBarrierData> kImageMemory
VK_ACCESS_TRANSFER_READ_BIT,
// Transition from: RAR and WAR don't need memory barrier.
0,
true,
false,
},
},
{
......@@ -113,7 +118,7 @@ constexpr angle::PackedEnumMap<ImageLayout, ImageMemoryBarrierData> kImageMemory
VK_ACCESS_TRANSFER_WRITE_BIT,
// Transition from: all writes must finish before barrier.
VK_ACCESS_TRANSFER_WRITE_BIT,
false,
true,
},
},
{
......@@ -126,7 +131,7 @@ constexpr angle::PackedEnumMap<ImageLayout, ImageMemoryBarrierData> kImageMemory
VK_ACCESS_SHADER_READ_BIT,
// Transition from: RAR and WAR don't need memory barrier.
0,
true,
false,
},
},
{
......@@ -139,7 +144,7 @@ constexpr angle::PackedEnumMap<ImageLayout, ImageMemoryBarrierData> kImageMemory
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
// Transition from: all writes must finish before barrier.
VK_ACCESS_SHADER_WRITE_BIT,
false,
true,
},
},
{
......@@ -152,7 +157,7 @@ constexpr angle::PackedEnumMap<ImageLayout, ImageMemoryBarrierData> kImageMemory
VK_ACCESS_SHADER_READ_BIT,
// Transition from: RAR and WAR don't need memory barrier.
0,
true,
false,
},
},
{
......@@ -196,7 +201,7 @@ constexpr angle::PackedEnumMap<ImageLayout, ImageMemoryBarrierData> kImageMemory
0,
// Transition from: RAR and WAR don't need memory barrier.
0,
true,
false,
},
},
};
......@@ -1662,10 +1667,14 @@ bool ImageHelper::isLayoutChangeNecessary(ImageLayout newLayout) const
{
const ImageMemoryBarrierData &layoutData = kImageMemoryBarrierData[mCurrentLayout];
// If transitioning to the same read-only layout (RAR), don't generate a barrier.
bool sameLayoutReadAfterRead = mCurrentLayout == newLayout && layoutData.isReadOnlyAccess;
// If transitioning to the same layout, we rarely need a barrier. RAR (read-after-read)
// doesn't need a barrier, and WAW (write-after-write) is guaranteed to not require a barrier
// for color attachment and depth/stencil attachment writes. Transfer dst and shader writes
// are basically the only cases where an execution barrier is still necessary.
bool sameLayoutAndNoNeedForBarrier =
mCurrentLayout == newLayout && !layoutData.sameLayoutTransitionRequiresBarrier;
return !sameLayoutReadAfterRead;
return !sameLayoutAndNoNeedForBarrier;
}
void ImageHelper::changeLayout(VkImageAspectFlags aspectMask,
......@@ -1694,6 +1703,24 @@ void ImageHelper::forceChangeLayoutAndQueue(VkImageAspectFlags aspectMask,
uint32_t newQueueFamilyIndex,
vk::CommandBuffer *commandBuffer)
{
// If transitioning to the same layout (and there is no queue transfer), an execution barrier
// suffices.
//
// TODO(syoussefi): AMD driver on windows has a bug where an execution barrier is not sufficient
// between transfer dst operations (even if the transfer is not to the same subresource!). A
// workaround may be necessary. http://anglebug.com/3554
if (mCurrentLayout == newLayout && mCurrentQueueFamilyIndex == newQueueFamilyIndex &&
mCurrentLayout != ImageLayout::TransferDst)
{
const ImageMemoryBarrierData &transition = kImageMemoryBarrierData[mCurrentLayout];
// In this case, the image is going to be used in the same way, so the src and dst stage
// masks must be necessarily equal.
ASSERT(transition.srcStageMask == transition.dstStageMask);
commandBuffer->executionBarrier(transition.dstStageMask);
return;
}
const ImageMemoryBarrierData &transitionFrom = kImageMemoryBarrierData[mCurrentLayout];
const ImageMemoryBarrierData &transitionTo = kImageMemoryBarrierData[newLayout];
......
......@@ -287,6 +287,8 @@ class CommandBuffer : public WrappedObject<CommandBuffer, VkCommandBuffer>
void endRenderPass();
void executeCommands(uint32_t commandBufferCount, const CommandBuffer *commandBuffers);
void executionBarrier(VkPipelineStageFlags stageMask);
void fillBuffer(const Buffer &dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize size,
......@@ -649,6 +651,12 @@ ANGLE_INLINE void CommandBuffer::pipelineBarrier(VkPipelineStageFlags srcStageMa
imageMemoryBarrierCount, imageMemoryBarriers);
}
ANGLE_INLINE void CommandBuffer::executionBarrier(VkPipelineStageFlags stageMask)
{
ASSERT(valid());
vkCmdPipelineBarrier(mHandle, stageMask, stageMask, 0, 0, nullptr, 0, nullptr, 0, nullptr);
}
ANGLE_INLINE void CommandBuffer::imageBarrier(VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
const VkImageMemoryBarrier *imageMemoryBarrier)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment