Commit e9111885 by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Optimize texture upload barriers

When flushing staged uploads to an image, a 64-wide bitfield is used to track subresources that are uploaded since the last barrier. If a collision is detected, a barrier is inserted and the bitfield is reset. If the image has more than 64 subresources, some subresources would map to the same bit and cause a few unnecessary barriers. Texture upload benchmarks show 5% to 10% improvement both in CPU and GPU time. Bug: angleproject:3347 Change-Id: I21dd23c78879bb01d58bc869ffc8bee06c79e6c1 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1700147Reviewed-by: 's avatarTobin Ehlis <tobine@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
parent 0efe516e
......@@ -1257,8 +1257,16 @@ inline constexpr unsigned int UnsignedCeilDivide(unsigned int value, unsigned in
#if defined(_MSC_VER)
# define ANGLE_ROTL(x, y) _rotl(x, y)
# define ANGLE_ROTL64(x, y) _rotl64(x, y)
# define ANGLE_ROTR16(x, y) _rotr16(x, y)
#elif defined(__clang__) && __has_builtin(__builtin_rotateleft32) && \
__has_builtin(__builtin_rotateleft64) && __has_builtin(__builtin_rotateright16)
# define ANGLE_ROTL(x, y) __builtin_rotateleft32(x, y)
# define ANGLE_ROTL64(x, y) __builtin_rotateleft64(x, y)
# define ANGLE_ROTR16(x, y) __builtin_rotateright16(x, y)
#else
inline uint32_t RotL(uint32_t x, int8_t r)
......@@ -1266,12 +1274,18 @@ inline uint32_t RotL(uint32_t x, int8_t r)
return (x << r) | (x >> (32 - r));
}
inline uint64_t RotL64(uint64_t x, int8_t r)
{
return (x << r) | (x >> (64 - r));
}
inline uint16_t RotR16(uint16_t x, int8_t r)
{
return (x >> r) | (x << (16 - r));
}
# define ANGLE_ROTL(x, y) ::rx::RotL(x, y)
# define ANGLE_ROTL64(x, y) ::rx::RotL64(x, y)
# define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y)
#endif // namespace rx
......
......@@ -1847,7 +1847,7 @@ void ImageHelper::clearColor(const VkClearColorValue &color,
{
ASSERT(valid());
changeLayout(VK_IMAGE_ASPECT_COLOR_BIT, ImageLayout::TransferDst, commandBuffer);
ASSERT(mCurrentLayout == ImageLayout::TransferDst);
VkImageSubresourceRange range = {};
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
......@@ -1870,7 +1870,7 @@ void ImageHelper::clearDepthStencil(VkImageAspectFlags imageAspectFlags,
{
ASSERT(valid());
changeLayout(imageAspectFlags, ImageLayout::TransferDst, commandBuffer);
ASSERT(mCurrentLayout == ImageLayout::TransferDst);
VkImageSubresourceRange clearRange = {
/*aspectMask*/ clearAspectFlags,
......@@ -2444,6 +2444,16 @@ angle::Result ImageHelper::flushStagedUpdates(ContextVk *contextVk,
std::vector<SubresourceUpdate> updatesToKeep;
const VkImageAspectFlags aspectFlags = GetFormatAspectFlags(mFormat->imageFormat());
// Upload levels and layers that don't conflict in parallel. The (level, layer) pair is hashed
// to `(level * mLayerCount + layer) % 64` and used to track whether that subresource is
// currently in transfer. If so, a barrier is inserted. If mLayerCount * mLevelCount > 64,
// there will be a few unnecessary barriers.
constexpr uint32_t kMaxParallelSubresourceUpload = 64;
uint64_t subresourceUploadsInProgress = 0;
// Start in TransferDst.
changeLayout(aspectFlags, vk::ImageLayout::TransferDst, commandBuffer);
for (SubresourceUpdate &update : mSubresourceUpdates)
{
ASSERT(update.updateSource == SubresourceUpdate::UpdateSource::Clear ||
......@@ -2471,6 +2481,7 @@ angle::Result ImageHelper::flushStagedUpdates(ContextVk *contextVk,
updateMipLevel = dstSubresource.mipLevel;
updateBaseLayer = dstSubresource.baseArrayLayer;
updateLayerCount = dstSubresource.layerCount;
ASSERT(updateLayerCount != static_cast<uint32_t>(gl::ImageIndex::kEntireLevel));
}
// If the update level is not within the requested range, skip the update.
......@@ -2486,10 +2497,28 @@ angle::Result ImageHelper::flushStagedUpdates(ContextVk *contextVk,
continue;
}
// Conservatively add a barrier between every update. This is to avoid races when updating
// the same subresource. A possible optimization could be to only issue this barrier when
// an overlap in updates is observed.
changeLayout(aspectFlags, vk::ImageLayout::TransferDst, commandBuffer);
if (updateLayerCount >= kMaxParallelSubresourceUpload)
{
// If there are more subresources than bits we can track, always insert a barrier.
changeLayout(aspectFlags, vk::ImageLayout::TransferDst, commandBuffer);
subresourceUploadsInProgress = std::numeric_limits<uint64_t>::max();
}
else
{
const uint64_t subresourceHashRange = angle::Bit<uint64_t>(updateLayerCount) - 1;
const uint32_t subresourceHashOffset =
(updateMipLevel * mLayerCount + updateBaseLayer) % kMaxParallelSubresourceUpload;
const uint64_t subresourceHash =
ANGLE_ROTL64(subresourceHashRange, subresourceHashOffset);
if ((subresourceUploadsInProgress & subresourceHash) != 0)
{
// If there's overlap in subresource upload, issue a barrier.
changeLayout(aspectFlags, vk::ImageLayout::TransferDst, commandBuffer);
subresourceUploadsInProgress = 0;
}
subresourceUploadsInProgress |= subresourceHash;
}
if (update.updateSource == SubresourceUpdate::UpdateSource::Clear)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment