Commit ee4e0866 by Tim Van Patten Committed by Commit Bot

Vulkan: Add descriptor set allocation counters

Add descriptor set allocation counters for the following: - ContextVk - Driver uniform allocations for graphics and compute pipelines. - ProgramExecutableVk - ANGLE driver uniforms - Uniforms - Textures - Other shader resources - UtilsVk - All of the UtilsVk::Function types increment the same counter Each object's counters live within the object itself and the cumulative total is output as part of that object's destruction. On Present, all of the descriptor set counts are collected into a single total which is used to update the overlay each frame. In order to see the cumulative total output for each object, the following GN args must be enabled: is_debug = true angle_enable_perf_counter_output = true To see the descriptor set allocation overlay: ANGLE_OVERLAY=VulkanDescriptorSetAllocations Bug: angleproject:5067 Test: Manual verification with angle_perftests Change-Id: Ie45fda56ade3e68bfba7bf6da9554eb05a02c6b6 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2429487 Commit-Queue: Tim Van Patten <timvp@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarCharlie Lao <cclao@google.com>
parent 626a4185
......@@ -606,6 +606,9 @@ config("libANGLE_config") {
if (angle_enable_overlay) {
defines += [ "ANGLE_ENABLE_OVERLAY=1" ]
}
if (angle_enable_perf_counter_output) {
defines += [ "ANGLE_ENABLE_PERF_COUNTER_OUTPUT=1" ]
}
}
angle_source_set("libANGLE_headers") {
......
......@@ -131,6 +131,9 @@ declare_args() {
# Disable overlay by default
angle_enable_overlay = false
# Disable performance counter output by default
angle_enable_perf_counter_output = false
}
if (!defined(angle_zlib_compression_utils_dir)) {
......
{
"src/libANGLE/Overlay_autogen.cpp":
"6c9c7df59562390505222145c1f32277",
"849f447a220cb0ce00a41f99db179a6b",
"src/libANGLE/Overlay_autogen.h":
"03ac72d8286f1f933696fa3dabb75eb1",
"4f29dd0e9c2030b98b396fdf03eaeb29",
"src/libANGLE/gen_overlay_widgets.py":
"f4395481db010c82af2e2981353e8592",
"src/libANGLE/overlay_widgets.json":
"dd9d2a72035e754bbc5f614410e76df1"
"93205f3d9585228428bc62463f478dc6"
}
\ No newline at end of file
......@@ -2798,7 +2798,7 @@ void CaptureMidExecutionSetup(const gl::Context *context,
const gl::ResourceMap<gl::Shader, gl::ShaderProgramID> &shaders =
shadersAndPrograms.getShadersForCapture();
const gl::ResourceMap<gl::Program, gl::ShaderProgramID> &programs =
shadersAndPrograms.getProgramsForCapture();
shadersAndPrograms.getProgramsForCaptureAndPerf();
// Capture Program binary state. Use max ID as a temporary shader ID.
gl::ShaderProgramID tempShaderID = {resourceTracker->getMaxShaderPrograms()};
......
......@@ -300,10 +300,10 @@ void AppendWidgetDataHelper::AppendRunningHistogramCommon(const overlay::Widget
OverlayWidgetCounts *widgetCounts,
FormatHistogramTitleFunc formatFunc)
{
const overlay::RunningHistogram *secondaryCommandBufferPoolWaste =
const overlay::RunningHistogram *runningHistogram =
static_cast<const overlay::RunningHistogram *>(widget);
std::vector<size_t> histogram = CreateHistogram(secondaryCommandBufferPoolWaste->runningValues);
std::vector<size_t> histogram = CreateHistogram(runningHistogram->runningValues);
auto peakRangeIt = std::max_element(histogram.rbegin(), histogram.rend());
const size_t peakRangeValue = *peakRangeIt;
const int32_t graphHeight = std::abs(widget->coords[3] - widget->coords[1]);
......@@ -320,8 +320,8 @@ void AppendWidgetDataHelper::AppendRunningHistogramCommon(const overlay::Widget
size_t maxValueRange = std::distance(maxValueIter, histogram.rend() - 1);
std::string text = formatFunc(peakRange, maxValueRange, histogram.size());
AppendTextCommon(&secondaryCommandBufferPoolWaste->description, imageExtent, text,
textWidget, widgetCounts);
AppendTextCommon(&runningHistogram->description, imageExtent, text, textWidget,
widgetCounts);
}
}
......@@ -431,6 +431,21 @@ void AppendWidgetDataHelper::AppendVulkanWriteDescriptorSetCount(const overlay::
AppendRunningGraphCommon(widget, imageExtent, textWidget, graphWidget, widgetCounts, format);
}
void AppendWidgetDataHelper::AppendVulkanDescriptorSetAllocations(const overlay::Widget *widget,
const gl::Extents &imageExtent,
TextWidgetData *textWidget,
GraphWidgetData *graphWidget,
OverlayWidgetCounts *widgetCounts)
{
auto format = [](size_t maxValue) {
std::ostringstream text;
text << "Descriptor Set Allocations (Max: " << maxValue << ")";
return text.str();
};
AppendRunningGraphCommon(widget, imageExtent, textWidget, graphWidget, widgetCounts, format);
}
std::ostream &AppendWidgetDataHelper::OutputPerSecond(std::ostream &out,
const overlay::PerSecond *perSecond)
{
......
......@@ -274,6 +274,49 @@ void Overlay::initOverlayWidgets()
widget->description.color[3] = 1.0f;
}
}
{
RunningGraph *widget = new RunningGraph(60);
{
const int32_t fontSize = GetFontSize(0, kLargeFont);
const int32_t offsetX = -50;
const int32_t offsetY = 250;
const int32_t width = 6 * static_cast<uint32_t>(widget->runningValues.size());
const int32_t height = 100;
widget->type = WidgetType::RunningGraph;
widget->fontSize = fontSize;
widget->coords[0] = offsetX - width;
widget->coords[1] = offsetY;
widget->coords[2] = offsetX;
widget->coords[3] = offsetY + height;
widget->color[0] = 1.0f;
widget->color[1] = 0.0f;
widget->color[2] = 0.294117647059f;
widget->color[3] = 0.78431372549f;
}
mState.mOverlayWidgets[WidgetId::VulkanDescriptorSetAllocations].reset(widget);
{
const int32_t fontSize = GetFontSize(kFontLayerSmall, kLargeFont);
const int32_t offsetX =
mState.mOverlayWidgets[WidgetId::VulkanDescriptorSetAllocations]->coords[0];
const int32_t offsetY =
mState.mOverlayWidgets[WidgetId::VulkanDescriptorSetAllocations]->coords[1];
const int32_t width = 40 * kFontGlyphWidths[fontSize];
const int32_t height = kFontGlyphHeights[fontSize];
widget->description.type = WidgetType::Text;
widget->description.fontSize = fontSize;
widget->description.coords[0] = offsetX;
widget->description.coords[1] = std::max(offsetY - height, 1);
widget->description.coords[2] = std::min(offsetX + width, -1);
widget->description.coords[3] = offsetY;
widget->description.color[0] = 1.0f;
widget->description.color[1] = 0.0f;
widget->description.color[2] = 0.294117647059f;
widget->description.color[3] = 1.0f;
}
}
}
} // namespace gl
......@@ -26,6 +26,8 @@ enum class WidgetId
VulkanSecondaryCommandBufferPoolWaste,
// Number of Descriptor Set writes in a frame (Count).
VulkanWriteDescriptorSetCount,
// Descriptor Set Allocations.
VulkanDescriptorSetAllocations,
InvalidEnum,
EnumCount = InvalidEnum,
......@@ -39,6 +41,7 @@ enum class WidgetId
PROC(VulkanRenderPassCount) \
PROC(VulkanRenderPassBufferCount) \
PROC(VulkanSecondaryCommandBufferPoolWaste) \
PROC(VulkanWriteDescriptorSetCount)
PROC(VulkanWriteDescriptorSetCount) \
PROC(VulkanDescriptorSetAllocations)
} // namespace gl
......@@ -164,9 +164,12 @@ class ShaderProgramManager : public ResourceManagerBase
return mPrograms.query(handle);
}
// For capture only.
// For capture and performance counters only.
const ResourceMap<Shader, ShaderProgramID> &getShadersForCapture() const { return mShaders; }
const ResourceMap<Program, ShaderProgramID> &getProgramsForCapture() const { return mPrograms; }
const ResourceMap<Program, ShaderProgramID> &getProgramsForCaptureAndPerf() const
{
return mPrograms;
}
protected:
~ShaderProgramManager() override;
......
......@@ -991,7 +991,7 @@ Result SerializeContext(gl::BinaryOutputStream *bos, const gl::Context *context)
SerializeShader(bos, shaderPtr);
}
const gl::ResourceMap<gl::Program, gl::ShaderProgramID> &programManager =
shaderProgramManager.getProgramsForCapture();
shaderProgramManager.getProgramsForCaptureAndPerf();
for (const auto &program : programManager)
{
gl::Program *programPtr = program.second;
......
......@@ -116,6 +116,22 @@
"font": "small",
"length": 40
}
},
{
"name": "VulkanDescriptorSetAllocations",
"comment": "Descriptor Set Allocations.",
"type": "RunningGraph(60)",
"color": [255, 0, 75, 200],
"coords": [-50, 250],
"bar_width": 6,
"height": 100,
"description": {
"color": [255, 0, 75, 255],
"coords": ["VulkanDescriptorSetAllocations.left.align",
"VulkanDescriptorSetAllocations.top.adjacent"],
"font": "small",
"length": 40
}
}
]
}
......@@ -702,6 +702,7 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mGpuClockSync{std::numeric_limits<double>::max(), std::numeric_limits<double>::max()},
mGpuEventTimestampOrigin(0),
mPerfCounters{},
mObjectPerfCounters{},
mContextPriority(renderer->getDriverPriority(GetContextPriority(state))),
mCurrentIndirectBuffer(nullptr),
mShareGroupVk(vk::GetImpl(state.getShareGroup()))
......@@ -784,12 +785,16 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mDescriptorBufferInfos.reserve(kDescriptorBufferInfosInitialSize);
mDescriptorImageInfos.reserve(kDescriptorImageInfosInitialSize);
mWriteDescriptorSets.reserve(kDescriptorWriteInfosInitialSize);
mObjectPerfCounters.descriptorSetsAllocated.fill(0);
}
ContextVk::~ContextVk() = default;
void ContextVk::onDestroy(const gl::Context *context)
{
outputCumulativePerfCounters();
// Remove context from the share group
mShareGroupVk->getShareContextSet()->erase(this);
......@@ -1778,6 +1783,42 @@ void ContextVk::updateOverlayOnPresent()
mPerfCounters.writeDescriptorSets = 0;
}
{
uint32_t descriptorSetAllocations = 0;
// ContextVk's descriptor set allocations
for (const uint32_t count : mObjectPerfCounters.descriptorSetsAllocated)
{
descriptorSetAllocations += count;
}
// UtilsVk's descriptor set allocations
descriptorSetAllocations += mUtils.getObjectPerfCounters().descriptorSetsAllocated;
// ProgramExecutableVk's descriptor set allocations
const gl::State &state = getState();
const gl::ShaderProgramManager &shadersAndPrograms =
state.getShaderProgramManagerForCapture();
const gl::ResourceMap<gl::Program, gl::ShaderProgramID> &programs =
shadersAndPrograms.getProgramsForCaptureAndPerf();
for (const std::pair<GLuint, gl::Program *> &resource : programs)
{
ProgramVk *programVk = vk::GetImpl(resource.second);
ProgramExecutableVk::PerfCounters progPerfCounters =
programVk->getExecutable().getObjectPerfCounters();
for (const uint32_t count : progPerfCounters.descriptorSetsAllocated)
{
descriptorSetAllocations += count;
}
}
gl::RunningGraphWidget *descriptorSetAllocationCount =
overlay->getRunningGraphWidget(gl::WidgetId::VulkanDescriptorSetAllocations);
descriptorSetAllocationCount->add(descriptorSetAllocations -
mPerfCounters.descriptorSetAllocations);
descriptorSetAllocationCount->next();
mPerfCounters.descriptorSetAllocations = descriptorSetAllocations;
}
}
void ContextVk::addOverlayUsedBuffersCount(vk::CommandBufferHelper *commandBuffer)
......@@ -4077,6 +4118,7 @@ angle::Result ContextVk::updateDriverUniformsDescriptorSet(
ANGLE_TRY(mDriverUniformsDescriptorPools[pipelineType].allocateSetsAndGetInfo(
this, driverUniforms->descriptorSetLayout.get().ptr(), 1,
&driverUniforms->descriptorPoolBinding, &driverUniforms->descriptorSet, &newPoolAllocated));
mObjectPerfCounters.descriptorSetsAllocated[ToUnderlying(pipelineType)]++;
// Clear descriptor set cache. It may no longer be valid.
if (newPoolAllocated)
......@@ -5340,4 +5382,27 @@ bool ContextVk::shouldSwitchToReadOnlyDepthFeedbackLoopMode(const gl::Context *c
texture->isBoundToFramebuffer(mDrawFramebuffer->getState().getFramebufferSerial()) &&
!mDrawFramebuffer->isReadOnlyDepthFeedbackLoopMode();
}
// Requires that trace is enabled to see the output, which is supported with is_debug=true
void ContextVk::outputCumulativePerfCounters()
{
if (!vk::kOutputCumulativePerfCounters)
{
return;
}
{
INFO() << "Context Descriptor Set Allocations: ";
for (size_t pipelineType = 0;
pipelineType < mObjectPerfCounters.descriptorSetsAllocated.size(); ++pipelineType)
{
uint32_t count = mObjectPerfCounters.descriptorSetsAllocated[pipelineType];
if (count > 0)
{
INFO() << " PipelineType " << pipelineType << ": " << count;
}
}
}
}
} // namespace rx
......@@ -768,6 +768,14 @@ class ContextVk : public ContextImpl, public vk::Context
double cpuTimestampS;
};
// Performance Counters specific to this object type
using DescriptorSetList =
std::array<uint32_t, ToUnderlying(ContextVk::PipelineType::EnumCount)>;
struct PerfCounters
{
DescriptorSetList descriptorSetsAllocated;
};
class ScopedDescriptorSetUpdates;
angle::Result setupDraw(const gl::Context *context,
......@@ -989,6 +997,8 @@ class ContextVk : public ContextImpl, public vk::Context
bool shouldSwitchToReadOnlyDepthFeedbackLoopMode(const gl::Context *context,
gl::Texture *texture) const;
void outputCumulativePerfCounters();
std::array<DirtyBitHandler, DIRTY_BIT_MAX> mGraphicsDirtyBitHandlers;
std::array<DirtyBitHandler, DIRTY_BIT_MAX> mComputeDirtyBitHandlers;
......@@ -1157,6 +1167,7 @@ class ContextVk : public ContextImpl, public vk::Context
// A mix of per-frame and per-run counters.
vk::PerfCounters mPerfCounters;
PerfCounters mObjectPerfCounters;
gl::State::DirtyBits mPipelineDirtyBitsMask;
......
......@@ -182,10 +182,14 @@ ProgramExecutableVk::ProgramExecutableVk()
mNumDefaultUniformDescriptors(0),
mDynamicBufferOffsets{},
mProgram(nullptr),
mProgramPipeline(nullptr)
mProgramPipeline(nullptr),
mObjectPerfCounters{}
{}
ProgramExecutableVk::~ProgramExecutableVk() = default;
ProgramExecutableVk::~ProgramExecutableVk()
{
outputCumulativePerfCounters();
}
void ProgramExecutableVk::reset(ContextVk *contextVk)
{
......@@ -424,6 +428,8 @@ angle::Result ProgramExecutableVk::allocateDescriptorSetAndGetInfo(
&mDescriptorSets[ToUnderlying(descriptorSetIndex)], newPoolAllocatedOut));
mEmptyDescriptorSets[ToUnderlying(descriptorSetIndex)] = VK_NULL_HANDLE;
++mObjectPerfCounters.descriptorSetsAllocated[ToUnderlying(descriptorSetIndex)];
return angle::Result::Continue;
}
......@@ -1561,6 +1567,8 @@ angle::Result ProgramExecutableVk::updateDescriptorSets(ContextVk *contextVk,
contextVk, descriptorSetLayout.ptr(), 1,
&mDescriptorPoolBindings[descriptorSetIndex],
&mEmptyDescriptorSets[descriptorSetIndex]));
++mObjectPerfCounters.descriptorSetsAllocated[descriptorSetIndex];
}
descSet = mEmptyDescriptorSets[descriptorSetIndex];
}
......@@ -1582,4 +1590,44 @@ angle::Result ProgramExecutableVk::updateDescriptorSets(ContextVk *contextVk,
return angle::Result::Continue;
}
// Requires that trace is enabled to see the output, which is supported with is_debug=true
void ProgramExecutableVk::outputCumulativePerfCounters()
{
if (!vk::kOutputCumulativePerfCounters)
{
return;
}
{
std::ostringstream text;
for (size_t descriptorSetIndex = 0;
descriptorSetIndex < mObjectPerfCounters.descriptorSetsAllocated.size();
++descriptorSetIndex)
{
uint32_t count = mObjectPerfCounters.descriptorSetsAllocated[descriptorSetIndex];
if (count > 0)
{
text << " DescriptorSetIndex " << descriptorSetIndex << ": " << count << "\n";
}
}
// Only output information for programs that allocated descriptor sets.
std::string textStr = text.str();
if (!textStr.empty())
{
INFO() << "ProgramExecutable: " << this << ":";
// Output each descriptor set allocation on a single line, so they're prefixed with the
// INFO information (file, line number, etc.).
// https://stackoverflow.com/a/12514641
std::istringstream iss(textStr);
for (std::string line; std::getline(iss, line);)
{
INFO() << line;
}
}
}
}
} // namespace rx
......@@ -168,6 +168,15 @@ class ProgramExecutableVk
mProgramPipeline = pipeline;
}
using DescriptorSetCountList = std::array<uint32_t, DescriptorSetIndex::EnumCount>;
// Performance and resource counters.
struct PerfCounters
{
DescriptorSetCountList descriptorSetsAllocated;
};
const PerfCounters getObjectPerfCounters() const { return mObjectPerfCounters; }
private:
friend class ProgramVk;
friend class ProgramPipelineVk;
......@@ -224,6 +233,8 @@ class ProgramExecutableVk
DescriptorSetIndex descriptorSetIndex,
VkDescriptorSetLayout descriptorSetLayout);
void outputCumulativePerfCounters();
// Descriptor sets for uniform blocks and textures for this program.
vk::DescriptorSetLayoutArray<VkDescriptorSet> mDescriptorSets;
vk::DescriptorSetLayoutArray<VkDescriptorSet> mEmptyDescriptorSets;
......@@ -260,6 +271,8 @@ class ProgramExecutableVk
ProgramVk *mProgram;
ProgramPipelineVk *mProgramPipeline;
PerfCounters mObjectPerfCounters;
};
} // namespace rx
......
......@@ -15,6 +15,7 @@
#include "libANGLE/renderer/vulkan/GlslangWrapperVk.h"
#include "libANGLE/renderer/vulkan/RenderTargetVk.h"
#include "libANGLE/renderer/vulkan/RendererVk.h"
#include "libANGLE/renderer/vulkan/vk_utils.h"
namespace rx
{
......@@ -530,7 +531,7 @@ uint32_t UtilsVk::GetGenerateMipmapMaxLevels(ContextVk *contextVk)
: kGenerateMipmapMaxLevels;
}
UtilsVk::UtilsVk() = default;
UtilsVk::UtilsVk() : mObjectPerfCounters{} {}
UtilsVk::~UtilsVk() = default;
......@@ -538,6 +539,8 @@ void UtilsVk::destroy(RendererVk *renderer)
{
VkDevice device = renderer->getDevice();
outputCumulativePerfCounters();
for (Function f : angle::AllEnums<Function>())
{
for (auto &descriptorSetLayout : mDescriptorSetLayouts[f])
......@@ -2632,6 +2635,9 @@ angle::Result UtilsVk::allocateDescriptorSet(ContextVk *contextVk,
.ptr(),
1, bindingOut, descriptorSetOut));
bindingOut->get().updateSerial(contextVk->getCurrentQueueSerial());
mObjectPerfCounters.descriptorSetsAllocated++;
return angle::Result::Continue;
}
......@@ -2647,4 +2653,15 @@ UtilsVk::ClearFramebufferParameters::ClearFramebufferParameters()
depthStencilClearValue{}
{}
// Requires that trace is enabled to see the output, which is supported with is_debug=true
void UtilsVk::outputCumulativePerfCounters()
{
if (!vk::kOutputCumulativePerfCounters)
{
return;
}
INFO() << "Utils Descriptor Set Allocations: " << mObjectPerfCounters.descriptorSetsAllocated;
}
} // namespace rx
......@@ -166,6 +166,12 @@ class UtilsVk : angle::NonCopyable
bool unresolveStencil;
};
struct PerfCounters
{
// Total descriptor set allocations for all UtilsVk::Functions
uint32_t descriptorSetsAllocated;
};
// Based on the maximum number of levels in GenerateMipmap.comp.
static constexpr uint32_t kGenerateMipmapMaxLevels = 6;
static uint32_t GetGenerateMipmapMaxLevels(ContextVk *contextVk);
......@@ -262,6 +268,8 @@ class UtilsVk : angle::NonCopyable
const vk::ImageView *destView,
const OverlayDrawParameters &params);
const PerfCounters getObjectPerfCounters() const { return mObjectPerfCounters; }
private:
ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
......@@ -502,6 +510,8 @@ class UtilsVk : angle::NonCopyable
vk::RefCountedDescriptorPoolBinding *bindingOut,
VkDescriptorSet *descriptorSetOut);
void outputCumulativePerfCounters();
angle::PackedEnumMap<Function, vk::DescriptorSetLayoutPointerArray> mDescriptorSetLayouts;
angle::PackedEnumMap<Function, vk::BindingPointer<vk::PipelineLayout>> mPipelineLayouts;
angle::PackedEnumMap<Function, vk::DynamicDescriptorPool> mDescriptorPools;
......@@ -531,6 +541,8 @@ class UtilsVk : angle::NonCopyable
vk::Sampler mPointSampler;
vk::Sampler mLinearSampler;
PerfCounters mObjectPerfCounters;
};
} // namespace rx
......
......@@ -70,6 +70,7 @@ namespace rx
{
class DisplayVk;
class ImageVk;
class ProgramExecutableVk;
class RenderTargetVk;
class RendererVk;
class RenderPassCache;
......@@ -781,6 +782,12 @@ class ResourceSerialFactory final : angle::NonCopyable
std::atomic<uint32_t> mCurrentUniqueSerial;
};
#if defined(ANGLE_ENABLE_PERF_COUNTER_OUTPUT)
constexpr bool kOutputCumulativePerfCounters = ANGLE_ENABLE_PERF_COUNTER_OUTPUT;
#else
constexpr bool kOutputCumulativePerfCounters = false;
#endif
// Performance and resource counters.
struct RenderPassPerfCounters
{
......@@ -823,6 +830,7 @@ struct PerfCounters
uint32_t depthAttachmentResolves;
uint32_t stencilAttachmentResolves;
uint32_t readOnlyDepthStencilRenderPasses;
uint32_t descriptorSetAllocations;
};
// A Vulkan image level index.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment