Commit 02acc5ee by Jamie Madill Committed by Commit Bot

Vulkan: More cleanups to perf counters.

This refactors the pipeline type in the ContextVk class to also use a packed enum map. It also expands the object perf counters to store both a specific and cumulative version for use in different cases. Bug: angleproject:5736 Change-Id: I6ff78e38065eb577f2b95b1d9c4f9cc31d7f325f Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2774184Reviewed-by: 's avatarShahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarTim Van Patten <timvp@google.com> Commit-Queue: Jamie Madill <jmadill@chromium.org>
parent ecbd870f
......@@ -58,11 +58,11 @@ class EnumIterator final
UnderlyingType mValue;
};
template <typename E>
template <typename E, size_t MaxSize = EnumSize<E>()>
struct AllEnums
{
EnumIterator<E> begin() const { return {static_cast<E>(0)}; }
EnumIterator<E> end() const { return {E::InvalidEnum}; }
EnumIterator<E> end() const { return {static_cast<E>(MaxSize)}; }
};
// PackedEnumMap<E, T> is like an std::array<T, E::EnumCount> but is indexed with enum values. It
......@@ -156,6 +156,16 @@ class PackedEnumMap
bool operator==(const PackedEnumMap &rhs) const { return mPrivateData == rhs.mPrivateData; }
bool operator!=(const PackedEnumMap &rhs) const { return mPrivateData != rhs.mPrivateData; }
template <typename SubT = T>
typename std::enable_if<std::is_integral<SubT>::value>::type operator+=(
const PackedEnumMap<E, SubT, MaxSize> &rhs)
{
for (E e : AllEnums<E, MaxSize>())
{
at(e) += rhs[e];
}
}
private:
Storage mPrivateData;
};
......
......@@ -397,7 +397,8 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mGpuClockSync{std::numeric_limits<double>::max(), std::numeric_limits<double>::max()},
mGpuEventTimestampOrigin(0),
mPerfCounters{},
mObjectPerfCounters{},
mContextPerfCounters{},
mCumulativeContextPerfCounters{},
mContextPriority(renderer->getDriverPriority(GetContextPriority(state))),
mShareGroupVk(vk::GetImpl(state.getShareGroup()))
{
......@@ -521,8 +522,6 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mDescriptorBufferInfos.reserve(kDescriptorBufferInfosInitialSize);
mDescriptorImageInfos.reserve(kDescriptorImageInfosInitialSize);
mWriteDescriptorSets.reserve(kDescriptorWriteInfosInitialSize);
mObjectPerfCounters.descriptorSetsAllocated.fill(0);
}
ContextVk::~ContextVk() = default;
......@@ -1790,12 +1789,13 @@ void ContextVk::syncObjectPerfCounters()
uint32_t descriptorSetAllocations = 0;
// ContextVk's descriptor set allocations
for (const uint32_t count : mObjectPerfCounters.descriptorSetsAllocated)
ContextVkPerfCounters contextCounters = getAndResetObjectPerfCounters();
for (uint32_t count : contextCounters.descriptorSetsAllocated)
{
descriptorSetAllocations += count;
}
// UtilsVk's descriptor set allocations
descriptorSetAllocations += mUtils.getObjectPerfCounters().descriptorSetsAllocated;
descriptorSetAllocations += mUtils.getAndResetObjectPerfCounters().descriptorSetsAllocated;
// ProgramExecutableVk's descriptor set allocations
const gl::State &state = getState();
const gl::ShaderProgramManager &shadersAndPrograms = state.getShaderProgramManagerForCapture();
......@@ -4429,7 +4429,7 @@ angle::Result ContextVk::updateDriverUniformsDescriptorSet(
ANGLE_TRY(mDriverUniformsDescriptorPools[pipelineType].allocateSetsAndGetInfo(
this, driverUniforms->descriptorSetLayout.get().ptr(), 1,
&driverUniforms->descriptorPoolBinding, &driverUniforms->descriptorSet, &newPoolAllocated));
mObjectPerfCounters.descriptorSetsAllocated[ToUnderlying(pipelineType)]++;
mContextPerfCounters.descriptorSetsAllocated[pipelineType]++;
// Clear descriptor set cache. It may no longer be valid.
if (newPoolAllocated)
......@@ -5710,18 +5710,25 @@ void ContextVk::outputCumulativePerfCounters()
return;
}
{
INFO() << "Context Descriptor Set Allocations: ";
INFO() << "Context Descriptor Set Allocations: ";
for (size_t pipelineType = 0;
pipelineType < mObjectPerfCounters.descriptorSetsAllocated.size(); ++pipelineType)
for (PipelineType pipelineType : angle::AllEnums<PipelineType>())
{
uint32_t count = mCumulativeContextPerfCounters.descriptorSetsAllocated[pipelineType];
if (count > 0)
{
uint32_t count = mObjectPerfCounters.descriptorSetsAllocated[pipelineType];
if (count > 0)
{
INFO() << " PipelineType " << pipelineType << ": " << count;
}
INFO() << " PipelineType " << ToUnderlying(pipelineType) << ": " << count;
}
}
}
ContextVkPerfCounters ContextVk::getAndResetObjectPerfCounters()
{
mCumulativeContextPerfCounters.descriptorSetsAllocated +=
mContextPerfCounters.descriptorSetsAllocated;
ContextVkPerfCounters counters = mContextPerfCounters;
mContextPerfCounters.descriptorSetsAllocated = {};
return counters;
}
} // namespace rx
......@@ -37,6 +37,22 @@ class ShareGroupVk;
static constexpr uint32_t kMaxGpuEventNameLen = 32;
using EventName = std::array<char, kMaxGpuEventNameLen>;
enum class PipelineType
{
Graphics = 0,
Compute = 1,
InvalidEnum = 2,
EnumCount = 2,
};
using ContextVkDescriptorSetList = angle::PackedEnumMap<PipelineType, uint32_t>;
struct ContextVkPerfCounters
{
ContextVkDescriptorSetList descriptorSetsAllocated;
};
class ContextVk : public ContextImpl, public vk::Context, public MultisampleTextureInitializer
{
public:
......@@ -658,15 +674,6 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
void destroy(RendererVk *rendererVk);
};
enum class PipelineType
{
Graphics = 0,
Compute = 1,
InvalidEnum = 2,
EnumCount = 2,
};
// The GpuEventQuery struct holds together a timestamp query and enough data to create a
// trace event based on that. Use traceGpuEvent to insert such queries. They will be readback
// when the results are available, without inserting a GPU bubble.
......@@ -700,14 +707,6 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
double cpuTimestampS;
};
// Performance Counters specific to this object type
using DescriptorSetList =
std::array<uint32_t, ToUnderlying(ContextVk::PipelineType::EnumCount)>;
struct PerfCounters
{
DescriptorSetList descriptorSetsAllocated;
};
class ScopedDescriptorSetUpdates;
angle::Result setupDraw(const gl::Context *context,
......@@ -932,6 +931,8 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
SpecConstUsageBits getCurrentProgramSpecConstUsageBits() const;
void updateGraphicsPipelineDescWithSpecConstUsageBits(SpecConstUsageBits usageBits);
ContextVkPerfCounters getAndResetObjectPerfCounters();
std::array<GraphicsDirtyBitHandler, DIRTY_BIT_MAX> mGraphicsDirtyBitHandlers;
std::array<ComputeDirtyBitHandler, DIRTY_BIT_MAX> mComputeDirtyBitHandlers;
......@@ -1087,7 +1088,8 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
// A mix of per-frame and per-run counters.
vk::PerfCounters mPerfCounters;
PerfCounters mObjectPerfCounters;
ContextVkPerfCounters mContextPerfCounters;
ContextVkPerfCounters mCumulativeContextPerfCounters;
gl::State::DirtyBits mPipelineDirtyBitsMask;
......
......@@ -218,7 +218,8 @@ ProgramExecutableVk::ProgramExecutableVk()
mDynamicBufferOffsets{},
mProgram(nullptr),
mProgramPipeline(nullptr),
mObjectPerfCounters{}
mPerfCounters{},
mCumulativePerfCounters{}
{}
ProgramExecutableVk::~ProgramExecutableVk()
......@@ -474,7 +475,7 @@ angle::Result ProgramExecutableVk::allocateDescriptorSetAndGetInfo(
&mDescriptorSets[descriptorSetIndex], newPoolAllocatedOut));
mEmptyDescriptorSets[descriptorSetIndex] = VK_NULL_HANDLE;
++mObjectPerfCounters.descriptorSetsAllocated[descriptorSetIndex];
++mPerfCounters.descriptorSetsAllocated[descriptorSetIndex];
return angle::Result::Continue;
}
......@@ -1744,7 +1745,7 @@ angle::Result ProgramExecutableVk::updateDescriptorSets(ContextVk *contextVk,
&mDescriptorPoolBindings[descriptorSetIndex],
&mEmptyDescriptorSets[descriptorSetIndex]));
++mObjectPerfCounters.descriptorSetsAllocated[descriptorSetIndex];
++mPerfCounters.descriptorSetsAllocated[descriptorSetIndex];
}
descSet = mEmptyDescriptorSets[descriptorSetIndex];
}
......@@ -1778,7 +1779,7 @@ void ProgramExecutableVk::outputCumulativePerfCounters()
for (DescriptorSetIndex descriptorSetIndex : angle::AllEnums<DescriptorSetIndex>())
{
uint32_t count = mObjectPerfCounters.descriptorSetsAllocated[descriptorSetIndex];
uint32_t count = mCumulativePerfCounters.descriptorSetsAllocated[descriptorSetIndex];
if (count > 0)
{
text << " DescriptorSetIndex " << ToUnderlying(descriptorSetIndex) << ": " << count
......@@ -1805,8 +1806,10 @@ void ProgramExecutableVk::outputCumulativePerfCounters()
ProgramExecutablePerfCounters ProgramExecutableVk::getAndResetObjectPerfCounters()
{
ProgramExecutablePerfCounters counters = mObjectPerfCounters;
mObjectPerfCounters.descriptorSetsAllocated = {};
mCumulativePerfCounters.descriptorSetsAllocated += mPerfCounters.descriptorSetsAllocated;
ProgramExecutablePerfCounters counters = mPerfCounters;
mPerfCounters.descriptorSetsAllocated = {};
return counters;
}
......
......@@ -285,7 +285,8 @@ class ProgramExecutableVk
ProgramVk *mProgram;
ProgramPipelineVk *mProgramPipeline;
ProgramExecutablePerfCounters mObjectPerfCounters;
ProgramExecutablePerfCounters mPerfCounters;
ProgramExecutablePerfCounters mCumulativePerfCounters;
};
} // namespace rx
......
......@@ -1024,7 +1024,7 @@ uint32_t UtilsVk::GetGenerateMipmapMaxLevels(ContextVk *contextVk)
: kGenerateMipmapMaxLevels;
}
UtilsVk::UtilsVk() : mObjectPerfCounters{} {}
UtilsVk::UtilsVk() : mPerfCounters{}, mCumulativePerfCounters{} {}
UtilsVk::~UtilsVk() = default;
......@@ -3440,7 +3440,7 @@ angle::Result UtilsVk::allocateDescriptorSet(ContextVk *contextVk,
contextVk, mDescriptorSetLayouts[function][DescriptorSetIndex::Internal].get().ptr(), 1,
bindingOut, descriptorSetOut));
mObjectPerfCounters.descriptorSetsAllocated++;
mPerfCounters.descriptorSetsAllocated++;
return angle::Result::Continue;
}
......@@ -3465,7 +3465,16 @@ void UtilsVk::outputCumulativePerfCounters()
return;
}
INFO() << "Utils Descriptor Set Allocations: " << mObjectPerfCounters.descriptorSetsAllocated;
INFO() << "Utils Descriptor Set Allocations: "
<< mCumulativePerfCounters.descriptorSetsAllocated;
}
InternalShaderPerfCounters UtilsVk::getAndResetObjectPerfCounters()
{
mCumulativePerfCounters.descriptorSetsAllocated += mPerfCounters.descriptorSetsAllocated;
InternalShaderPerfCounters counters = mPerfCounters;
mPerfCounters.descriptorSetsAllocated = 0;
return counters;
}
} // namespace rx
......@@ -34,6 +34,12 @@
namespace rx
{
struct InternalShaderPerfCounters
{
// Total descriptor set allocations for all UtilsVk::Functions
uint32_t descriptorSetsAllocated;
};
class UtilsVk : angle::NonCopyable
{
public:
......@@ -178,12 +184,6 @@ class UtilsVk : angle::NonCopyable
bool unresolveStencil;
};
struct PerfCounters
{
// Total descriptor set allocations for all UtilsVk::Functions
uint32_t descriptorSetsAllocated;
};
// Based on the maximum number of levels in GenerateMipmap.comp.
static constexpr uint32_t kGenerateMipmapMaxLevels = 6;
static uint32_t GetGenerateMipmapMaxLevels(ContextVk *contextVk);
......@@ -285,7 +285,7 @@ class UtilsVk : angle::NonCopyable
const vk::ImageView *destView,
const OverlayDrawParameters &params);
const PerfCounters getObjectPerfCounters() const { return mObjectPerfCounters; }
InternalShaderPerfCounters getAndResetObjectPerfCounters();
private:
ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
......@@ -569,7 +569,8 @@ class UtilsVk : angle::NonCopyable
vk::Sampler mPointSampler;
vk::Sampler mLinearSampler;
PerfCounters mObjectPerfCounters;
InternalShaderPerfCounters mPerfCounters;
InternalShaderPerfCounters mCumulativePerfCounters;
};
} // namespace rx
......
......@@ -26,6 +26,7 @@ enum class StateChange
VertexBufferCycle,
Scissor,
InvalidEnum,
EnumCount = InvalidEnum,
};
constexpr size_t kCycleVBOPoolSize = 200;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment