Commit 25224e78 by Shahbaz Youssefi Committed by Commit Bot

Vulkan: add GPU trace events

RendererVk now tries, as best as it can, to match the CPU and GPU timers on init as well as every finish(). A clock-sync event is generated for each such synchronization point. RendererVk::traceGpuEvent() is a new function that, given a command buffer, performs timestamp queries corresponding to GPU events. These queries are read back when done, without incurring GPU bubbles, at which point an event is generated with that timestamp. Bug: angleproject:2908 Change-Id: I08d7d11ff9f8ad6c9f9a9899767c9cd746d0623e Reviewed-on: https://chromium-review.googlesource.com/c/1296954 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarYuly Novikov <ynovikov@chromium.org>
parent 4a22f4b0
...@@ -80,6 +80,12 @@ config("internal_config") { ...@@ -80,6 +80,12 @@ config("internal_config") {
if (angle_force_thread_safety) { if (angle_force_thread_safety) {
defines += [ "ANGLE_FORCE_THREAD_SAFETY=1" ] defines += [ "ANGLE_FORCE_THREAD_SAFETY=1" ]
} }
if (angle_enable_vulkan) {
if (angle_enable_vulkan_gpu_trace_events) {
defines += [ "ANGLE_ENABLE_VULKAN_GPU_TRACE_EVENTS=1" ]
}
}
} }
config("extra_warnings") { config("extra_warnings") {
......
...@@ -75,6 +75,11 @@ declare_args() { ...@@ -75,6 +75,11 @@ declare_args() {
# Disable the layers in ubsan builds because of really slow builds. # Disable the layers in ubsan builds because of really slow builds.
angle_enable_vulkan_validation_layers = angle_enable_vulkan_validation_layers =
angle_enable_vulkan && !is_ubsan && !is_tsan && !is_asan angle_enable_vulkan && !is_ubsan && !is_tsan && !is_asan
if (angle_enable_vulkan) {
# Enable Vulkan GPU trace event capability
angle_enable_vulkan_gpu_trace_events = false
}
} }
if (is_win) { if (is_win) {
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#include "libANGLE/renderer/vulkan/vk_format_utils.h" #include "libANGLE/renderer/vulkan/vk_format_utils.h"
#include "libANGLE/renderer/vulkan/vk_helpers.h" #include "libANGLE/renderer/vulkan/vk_helpers.h"
#include "third_party/trace_event/trace_event.h"
namespace rx namespace rx
{ {
namespace vk namespace vk
...@@ -626,11 +628,14 @@ angle::Result CommandGraph::submitCommands(Context *context, ...@@ -626,11 +628,14 @@ angle::Result CommandGraph::submitCommands(Context *context,
VkCommandBufferBeginInfo beginInfo = {}; VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = 0; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr; beginInfo.pInheritanceInfo = nullptr;
ANGLE_TRY(primaryCommandBufferOut->begin(context, beginInfo)); ANGLE_TRY(primaryCommandBufferOut->begin(context, beginInfo));
ANGLE_TRY(context->getRenderer()->traceGpuEvent(
context, primaryCommandBufferOut, TRACE_EVENT_PHASE_BEGIN, "Primary Command Buffer"));
for (CommandGraphNode *topLevelNode : mNodes) for (CommandGraphNode *topLevelNode : mNodes)
{ {
// Only process commands that don't have child commands. The others will be pulled in // Only process commands that don't have child commands. The others will be pulled in
...@@ -664,6 +669,9 @@ angle::Result CommandGraph::submitCommands(Context *context, ...@@ -664,6 +669,9 @@ angle::Result CommandGraph::submitCommands(Context *context,
} }
} }
ANGLE_TRY(context->getRenderer()->traceGpuEvent(
context, primaryCommandBufferOut, TRACE_EVENT_PHASE_END, "Primary Command Buffer"));
ANGLE_TRY(primaryCommandBufferOut->end(context)); ANGLE_TRY(primaryCommandBufferOut->end(context));
// TODO(jmadill): Use pool allocation so we don't need to deallocate command graph. // TODO(jmadill): Use pool allocation so we don't need to deallocate command graph.
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "libANGLE/Caps.h" #include "libANGLE/Caps.h"
#include "libANGLE/renderer/vulkan/CommandGraph.h" #include "libANGLE/renderer/vulkan/CommandGraph.h"
#include "libANGLE/renderer/vulkan/FeaturesVk.h" #include "libANGLE/renderer/vulkan/FeaturesVk.h"
#include "libANGLE/renderer/vulkan/QueryVk.h"
#include "libANGLE/renderer/vulkan/vk_format_utils.h" #include "libANGLE/renderer/vulkan/vk_format_utils.h"
#include "libANGLE/renderer/vulkan/vk_helpers.h" #include "libANGLE/renderer/vulkan/vk_helpers.h"
#include "libANGLE/renderer/vulkan/vk_internal_shaders.h" #include "libANGLE/renderer/vulkan/vk_internal_shaders.h"
...@@ -172,6 +173,19 @@ class RendererVk : angle::NonCopyable ...@@ -172,6 +173,19 @@ class RendererVk : angle::NonCopyable
vk::ShaderLibrary *getShaderLibrary(); vk::ShaderLibrary *getShaderLibrary();
const FeaturesVk &getFeatures() const { return mFeatures; } const FeaturesVk &getFeatures() const { return mFeatures; }
// Create Begin/End/Instant GPU trace events, which take their timestamps from GPU queries.
// The events are queued until the query results are available. Possible values for `phase`
// are TRACE_EVENT_PHASE_*
ANGLE_INLINE angle::Result traceGpuEvent(vk::Context *context,
vk::CommandBuffer *commandBuffer,
char phase,
const char *name)
{
if (mGpuEventsEnabled)
return traceGpuEventImpl(context, commandBuffer, phase, name);
return angle::Result::Continue();
}
private: private:
// Number of semaphores for external entities to renderer to issue a wait, such as surface's // Number of semaphores for external entities to renderer to issue a wait, such as surface's
// image acquire. // image acquire.
...@@ -184,7 +198,8 @@ class RendererVk : angle::NonCopyable ...@@ -184,7 +198,8 @@ class RendererVk : angle::NonCopyable
void ensureCapsInitialized() const; void ensureCapsInitialized() const;
void getSubmitWaitSemaphores( void getSubmitWaitSemaphores(
vk::Context *context, vk::Context *context,
angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> *waitSemaphores); angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> *waitSemaphores,
angle::FixedVector<VkPipelineStageFlags, kMaxWaitSemaphores> *waitStageMasks);
angle::Result submitFrame(vk::Context *context, angle::Result submitFrame(vk::Context *context,
const VkSubmitInfo &submitInfo, const VkSubmitInfo &submitInfo,
vk::CommandBuffer &&commandBuffer); vk::CommandBuffer &&commandBuffer);
...@@ -194,6 +209,14 @@ class RendererVk : angle::NonCopyable ...@@ -194,6 +209,14 @@ class RendererVk : angle::NonCopyable
void initPipelineCacheVkKey(); void initPipelineCacheVkKey();
angle::Result initPipelineCacheVk(DisplayVk *display); angle::Result initPipelineCacheVk(DisplayVk *display);
angle::Result synchronizeCpuGpuTime(vk::Context *context);
angle::Result traceGpuEventImpl(vk::Context *context,
vk::CommandBuffer *commandBuffer,
char phase,
const char *name);
angle::Result checkCompletedGpuEvents(vk::Context *context);
void flushGpuEvents(double nextSyncGpuTimestampS, double nextSyncCpuTimestampS);
mutable bool mCapsInitialized; mutable bool mCapsInitialized;
mutable gl::Caps mNativeCaps; mutable gl::Caps mNativeCaps;
mutable gl::TextureCapsMap mNativeTextureCaps; mutable gl::TextureCapsMap mNativeTextureCaps;
...@@ -277,6 +300,58 @@ class RendererVk : angle::NonCopyable ...@@ -277,6 +300,58 @@ class RendererVk : angle::NonCopyable
// Internal shader library. // Internal shader library.
vk::ShaderLibrary mShaderLibrary; vk::ShaderLibrary mShaderLibrary;
// The GpuEventQuery struct holds together a timestamp query and enough data to create a
// trace event based on that. Use traceGpuEvent to insert such queries. They will be readback
// when the results are available, without inserting a GPU bubble.
//
// - eventName will be the reported name of the event
// - phase is either 'B' (duration begin), 'E' (duration end) or 'i' (instant // event).
// See Google's "Trace Event Format":
// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU
// - serial is the serial of the batch the query was submitted on. Until the batch is
// submitted, the query is not checked to avoid incuring a flush.
struct GpuEventQuery final
{
const char *name;
char phase;
uint32_t queryIndex;
size_t queryPoolIndex;
Serial serial;
};
// Once a query result is available, the timestamp is read and a GpuEvent object is kept until
// the next clock sync, at which point the clock drift is compensated in the results before
// handing them off to the application.
struct GpuEvent final
{
uint64_t gpuTimestampCycles;
const char *name;
char phase;
};
bool mGpuEventsEnabled;
vk::DynamicQueryPool mGpuEventQueryPool;
// A list of queries that have yet to be turned into an event (their result is not yet
// available).
std::vector<GpuEventQuery> mInFlightGpuEventQueries;
// A list of gpu events since the last clock sync.
std::vector<GpuEvent> mGpuEvents;
// Hold information from the last gpu clock sync for future gpu-to-cpu timestamp conversions.
struct GpuClockSyncInfo
{
double gpuTimestampS;
double cpuTimestampS;
};
GpuClockSyncInfo mGpuClockSync;
// The very first timestamp queried for a GPU event is used as origin, so event timestamps would
// have a value close to zero, to avoid losing 12 bits when converting these 64 bit values to
// double.
uint64_t mGpuEventTimestampOrigin;
}; };
uint32_t GetUniformBufferDescriptorCount(); uint32_t GetUniformBufferDescriptorCount();
......
...@@ -518,13 +518,11 @@ angle::Result DynamicQueryPool::allocateQuery(Context *context, QueryHelper *que ...@@ -518,13 +518,11 @@ angle::Result DynamicQueryPool::allocateQuery(Context *context, QueryHelper *que
{ {
ASSERT(!queryOut->getQueryPool()); ASSERT(!queryOut->getQueryPool());
if (mCurrentFreeEntry >= mPoolSize) size_t poolIndex = 0;
{ uint32_t queryIndex = 0;
// No more queries left in this pool, create another one. ANGLE_TRY(allocateQuery(context, &poolIndex, &queryIndex));
ANGLE_TRY(allocateNewPool(context));
}
queryOut->init(this, mCurrentPool, mCurrentFreeEntry++); queryOut->init(this, poolIndex, queryIndex);
return angle::Result::Continue(); return angle::Result::Continue();
} }
...@@ -536,11 +534,34 @@ void DynamicQueryPool::freeQuery(Context *context, QueryHelper *query) ...@@ -536,11 +534,34 @@ void DynamicQueryPool::freeQuery(Context *context, QueryHelper *query)
size_t poolIndex = query->getQueryPoolIndex(); size_t poolIndex = query->getQueryPoolIndex();
ASSERT(query->getQueryPool()->valid()); ASSERT(query->getQueryPool()->valid());
onEntryFreed(context, poolIndex); freeQuery(context, poolIndex, query->getQuery());
query->deinit(); query->deinit();
} }
} }
angle::Result DynamicQueryPool::allocateQuery(Context *context,
size_t *poolIndex,
uint32_t *queryIndex)
{
if (mCurrentFreeEntry >= mPoolSize)
{
// No more queries left in this pool, create another one.
ANGLE_TRY(allocateNewPool(context));
}
*poolIndex = mCurrentPool;
*queryIndex = mCurrentFreeEntry++;
return angle::Result::Continue();
}
void DynamicQueryPool::freeQuery(Context *context, size_t poolIndex, uint32_t queryIndex)
{
ANGLE_UNUSED_VARIABLE(queryIndex);
onEntryFreed(context, poolIndex);
}
angle::Result DynamicQueryPool::allocateNewPool(Context *context) angle::Result DynamicQueryPool::allocateNewPool(Context *context)
{ {
if (findFreeEntryPool(context)) if (findFreeEntryPool(context))
......
...@@ -229,6 +229,11 @@ class DynamicQueryPool final : public DynamicallyGrowingPool<QueryPool> ...@@ -229,6 +229,11 @@ class DynamicQueryPool final : public DynamicallyGrowingPool<QueryPool>
angle::Result allocateQuery(Context *context, QueryHelper *queryOut); angle::Result allocateQuery(Context *context, QueryHelper *queryOut);
void freeQuery(Context *context, QueryHelper *query); void freeQuery(Context *context, QueryHelper *query);
// Special allocator that doesn't work with QueryHelper, which is a CommandGraphResource.
// Currently only used with RendererVk::GpuEventQuery.
angle::Result allocateQuery(Context *context, size_t *poolIndex, uint32_t *queryIndex);
void freeQuery(Context *context, size_t poolIndex, uint32_t queryIndex);
const QueryPool *getQueryPool(size_t index) const { return &mPools[index]; } const QueryPool *getQueryPool(size_t index) const { return &mPools[index]; }
private: private:
......
...@@ -27,8 +27,7 @@ int main(int argc, char **argv) ...@@ -27,8 +27,7 @@ int main(int argc, char **argv)
} }
if (strcmp("--trace-file", argv[i]) == 0 && i < argc - 1) if (strcmp("--trace-file", argv[i]) == 0 && i < argc - 1)
{ {
gTraceFile = argv[i + 1]; gTraceFile = argv[++i];
argc++;
} }
} }
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "ANGLEPerfTest.h" #include "ANGLEPerfTest.h"
#include "third_party/perf/perf_test.h" #include "third_party/perf/perf_test.h"
#include "third_party/trace_event/trace_event.h"
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
...@@ -21,6 +22,17 @@ namespace ...@@ -21,6 +22,17 @@ namespace
{ {
constexpr size_t kInitialTraceEventBufferSize = 50000; constexpr size_t kInitialTraceEventBufferSize = 50000;
struct TraceCategory
{
unsigned char enabled;
const char *name;
};
constexpr TraceCategory gTraceCategories[2] = {
{1, "gpu.angle"},
{1, "gpu.angle.gpu"},
};
void EmptyPlatformMethod(angle::PlatformMethods *, const char *) void EmptyPlatformMethod(angle::PlatformMethods *, const char *)
{ {
} }
...@@ -43,17 +55,33 @@ angle::TraceEventHandle AddTraceEvent(angle::PlatformMethods *platform, ...@@ -43,17 +55,33 @@ angle::TraceEventHandle AddTraceEvent(angle::PlatformMethods *platform,
const unsigned long long *argValues, const unsigned long long *argValues,
unsigned char flags) unsigned char flags)
{ {
// Discover the category name based on categoryEnabledFlag. This flag comes from the first
// parameter of TraceCategory, and corresponds to one of the entries in gTraceCategories.
static_assert(offsetof(TraceCategory, enabled) == 0,
"|enabled| must be the first field of the TraceCategory class.");
const TraceCategory *category = reinterpret_cast<const TraceCategory *>(categoryEnabledFlag);
ptrdiff_t categoryIndex = category - gTraceCategories;
ASSERT(categoryIndex >= 0 && static_cast<size_t>(categoryIndex) < ArraySize(gTraceCategories));
ANGLERenderTest *renderTest = static_cast<ANGLERenderTest *>(platform->context); ANGLERenderTest *renderTest = static_cast<ANGLERenderTest *>(platform->context);
std::vector<TraceEvent> &buffer = renderTest->getTraceEventBuffer(); std::vector<TraceEvent> &buffer = renderTest->getTraceEventBuffer();
buffer.emplace_back(phase, name, timestamp); buffer.emplace_back(phase, category->name, name, timestamp);
return buffer.size(); return buffer.size();
} }
const unsigned char *GetTraceCategoryEnabledFlag(angle::PlatformMethods *platform, const unsigned char *GetTraceCategoryEnabledFlag(angle::PlatformMethods *platform,
const char *categoryName) const char *categoryName)
{ {
constexpr static unsigned char kNonZero = 1; for (const TraceCategory &category : gTraceCategories)
return &kNonZero; {
if (strcmp(category.name, categoryName) == 0)
{
return &category.enabled;
}
}
constexpr static unsigned char kZero = 0;
return &kZero;
} }
void UpdateTraceEventDuration(angle::PlatformMethods *platform, void UpdateTraceEventDuration(angle::PlatformMethods *platform,
...@@ -67,7 +95,10 @@ void UpdateTraceEventDuration(angle::PlatformMethods *platform, ...@@ -67,7 +95,10 @@ void UpdateTraceEventDuration(angle::PlatformMethods *platform,
double MonotonicallyIncreasingTime(angle::PlatformMethods *platform) double MonotonicallyIncreasingTime(angle::PlatformMethods *platform)
{ {
ANGLERenderTest *renderTest = static_cast<ANGLERenderTest *>(platform->context); ANGLERenderTest *renderTest = static_cast<ANGLERenderTest *>(platform->context);
return renderTest->getTimer()->getElapsedTime(); // Move the time origin to the first call to this function, to avoid generating unnecessarily
// large timestamps.
static double origin = renderTest->getTimer()->getAbsoluteTime();
return renderTest->getTimer()->getAbsoluteTime() - origin;
} }
void DumpTraceEventsToJSONFile(const std::vector<TraceEvent> &traceEvents, void DumpTraceEventsToJSONFile(const std::vector<TraceEvent> &traceEvents,
...@@ -86,11 +117,11 @@ void DumpTraceEventsToJSONFile(const std::vector<TraceEvent> &traceEvents, ...@@ -86,11 +117,11 @@ void DumpTraceEventsToJSONFile(const std::vector<TraceEvent> &traceEvents,
static_cast<unsigned long long>(traceEvent.timestamp * 1000.0 * 1000.0); static_cast<unsigned long long>(traceEvent.timestamp * 1000.0 * 1000.0);
value["name"] = traceEvent.name; value["name"] = traceEvent.name;
value["cat"] = "gpu.angle"; value["cat"] = traceEvent.categoryName;
value["ph"] = phaseName.str(); value["ph"] = phaseName.str();
value["ts"] = microseconds; value["ts"] = microseconds;
value["pid"] = "ANGLE"; value["pid"] = "ANGLE";
value["tid"] = "CPU"; value["tid"] = strcmp(traceEvent.categoryName, "gpu.angle.gpu") == 0 ? "GPU" : "CPU";
eventsValue.append(value); eventsValue.append(value);
} }
......
...@@ -44,12 +44,13 @@ struct TraceEvent final ...@@ -44,12 +44,13 @@ struct TraceEvent final
{ {
TraceEvent() {} TraceEvent() {}
TraceEvent(char phaseIn, const char *nameIn, double timestampIn) TraceEvent(char phaseIn, const char *categoryNameIn, const char *nameIn, double timestampIn)
: phase(phaseIn), name(nameIn), timestamp(timestampIn) : phase(phaseIn), categoryName(categoryNameIn), name(nameIn), timestamp(timestampIn)
{ {
} }
char phase = 0; char phase = 0;
const char *categoryName = nullptr;
const char *name = nullptr; const char *name = nullptr;
double timestamp = 0; double timestamp = 0;
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment