Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
B
benchmark
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Chen Yisong
benchmark
Commits
e539e807
Unverified
Commit
e539e807
authored
May 19, 2021
by
Mircea Trofin
Committed by
GitHub
May 19, 2021
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[PFM] Extend perf counter support to multi-threaded cases. (#1153)
* Extend perf counter support to multi-threaded cases. * Docs update * const-ed Snapshot
parent
7d0d9061
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
56 additions
and
5 deletions
+56
-5
perf_counters.md
docs/perf_counters.md
+1
-2
benchmark_runner.cc
src/benchmark_runner.cc
+0
-2
perf_counters.cc
src/perf_counters.cc
+4
-0
perf_counters.h
src/perf_counters.h
+1
-1
perf_counters_gtest.cc
test/perf_counters_gtest.cc
+50
-0
No files found.
docs/perf_counters.md
View file @
e539e807
...
@@ -13,8 +13,7 @@ This feature is available if:
...
@@ -13,8 +13,7 @@ This feature is available if:
Unit (PMU),
Unit (PMU),
*
The benchmark is compiled with support for collecting counters. Currently,
*
The benchmark is compiled with support for collecting counters. Currently,
this requires
[
libpfm
](
http://perfmon2.sourceforge.net/
)
be available at build
this requires
[
libpfm
](
http://perfmon2.sourceforge.net/
)
be available at build
time, and
time
*
Currently, there is a limitation that the benchmark be run on one thread.
The feature does not require modifying benchmark code. Counter collection is
The feature does not require modifying benchmark code. Counter collection is
handled at the boundaries where timer collection is also handled.
handled at the boundaries where timer collection is also handled.
...
...
src/benchmark_runner.cc
View file @
e539e807
...
@@ -163,8 +163,6 @@ class BenchmarkRunner {
...
@@ -163,8 +163,6 @@ class BenchmarkRunner {
internal
::
ARM_DisplayReportAggregatesOnly
);
internal
::
ARM_DisplayReportAggregatesOnly
);
run_results
.
file_report_aggregates_only
=
run_results
.
file_report_aggregates_only
=
(
b
.
aggregation_report_mode
()
&
internal
::
ARM_FileReportAggregatesOnly
);
(
b
.
aggregation_report_mode
()
&
internal
::
ARM_FileReportAggregatesOnly
);
CHECK
(
b
.
threads
()
==
1
||
!
perf_counters_measurement
.
IsValid
())
<<
"Perf counters are not supported in multi-threaded cases.
\n
"
;
CHECK
(
FLAGS_benchmark_perf_counters
.
empty
()
||
CHECK
(
FLAGS_benchmark_perf_counters
.
empty
()
||
perf_counters_measurement
.
IsValid
())
perf_counters_measurement
.
IsValid
())
<<
"Perf counters were requested but could not be set up."
;
<<
"Perf counters were requested but could not be set up."
;
...
...
src/perf_counters.cc
View file @
e539e807
...
@@ -67,6 +67,10 @@ PerfCounters PerfCounters::Create(
...
@@ -67,6 +67,10 @@ PerfCounters PerfCounters::Create(
return
NoCounters
();
return
NoCounters
();
}
}
attr
.
disabled
=
is_first
;
attr
.
disabled
=
is_first
;
// Note: the man page for perf_event_create suggests inerit = true and
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
// case.
attr
.
inherit
=
true
;
attr
.
pinned
=
is_first
;
attr
.
pinned
=
is_first
;
attr
.
exclude_kernel
=
true
;
attr
.
exclude_kernel
=
true
;
attr
.
exclude_user
=
false
;
attr
.
exclude_user
=
false
;
...
...
src/perf_counters.h
View file @
e539e807
...
@@ -92,7 +92,7 @@ class PerfCounters final {
...
@@ -92,7 +92,7 @@ class PerfCounters final {
// Take a snapshot of the current value of the counters into the provided
// Take a snapshot of the current value of the counters into the provided
// valid PerfCounterValues storage. The values are populated such that:
// valid PerfCounterValues storage. The values are populated such that:
// names()[i]'s value is (*values)[i]
// names()[i]'s value is (*values)[i]
BENCHMARK_ALWAYS_INLINE
bool
Snapshot
(
PerfCounterValues
*
values
)
{
BENCHMARK_ALWAYS_INLINE
bool
Snapshot
(
PerfCounterValues
*
values
)
const
{
#ifndef BENCHMARK_OS_WINDOWS
#ifndef BENCHMARK_OS_WINDOWS
assert
(
values
!=
nullptr
);
assert
(
values
!=
nullptr
);
assert
(
IsValid
());
assert
(
IsValid
());
...
...
test/perf_counters_gtest.cc
View file @
e539e807
#include <thread>
#include "../src/perf_counters.h"
#include "../src/perf_counters.h"
#include "gtest/gtest.h"
#include "gtest/gtest.h"
...
@@ -92,4 +94,52 @@ TEST(PerfCountersTest, Read2Counters) {
...
@@ -92,4 +94,52 @@ TEST(PerfCountersTest, Read2Counters) {
EXPECT_GT
(
values2
[
0
],
0
);
EXPECT_GT
(
values2
[
0
],
0
);
EXPECT_GT
(
values2
[
1
],
0
);
EXPECT_GT
(
values2
[
1
],
0
);
}
}
size_t
do_work
()
{
size_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
100000000
;
++
i
)
res
+=
i
*
i
;
return
res
;
}
void
measure
(
size_t
threadcount
,
PerfCounterValues
*
values1
,
PerfCounterValues
*
values2
)
{
CHECK_NE
(
values1
,
nullptr
);
CHECK_NE
(
values2
,
nullptr
);
std
::
vector
<
std
::
thread
>
threads
(
threadcount
);
auto
work
=
[
&
]()
{
CHECK
(
do_work
()
>
1000
);
};
// We need to first set up the counters, then start the threads, so the
// threads would inherit the counters. But later, we need to first destroy the
// thread pool (so all the work finishes), then measure the counters. So the
// scopes overlap, and we need to explicitly control the scope of the
// threadpool.
auto
counters
=
PerfCounters
::
Create
({
kGenericPerfEvent1
,
kGenericPerfEvent3
});
for
(
auto
&
t
:
threads
)
t
=
std
::
thread
(
work
);
counters
.
Snapshot
(
values1
);
for
(
auto
&
t
:
threads
)
t
.
join
();
counters
.
Snapshot
(
values2
);
}
TEST
(
PerfCountersTest
,
MultiThreaded
)
{
if
(
!
PerfCounters
::
kSupported
)
{
GTEST_SKIP
()
<<
"Test skipped because libpfm is not supported."
;
}
EXPECT_TRUE
(
PerfCounters
::
Initialize
());
PerfCounterValues
values1
(
2
);
PerfCounterValues
values2
(
2
);
measure
(
2
,
&
values1
,
&
values2
);
std
::
vector
<
double
>
D1
{
static_cast
<
double
>
(
values2
[
0
]
-
values1
[
0
]),
static_cast
<
double
>
(
values2
[
1
]
-
values1
[
1
])};
measure
(
4
,
&
values1
,
&
values2
);
std
::
vector
<
double
>
D2
{
static_cast
<
double
>
(
values2
[
0
]
-
values1
[
0
]),
static_cast
<
double
>
(
values2
[
1
]
-
values1
[
1
])};
// Some extra work will happen on the main thread - like joining the threads
// - so the ratio won't be quite 2.0, but very close.
EXPECT_GE
(
D2
[
0
],
1.9
*
D1
[
0
]);
EXPECT_GE
(
D2
[
1
],
1.9
*
D1
[
1
]);
}
}
// namespace
}
// namespace
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment