Fix the perf test runner calibration.

This path was broken for the white box unit tests. Also adds argparse handling to the runner so we can more flexibly override command line arguments. Previously the broken calibration was causing some of the tests to run only a single test iteration when measuring. This could lead to low quality measurements. Bug: angleproject:5573 Change-Id: Ic1cb2b2553774a361325f290440c40b2ff90db5e Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2672702 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Mohan Maiya <m.maiya@samsung.com> Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>

Fix the perf test runner calibration.
2c685a41 · Jamie Madill · Commit Bot · 89f50584 · 2c685a41 · 2c685a41
Commit 2c685a41 authored Feb 03, 2021 by Jamie Madill Committed by Commit Bot Feb 03, 2021
4 changed files
--- a/scripts/perf_test_runner.py
+++ b/scripts/perf_test_runner.py
@@ -10,22 +10,30 @@
 #   variation of the population continuously.
 #

+import argparse
 import glob
-import subprocess
-import sys
+import logging
 import os
 import re
+import subprocess
+import sys

 base_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))

 # Look for a [Rr]elease build.
-perftests_paths = glob.glob('out/*elease*')
-metric = 'wall_time'
-max_experiments = 10
+TEST_SUITE_SEARCH_PATH = glob.glob('out/*elease*')
+DEFAULT_METRIC = 'wall_time'
+DEFAULT_EXPERIMENTS = 10
+
+DEFAULT_TEST_SUITE = 'angle_perftests'

-binary_name = 'angle_perftests'
 if sys.platform == 'win32':
-    binary_name += '.exe'
+    DEFAULT_TEST_NAME = 'DrawCallPerfBenchmark.Run/d3d11_null'
+else:
+    DEFAULT_TEST_NAME = 'DrawCallPerfBenchmark.Run/gl'
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1

 scores = []

@@ -74,80 +82,102 @@ def truncated_cov(data, n):
    return coefficient_of_variation(truncated_list(data, n))


-# Find most recent binary
-newest_binary = None
-newest_mtime = None
-
-for path in perftests_paths:
-    binary_path = os.path.join(base_path, path, binary_name)
-    if os.path.exists(binary_path):
-        binary_mtime = os.path.getmtime(binary_path)
-        if (newest_binary is None) or (binary_mtime > newest_mtime):
-            newest_binary = binary_path
-            newest_mtime = binary_mtime
-
-perftests_path = newest_binary
-
-if perftests_path == None or not os.path.exists(perftests_path):
-    print('Cannot find Release %s!' % binary_name)
-    sys.exit(1)
-
-if sys.platform == 'win32':
-    test_name = 'DrawCallPerfBenchmark.Run/d3d11_null'
-else:
-    test_name = 'DrawCallPerfBenchmark.Run/gl'
-
-if len(sys.argv) >= 2:
-    test_name = sys.argv[1]
-
-print('Using test executable: ' + perftests_path)
-print('Test name: ' + test_name)
-
-
-def get_results(metric, extra_args=[]):
-    process = subprocess.Popen(
-        [perftests_path, '--gtest_filter=' + test_name] + extra_args,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
-    output, err = process.communicate()
-
-    m = re.search(r'Running (\d+) tests', output)
-    if m and int(m.group(1)) > 1:
-        print("Found more than one test result in output:")
-        print(output)
-        sys.exit(3)
-
-    # Results are reported in the format:
-    # name_backend.metric: story= value units.
-    pattern = r'\.' + metric + r':.*= ([0-9.]+)'
-    m = re.findall(pattern, output)
-    if not m:
-        print("Did not find the metric '%s' in the test output:" % metric)
-        print(output)
-        sys.exit(1)
-
-    return [float(value) for value in m]
-
-
-# Calibrate the number of steps
-steps = get_results("steps", ["--calibration"])[0]
-print("running with %d steps." % steps)
-
-# Loop 'max_experiments' times, running the tests.
-for experiment in range(max_experiments):
-    experiment_scores = get_results(metric, ["--steps-per-trial", str(steps)])
-
-    for score in experiment_scores:
-        sys.stdout.write("%s: %.2f" % (metric, score))
-        scores.append(score)
-
-        if (len(scores) > 1):
-            sys.stdout.write(", mean: %.2f" % mean(scores))
-            sys.stdout.write(", variation: %.2f%%" % (coefficient_of_variation(scores) * 100.0))
-
-        if (len(scores) > 7):
-            truncation_n = len(scores) >> 3
-            sys.stdout.write(", truncated mean: %.2f" % truncated_mean(scores, truncation_n))
-            sys.stdout.write(", variation: %.2f%%" % (truncated_cov(scores, truncation_n) * 100.0))
-
-        print("")
+def main(raw_args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--suite',
+        help='Test suite binary. Default is "%s".' % DEFAULT_TEST_SUITE,
+        default=DEFAULT_TEST_SUITE)
+    parser.add_argument(
+        '-m',
+        '--metric',
+        help='Test metric. Default is "%s".' % DEFAULT_METRIC,
+        default=DEFAULT_METRIC)
+    parser.add_argument(
+        '--experiments',
+        help='Number of experiments to run. Default is %d.' % DEFAULT_EXPERIMENTS,
+        default=DEFAULT_EXPERIMENTS,
+        type=int)
+    parser.add_argument('-v', '--verbose', help='Extra verbose logging.', action='store_true')
+    parser.add_argument('test_name', help='Test to run', default=DEFAULT_TEST_NAME)
+    args = parser.parse_args(raw_args)
+
+    if args.verbose:
+        logging.basicConfig(level='DEBUG')
+
+    if sys.platform == 'win32':
+        args.suite += '.exe'
+
+    # Find most recent binary
+    newest_binary = None
+    newest_mtime = None
+
+    for path in TEST_SUITE_SEARCH_PATH:
+        binary_path = os.path.join(base_path, path, args.suite)
+        if os.path.exists(binary_path):
+            binary_mtime = os.path.getmtime(binary_path)
+            if (newest_binary is None) or (binary_mtime > newest_mtime):
+                newest_binary = binary_path
+                newest_mtime = binary_mtime
+
+    perftests_path = newest_binary
+
+    if perftests_path == None or not os.path.exists(perftests_path):
+        print('Cannot find Release %s!' % args.test_suite)
+        return EXIT_FAILURE
+
+    print('Using test executable: %s' % perftests_path)
+    print('Test name: %s' % args.test_name)
+
+    def get_results(metric, extra_args=[]):
+        run = [perftests_path, '--gtest_filter=%s' % args.test_name] + extra_args
+        logging.info('running %s' % str(run))
+        process = subprocess.Popen(run, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        output, err = process.communicate()
+
+        m = re.search(r'Running (\d+) tests', output)
+        if m and int(m.group(1)) > 1:
+            print(output)
+            raise Exception('Found more than one test result in output')
+
+        # Results are reported in the format:
+        # name_backend.metric: story= value units.
+        pattern = r'\.' + metric + r':.*= ([0-9.]+)'
+        logging.debug('searching for %s in output' % pattern)
+        m = re.findall(pattern, output)
+        if not m:
+            print(output)
+            raise Exception('Did not find the metric "%s" in the test output' % metric)
+
+        return [float(value) for value in m]
+
+    # Calibrate the number of steps
+    steps = get_results("steps_to_run", ["--calibration"])[0]
+    print("running with %d steps." % steps)
+
+    # Loop 'args.experiments' times, running the tests.
+    for experiment in range(args.experiments):
+        experiment_scores = get_results(args.metric, ["--steps-per-trial", str(steps)])
+
+        for score in experiment_scores:
+            sys.stdout.write("%s: %.2f" % (args.metric, score))
+            scores.append(score)
+
+            if (len(scores) > 1):
+                sys.stdout.write(", mean: %.2f" % mean(scores))
+                sys.stdout.write(", variation: %.2f%%" %
+                                 (coefficient_of_variation(scores) * 100.0))
+
+            if (len(scores) > 7):
+                truncation_n = len(scores) >> 3
+                sys.stdout.write(", truncated mean: %.2f" % truncated_mean(scores, truncation_n))
+                sys.stdout.write(", variation: %.2f%%" %
+                                 (truncated_cov(scores, truncation_n) * 100.0))
+
+            print("")
+
+    return EXIT_SUCCESS
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
--- a/src/tests/perf_tests/ANGLEPerfTest.cpp
+++ b/src/tests/perf_tests/ANGLEPerfTest.cpp
@@ -237,6 +237,7 @@ ANGLEPerfTest::ANGLEPerfTest(const std::string &name,
    mReporter->RegisterImportantMetric(".gpu_time", units);
    mReporter->RegisterFyiMetric(".trial_steps", "count");
    mReporter->RegisterFyiMetric(".total_steps", "count");
+    mReporter->RegisterFyiMetric(".steps_to_run", "count");
 }

 ANGLEPerfTest::~ANGLEPerfTest() {}
@@ -248,6 +249,14 @@ void ANGLEPerfTest::run()
        return;
    }

+    if (mStepsToRun <= 0)
+    {
+        // We don't call finish between calibration steps when calibrating non-Render tests. The
+        // Render tests will have already calibrated when this code is run.
+        calibrateStepsToRun(RunLoopPolicy::RunContinuously);
+        ASSERT(mStepsToRun > 0);
+    }
+
    uint32_t numTrials = OneFrame() ? 1 : gTestTrials;
    if (gVerboseLogging)
    {
@@ -290,7 +299,14 @@ void ANGLEPerfTest::run()
        double standardDeviation      = std::sqrt(variance);
        double coefficientOfVariation = standardDeviation / mean;

-        printf("Mean result time: %.4lf ms.\n", mean);
+        if (mean < 0.001)
+        {
+            printf("Mean result time: %.4lf ns.\n", mean * 1000.0);
+        }
+        else
+        {
+            printf("Mean result time: %.4lf ms.\n", mean);
+        }
        printf("Coefficient of variation: %.2lf%%\n", coefficientOfVariation * 100.0);
    }
 }
@@ -402,8 +418,15 @@ double ANGLEPerfTest::printResults()
        printf("Ran %0.2lf iterations per second\n", fps);
    }

-    mReporter->AddResult(".trial_steps", static_cast<size_t>(mTrialNumStepsPerformed));
-    mReporter->AddResult(".total_steps", static_cast<size_t>(mTotalNumStepsPerformed));
+    if (gCalibration)
+    {
+        mReporter->AddResult(".steps_to_run", static_cast<size_t>(mStepsToRun));
+    }
+    else
+    {
+        mReporter->AddResult(".trial_steps", static_cast<size_t>(mTrialNumStepsPerformed));
+        mReporter->AddResult(".total_steps", static_cast<size_t>(mTotalNumStepsPerformed));
+    }

    // Output histogram JSON set format if enabled.
    double secondsPerStep = elapsedTimeSeconds[0] / static_cast<double>(mTrialNumStepsPerformed);
@@ -418,10 +441,9 @@ double ANGLEPerfTest::normalizedTime(size_t value) const
    return static_cast<double>(value) / static_cast<double>(mTrialNumStepsPerformed);
 }

-void ANGLEPerfTest::calibrateStepsToRun()
+void ANGLEPerfTest::calibrateStepsToRun(RunLoopPolicy policy)
 {
-    doRunLoop(gCalibrationTimeSeconds, std::numeric_limits<int>::max(),
-              RunLoopPolicy::FinishEveryStep);
+    doRunLoop(gCalibrationTimeSeconds, std::numeric_limits<int>::max(), policy);

    double elapsedTime = mTimer.getElapsedTime();

@@ -447,7 +469,7 @@ void ANGLEPerfTest::calibrateStepsToRun()
    // Calibration allows the perf test runner script to save some time.
    if (gCalibration)
    {
-        mReporter->AddResult(".steps", static_cast<size_t>(mStepsToRun));
+        printResults();
        return;
    }
 }
@@ -719,7 +741,9 @@ void ANGLERenderTest::SetUp()

    if (mStepsToRun <= 0)
    {
-        calibrateStepsToRun();
+        // Ensure we always call Finish when calibrating Render tests. This completes our work
+        // beween calibration measurements.
+        calibrateStepsToRun(RunLoopPolicy::FinishEveryStep);
    }
 }


--- a/src/tests/perf_tests/ANGLEPerfTest.h
+++ b/src/tests/perf_tests/ANGLEPerfTest.h
@@ -107,7 +107,7 @@ class ANGLEPerfTest : public testing::Test, angle::NonCopyable
    virtual void computeGPUTime() {}

    double printResults();
-    void calibrateStepsToRun();
+    void calibrateStepsToRun(RunLoopPolicy policy);

    std::string mName;
    std::string mBackend;

--- a/src/tests/perf_tests/ANGLEPerfTestArgs.cpp
+++ b/src/tests/perf_tests/ANGLEPerfTestArgs.cpp
@@ -74,6 +74,7 @@ void ANGLEProcessPerfTestArgs(int *argc, char **argv)
        else if (strcmp("--calibration", argv[argIndex]) == 0)
        {
            gCalibration = true;
+            gTestTrials  = 0;
        }
        else if (strcmp("--steps-per-trial", argv[argIndex]) == 0 && argIndex < *argc - 1)
        {