Test Runner: Add ability to retry flaky tests.

Bug: angleproject:5273 Change-Id: Ie89559bb0897a04213981aa8fe4e2f2bfe78959a Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2513287 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Yuly Novikov <ynovikov@chromium.org> Reviewed-by: Tim Van Patten <timvp@google.com>

Test Runner: Add ability to retry flaky tests.
59fbb989 · Jamie Madill · Commit Bot · 8e878d95 · 59fbb989 · 59fbb989
Commit 59fbb989 authored Nov 02, 2020 by Jamie Madill Committed by Commit Bot Nov 02, 2020
4 changed files
--- a/src/tests/test_utils/runner/README.md
+++ b/src/tests/test_utils/runner/README.md
@@ -24,6 +24,7 @@ following additional command-line arguments:
 * `--results-file` specifies a location for the JSON test result output
 * `--shard-count` and `--shard-index` control the test sharding
 * `--test-timeout` limits the amount of time spent in each test
+ * `--flaky-retries` allows for tests to fail a fixed number of times and still pass
 `--isolated-script-test-output` and `--isolated-script-perf-test-output` mirror `--results-file`
 and `--histogram-json-file` respectively.

--- a/src/tests/test_utils/runner/TestSuite.cpp
+++ b/src/tests/test_utils/runner/TestSuite.cpp
@@ -36,14 +36,15 @@ namespace angle
 {
 namespace
 {
-constexpr char kTestTimeoutArg[]       = "--test-timeout=";
+constexpr char kBatchId[]              = "--batch-id=";
 constexpr char kFilterFileArg[]        = "--filter-file=";
-constexpr char kResultFileArg[]        = "--results-file=";
+constexpr char kFlakyRetries[]         = "--flaky-retries=";
-constexpr char kHistogramJsonFileArg[] = "--histogram-json-file=";
 constexpr char kGTestListTests[]       = "--gtest_list_tests";
+constexpr char kHistogramJsonFileArg[] = "--histogram-json-file=";
 constexpr char kListTests[]            = "--list-tests";
 constexpr char kPrintTestStdout[]      = "--print-test-stdout";
-constexpr char kBatchId[]              = "--batch-id=";
+constexpr char kResultFileArg[]        = "--results-file=";
+constexpr char kTestTimeoutArg[]       = "--test-timeout=";
 #if defined(NDEBUG)
 constexpr int kDefaultTestTimeout = 20;
 #else
@@ -152,7 +153,7 @@ const char *ResultTypeToString(TestResultType type)
            return "FAIL";
        case TestResultType::Pass:
            return "PASS";
-        case TestResultType::Skip:
+        case TestResultType::NoResult:
            return "SKIP";
        case TestResultType::Timeout:
            return "TIMEOUT";
@@ -170,7 +171,7 @@ TestResultType GetResultTypeFromString(const std::string &str)
    if (str == "PASS")
        return TestResultType::Pass;
    if (str == "SKIP")
-        return TestResultType::Skip;
+        return TestResultType::NoResult;
    if (str == "TIMEOUT")
        return TestResultType::Timeout;
    return TestResultType::Unknown;
@@ -243,8 +244,27 @@ void WriteResultsFile(bool interrupted,
        counts[result.type]++;
-        jsResult.AddMember("expected", "PASS", allocator);
+        std::string actualResult;
-        jsResult.AddMember("actual", ResultTypeToJSString(result.type, &allocator), allocator);
+        for (uint32_t fail = 0; fail < result.flakyFailures; ++fail)
+        {
+            actualResult += "FAIL ";
+        }
+        actualResult += ResultTypeToString(result.type);
+        std::string expectedResult;
+        if (result.flakyFailures > 0)
+        {
+            expectedResult = "FAIL PASS";
+            jsResult.AddMember("is_flaky", true, allocator);
+        }
+        else
+        {
+            expectedResult = "PASS";
+        }
+        jsResult.AddMember("actual", actualResult, allocator);
+        jsResult.AddMember("expected", expectedResult, allocator);
        if (result.type != TestResultType::Pass)
        {
@@ -330,7 +350,7 @@ void UpdateCurrentTestResult(const testing::TestResult &resultIn, TestResults *r
    // Note: Crashes and Timeouts are detected by the crash handler and a watchdog thread.
    if (resultIn.Skipped())
    {
-        resultOut.type = TestResultType::Skip;
+        resultOut.type = TestResultType::NoResult;
    }
    else if (resultIn.Failed())
    {
@@ -546,19 +566,37 @@ bool GetTestResultsFromJSON(const js::Document &document, TestResults *resultsOu
            return false;
        }
-        const std::string expectedStr = expected.GetString();
        const std::string actualStr = actual.GetString();
-        if (expectedStr != "PASS")
+        TestResultType resultType = TestResultType::Unknown;
+        int flakyFailures         = 0;
+        if (actualStr.find(' '))
+        {
+            std::istringstream strstr(actualStr);
+            std::string token;
+            while (std::getline(strstr, token, ' '))
            {
+                resultType = GetResultTypeFromString(token);
+                if (resultType == TestResultType::Unknown)
+                {
+                    printf("Failed to parse result type.\n");
                    return false;
                }
+                if (resultType != TestResultType::Pass)
-        TestResultType resultType = GetResultTypeFromString(actualStr);
+                {
+                    flakyFailures++;
+                }
+            }
+        }
+        else
+        {
+            resultType = GetResultTypeFromString(actualStr);
            if (resultType == TestResultType::Unknown)
            {
+                printf("Failed to parse result type.\n");
                return false;
            }
+        }
        double elapsedTimeSeconds = 0.0;
        if (obj.HasMember("times"))
@@ -581,33 +619,45 @@ bool GetTestResultsFromJSON(const js::Document &document, TestResults *resultsOu
        TestResult &result        = resultsOut->results[id];
        result.elapsedTimeSeconds = elapsedTimeSeconds;
        result.type               = resultType;
+        result.flakyFailures      = flakyFailures;
    }
    return true;
 }
-bool MergeTestResults(const TestResults &input, TestResults *output)
+bool MergeTestResults(TestResults *input, TestResults *output, int flakyRetries)
 {
-    for (const auto &resultsIter : input.results)
+    for (auto &resultsIter : input->results)
    {
        const TestIdentifier &id = resultsIter.first;
-        const TestResult &inputResult = resultsIter.second;
+        TestResult &inputResult  = resultsIter.second;
        TestResult &outputResult = output->results[id];
-        // This should probably handle situations where a test is run more than once.
+        if (inputResult.type != TestResultType::NoResult)
-        if (inputResult.type != TestResultType::Skip)
        {
-            if (outputResult.type != TestResultType::Skip)
+            if (outputResult.type != TestResultType::NoResult)
            {
                printf("Warning: duplicate entry for %s.%s.\n", id.testSuiteName.c_str(),
                       id.testName.c_str());
                return false;
            }
+            // Mark the tests that haven't exhausted their retries as 'SKIP'. This makes ANGLE
+            // attempt the test again.
+            uint32_t runCount = outputResult.flakyFailures + 1;
+            if (inputResult.type != TestResultType::Pass &&
+                runCount < static_cast<uint32_t>(flakyRetries))
+            {
+                inputResult.type = TestResultType::NoResult;
+                outputResult.flakyFailures++;
+            }
+            else
+            {
                outputResult.elapsedTimeSeconds = inputResult.elapsedTimeSeconds;
                outputResult.type               = inputResult.type;
            }
        }
+    }
    return true;
 }
@@ -829,7 +879,8 @@ TestSuite::TestSuite(int *argc, char **argv)
      mMaxProcesses(std::min(NumberOfProcessors(), kDefaultMaxProcesses)),
      mTestTimeout(kDefaultTestTimeout),
      mBatchTimeout(kDefaultBatchTimeout),
-      mBatchId(-1)
+      mBatchId(-1),
+      mFlakyRetries(0)
 {
    Optional<int> filterArgIndex;
    bool alsoRunDisabledTests = false;
@@ -1034,7 +1085,7 @@ TestSuite::TestSuite(int *argc, char **argv)
        for (const TestIdentifier &id : testSet)
        {
-            mTestResults.results[id].type = TestResultType::Skip;
+            mTestResults.results[id].type = TestResultType::NoResult;
        }
    }
 }
@@ -1057,6 +1108,7 @@ bool TestSuite::parseSingleArg(const char *argument)
            ParseIntArg("--max-processes=", argument, &mMaxProcesses) ||
            ParseIntArg(kTestTimeoutArg, argument, &mTestTimeout) ||
            ParseIntArg("--batch-timeout=", argument, &mBatchTimeout) ||
+            ParseIntArg(kFlakyRetries, argument, &mFlakyRetries) ||
            // Other test functions consume the batch ID, so keep it in the list.
            ParseIntArgNoDelete(kBatchId, argument, &mBatchId) ||
            ParseStringArg("--results-directory=", argument, &mResultsDirectory) ||
@@ -1191,7 +1243,7 @@ bool TestSuite::finishProcess(ProcessInfo *processInfo)
        return false;
    }
-    if (!MergeTestResults(batchResults, &mTestResults))
+    if (!MergeTestResults(&batchResults, &mTestResults, mFlakyRetries))
    {
        std::cerr << "Error merging batch test results.\n";
        return false;
@@ -1206,7 +1258,7 @@ bool TestSuite::finishProcess(ProcessInfo *processInfo)
        for (const auto &resultIter : batchResults.results)
        {
            const TestResult &result = resultIter.second;
-            if (result.type != TestResultType::Skip && result.type != TestResultType::Pass)
+            if (result.type != TestResultType::NoResult && result.type != TestResultType::Pass)
            {
                printf("To reproduce the batch, use filter:\n%s\n",
                       processInfo->filterString.c_str());
@@ -1222,7 +1274,7 @@ bool TestSuite::finishProcess(ProcessInfo *processInfo)
        const TestResult &result = resultIter.second;
        // Skip results aren't procesed since they're added back to the test queue below.
-        if (result.type == TestResultType::Skip)
+        if (result.type == TestResultType::NoResult)
        {
            continue;
        }
@@ -1255,21 +1307,20 @@ bool TestSuite::finishProcess(ProcessInfo *processInfo)
    }
    // On unexpected exit, re-queue any unfinished tests.
-    if (processInfo->process->getExitCode() != 0)
-    {
    std::vector<TestIdentifier> unfinishedTests;
    for (const auto &resultIter : batchResults.results)
    {
        const TestIdentifier &id = resultIter.first;
        const TestResult &result = resultIter.second;
-            if (result.type == TestResultType::Skip)
+        if (result.type == TestResultType::NoResult)
        {
            unfinishedTests.push_back(id);
        }
    }
+    if (!unfinishedTests.empty())
+    {
        mTestQueue.emplace(std::move(unfinishedTests));
    }
@@ -1509,8 +1560,8 @@ const char *TestResultTypeToString(TestResultType type)
            return "Crash";
        case TestResultType::Fail:
            return "Fail";
-        case TestResultType::Skip:
+        case TestResultType::NoResult:
-            return "Skip";
+            return "NoResult";
        case TestResultType::Pass:
            return "Pass";
        case TestResultType::Timeout:

--- a/src/tests/test_utils/runner/TestSuite.h
+++ b/src/tests/test_utils/runner/TestSuite.h
@@ -54,7 +54,7 @@ enum class TestResultType
 {
    Crash,
    Fail,
-    Skip,
+    NoResult,
    Pass,
    Timeout,
    Unknown,
@@ -64,8 +64,9 @@ const char *TestResultTypeToString(TestResultType type);
 struct TestResult
 {
-    TestResultType type       = TestResultType::Skip;
+    TestResultType type       = TestResultType::NoResult;
    double elapsedTimeSeconds = 0.0;
+    uint32_t flakyFailures    = 0;
 };
 inline bool operator==(const TestResult &a, const TestResult &b)
@@ -153,6 +154,7 @@ class TestSuite
    int mTestTimeout;
    int mBatchTimeout;
    int mBatchId;
+    int mFlakyRetries;
    std::vector<std::string> mChildProcessArgs;
    std::map<TestIdentifier, FileLine> mTestFileLines;
    std::vector<ProcessInfo> mCurrentProcesses;

--- a/src/tests/test_utils/runner/TestSuite_unittest.cpp
+++ b/src/tests/test_utils/runner/TestSuite_unittest.cpp
@@ -24,6 +24,10 @@ namespace js = rapidjson;
 namespace
 {
 constexpr char kTestHelperExecutable[] = "test_utils_unittest_helper";
+constexpr int kFlakyRetries            = 3;
+// Enable this for debugging.
+constexpr bool kDebugOutput = false;
 class TestSuiteTest : public testing::Test
 {
@@ -36,20 +40,19 @@ class TestSuiteTest : public testing::Test
        }
    }
-    std::string mTempFileName;
+    bool runTestSuite(const std::vector<std::string> &extraArgs, TestResults *actualResults)
-};
+    {
-// Tests the ANGLE standalone testing harness. Runs four tests with different ending conditions.
-// Verifies that Pass, Fail, Crash and Timeout are all handled correctly.
-TEST_F(TestSuiteTest, RunMockTests)
-{
        std::string executablePath = GetExecutableDirectory();
        EXPECT_NE(executablePath, "");
        executablePath += std::string("/") + kTestHelperExecutable + GetExecutableExtension();
        constexpr uint32_t kMaxTempDirLen = 100;
        char tempDirName[kMaxTempDirLen * 2];
-    ASSERT_TRUE(GetTempDir(tempDirName, kMaxTempDirLen));
+        if (!GetTempDir(tempDirName, kMaxTempDirLen))
+        {
+            return false;
+        }
        std::stringstream tempFNameStream;
        tempFNameStream << tempDirName << GetPathSeparator() << "test_temp_" << rand() << ".json";
@@ -57,13 +60,24 @@ TEST_F(TestSuiteTest, RunMockTests)
        std::string resultsFileName = "--results-file=" + mTempFileName;
-    std::vector<const char *> args = {executablePath.c_str(),
+        std::vector<const char *> args = {
-                                      kRunTestSuite,
+            executablePath.c_str(), kRunTestSuite,      "--gtest_also_run_disabled_tests",
-                                      "--gtest_filter=MockTestSuiteTest.DISABLED_*",
+            "--bot-mode",           "--test-timeout=5", resultsFileName.c_str()};
-                                      "--gtest_also_run_disabled_tests",
-                                      "--bot-mode",
+        for (const std::string &arg : extraArgs)
-                                      "--test-timeout=5",
+        {
-                                      resultsFileName.c_str()};
+            args.push_back(arg.c_str());
+        }
+        if (kDebugOutput)
+        {
+            printf("Test arguments:\n");
+            for (const char *arg : args)
+            {
+                printf("%s ", arg);
+            }
+            printf("\n");
+        }
        ProcessHandle process(args, true, true);
        EXPECT_TRUE(process->started());
@@ -71,13 +85,25 @@ TEST_F(TestSuiteTest, RunMockTests)
        EXPECT_TRUE(process->finished());
        EXPECT_EQ(process->getStderr(), "");
-    // Uncomment this for debugging.
+        if (kDebugOutput)
-    // printf("stdout:\n%s\n", process->getStdout().c_str());
+        {
+            printf("stdout:\n%s\n", process->getStdout().c_str());
+        }
+        return GetTestResultsFromFile(mTempFileName.c_str(), actualResults);
+    }
+    std::string mTempFileName;
+};
+// Tests the ANGLE standalone testing harness. Runs four tests with different ending conditions.
+// Verifies that Pass, Fail, Crash and Timeout are all handled correctly.
+TEST_F(TestSuiteTest, RunMockTests)
+{
+    std::vector<std::string> extraArgs = {"--gtest_filter=MockTestSuiteTest.DISABLED_*"};
    TestResults actual;
-    ASSERT_TRUE(GetTestResultsFromFile(mTempFileName.c_str(), &actual));
+    ASSERT_TRUE(runTestSuite(extraArgs, &actual));
-    EXPECT_TRUE(DeleteFile(mTempFileName.c_str()));
-    mTempFileName.clear();
    std::map<TestIdentifier, TestResult> expectedResults = {
        {{"MockTestSuiteTest", "DISABLED_Pass"}, {TestResultType::Pass, 0.0}},
@@ -89,6 +115,22 @@ TEST_F(TestSuiteTest, RunMockTests)
    EXPECT_EQ(expectedResults, actual.results);
 }
+// Verifies the flaky retry feature works as expected.
+TEST_F(TestSuiteTest, RunFlakyTests)
+{
+    std::vector<std::string> extraArgs = {"--gtest_filter=MockFlakyTestSuiteTest.DISABLED_Flaky",
+                                          "--flaky-retries=" + std::to_string(kFlakyRetries)};
+    TestResults actual;
+    ASSERT_TRUE(runTestSuite(extraArgs, &actual));
+    std::map<TestIdentifier, TestResult> expectedResults = {
+        {{"MockFlakyTestSuiteTest", "DISABLED_Flaky"},
+         {TestResultType::Pass, 0.0, kFlakyRetries - 1}}};
+    EXPECT_EQ(expectedResults, actual.results);
+}
 // Normal passing test.
 TEST(MockTestSuiteTest, DISABLED_Pass)
 {
@@ -107,6 +149,43 @@ TEST(MockTestSuiteTest, DISABLED_Timeout)
    angle::Sleep(20000);
 }
+// Trigger a flaky test failure.
+TEST(MockFlakyTestSuiteTest, DISABLED_Flaky)
+{
+    constexpr uint32_t kMaxTempDirLen = 100;
+    char tempDirName[kMaxTempDirLen * 2];
+    ASSERT_TRUE(GetTempDir(tempDirName, kMaxTempDirLen));
+    std::stringstream tempFNameStream;
+    tempFNameStream << tempDirName << GetPathSeparator() << "flaky_temp.txt";
+    std::string tempFileName = tempFNameStream.str();
+    int fails = 0;
+    {
+        FILE *fp = fopen(tempFileName.c_str(), "r");
+        if (fp)
+        {
+            ASSERT_EQ(fscanf(fp, "%d", &fails), 1);
+            fclose(fp);
+        }
+    }
+    if (fails >= kFlakyRetries - 1)
+    {
+        angle::DeleteFile(tempFileName.c_str());
+    }
+    else
+    {
+        EXPECT_TRUE(false);
+        FILE *fp = fopen(tempFileName.c_str(), "w");
+        ASSERT_NE(fp, nullptr);
+        fprintf(fp, "%d", fails + 1);
+        fclose(fp);
+    }
+}
 // Trigger a test crash.
 // TEST(MockTestSuiteTest, DISABLED_Crash)
 // {