Merge #11789: [tests] [travis-ci] Combine logs on failure

ff8a9b0 [tests] Add combinedlogslen argument to test_runner.py (John Newbery) dba94ea [tests] [travis-ci] Move Travis functional test log post processing to test_runner (John Newbery) bba1c54 [tests] Improve logging shutdown and add hint for combine_logs (John Newbery) Pull request description: Replaces #11779 . Notes from that PR: > Currently, when a functional test fails, the debug logs are printed sequentially to the travis log. This makes debugging race conditions based on the travis log hard. Instead, all logs events should be combined and sorted by their timestamp, then appended to the travis log. @MarcoFalke Tree-SHA512: 56c80067d6a2c92f7e6a35e3ae5160637a0de052d9da593c7be6e02233544a93c66d62456f903f85e2edc09e31ab4bdafd1aed1d9897ae48c634f82631f856f7
2017-11-30 13:40:46 +01:00 · 2017-11-30 13:40:46 +01:00 · 60d739eb49
commit 60d739eb49
parent dd49862a70 ff8a9b068a
3 changed files with 26 additions and 29 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -18,7 +18,6 @@ env:
    - CCACHE_COMPRESS=1
    - BASE_OUTDIR=$TRAVIS_BUILD_DIR/out
    - SDK_URL=https://bitcoincore.org/depends-sources/sdks
-    - PYTHON_DEBUG=1
    - WINEDEBUG=fixme-all
  matrix:
 # ARM
@ -79,7 +78,7 @@ script:
    - export LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/depends/$HOST/lib
    - if [ "$RUN_TESTS" = "true" ]; then travis_wait 30 make $MAKEJOBS check VERBOSE=1; fi
    - if [ "$TRAVIS_EVENT_TYPE" = "cron" ]; then extended="--extended --exclude pruning,dbcrash"; fi
-    - if [ "$RUN_TESTS" = "true" ]; then test/functional/test_runner.py --coverage --quiet ${extended}; fi
+    - if [ "$RUN_TESTS" = "true" ]; then test/functional/test_runner.py --combinedlogslen=4000 --coverage --quiet ${extended}; fi
 after_script:
    - echo $TRAVIS_COMMIT_RANGE
    - echo $TRAVIS_COMMIT_LOG
--- a/test/functional/test_framework/test_framework.py
+++ b/test/functional/test_framework/test_framework.py
@ -4,7 +4,6 @@
 # file COPYING or http://www.opensource.org/licenses/mit-license.php.
 """Base class for RPC testing."""

-from collections import deque
 from enum import Enum
 import logging
 import optparse
@ -149,32 +148,19 @@ class BitcoinTestFramework():
            shutil.rmtree(self.options.tmpdir)
        else:
            self.log.warning("Not cleaning up dir %s" % self.options.tmpdir)
-            if os.getenv("PYTHON_DEBUG", ""):
-                # Dump the end of the debug logs, to aid in debugging rare
-                # travis failures.
-                import glob
-                filenames = [self.options.tmpdir + "/test_framework.log"]
-                filenames += glob.glob(self.options.tmpdir + "/node*/regtest/debug.log")
-                MAX_LINES_TO_PRINT = 1000
-                for fn in filenames:
-                    try:
-                        with open(fn, 'r') as f:
-                            print("From", fn, ":")
-                            print("".join(deque(f, MAX_LINES_TO_PRINT)))
-                    except OSError:
-                        print("Opening file %s failed." % fn)
-                        traceback.print_exc()

        if success == TestStatus.PASSED:
            self.log.info("Tests successful")
-            sys.exit(TEST_EXIT_PASSED)
+            exit_code = TEST_EXIT_PASSED
        elif success == TestStatus.SKIPPED:
            self.log.info("Test skipped")
-            sys.exit(TEST_EXIT_SKIPPED)
+            exit_code = TEST_EXIT_SKIPPED
        else:
            self.log.error("Test failed. Test logging available at %s/test_framework.log", self.options.tmpdir)
+            self.log.error("Hint: Call {} '{}' to consolidate all logs".format(os.path.normpath(os.path.dirname(os.path.realpath(__file__)) + "/../combine_logs.py"), self.options.tmpdir))
+            exit_code = TEST_EXIT_FAILED
        logging.shutdown()
-            sys.exit(TEST_EXIT_FAILED)
+        sys.exit(exit_code)

    # Methods to override in subclass test scripts.
    def set_test_params(self):
--- a/test/functional/test_runner.py
+++ b/test/functional/test_runner.py
@ -15,6 +15,7 @@ For a description of arguments recognized by test scripts, see
 """

 import argparse
+from collections import deque
 import configparser
 import datetime
 import os
@ -174,6 +175,7 @@ def main():
                                     epilog='''
    Help text and arguments for individual test script:''',
                                     formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('--combinedlogslen', '-c', type=int, default=0, help='print a combined log (of length n lines) from all test nodes and test framework to the console on failure.')
    parser.add_argument('--coverage', action='store_true', help='generate a basic coverage report for the RPC interface')
    parser.add_argument('--exclude', '-x', help='specify a comma-separated-list of scripts to exclude.')
    parser.add_argument('--extended', action='store_true', help='run the extended test suite in addition to the basic tests')
@ -266,9 +268,9 @@ def main():
    if not args.keepcache:
        shutil.rmtree("%s/test/cache" % config["environment"]["BUILDDIR"], ignore_errors=True)

-    run_tests(test_list, config["environment"]["SRCDIR"], config["environment"]["BUILDDIR"], config["environment"]["EXEEXT"], tmpdir, args.jobs, args.coverage, passon_args)
+    run_tests(test_list, config["environment"]["SRCDIR"], config["environment"]["BUILDDIR"], config["environment"]["EXEEXT"], tmpdir, args.jobs, args.coverage, passon_args, args.combinedlogslen)

-def run_tests(test_list, src_dir, build_dir, exeext, tmpdir, jobs=1, enable_coverage=False, args=[]):
+def run_tests(test_list, src_dir, build_dir, exeext, tmpdir, jobs=1, enable_coverage=False, args=[], combined_logs_len=0):
    # Warn if bitcoind is already running (unix only)
    try:
        if subprocess.check_output(["pidof", "bitcoind"]) is not None:
@ -314,7 +316,7 @@ def run_tests(test_list, src_dir, build_dir, exeext, tmpdir, jobs=1, enable_cove
    max_len_name = len(max(test_list, key=len))

    for _ in range(len(test_list)):
-        test_result, stdout, stderr = job_queue.get_next()
+        test_result, testdir, stdout, stderr = job_queue.get_next()
        test_results.append(test_result)

        if test_result.status == "Passed":
@ -325,6 +327,14 @@ def run_tests(test_list, src_dir, build_dir, exeext, tmpdir, jobs=1, enable_cove
            print("\n%s%s%s failed, Duration: %s s\n" % (BOLD[1], test_result.name, BOLD[0], test_result.time))
            print(BOLD[1] + 'stdout:\n' + BOLD[0] + stdout + '\n')
            print(BOLD[1] + 'stderr:\n' + BOLD[0] + stderr + '\n')
+            if combined_logs_len and os.path.isdir(testdir):
+                # Print the final `combinedlogslen` lines of the combined logs
+                print('{}Combine the logs and print the last {} lines ...{}'.format(BOLD[1], combined_logs_len, BOLD[0]))
+                print('\n============')
+                print('{}Combined log for {}:{}'.format(BOLD[1], testdir, BOLD[0]))
+                print('============\n')
+                combined_logs, _ = subprocess.Popen([os.path.join(tests_dir, 'combine_logs.py'), '-c', testdir], universal_newlines=True, stdout=subprocess.PIPE).communicate()
+                print("\n".join(deque(combined_logs.splitlines(), combined_logs_len)))

    print_results(test_results, max_len_name, (int(time.time() - time0)))

@ -389,13 +399,15 @@ class TestHandler:
            log_stdout = tempfile.SpooledTemporaryFile(max_size=2**16)
            log_stderr = tempfile.SpooledTemporaryFile(max_size=2**16)
            test_argv = t.split()
-            tmpdir = ["--tmpdir=%s/%s_%s" % (self.tmpdir, re.sub(".py$", "", test_argv[0]), portseed)]
+            testdir = "{}/{}_{}".format(self.tmpdir, re.sub(".py$", "", test_argv[0]), portseed)
+            tmpdir_arg = ["--tmpdir={}".format(testdir)]
            self.jobs.append((t,
                              time.time(),
-                              subprocess.Popen([self.tests_dir + test_argv[0]] + test_argv[1:] + self.flags + portseed_arg + tmpdir,
+                              subprocess.Popen([self.tests_dir + test_argv[0]] + test_argv[1:] + self.flags + portseed_arg + tmpdir_arg,
                                               universal_newlines=True,
                                               stdout=log_stdout,
                                               stderr=log_stderr),
+                              testdir,
                              log_stdout,
                              log_stderr))
        if not self.jobs:
@ -404,7 +416,7 @@ class TestHandler:
            # Return first proc that finishes
            time.sleep(.5)
            for j in self.jobs:
-                (name, time0, proc, log_out, log_err) = j
+                (name, time0, proc, testdir, log_out, log_err) = j
                if os.getenv('TRAVIS') == 'true' and int(time.time() - time0) > 20 * 60:
                    # In travis, timeout individual tests after 20 minutes (to stop tests hanging and not
                    # providing useful output.
@ -422,7 +434,7 @@ class TestHandler:
                    self.num_running -= 1
                    self.jobs.remove(j)

-                    return TestResult(name, status, int(time.time() - time0)), stdout, stderr
+                    return TestResult(name, status, int(time.time() - time0)), testdir, stdout, stderr
            print('.', end='', flush=True)

 class TestResult():