tests: add utility to easily profile node performance with perf

Introduces `TestNode.profile_with_perf()` context manager which
samples node execution to produce profiling data.

Also introduces a test framework flag, `--perf`, which will run
perf on all nodes for the duration of a given test.
This commit is contained in:
James O'Beirne 2018-10-19 12:28:47 -04:00
parent df894fa69a
commit 58180b5fd4
5 changed files with 165 additions and 4 deletions

View file

@ -176,6 +176,26 @@ gdb /home/example/bitcoind <pid>
Note: gdb attach step may require `sudo` Note: gdb attach step may require `sudo`
##### Profiling
An easy way to profile node performance during functional tests is provided
for Linux platforms using `perf`.
Perf will sample the running node and will generate profile data in the node's
datadir. The profile data can then be presented using `perf report` or a graphical
tool like [hotspot](https://github.com/KDAB/hotspot).
To generate a profile during test suite runs, use the `--perf` flag.
To see render the output to text, run
```sh
perf report -i /path/to/datadir/send-big-msgs.perf.data.xxxx --stdio | c++filt | less
```
For ways to generate more granular profiles, see the README in
[test/functional](/test/functional).
### Util tests ### Util tests
Util tests can be run locally by running `test/util/bitcoin-util-test.py`. Util tests can be run locally by running `test/util/bitcoin-util-test.py`.

View file

@ -118,3 +118,36 @@ Helpers for script.py
#### [test_framework/blocktools.py](test_framework/blocktools.py) #### [test_framework/blocktools.py](test_framework/blocktools.py)
Helper functions for creating blocks and transactions. Helper functions for creating blocks and transactions.
### Benchmarking with perf
An easy way to profile node performance during functional tests is provided
for Linux platforms using `perf`.
Perf will sample the running node and will generate profile data in the node's
datadir. The profile data can then be presented using `perf report` or a graphical
tool like [hotspot](https://github.com/KDAB/hotspot).
There are two ways of invoking perf: one is to use the `--perf` flag when
running tests, which will profile each node during the entire test run: perf
begins to profile when the node starts and ends when it shuts down. The other
way is the use the `profile_with_perf` context manager, e.g.
```python
with node.profile_with_perf("send-big-msgs"):
# Perform activity on the node you're interested in profiling, e.g.:
for _ in range(10000):
node.p2p.send_message(some_large_message)
```
To see useful textual output, run
```sh
perf report -i /path/to/datadir/send-big-msgs.perf.data.xxxx --stdio | c++filt | less
```
#### See also:
- [Installing perf](https://askubuntu.com/q/50145)
- [Perf examples](http://www.brendangregg.com/perf.html)
- [Hotspot](https://github.com/KDAB/hotspot): a GUI for perf output analysis

View file

@ -128,6 +128,8 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
help="Attach a python debugger if test fails") help="Attach a python debugger if test fails")
parser.add_argument("--usecli", dest="usecli", default=False, action="store_true", parser.add_argument("--usecli", dest="usecli", default=False, action="store_true",
help="use bitcoin-cli instead of RPC for all commands") help="use bitcoin-cli instead of RPC for all commands")
parser.add_argument("--perf", dest="perf", default=False, action="store_true",
help="profile running nodes with perf for the duration of the test")
self.add_options(parser) self.add_options(parser)
self.options = parser.parse_args() self.options = parser.parse_args()
@ -201,11 +203,20 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
node.cleanup_on_exit = False node.cleanup_on_exit = False
self.log.info("Note: bitcoinds were not stopped and may still be running") self.log.info("Note: bitcoinds were not stopped and may still be running")
if not self.options.nocleanup and not self.options.noshutdown and success != TestStatus.FAILED: should_clean_up = (
not self.options.nocleanup and
not self.options.noshutdown and
success != TestStatus.FAILED and
not self.options.perf
)
if should_clean_up:
self.log.info("Cleaning up {} on exit".format(self.options.tmpdir)) self.log.info("Cleaning up {} on exit".format(self.options.tmpdir))
cleanup_tree_on_exit = True cleanup_tree_on_exit = True
elif self.options.perf:
self.log.warning("Not cleaning up dir {} due to perf data".format(self.options.tmpdir))
cleanup_tree_on_exit = False
else: else:
self.log.warning("Not cleaning up dir %s" % self.options.tmpdir) self.log.warning("Not cleaning up dir {}".format(self.options.tmpdir))
cleanup_tree_on_exit = False cleanup_tree_on_exit = False
if success == TestStatus.PASSED: if success == TestStatus.PASSED:
@ -309,6 +320,7 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
extra_conf=extra_confs[i], extra_conf=extra_confs[i],
extra_args=extra_args[i], extra_args=extra_args[i],
use_cli=self.options.usecli, use_cli=self.options.usecli,
start_perf=self.options.perf,
)) ))
def start_node(self, i, *args, **kwargs): def start_node(self, i, *args, **kwargs):

View file

@ -18,6 +18,8 @@ import tempfile
import time import time
import urllib.parse import urllib.parse
import collections import collections
import shlex
import sys
from .authproxy import JSONRPCException from .authproxy import JSONRPCException
from .util import ( from .util import (
@ -59,7 +61,13 @@ class TestNode():
To make things easier for the test writer, any unrecognised messages will To make things easier for the test writer, any unrecognised messages will
be dispatched to the RPC connection.""" be dispatched to the RPC connection."""
def __init__(self, i, datadir, *, rpchost, timewait, bitcoind, bitcoin_cli, mocktime, coverage_dir, extra_conf=None, extra_args=None, use_cli=False): def __init__(self, i, datadir, *, rpchost, timewait, bitcoind, bitcoin_cli, mocktime, coverage_dir, extra_conf=None, extra_args=None, use_cli=False, start_perf=False):
"""
Kwargs:
start_perf (bool): If True, begin profiling the node with `perf` as soon as
the node starts.
"""
self.index = i self.index = i
self.datadir = datadir self.datadir = datadir
self.stdout_dir = os.path.join(self.datadir, "stdout") self.stdout_dir = os.path.join(self.datadir, "stdout")
@ -87,6 +95,7 @@ class TestNode():
self.cli = TestNodeCLI(bitcoin_cli, self.datadir) self.cli = TestNodeCLI(bitcoin_cli, self.datadir)
self.use_cli = use_cli self.use_cli = use_cli
self.start_perf = start_perf
self.running = False self.running = False
self.process = None self.process = None
@ -95,6 +104,8 @@ class TestNode():
self.url = None self.url = None
self.log = logging.getLogger('TestFramework.node%d' % i) self.log = logging.getLogger('TestFramework.node%d' % i)
self.cleanup_on_exit = True # Whether to kill the node when this object goes away self.cleanup_on_exit = True # Whether to kill the node when this object goes away
# Cache perf subprocesses here by their data output filename.
self.perf_subprocesses = {}
self.p2ps = [] self.p2ps = []
@ -186,6 +197,9 @@ class TestNode():
self.running = True self.running = True
self.log.debug("bitcoind started, waiting for RPC to come up") self.log.debug("bitcoind started, waiting for RPC to come up")
if self.start_perf:
self._start_perf()
def wait_for_rpc_connection(self): def wait_for_rpc_connection(self):
"""Sets up an RPC connection to the bitcoind process. Returns False if unable to connect.""" """Sets up an RPC connection to the bitcoind process. Returns False if unable to connect."""
# Poll at a rate of four times per second # Poll at a rate of four times per second
@ -238,6 +252,10 @@ class TestNode():
except http.client.CannotSendRequest: except http.client.CannotSendRequest:
self.log.exception("Unable to stop node.") self.log.exception("Unable to stop node.")
# If there are any running perf processes, stop them.
for profile_name in tuple(self.perf_subprocesses.keys()):
self._stop_perf(profile_name)
# Check that stderr is as expected # Check that stderr is as expected
self.stderr.seek(0) self.stderr.seek(0)
stderr = self.stderr.read().decode('utf-8').strip() stderr = self.stderr.read().decode('utf-8').strip()
@ -317,6 +335,84 @@ class TestNode():
increase_allowed * 100, before_memory_usage, after_memory_usage, increase_allowed * 100, before_memory_usage, after_memory_usage,
perc_increase_memory_usage * 100)) perc_increase_memory_usage * 100))
@contextlib.contextmanager
def profile_with_perf(self, profile_name):
"""
Context manager that allows easy profiling of node activity using `perf`.
See `test/functional/README.md` for details on perf usage.
Args:
profile_name (str): This string will be appended to the
profile data filename generated by perf.
"""
subp = self._start_perf(profile_name)
yield
if subp:
self._stop_perf(profile_name)
def _start_perf(self, profile_name=None):
"""Start a perf process to profile this node.
Returns the subprocess running perf."""
subp = None
def test_success(cmd):
return subprocess.call(
# shell=True required for pipe use below
cmd, shell=True,
stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) == 0
if not sys.platform.startswith('linux'):
self.log.warning("Can't profile with perf; only availabe on Linux platforms")
return None
if not test_success('which perf'):
self.log.warning("Can't profile with perf; must install perf-tools")
return None
if not test_success('readelf -S {} | grep .debug_str'.format(shlex.quote(self.binary))):
self.log.warning(
"perf output won't be very useful without debug symbols compiled into bitcoind")
output_path = tempfile.NamedTemporaryFile(
dir=self.datadir,
prefix="{}.perf.data.".format(profile_name or 'test'),
delete=False,
).name
cmd = [
'perf', 'record',
'-g', # Record the callgraph.
'--call-graph', 'dwarf', # Compatibility for gcc's --fomit-frame-pointer.
'-F', '101', # Sampling frequency in Hz.
'-p', str(self.process.pid),
'-o', output_path,
]
subp = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
self.perf_subprocesses[profile_name] = subp
return subp
def _stop_perf(self, profile_name):
"""Stop (and pop) a perf subprocess."""
subp = self.perf_subprocesses.pop(profile_name)
output_path = subp.args[subp.args.index('-o') + 1]
subp.terminate()
subp.wait(timeout=10)
stderr = subp.stderr.read().decode()
if 'Consider tweaking /proc/sys/kernel/perf_event_paranoid' in stderr:
self.log.warning(
"perf couldn't collect data! Try "
"'sudo sysctl -w kernel.perf_event_paranoid=-1'")
else:
report_cmd = "perf report -i {}".format(output_path)
self.log.info("See perf output by running '{}'".format(report_cmd))
def assert_start_raises_init_error(self, extra_args=None, expected_msg=None, match=ErrorMatch.FULL_TEXT, *args, **kwargs): def assert_start_raises_init_error(self, extra_args=None, expected_msg=None, match=ErrorMatch.FULL_TEXT, *args, **kwargs):
"""Attempt to start the node and expect it to raise an error. """Attempt to start the node and expect it to raise an error.

View file

@ -15,5 +15,5 @@ fi
vulture \ vulture \
--min-confidence 60 \ --min-confidence 60 \
--ignore-names "argtypes,connection_lost,connection_made,converter,data_received,daemon,errcheck,get_ecdh_key,get_privkey,is_compressed,is_fullyvalid,msg_generic,on_*,optionxform,restype,set_privkey" \ --ignore-names "argtypes,connection_lost,connection_made,converter,data_received,daemon,errcheck,get_ecdh_key,get_privkey,is_compressed,is_fullyvalid,msg_generic,on_*,optionxform,restype,set_privkey,profile_with_perf" \
$(git ls-files -- "*.py" ":(exclude)contrib/" ":(exclude)test/functional/data/invalid_txs.py") $(git ls-files -- "*.py" ":(exclude)contrib/" ":(exclude)test/functional/data/invalid_txs.py")