bench: Add support for measuring CPU cycles
This adds cycle min/max/avg to the statistics. Supported on x86 and x86_64 (natively through rdtsc), as well as Linux (perf syscall).
This commit is contained in:
parent
55b2eddcc8
commit
3532818746
5 changed files with 121 additions and 5 deletions
|
@ -22,7 +22,9 @@ bench_bench_bitcoin_SOURCES = \
|
||||||
bench/mempool_eviction.cpp \
|
bench/mempool_eviction.cpp \
|
||||||
bench/verify_script.cpp \
|
bench/verify_script.cpp \
|
||||||
bench/base58.cpp \
|
bench/base58.cpp \
|
||||||
bench/lockedpool.cpp
|
bench/lockedpool.cpp \
|
||||||
|
bench/perf.cpp \
|
||||||
|
bench/perf.h
|
||||||
|
|
||||||
nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_TEST_FILES)
|
nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_TEST_FILES)
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||||
|
|
||||||
#include "bench.h"
|
#include "bench.h"
|
||||||
|
#include "perf.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
@ -26,7 +27,9 @@ BenchRunner::BenchRunner(std::string name, BenchFunction func)
|
||||||
void
|
void
|
||||||
BenchRunner::RunAll(double elapsedTimeForOne)
|
BenchRunner::RunAll(double elapsedTimeForOne)
|
||||||
{
|
{
|
||||||
std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "\n";
|
perf_init();
|
||||||
|
std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << ","
|
||||||
|
<< "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n";
|
||||||
|
|
||||||
for (std::map<std::string,BenchFunction>::iterator it = benchmarks.begin();
|
for (std::map<std::string,BenchFunction>::iterator it = benchmarks.begin();
|
||||||
it != benchmarks.end(); ++it) {
|
it != benchmarks.end(); ++it) {
|
||||||
|
@ -35,6 +38,7 @@ BenchRunner::RunAll(double elapsedTimeForOne)
|
||||||
BenchFunction& func = it->second;
|
BenchFunction& func = it->second;
|
||||||
func(state);
|
func(state);
|
||||||
}
|
}
|
||||||
|
perf_fini();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool State::KeepRunning()
|
bool State::KeepRunning()
|
||||||
|
@ -44,8 +48,10 @@ bool State::KeepRunning()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
double now;
|
double now;
|
||||||
|
uint64_t nowCycles;
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
lastTime = beginTime = now = gettimedouble();
|
lastTime = beginTime = now = gettimedouble();
|
||||||
|
lastCycles = beginCycles = nowCycles = perf_cpucycles();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
now = gettimedouble();
|
now = gettimedouble();
|
||||||
|
@ -53,6 +59,13 @@ bool State::KeepRunning()
|
||||||
double elapsedOne = elapsed * countMaskInv;
|
double elapsedOne = elapsed * countMaskInv;
|
||||||
if (elapsedOne < minTime) minTime = elapsedOne;
|
if (elapsedOne < minTime) minTime = elapsedOne;
|
||||||
if (elapsedOne > maxTime) maxTime = elapsedOne;
|
if (elapsedOne > maxTime) maxTime = elapsedOne;
|
||||||
|
|
||||||
|
// We only use relative values, so don't have to handle 64-bit wrap-around specially
|
||||||
|
nowCycles = perf_cpucycles();
|
||||||
|
uint64_t elapsedOneCycles = (nowCycles - lastCycles) * countMaskInv;
|
||||||
|
if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles;
|
||||||
|
if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles;
|
||||||
|
|
||||||
if (elapsed*128 < maxElapsed) {
|
if (elapsed*128 < maxElapsed) {
|
||||||
// If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing.
|
// If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing.
|
||||||
// The restart avoids including the overhead of this code in the measurement.
|
// The restart avoids including the overhead of this code in the measurement.
|
||||||
|
@ -61,6 +74,8 @@ bool State::KeepRunning()
|
||||||
count = 0;
|
count = 0;
|
||||||
minTime = std::numeric_limits<double>::max();
|
minTime = std::numeric_limits<double>::max();
|
||||||
maxTime = std::numeric_limits<double>::min();
|
maxTime = std::numeric_limits<double>::min();
|
||||||
|
minCycles = std::numeric_limits<uint64_t>::max();
|
||||||
|
maxCycles = std::numeric_limits<uint64_t>::min();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (elapsed*16 < maxElapsed) {
|
if (elapsed*16 < maxElapsed) {
|
||||||
|
@ -72,6 +87,7 @@ bool State::KeepRunning()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lastTime = now;
|
lastTime = now;
|
||||||
|
lastCycles = nowCycles;
|
||||||
++count;
|
++count;
|
||||||
|
|
||||||
if (now - beginTime < maxElapsed) return true; // Keep going
|
if (now - beginTime < maxElapsed) return true; // Keep going
|
||||||
|
@ -80,7 +96,9 @@ bool State::KeepRunning()
|
||||||
|
|
||||||
// Output results
|
// Output results
|
||||||
double average = (now-beginTime)/count;
|
double average = (now-beginTime)/count;
|
||||||
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "\n";
|
int64_t averageCycles = (nowCycles-beginCycles)/count;
|
||||||
|
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << ","
|
||||||
|
<< minCycles << "," << maxCycles << "," << averageCycles << "\n";
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,12 +41,18 @@ namespace benchmark {
|
||||||
double maxElapsed;
|
double maxElapsed;
|
||||||
double beginTime;
|
double beginTime;
|
||||||
double lastTime, minTime, maxTime, countMaskInv;
|
double lastTime, minTime, maxTime, countMaskInv;
|
||||||
int64_t count;
|
uint64_t count;
|
||||||
int64_t countMask;
|
uint64_t countMask;
|
||||||
|
uint64_t beginCycles;
|
||||||
|
uint64_t lastCycles;
|
||||||
|
uint64_t minCycles;
|
||||||
|
uint64_t maxCycles;
|
||||||
public:
|
public:
|
||||||
State(std::string _name, double _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) {
|
State(std::string _name, double _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) {
|
||||||
minTime = std::numeric_limits<double>::max();
|
minTime = std::numeric_limits<double>::max();
|
||||||
maxTime = std::numeric_limits<double>::min();
|
maxTime = std::numeric_limits<double>::min();
|
||||||
|
minCycles = std::numeric_limits<uint64_t>::max();
|
||||||
|
maxCycles = std::numeric_limits<uint64_t>::min();
|
||||||
countMask = 1;
|
countMask = 1;
|
||||||
countMaskInv = 1./(countMask + 1);
|
countMaskInv = 1./(countMask + 1);
|
||||||
}
|
}
|
||||||
|
|
53
src/bench/perf.cpp
Normal file
53
src/bench/perf.cpp
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
// Copyright (c) 2016 The Bitcoin Core developers
|
||||||
|
// Distributed under the MIT software license, see the accompanying
|
||||||
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||||
|
|
||||||
|
#include "perf.h"
|
||||||
|
|
||||||
|
#if defined(__i386__) || defined(__x86_64__)
|
||||||
|
|
||||||
|
/* These architectures support quering the cycle counter
|
||||||
|
* from user space, no need for any syscall overhead.
|
||||||
|
*/
|
||||||
|
void perf_init(void) { }
|
||||||
|
void perf_fini(void) { }
|
||||||
|
|
||||||
|
#elif defined(__linux__)
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
|
static int fd = -1;
|
||||||
|
static struct perf_event_attr attr;
|
||||||
|
|
||||||
|
void perf_init(void)
|
||||||
|
{
|
||||||
|
attr.type = PERF_TYPE_HARDWARE;
|
||||||
|
attr.config = PERF_COUNT_HW_CPU_CYCLES;
|
||||||
|
fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void perf_fini(void)
|
||||||
|
{
|
||||||
|
if (fd != -1) {
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t perf_cpucycles(void)
|
||||||
|
{
|
||||||
|
uint64_t result = 0;
|
||||||
|
if (fd == -1 || read(fd, &result, sizeof(result)) < (ssize_t)sizeof(result)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* Unhandled platform */
|
||||||
|
|
||||||
|
void perf_init(void) { }
|
||||||
|
void perf_fini(void) { }
|
||||||
|
uint64_t perf_cpucycles(void) { return 0; }
|
||||||
|
|
||||||
|
#endif
|
37
src/bench/perf.h
Normal file
37
src/bench/perf.h
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
// Copyright (c) 2016 The Bitcoin Core developers
|
||||||
|
// Distributed under the MIT software license, see the accompanying
|
||||||
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||||
|
|
||||||
|
/** Functions for measurement of CPU cycles */
|
||||||
|
#ifndef H_PERF
|
||||||
|
#define H_PERF
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#if defined(__i386__)
|
||||||
|
|
||||||
|
static inline uint64_t perf_cpucycles(void)
|
||||||
|
{
|
||||||
|
uint64_t x;
|
||||||
|
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(__x86_64__)
|
||||||
|
|
||||||
|
static inline uint64_t perf_cpucycles(void)
|
||||||
|
{
|
||||||
|
uint32_t hi, lo;
|
||||||
|
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
|
||||||
|
return ((uint64_t)lo)|(((uint64_t)hi)<<32);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
uint64_t perf_cpucycles(void);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void perf_init(void);
|
||||||
|
void perf_fini(void);
|
||||||
|
|
||||||
|
#endif // H_PERF
|
Loading…
Add table
Reference in a new issue