move wallet server prometheus

-only run wallet server metrics for the wallet server
This commit is contained in:
Jack Robison 2020-04-23 20:21:27 -04:00
parent 79624febc0
commit 36c05fc4b9
No known key found for this signature in database
GPG key ID: DF25C68FE0239BB2
8 changed files with 181 additions and 111 deletions

32
lbry/prometheus.py Normal file
View file

@ -0,0 +1,32 @@
import logging
from aiohttp import web
from prometheus_client import generate_latest as prom_generate_latest
class PrometheusServer:
def __init__(self, logger=None):
self.runner = None
self.logger = logger or logging.getLogger(__name__)
async def start(self, interface: str, port: int):
prom_app = web.Application()
prom_app.router.add_get('/metrics', self.handle_metrics_get_request)
self.runner = web.AppRunner(prom_app)
await self.runner.setup()
metrics_site = web.TCPSite(self.runner, interface, port, shutdown_timeout=.5)
await metrics_site.start()
self.logger.info('metrics server listening on %s:%i', *metrics_site._server.sockets[0].getsockname()[:2])
async def handle_metrics_get_request(self, request: web.Request):
try:
return web.Response(
text=prom_generate_latest().decode(),
content_type='text/plain; version=0.0.4'
)
except Exception:
self.logger.exception('could not generate prometheus data')
raise
async def stop(self):
await self.runner.cleanup()

View file

@ -39,7 +39,7 @@ from lbry.wallet.tasks import TaskGroup
from .jsonrpc import Request, JSONRPCConnection, JSONRPCv2, JSONRPC, Batch, Notification from .jsonrpc import Request, JSONRPCConnection, JSONRPCv2, JSONRPC, Batch, Notification
from .jsonrpc import RPCError, ProtocolError from .jsonrpc import RPCError, ProtocolError
from .framing import BadMagicError, BadChecksumError, OversizedPayloadError, BitcoinFramer, NewlineFramer from .framing import BadMagicError, BadChecksumError, OversizedPayloadError, BitcoinFramer, NewlineFramer
from lbry.wallet.server.prometheus import NOTIFICATION_COUNT, RESPONSE_TIMES, REQUEST_ERRORS_COUNT, RESET_CONNECTIONS from lbry.wallet.server import prometheus
class Connector: class Connector:
@ -388,7 +388,7 @@ class RPCSession(SessionBase):
except MemoryError: except MemoryError:
self.logger.warning('received oversized message from %s:%s, dropping connection', self.logger.warning('received oversized message from %s:%s, dropping connection',
self._address[0], self._address[1]) self._address[0], self._address[1])
RESET_CONNECTIONS.labels(version=self.client_version).inc() prometheus.METRICS.RESET_CONNECTIONS.labels(version=self.client_version).inc()
self._close() self._close()
return return
@ -422,7 +422,7 @@ class RPCSession(SessionBase):
'internal server error') 'internal server error')
if isinstance(request, Request): if isinstance(request, Request):
message = request.send_result(result) message = request.send_result(result)
RESPONSE_TIMES.labels( prometheus.METRICS.RESPONSE_TIMES.labels(
method=request.method, method=request.method,
version=self.client_version version=self.client_version
).observe(time.perf_counter() - start) ).observe(time.perf_counter() - start)
@ -430,7 +430,7 @@ class RPCSession(SessionBase):
await self._send_message(message) await self._send_message(message)
if isinstance(result, Exception): if isinstance(result, Exception):
self._bump_errors() self._bump_errors()
REQUEST_ERRORS_COUNT.labels( prometheus.METRICS.REQUEST_ERRORS_COUNT.labels(
method=request.method, method=request.method,
version=self.client_version version=self.client_version
).inc() ).inc()
@ -467,7 +467,7 @@ class RPCSession(SessionBase):
async def send_notification(self, method, args=()): async def send_notification(self, method, args=()):
"""Send an RPC notification over the network.""" """Send an RPC notification over the network."""
message = self.connection.send_notification(Notification(method, args)) message = self.connection.send_notification(Notification(method, args))
NOTIFICATION_COUNT.labels(method=method, version=self.client_version).inc() prometheus.METRICS.NOTIFICATION_COUNT.labels(method=method, version=self.client_version).inc()
await self._send_message(message) await self._send_message(message)
def send_batch(self, raise_errors=False): def send_batch(self, raise_errors=False):

View file

@ -10,7 +10,7 @@ from lbry.wallet.server.daemon import DaemonError
from lbry.wallet.server.hash import hash_to_hex_str, HASHX_LEN from lbry.wallet.server.hash import hash_to_hex_str, HASHX_LEN
from lbry.wallet.server.util import chunks, class_logger from lbry.wallet.server.util import chunks, class_logger
from lbry.wallet.server.leveldb import FlushData from lbry.wallet.server.leveldb import FlushData
from lbry.wallet.server.prometheus import BLOCK_COUNT, BLOCK_UPDATE_TIMES, REORG_COUNT from lbry.wallet.server import prometheus
class Prefetcher: class Prefetcher:
@ -199,8 +199,8 @@ class BlockProcessor:
cache.clear() cache.clear()
await self._maybe_flush() await self._maybe_flush()
processed_time = time.perf_counter() - start processed_time = time.perf_counter() - start
BLOCK_COUNT.set(self.height) prometheus.METRICS.BLOCK_COUNT.set(self.height)
BLOCK_UPDATE_TIMES.observe(processed_time) prometheus.METRICS.BLOCK_UPDATE_TIMES.observe(processed_time)
if not self.db.first_sync: if not self.db.first_sync:
s = '' if len(blocks) == 1 else 's' s = '' if len(blocks) == 1 else 's'
self.logger.info('processed {:,d} block{} in {:.1f}s'.format(len(blocks), s, processed_time)) self.logger.info('processed {:,d} block{} in {:.1f}s'.format(len(blocks), s, processed_time))
@ -255,7 +255,7 @@ class BlockProcessor:
last -= len(raw_blocks) last -= len(raw_blocks)
await self.run_in_thread_with_lock(self.db.sql.delete_claims_above_height, self.height) await self.run_in_thread_with_lock(self.db.sql.delete_claims_above_height, self.height)
await self.prefetcher.reset_height(self.height) await self.prefetcher.reset_height(self.height)
REORG_COUNT.inc() prometheus.METRICS.REORG_COUNT.inc()
async def reorg_hashes(self, count): async def reorg_hashes(self, count):
"""Return a pair (start, last, hashes) of blocks to back up during a """Return a pair (start, last, hashes) of blocks to back up during a

View file

@ -10,7 +10,8 @@ import aiohttp
from lbry.wallet.rpc.jsonrpc import RPCError from lbry.wallet.rpc.jsonrpc import RPCError
from lbry.wallet.server.util import hex_to_bytes, class_logger from lbry.wallet.server.util import hex_to_bytes, class_logger
from lbry.wallet.rpc import JSONRPC from lbry.wallet.rpc import JSONRPC
from lbry.wallet.server.prometheus import LBRYCRD_REQUEST_TIMES, LBRYCRD_PENDING_COUNT from lbry.wallet.server import prometheus
class DaemonError(Exception): class DaemonError(Exception):
"""Raised when the daemon returns an error in its results.""" """Raised when the daemon returns an error in its results."""
@ -129,7 +130,7 @@ class Daemon:
while True: while True:
try: try:
for method in methods: for method in methods:
LBRYCRD_PENDING_COUNT.labels(method=method).inc() prometheus.METRICS.LBRYCRD_PENDING_COUNT.labels(method=method).inc()
result = await self._send_data(data) result = await self._send_data(data)
result = processor(result) result = processor(result)
if on_good_message: if on_good_message:
@ -154,7 +155,7 @@ class Daemon:
on_good_message = 'running normally' on_good_message = 'running normally'
finally: finally:
for method in methods: for method in methods:
LBRYCRD_PENDING_COUNT.labels(method=method).dec() prometheus.METRICS.LBRYCRD_PENDING_COUNT.labels(method=method).dec()
await asyncio.sleep(retry) await asyncio.sleep(retry)
retry = max(min(self.max_retry, retry * 2), self.init_retry) retry = max(min(self.max_retry, retry * 2), self.init_retry)
@ -175,7 +176,7 @@ class Daemon:
if params: if params:
payload['params'] = params payload['params'] = params
result = await self._send(payload, processor) result = await self._send(payload, processor)
LBRYCRD_REQUEST_TIMES.labels(method=method).observe(time.perf_counter() - start) prometheus.METRICS.LBRYCRD_REQUEST_TIMES.labels(method=method).observe(time.perf_counter() - start)
return result return result
async def _send_vector(self, method, params_iterable, replace_errs=False): async def _send_vector(self, method, params_iterable, replace_errs=False):
@ -200,7 +201,7 @@ class Daemon:
result = [] result = []
if payload: if payload:
result = await self._send(payload, processor) result = await self._send(payload, processor)
LBRYCRD_REQUEST_TIMES.labels(method=method).observe(time.perf_counter()-start) prometheus.METRICS.LBRYCRD_REQUEST_TIMES.labels(method=method).observe(time.perf_counter()-start)
return result return result
async def _is_rpc_available(self, method): async def _is_rpc_available(self, method):

View file

@ -1,89 +1,125 @@
import os import os
from aiohttp import web from prometheus_client import Counter, Info, Histogram, Gauge
from prometheus_client import Counter, Info, generate_latest as prom_generate_latest, Histogram, Gauge
from lbry import __version__ as version from lbry import __version__ as version
from lbry.build_info import BUILD, COMMIT_HASH, DOCKER_TAG from lbry.build_info import BUILD, COMMIT_HASH, DOCKER_TAG
from lbry.wallet.server import util from lbry.wallet.server import util
import lbry.wallet.server.version as wallet_server_version import lbry.wallet.server.version as wallet_server_version
NAMESPACE = "wallet_server"
CPU_COUNT = f"{os.cpu_count()}" class PrometheusMetrics:
VERSION_INFO = Info('build', 'Wallet server build info (e.g. version, commit hash)', namespace=NAMESPACE) VERSION_INFO: Info
VERSION_INFO.info({ SESSIONS_COUNT: Gauge
REQUESTS_COUNT: Counter
RESPONSE_TIMES: Histogram
NOTIFICATION_COUNT: Counter
REQUEST_ERRORS_COUNT: Counter
SQLITE_INTERRUPT_COUNT: Counter
SQLITE_OPERATIONAL_ERROR_COUNT: Counter
SQLITE_INTERNAL_ERROR_COUNT: Counter
SQLITE_EXECUTOR_TIMES: Histogram
SQLITE_PENDING_COUNT: Gauge
LBRYCRD_REQUEST_TIMES: Histogram
LBRYCRD_PENDING_COUNT: Gauge
CLIENT_VERSIONS: Counter
BLOCK_COUNT: Gauge
BLOCK_UPDATE_TIMES: Histogram
REORG_COUNT: Gauge
RESET_CONNECTIONS: Counter
__slots__ = [
'VERSION_INFO',
'SESSIONS_COUNT',
'REQUESTS_COUNT',
'RESPONSE_TIMES',
'NOTIFICATION_COUNT',
'REQUEST_ERRORS_COUNT',
'SQLITE_INTERRUPT_COUNT',
'SQLITE_OPERATIONAL_ERROR_COUNT',
'SQLITE_INTERNAL_ERROR_COUNT',
'SQLITE_EXECUTOR_TIMES',
'SQLITE_PENDING_COUNT',
'LBRYCRD_REQUEST_TIMES',
'LBRYCRD_PENDING_COUNT',
'CLIENT_VERSIONS',
'BLOCK_COUNT',
'BLOCK_UPDATE_TIMES',
'REORG_COUNT',
'RESET_CONNECTIONS',
'_installed',
'namespace',
'cpu_count'
]
def __init__(self):
self._installed = False
self.namespace = "wallet_server"
self.cpu_count = f"{os.cpu_count()}"
def uninstall(self):
self._installed = False
for item in self.__slots__:
if not item.startswith('_') and item not in ('namespace', 'cpu_count'):
current = getattr(self, item, None)
if current:
setattr(self, item, None)
del current
def install(self):
if self._installed:
return
self._installed = True
self.VERSION_INFO = Info('build', 'Wallet server build info (e.g. version, commit hash)', namespace=self.namespace)
self.VERSION_INFO.info({
'build': BUILD, 'build': BUILD,
"commit": COMMIT_HASH, "commit": COMMIT_HASH,
"docker_tag": DOCKER_TAG, "docker_tag": DOCKER_TAG,
'version': version, 'version': version,
"min_version": util.version_string(wallet_server_version.PROTOCOL_MIN), "min_version": util.version_string(wallet_server_version.PROTOCOL_MIN),
"cpu_count": CPU_COUNT "cpu_count": self.cpu_count
}) })
SESSIONS_COUNT = Gauge("session_count", "Number of connected client sessions", namespace=NAMESPACE, self.SESSIONS_COUNT = Gauge("session_count", "Number of connected client sessions", namespace=self.namespace,
labelnames=("version",)) labelnames=("version",))
REQUESTS_COUNT = Counter("requests_count", "Number of requests received", namespace=NAMESPACE, self.REQUESTS_COUNT = Counter("requests_count", "Number of requests received", namespace=self.namespace,
labelnames=("method", "version")) labelnames=("method", "version"))
RESPONSE_TIMES = Histogram("response_time", "Response times", namespace=NAMESPACE, labelnames=("method", "version")) self.RESPONSE_TIMES = Histogram("response_time", "Response times", namespace=self.namespace,
NOTIFICATION_COUNT = Counter("notification", "Number of notifications sent (for subscriptions)",
namespace=NAMESPACE, labelnames=("method", "version"))
REQUEST_ERRORS_COUNT = Counter("request_error", "Number of requests that returned errors", namespace=NAMESPACE,
labelnames=("method", "version")) labelnames=("method", "version"))
SQLITE_INTERRUPT_COUNT = Counter("interrupt", "Number of interrupted queries", namespace=NAMESPACE) self.NOTIFICATION_COUNT = Counter("notification", "Number of notifications sent (for subscriptions)",
SQLITE_OPERATIONAL_ERROR_COUNT = Counter( namespace=self.namespace, labelnames=("method", "version"))
"operational_error", "Number of queries that raised operational errors", namespace=NAMESPACE self.REQUEST_ERRORS_COUNT = Counter("request_error", "Number of requests that returned errors", namespace=self.namespace,
labelnames=("method", "version"))
self.SQLITE_INTERRUPT_COUNT = Counter("interrupt", "Number of interrupted queries", namespace=self.namespace)
self.SQLITE_OPERATIONAL_ERROR_COUNT = Counter(
"operational_error", "Number of queries that raised operational errors", namespace=self.namespace
) )
SQLITE_INTERNAL_ERROR_COUNT = Counter( self.SQLITE_INTERNAL_ERROR_COUNT = Counter(
"internal_error", "Number of queries raising unexpected errors", namespace=NAMESPACE "internal_error", "Number of queries raising unexpected errors", namespace=self.namespace
) )
SQLITE_EXECUTOR_TIMES = Histogram("executor_time", "SQLite executor times", namespace=NAMESPACE) self.SQLITE_EXECUTOR_TIMES = Histogram("executor_time", "SQLite executor times", namespace=self.namespace)
SQLITE_PENDING_COUNT = Gauge( self.SQLITE_PENDING_COUNT = Gauge(
"pending_queries_count", "Number of pending and running sqlite queries", namespace=NAMESPACE "pending_queries_count", "Number of pending and running sqlite queries", namespace=self.namespace
) )
LBRYCRD_REQUEST_TIMES = Histogram( self.LBRYCRD_REQUEST_TIMES = Histogram(
"lbrycrd_request", "lbrycrd requests count", namespace=NAMESPACE, labelnames=("method",) "lbrycrd_request", "lbrycrd requests count", namespace=self.namespace, labelnames=("method",)
) )
LBRYCRD_PENDING_COUNT = Gauge( self.LBRYCRD_PENDING_COUNT = Gauge(
"lbrycrd_pending_count", "Number of lbrycrd rpcs that are in flight", namespace=NAMESPACE, labelnames=("method",) "lbrycrd_pending_count", "Number of lbrycrd rpcs that are in flight", namespace=self.namespace,
labelnames=("method",)
) )
CLIENT_VERSIONS = Counter( self.CLIENT_VERSIONS = Counter(
"clients", "Number of connections received per client version", "clients", "Number of connections received per client version",
namespace=NAMESPACE, labelnames=("version",) namespace=self.namespace, labelnames=("version",)
) )
BLOCK_COUNT = Gauge( self.BLOCK_COUNT = Gauge(
"block_count", "Number of processed blocks", namespace=NAMESPACE "block_count", "Number of processed blocks", namespace=self.namespace
) )
BLOCK_UPDATE_TIMES = Histogram("block_time", "Block update times", namespace=NAMESPACE) self.BLOCK_UPDATE_TIMES = Histogram("block_time", "Block update times", namespace=self.namespace)
REORG_COUNT = Gauge( self.REORG_COUNT = Gauge(
"reorg_count", "Number of reorgs", namespace=NAMESPACE "reorg_count", "Number of reorgs", namespace=self.namespace
) )
RESET_CONNECTIONS = Counter( self.RESET_CONNECTIONS = Counter(
"reset_clients", "Number of reset connections by client version", "reset_clients", "Number of reset connections by client version",
namespace=NAMESPACE, labelnames=("version",) namespace=self.namespace, labelnames=("version",)
) )
class PrometheusServer: METRICS = PrometheusMetrics()
def __init__(self):
self.logger = util.class_logger(__name__, self.__class__.__name__)
self.runner = None
async def start(self, port: int):
prom_app = web.Application()
prom_app.router.add_get('/metrics', self.handle_metrics_get_request)
self.runner = web.AppRunner(prom_app)
await self.runner.setup()
metrics_site = web.TCPSite(self.runner, "0.0.0.0", port, shutdown_timeout=.5)
await metrics_site.start()
self.logger.info('metrics server listening on %s:%i', *metrics_site._server.sockets[0].getsockname()[:2])
async def handle_metrics_get_request(self, request: web.Request):
try:
return web.Response(
text=prom_generate_latest().decode(),
content_type='text/plain; version=0.0.4'
)
except Exception:
self.logger.exception('could not generate prometheus data')
raise
async def stop(self):
await self.runner.cleanup()

View file

@ -6,7 +6,8 @@ import typing
import lbry import lbry
from lbry.wallet.server.mempool import MemPool, MemPoolAPI from lbry.wallet.server.mempool import MemPool, MemPoolAPI
from lbry.wallet.server.prometheus import PrometheusServer from lbry.prometheus import PrometheusServer
from lbry.wallet.server.prometheus import METRICS
class Notifications: class Notifications:
@ -92,6 +93,7 @@ class Server:
) )
async def start(self): async def start(self):
METRICS.install()
env = self.env env = self.env
min_str, max_str = env.coin.SESSIONCLS.protocol_min_max_strings() min_str, max_str = env.coin.SESSIONCLS.protocol_min_max_strings()
self.log.info(f'software version: {lbry.__version__}') self.log.info(f'software version: {lbry.__version__}')
@ -121,6 +123,7 @@ class Server:
self.prometheus_server = None self.prometheus_server = None
self.shutdown_event.set() self.shutdown_event.set()
await self.daemon.close() await self.daemon.close()
METRICS.uninstall()
def run(self): def run(self):
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
@ -143,4 +146,4 @@ class Server:
async def start_prometheus(self): async def start_prometheus(self):
if not self.prometheus_server and self.env.prometheus_port: if not self.prometheus_server and self.env.prometheus_port:
self.prometheus_server = PrometheusServer() self.prometheus_server = PrometheusServer()
await self.prometheus_server.start(self.env.prometheus_port) await self.prometheus_server.start("0.0.0.0", self.env.prometheus_port)

View file

@ -27,9 +27,7 @@ from lbry.wallet.server.db.writer import LBRYLevelDB
from lbry.wallet.server.db import reader from lbry.wallet.server.db import reader
from lbry.wallet.server.websocket import AdminWebSocket from lbry.wallet.server.websocket import AdminWebSocket
from lbry.wallet.server.metrics import ServerLoadData, APICallMetrics from lbry.wallet.server.metrics import ServerLoadData, APICallMetrics
from lbry.wallet.server.prometheus import REQUESTS_COUNT, SQLITE_INTERRUPT_COUNT, SQLITE_INTERNAL_ERROR_COUNT from lbry.wallet.server import prometheus
from lbry.wallet.server.prometheus import SQLITE_OPERATIONAL_ERROR_COUNT, SQLITE_EXECUTOR_TIMES, SESSIONS_COUNT
from lbry.wallet.server.prometheus import SQLITE_PENDING_COUNT, CLIENT_VERSIONS
from lbry.wallet.rpc.framing import NewlineFramer from lbry.wallet.rpc.framing import NewlineFramer
import lbry.wallet.server.version as VERSION import lbry.wallet.server.version as VERSION
@ -677,7 +675,7 @@ class SessionBase(RPCSession):
context = {'conn_id': f'{self.session_id}'} context = {'conn_id': f'{self.session_id}'}
self.logger = util.ConnectionLogger(self.logger, context) self.logger = util.ConnectionLogger(self.logger, context)
self.group = self.session_mgr.add_session(self) self.group = self.session_mgr.add_session(self)
SESSIONS_COUNT.labels(version=self.client_version).inc() prometheus.METRICS.SESSIONS_COUNT.labels(version=self.client_version).inc()
peer_addr_str = self.peer_address_str() peer_addr_str = self.peer_address_str()
self.logger.info(f'{self.kind} {peer_addr_str}, ' self.logger.info(f'{self.kind} {peer_addr_str}, '
f'{self.session_mgr.session_count():,d} total') f'{self.session_mgr.session_count():,d} total')
@ -686,7 +684,7 @@ class SessionBase(RPCSession):
"""Handle client disconnection.""" """Handle client disconnection."""
super().connection_lost(exc) super().connection_lost(exc)
self.session_mgr.remove_session(self) self.session_mgr.remove_session(self)
SESSIONS_COUNT.labels(version=self.client_version).dec() prometheus.METRICS.SESSIONS_COUNT.labels(version=self.client_version).dec()
msg = '' msg = ''
if not self._can_send.is_set(): if not self._can_send.is_set():
msg += ' whilst paused' msg += ' whilst paused'
@ -710,7 +708,7 @@ class SessionBase(RPCSession):
"""Handle an incoming request. ElectrumX doesn't receive """Handle an incoming request. ElectrumX doesn't receive
notifications from client sessions. notifications from client sessions.
""" """
REQUESTS_COUNT.labels(method=request.method, version=self.client_version).inc() prometheus.METRICS.REQUESTS_COUNT.labels(method=request.method, version=self.client_version).inc()
if isinstance(request, Request): if isinstance(request, Request):
handler = self.request_handlers.get(request.method) handler = self.request_handlers.get(request.method)
handler = partial(handler, self) handler = partial(handler, self)
@ -946,7 +944,7 @@ class LBRYElectrumX(SessionBase):
async def run_in_executor(self, query_name, func, kwargs): async def run_in_executor(self, query_name, func, kwargs):
start = time.perf_counter() start = time.perf_counter()
try: try:
SQLITE_PENDING_COUNT.inc() prometheus.METRICS.SQLITE_PENDING_COUNT.inc()
result = await asyncio.get_running_loop().run_in_executor( result = await asyncio.get_running_loop().run_in_executor(
self.session_mgr.query_executor, func, kwargs self.session_mgr.query_executor, func, kwargs
) )
@ -955,18 +953,18 @@ class LBRYElectrumX(SessionBase):
except reader.SQLiteInterruptedError as error: except reader.SQLiteInterruptedError as error:
metrics = self.get_metrics_or_placeholder_for_api(query_name) metrics = self.get_metrics_or_placeholder_for_api(query_name)
metrics.query_interrupt(start, error.metrics) metrics.query_interrupt(start, error.metrics)
SQLITE_INTERRUPT_COUNT.inc() prometheus.METRICS.prometheus.METRICS.SQLITE_INTERRUPT_COUNT.inc()
raise RPCError(JSONRPC.QUERY_TIMEOUT, 'sqlite query timed out') raise RPCError(JSONRPC.QUERY_TIMEOUT, 'sqlite query timed out')
except reader.SQLiteOperationalError as error: except reader.SQLiteOperationalError as error:
metrics = self.get_metrics_or_placeholder_for_api(query_name) metrics = self.get_metrics_or_placeholder_for_api(query_name)
metrics.query_error(start, error.metrics) metrics.query_error(start, error.metrics)
SQLITE_OPERATIONAL_ERROR_COUNT.inc() prometheus.METRICS.SQLITE_OPERATIONAL_ERROR_COUNT.inc()
raise RPCError(JSONRPC.INTERNAL_ERROR, 'query failed to execute') raise RPCError(JSONRPC.INTERNAL_ERROR, 'query failed to execute')
except Exception: except Exception:
log.exception("dear devs, please handle this exception better") log.exception("dear devs, please handle this exception better")
metrics = self.get_metrics_or_placeholder_for_api(query_name) metrics = self.get_metrics_or_placeholder_for_api(query_name)
metrics.query_error(start, {}) metrics.query_error(start, {})
SQLITE_INTERNAL_ERROR_COUNT.inc() prometheus.METRICS.SQLITE_INTERNAL_ERROR_COUNT.inc()
raise RPCError(JSONRPC.INTERNAL_ERROR, 'unknown server error') raise RPCError(JSONRPC.INTERNAL_ERROR, 'unknown server error')
else: else:
if self.env.track_metrics: if self.env.track_metrics:
@ -975,8 +973,8 @@ class LBRYElectrumX(SessionBase):
metrics.query_response(start, metrics_data) metrics.query_response(start, metrics_data)
return base64.b64encode(result).decode() return base64.b64encode(result).decode()
finally: finally:
SQLITE_PENDING_COUNT.dec() prometheus.METRICS.SQLITE_PENDING_COUNT.dec()
SQLITE_EXECUTOR_TIMES.observe(time.perf_counter() - start) prometheus.METRICS.SQLITE_EXECUTOR_TIMES.observe(time.perf_counter() - start)
async def run_and_cache_query(self, query_name, function, kwargs): async def run_and_cache_query(self, query_name, function, kwargs):
metrics = self.get_metrics_or_placeholder_for_api(query_name) metrics = self.get_metrics_or_placeholder_for_api(query_name)
@ -1443,10 +1441,10 @@ class LBRYElectrumX(SessionBase):
raise RPCError(BAD_REQUEST, raise RPCError(BAD_REQUEST,
f'unsupported client: {client_name}') f'unsupported client: {client_name}')
if self.client_version != client_name[:17]: if self.client_version != client_name[:17]:
SESSIONS_COUNT.labels(version=self.client_version).dec() prometheus.METRICS.SESSIONS_COUNT.labels(version=self.client_version).dec()
self.client_version = client_name[:17] self.client_version = client_name[:17]
SESSIONS_COUNT.labels(version=self.client_version).inc() prometheus.METRICS.SESSIONS_COUNT.labels(version=self.client_version).inc()
CLIENT_VERSIONS.labels(version=self.client_version).inc() prometheus.METRICS.CLIENT_VERSIONS.labels(version=self.client_version).inc()
# Find the highest common protocol version. Disconnect if # Find the highest common protocol version. Disconnect if
# that protocol version in unsupported. # that protocol version in unsupported.

View file

@ -2,7 +2,7 @@ import logging
import asyncio import asyncio
from binascii import hexlify from binascii import hexlify
from lbry.testcase import CommandTestCase from lbry.testcase import CommandTestCase
from lbry.wallet.server.prometheus import REORG_COUNT from lbry.wallet.server import prometheus
class BlockchainReorganizationTests(CommandTestCase): class BlockchainReorganizationTests(CommandTestCase):
@ -16,7 +16,7 @@ class BlockchainReorganizationTests(CommandTestCase):
) )
async def test_reorg(self): async def test_reorg(self):
REORG_COUNT.set(0) prometheus.METRICS.REORG_COUNT.set(0)
# invalidate current block, move forward 2 # invalidate current block, move forward 2
self.assertEqual(self.ledger.headers.height, 206) self.assertEqual(self.ledger.headers.height, 206)
await self.assertBlockHash(206) await self.assertBlockHash(206)
@ -26,7 +26,7 @@ class BlockchainReorganizationTests(CommandTestCase):
self.assertEqual(self.ledger.headers.height, 207) self.assertEqual(self.ledger.headers.height, 207)
await self.assertBlockHash(206) await self.assertBlockHash(206)
await self.assertBlockHash(207) await self.assertBlockHash(207)
self.assertEqual(1, REORG_COUNT._samples()[0][2]) self.assertEqual(1, prometheus.METRICS.REORG_COUNT._samples()[0][2])
# invalidate current block, move forward 3 # invalidate current block, move forward 3
await self.blockchain.invalidate_block((await self.ledger.headers.hash(206)).decode()) await self.blockchain.invalidate_block((await self.ledger.headers.hash(206)).decode())
@ -36,7 +36,7 @@ class BlockchainReorganizationTests(CommandTestCase):
await self.assertBlockHash(206) await self.assertBlockHash(206)
await self.assertBlockHash(207) await self.assertBlockHash(207)
await self.assertBlockHash(208) await self.assertBlockHash(208)
self.assertEqual(2, REORG_COUNT._samples()[0][2]) self.assertEqual(2, prometheus.METRICS.REORG_COUNT._samples()[0][2])
async def test_reorg_change_claim_height(self): async def test_reorg_change_claim_height(self):
# sanity check # sanity check