improve error analytics

- move analytics creation earlier in the startup process
- add several events to track startup, startup failure, other errors
- abstract platform detection into separate file
- add "build" type to platform so we can differentiate devs vs live users
This commit is contained in:
Alex Grintsvayg 2016-11-10 15:49:51 -05:00
parent 6b72b4de11
commit 7ecc8103d8
11 changed files with 167 additions and 83 deletions

View file

@ -56,11 +56,10 @@ class Api(object):
def track(self, event):
"""Send a single tracking event"""
log.debug('Sending track event: %s', event)
import base64
return self.session.post(self.url + '/track', json=event, auth=self.auth)
@classmethod
def load(cls, session=None):
def new_instance(cls, session=None):
"""Initialize an instance using values from the configuration"""
if not session:
session = sessions.FuturesSession()

View file

@ -22,6 +22,20 @@ class Events(object):
self.lbry_id = lbry_id
self.session_id = session_id
def update_context(self, context):
self.context = context
def server_startup(self):
return self._event('Server Startup')
def server_startup_success(self):
return self._event('Server Startup Success')
def server_startup_error(self, message):
return self._event('Server Startup Error', {
'message': message,
})
def heartbeat(self):
return self._event('Heartbeat')
@ -32,6 +46,13 @@ class Events(object):
}
return self._event('Download Started', properties)
def error(self, message, sd_hash=None):
properties = {
'message': message,
'stream_info': sd_hash
}
return self._event('Error', properties)
def metric_observed(self, metric_name, value):
properties = {
'value': value,
@ -57,15 +78,14 @@ class Events(object):
return properties
def make_context(platform, wallet, is_dev=False):
# TODO: distinguish between developer and release instances
def make_context(platform, wallet):
return {
'is_dev': is_dev,
'app': {
'name': 'lbrynet',
'version': platform['lbrynet_version'],
'ui_version': platform['ui_version'],
'python_version': platform['python_version'],
'build': platform['build'],
'wallet': {
'name': wallet,
# TODO: add in version info for lbrycrdd

View file

@ -3,7 +3,13 @@ from lbrynet.core import looping_call_manager
from twisted.internet import defer
from twisted.internet import task
from lbrynet.core.Platform import get_platform
from lbrynet.conf import settings
import constants
from api import Api
from events import Events, make_context
from track import Track
class Manager(object):
@ -12,28 +18,63 @@ class Manager(object):
self.events_generator = events_generator
self.track = track
self.looping_call_manager = self.setup_looping_calls()
self.is_started = False
@classmethod
def new_instance(cls, api=None, events=None):
if api is None:
api = Api.new_instance()
if events is None:
events = Events(
make_context(get_platform(), settings.wallet),
'not loaded', 'not loaded'
)
return cls(api, events, Track())
def update_events_generator(self, events_generator):
self.events_generator = events_generator
def _get_looping_calls(self):
return [
('send_heartbeat', self._send_heartbeat, 60),
('update_tracked_metrics', self._update_tracked_metrics, 300),
]
def setup_looping_calls(self):
call_manager = looping_call_manager.LoopingCallManager()
looping_calls = [
('send_heartbeat', self._send_heartbeat),
('update_tracked_metrics', self._update_tracked_metrics),
]
for name, fn in looping_calls:
for name, fn, _ in self._get_looping_calls():
call_manager.register_looping_call(name, task.LoopingCall(fn))
return call_manager
def start(self):
self.looping_call_manager.start('send_heartbeat', 60)
self.looping_call_manager.start('update_tracked_metrics', 300)
if not self.is_started:
for name, _, interval in self._get_looping_calls():
self.looping_call_manager.start(name, interval)
self.is_started = True
def shutdown(self):
self.looping_call_manager.shutdown()
def send_server_startup(self):
event = self.events_generator.server_startup()
self.analytics_api.track(event)
def send_server_startup_success(self):
event = self.events_generator.server_startup_success()
self.analytics_api.track(event)
def send_server_startup_error(self, message):
event = self.events_generator.server_startup_error(message)
self.analytics_api.track(event)
def send_download_started(self, name, stream_info=None):
event = self.events_generator.download_started(name, stream_info)
self.analytics_api.track(event)
def send_error(self, message, sd_hash=None):
event = self.events_generator.error(message, sd_hash)
self.analytics_api.track(event)
def register_repeating_metric(self, event_name, value_generator, frequency=300):
lcall = task.LoopingCall(self._send_repeating_metric, event_name, value_generator)
self.looping_call_manager.register_looping_call(event_name, lcall)

2
lbrynet/build_type.py Normal file
View file

@ -0,0 +1,2 @@
# dont touch this. Travis changes this during build/deployment
BUILD = "dev"

29
lbrynet/core/Platform.py Normal file
View file

@ -0,0 +1,29 @@
import platform
import simplejson as json
from urllib2 import urlopen
from lbrynet import __version__ as lbrynet_version
from lbrynet import build_type
from lbryum.version import LBRYUM_VERSION as lbryum_version
def get_platform():
p = {
"processor": platform.processor(),
"python_version": platform.python_version(),
"platform": platform.platform(),
"os_release": platform.release(),
"os_system": platform.system(),
"lbrynet_version": lbrynet_version,
"lbryum_version": lbryum_version,
"ui_version": "not loaded yet",
"build": build_type.BUILD, # travis sets this during build step
}
try:
p['ip'] = json.load(urlopen('http://jsonip.com'))['ip']
except:
p['ip'] = "Could not determine IP"
return p

View file

@ -2,7 +2,6 @@ import binascii
import logging.handlers
import mimetypes
import os
import platform
import random
import re
import subprocess
@ -21,7 +20,6 @@ from twisted.internet.task import LoopingCall
from txjsonrpc import jsonrpclib
from jsonschema import ValidationError
from lbrynet import __version__ as lbrynet_version
# TODO: importing this when internet is disabled raises a socket.gaierror
from lbryum.version import LBRYUM_VERSION as lbryum_version
@ -30,27 +28,15 @@ from lbrynet import conf
from lbrynet.conf import settings as lbrynet_settings
from lbrynet.conf import LBRYCRD_WALLET, LBRYUM_WALLET, PTC_WALLET
from lbrynet import analytics
from lbrynet import reflector
from lbrynet.metadata.Metadata import Metadata, verify_name_characters
from lbrynet.metadata.Fee import FeeValidator
from lbrynet.core import log_support
from lbrynet.core import utils
from lbrynet.core.utils import generate_id
from lbrynet.core.StreamDescriptor import StreamDescriptorIdentifier, download_sd_blob, BlobStreamDescriptorReader
from lbrynet.core.Session import Session
from lbrynet.core import Platform
from lbrynet.core.looping_call_manager import LoopingCallManager
from lbrynet.core.server.BlobRequestHandler import BlobRequestHandlerFactory
from lbrynet.core.server.ServerProtocol import ServerProtocolFactory
from lbrynet.core.Error import InsufficientFundsError, InvalidNameError
from lbrynet.core.PTCWallet import PTCWallet
from lbrynet.core.Wallet import LBRYcrdWallet, LBRYumWallet
from lbrynet.lbrynet_console.Settings import Settings
from lbrynet.lbryfilemanager.EncryptedFileManager import EncryptedFileManager
from lbrynet.lbryfile.StreamDescriptor import EncryptedFileStreamType
from lbrynet.lbryfile.client.EncryptedFileDownloader import EncryptedFileSaverFactory, EncryptedFileOpenerFactory
from lbrynet.lbryfile.client.EncryptedFileOptions import add_lbry_file_to_sd_identifier
from lbrynet.lbryfile.EncryptedFileMetadataManager import DBEncryptedFileMetadataManager
from lbrynet.lbryfile.EncryptedFileMetadataManager import TempEncryptedFileMetadataManager
from lbrynet.lbrynet_daemon.UIManager import UIManager
from lbrynet.lbrynet_daemon.Downloader import GetStream
from lbrynet.lbrynet_daemon.Publisher import Publisher
@ -222,7 +208,7 @@ class Daemon(AuthJSONRPCServer):
LBRYnet daemon, a jsonrpc interface to lbry functions
"""
def __init__(self, root):
def __init__(self, root, analytics_manager):
AuthJSONRPCServer.__init__(self, lbrynet_settings.use_auth_http)
reactor.addSystemEventTrigger('before', 'shutdown', self._shutdown)
@ -274,7 +260,7 @@ class Daemon(AuthJSONRPCServer):
self.git_lbrynet_version = None
self.git_lbryum_version = None
self.ui_version = None
self.ip = None
self.platform = None
self.first_run = None
self.log_file = lbrynet_settings.get_log_filename()
self.current_db_revision = 1
@ -314,6 +300,7 @@ class Daemon(AuthJSONRPCServer):
self.blob_request_payment_rate_manager = None
self.lbry_file_metadata_manager = None
self.lbry_file_manager = None
self.analytics_manager = analytics_manager
@AuthJSONRPCServer.subhandler
def _exclude_lbrycrd_only_commands_from_lbryum_session(self, request):
@ -412,24 +399,10 @@ class Daemon(AuthJSONRPCServer):
return d
def _get_platform(self):
r = {
"processor": platform.processor(),
"python_version": platform.python_version(),
"platform": platform.platform(),
"os_release": platform.release(),
"os_system": platform.system(),
"lbrynet_version": lbrynet_version,
"lbryum_version": lbryum_version,
"ui_version": self.lbry_ui_manager.loaded_git_version,
}
if not self.ip:
try:
r['ip'] = json.load(urlopen('http://jsonip.com'))['ip']
self.ip = r['ip']
except:
r['ip'] = "Could not determine"
return r
if self.platform is None:
self.platform = Platform.get_platform()
self.platform["ui_version"] = self.lbry_ui_manager.loaded_git_version
return self.platform
def _initial_setup(self):
def _log_platform():
@ -557,6 +530,11 @@ class Daemon(AuthJSONRPCServer):
return defer.succeed(True)
return defer.succeed(True)
def _stop_file_manager(self):
if self.lbry_file_manager:
self.lbry_file_manager.stop()
return defer.succeed(True)
def _stop_server(self):
try:
if self.lbry_server_port is not None:
@ -654,11 +632,12 @@ class Daemon(AuthJSONRPCServer):
except Exception:
log.warn('Failed to upload log', exc_info=True)
d = defer.succeed(None)
d.addCallback(lambda _: self._stop_server())
d.addErrback(log_support.failure, log, 'Failure while shutting down: %s')
d.addCallback(lambda _: self._stop_reflector())
d.addErrback(log_support.failure, log, 'Failure while shutting down: %s')
d.addCallback(lambda _: self.lbry_file_manager.stop())
d.addCallback(lambda _: self._stop_file_manager())
d.addErrback(log_support.failure, log, 'Failure while shutting down: %s')
if self.session is not None:
d.addCallback(lambda _: self.session.shut_down())
@ -784,12 +763,17 @@ class Daemon(AuthJSONRPCServer):
return d
def _get_analytics(self):
analytics_api = analytics.Api.load()
context = analytics.make_context(self._get_platform(), self.wallet_type)
events_generator = analytics.Events(
context, base58.b58encode(self.lbryid), self._session_id)
self.analytics_manager = analytics.Manager(
analytics_api, events_generator, analytics.Track())
if self.analytics_manager is None:
self.analytics_manager = analytics.Manager.new_instance(
events=events_generator
)
else:
self.analytics_manager.update_events_generator(events_generator)
if not self.analytics_manager.is_started:
self.analytics_manager.start()
self.analytics_manager.register_repeating_metric(
analytics.BLOB_BYTES_AVAILABLE,
@ -868,11 +852,14 @@ class Daemon(AuthJSONRPCServer):
def eb():
if not r.called:
self.analytics_manager.send_error("sd blob download timed out", sd_hash)
r.errback(Exception("sd timeout"))
r = defer.Deferred(None)
reactor.callLater(timeout, eb)
d = download_sd_blob(self.session, sd_hash, self.session.payment_rate_manager)
d.addErrback(lambda err: self.analytics_manager.send_error(
"error downloading sd blob: " + err, sd_hash))
d.addCallback(BlobStreamDescriptorReader)
d.addCallback(lambda blob: blob.get_info())
d.addCallback(cb)
@ -1047,12 +1034,6 @@ class Daemon(AuthJSONRPCServer):
)
return run_reflector_factory(factory)
def _log_to_slack(self, msg):
URL = "https://hooks.slack.com/services/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"
msg = platform.platform() + ": " + base58.b58encode(self.lbryid)[:20] + ", " + msg
requests.post(URL, json.dumps({"text": msg}))
return defer.succeed(None)
def _run_scripts(self):
if len([k for k in self.startup_scripts if 'run_once' in k.keys()]):
log.info("Removing one time startup scripts")
@ -2128,8 +2109,6 @@ class Daemon(AuthJSONRPCServer):
exclude_previous = True
d = self._upload_log(log_type=log_type, exclude_previous=exclude_previous, force=force)
if 'message' in p.keys():
d.addCallback(lambda _: self._log_to_slack(p['message']))
d.addCallback(lambda _: self._render_response(True, OK_CODE))
return d

View file

@ -9,6 +9,7 @@ from twisted.internet import defer, reactor, error
from twisted.cred import portal
from jsonrpc.proxy import JSONRPCProxy
from lbrynet import analytics
from lbrynet.lbrynet_daemon.auth.auth import PasswordChecker, HttpPasswordRealm
from lbrynet.lbrynet_daemon.auth.util import initialize_api_key_file
from lbrynet import conf
@ -98,7 +99,9 @@ def start():
print "To quit press ctrl-c or call 'stop' via the API"
if test_internet_connection():
start_server_and_listen(args.launchui, args.useauth)
analytics_manager = analytics.Manager.new_instance()
analytics_manager.send_server_startup()
start_server_and_listen(args.launchui, args.useauth, analytics_manager)
reactor.run()
if not args.logtoconsole and not args.quiet:
@ -121,29 +124,32 @@ def update_settings_from_args(args):
settings.update(to_pass)
def log_and_kill(failure):
def log_and_kill(failure, analytics_manager):
analytics_manager.send_server_startup_error(failure.getErrorMessage() + " " + str(failure))
log_support.failure(failure, log, 'Failed to startup: %s')
reactor.callFromThread(reactor.stop)
def start_server_and_listen(launchui, use_auth):
def start_server_and_listen(launchui, use_auth, analytics_manager):
"""The primary entry point for launching the daemon.
Args:
launchui: set to true to open a browser window
use_auth: set to true to enable http authentication
analytics_manager: to send analytics
kwargs: passed along to `DaemonServer().start()`
"""
lbry = DaemonServer()
d = lbry.start()
d.addCallback(lambda _: listen(lbry, use_auth))
daemon_server = DaemonServer(analytics_manager)
d = daemon_server.start()
d.addCallback(lambda _: listen(daemon_server, use_auth))
if launchui:
d.addCallback(lambda _: webbrowser.open(settings.UI_ADDRESS))
d.addErrback(log_and_kill)
d.addCallback(lambda _: analytics_manager.send_server_startup_success())
d.addErrback(log_and_kill, analytics_manager)
def listen(lbry, use_auth):
site_base = get_site_base(use_auth, lbry.root)
def listen(daemon_server, use_auth):
site_base = get_site_base(use_auth, daemon_server.root)
lbrynet_server = server.Site(site_base)
lbrynet_server.requestFactory = DaemonRequest
try:

View file

@ -3,7 +3,6 @@ import os
from twisted.internet import defer
from lbrynet.conf import settings
from lbrynet.lbrynet_daemon.Daemon import Daemon
from lbrynet.lbrynet_daemon.Resources import LBRYindex, HostedEncryptedFile, EncryptedFileUpload
from lbrynet.conf import settings
@ -13,10 +12,14 @@ log = logging.getLogger(__name__)
class DaemonServer(object):
def __init__(self, analytics_manager=None):
self.root = None
self.analytics_manager = analytics_manager
def _setup_server(self):
ui_path = os.path.join(settings.ensure_data_dir(), "lbry-ui", "active")
self.root = LBRYindex(ui_path)
self._api = Daemon(self.root)
self._api = Daemon(self.root, self.analytics_manager)
self.root.putChild("view", HostedEncryptedFile(self._api))
self.root.putChild("upload", EncryptedFileUpload(self._api))
self.root.putChild(settings.API_ADDRESS, self._api)

View file

@ -8,6 +8,7 @@ from txjsonrpc import jsonrpclib
from lbrynet.core.Error import InvalidAuthenticationToken, InvalidHeaderError, SubhandlerError
from lbrynet.conf import settings
from lbrynet.core import log_support
from lbrynet.lbrynet_daemon.auth.util import APIKey, get_auth_message
from lbrynet.lbrynet_daemon.auth.client import LBRY_SECRET
@ -262,10 +263,7 @@ class AuthJSONRPCServer(AuthorizedBase):
self._render_message(request, encoded_message)
def _errback_render(self, failure, id):
log.error("Request failed:")
log.error(failure)
log.error(failure.value)
log.error(id)
log_support.failure(failure, log, "Request failed. Id: %s, Failure: %s", id)
if isinstance(failure.value, jsonrpclib.Fault):
return failure.value
return server.failure

View file

@ -22,6 +22,7 @@ if not os.path.isfile(lbrycrdd_path_conf):
f.close()
from lbrynet.lbrynet_daemon import DaemonControl
from lbrynet import analytics
from lbrynet.conf import settings
from lbrynet.core import utils
@ -62,7 +63,10 @@ class LBRYDaemonApp(AppKit.NSApplication):
notify("LBRY needs an internet connection to start, try again when one is available")
sys.exit(0)
DaemonControl.start_server_and_listen(launchui=True, use_auth=False)
DaemonControl.start_server_and_listen(
launchui=True, use_auth=False,
analytics_manager=analytics.Manager.new_instance()
)
def openui_(self, sender):
webbrowser.open(settings.UI_ADDRESS)

View file

@ -15,7 +15,7 @@ try:
except ImportError:
import win32gui
from lbrynet import conf
from lbrynet import conf, analytics
from lbrynet.core import log_support
from lbrynet.core import utils
from lbrynet.lbrynet_daemon import DaemonControl
@ -267,7 +267,10 @@ def main(lbry_name=None):
systray_thread.daemon = True
systray_thread.start()
DaemonControl.start_server_and_listen(launchui=True, use_auth=False)
DaemonControl.start_server_and_listen(
launchui=True, use_auth=False,
analytics_manager=analytics.Manager.new_instance()
)
reactor.run()