Improve BlobAvailabilityTracker performance

For daemons with a lot of blobs, getting mean availabity
will be slow. Samples the blobs in an attempt at getting
better performance.
This commit is contained in:
Job Evers-Meltzer 2016-12-14 16:14:37 -06:00
parent 56d394fb5f
commit 7af6e9e0dd
2 changed files with 25 additions and 7 deletions

View file

@ -1,4 +1,6 @@
import logging
import random
import time
from twisted.internet import defer
from twisted.internet.task import LoopingCall
@ -27,7 +29,7 @@ class BlobAvailabilityTracker(object):
def start(self):
log.info("Starting %s", self)
self._check_popular.start(30)
self._check_mine.start(120)
self._check_mine.start(600)
def stop(self):
log.info("Stopping %s", self)
@ -76,7 +78,8 @@ class BlobAvailabilityTracker(object):
def _update_most_popular(self):
d = self._get_most_popular()
d.addCallback(lambda _: self._get_mean_peers())
d.addCallback(lambda _: self._set_mean_peers())
def _update_mine(self):
def _get_peers(blobs):
@ -85,11 +88,26 @@ class BlobAvailabilityTracker(object):
dl.append(self._update_peers_for_blob(hash))
return defer.DeferredList(dl)
d = self._blob_manager.get_all_verified_blobs()
d.addCallback(_get_peers)
d.addCallback(lambda _: self._get_mean_peers())
def sample(blobs):
return random.sample(blobs, 100)
def _get_mean_peers(self):
start = time.time()
log.debug('==> Updating the peers for my blobs')
d = self._blob_manager.get_all_verified_blobs()
# as far as I can tell, this only is used to set _last_mean_availability
# which... seems like a very expensive operation for such little payoff.
# so taking a sample should get about the same effect as querying the entire
# list of blobs
d.addCallback(sample)
d.addCallback(_get_peers)
d.addCallback(lambda _: self._set_mean_peers())
d.addCallback(lambda _: log.debug('<== Done updating peers for my blobs. Took %s seconds',
time.time() - start))
# although unused, need to return or else the looping call
# could overrun on a previous call
return d
def _set_mean_peers(self):
num_peers = [len(self.availability[blob]) for blob in self.availability]
mean = Decimal(sum(num_peers)) / Decimal(max(1, len(num_peers)))
self._last_mean_availability = mean

View file

@ -167,7 +167,7 @@ class BlobAvailabilityTracker(BlobAvailability.BlobAvailabilityTracker):
self._dht_node = None
self._check_popular = None
self._check_mine = None
self._get_mean_peers()
self._set_mean_peers()
def start(self):
pass