2015-08-20 17:27:15 +02:00
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import time
|
2015-09-04 22:22:02 +02:00
|
|
|
import sqlite3
|
2016-09-27 19:52:44 +02:00
|
|
|
|
2017-08-10 19:49:43 +02:00
|
|
|
from twisted.internet import threads, defer, reactor
|
2015-09-04 22:22:02 +02:00
|
|
|
from twisted.enterprise import adbapi
|
2017-07-27 00:23:10 +02:00
|
|
|
from lbrynet.core.HashBlob import BlobFile, BlobFileCreator
|
2015-08-20 17:27:15 +02:00
|
|
|
from lbrynet.core.server.DHTHashAnnouncer import DHTHashSupplier
|
2015-09-04 22:22:02 +02:00
|
|
|
from lbrynet.core.sqlite_helpers import rerun_if_locked
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2015-09-08 21:42:56 +02:00
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2017-08-10 19:49:43 +02:00
|
|
|
|
2017-07-27 06:19:20 +02:00
|
|
|
class DiskBlobManager(DHTHashSupplier):
|
2015-08-20 17:27:15 +02:00
|
|
|
"""This class stores blobs on the hard disk"""
|
|
|
|
def __init__(self, hash_announcer, blob_dir, db_dir):
|
2017-07-27 06:19:20 +02:00
|
|
|
DHTHashSupplier.__init__(self, hash_announcer)
|
2015-08-20 17:27:15 +02:00
|
|
|
self.blob_dir = blob_dir
|
2015-09-04 22:22:02 +02:00
|
|
|
self.db_file = os.path.join(db_dir, "blobs.db")
|
2017-06-09 20:09:17 +02:00
|
|
|
self.db_conn = adbapi.ConnectionPool('sqlite3', self.db_file, check_same_thread=False)
|
2015-08-20 17:27:15 +02:00
|
|
|
self.blob_type = BlobFile
|
|
|
|
self.blob_creator_type = BlobFileCreator
|
2016-12-14 23:37:17 +01:00
|
|
|
# TODO: consider using an LRU for blobs as there could potentially
|
|
|
|
# be thousands of blobs loaded up, many stale
|
2015-08-20 17:27:15 +02:00
|
|
|
self.blobs = {}
|
2016-08-26 06:32:33 +02:00
|
|
|
self.blob_hashes_to_delete = {} # {blob_hash: being_deleted (True/False)}
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2017-06-16 19:16:19 +02:00
|
|
|
@defer.inlineCallbacks
|
2015-08-20 17:27:15 +02:00
|
|
|
def setup(self):
|
2017-08-02 21:48:07 +02:00
|
|
|
log.info("Starting disk blob manager. blob_dir: %s, db_file: %s", str(self.blob_dir),
|
2015-09-15 06:29:18 +02:00
|
|
|
str(self.db_file))
|
2017-06-16 19:16:19 +02:00
|
|
|
yield self._open_db()
|
2015-08-20 17:27:15 +02:00
|
|
|
|
|
|
|
def stop(self):
|
2017-08-02 21:48:07 +02:00
|
|
|
log.info("Stopping disk blob manager.")
|
2017-06-09 20:09:17 +02:00
|
|
|
self.db_conn.close()
|
2015-08-20 17:27:15 +02:00
|
|
|
return defer.succeed(True)
|
|
|
|
|
2016-12-20 03:16:37 +01:00
|
|
|
def get_blob(self, blob_hash, length=None):
|
2016-11-30 21:20:45 +01:00
|
|
|
"""Return a blob identified by blob_hash, which may be a new blob or a
|
|
|
|
blob that is already on the hard disk
|
|
|
|
"""
|
2017-02-14 20:18:42 +01:00
|
|
|
assert length is None or isinstance(length, int)
|
2015-08-20 17:27:15 +02:00
|
|
|
if blob_hash in self.blobs:
|
|
|
|
return defer.succeed(self.blobs[blob_hash])
|
2016-12-20 03:16:37 +01:00
|
|
|
return self._make_new_blob(blob_hash, length)
|
2015-08-20 17:27:15 +02:00
|
|
|
|
|
|
|
def get_blob_creator(self):
|
2017-07-27 20:31:04 +02:00
|
|
|
return self.blob_creator_type(self.blob_dir)
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2016-12-20 03:16:37 +01:00
|
|
|
def _make_new_blob(self, blob_hash, length=None):
|
2016-12-11 00:02:13 +01:00
|
|
|
log.debug('Making a new blob for %s', blob_hash)
|
2016-12-20 03:16:37 +01:00
|
|
|
blob = self.blob_type(self.blob_dir, blob_hash, length)
|
2015-08-20 17:27:15 +02:00
|
|
|
self.blobs[blob_hash] = blob
|
2017-01-20 17:54:36 +01:00
|
|
|
return defer.succeed(blob)
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2017-07-27 06:19:20 +02:00
|
|
|
def _immediate_announce(self, blob_hashes):
|
|
|
|
if self.hash_announcer:
|
|
|
|
return self.hash_announcer.immediate_announce(blob_hashes)
|
2017-08-10 19:49:43 +02:00
|
|
|
raise Exception("Hash announcer not set")
|
2017-07-27 06:19:20 +02:00
|
|
|
|
2017-08-10 19:49:43 +02:00
|
|
|
@defer.inlineCallbacks
|
2015-08-20 17:27:15 +02:00
|
|
|
def blob_completed(self, blob, next_announce_time=None):
|
|
|
|
if next_announce_time is None:
|
2017-02-08 00:17:03 +01:00
|
|
|
next_announce_time = self.get_next_announce_time()
|
2017-08-10 19:49:43 +02:00
|
|
|
yield self._add_completed_blob(blob.blob_hash, blob.length, next_announce_time)
|
|
|
|
reactor.callLater(0, self._immediate_announce, [blob.blob_hash])
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2017-01-20 17:54:36 +01:00
|
|
|
def completed_blobs(self, blobhashes_to_check):
|
|
|
|
return self._completed_blobs(blobhashes_to_check)
|
2015-08-20 17:27:15 +02:00
|
|
|
|
|
|
|
def hashes_to_announce(self):
|
2017-02-08 00:17:03 +01:00
|
|
|
return self._get_blobs_to_announce()
|
2015-08-20 17:27:15 +02:00
|
|
|
|
|
|
|
def creator_finished(self, blob_creator):
|
2015-09-08 21:42:56 +02:00
|
|
|
log.debug("blob_creator.blob_hash: %s", blob_creator.blob_hash)
|
2015-08-20 17:27:15 +02:00
|
|
|
assert blob_creator.blob_hash is not None
|
|
|
|
assert blob_creator.blob_hash not in self.blobs
|
|
|
|
assert blob_creator.length is not None
|
2016-12-20 03:16:37 +01:00
|
|
|
new_blob = self.blob_type(self.blob_dir, blob_creator.blob_hash, blob_creator.length)
|
2015-08-20 17:27:15 +02:00
|
|
|
self.blobs[blob_creator.blob_hash] = new_blob
|
2017-01-06 15:00:22 +01:00
|
|
|
self._immediate_announce([blob_creator.blob_hash])
|
2017-02-08 00:17:03 +01:00
|
|
|
next_announce_time = self.get_next_announce_time()
|
2017-01-06 15:00:22 +01:00
|
|
|
d = self.blob_completed(new_blob, next_announce_time)
|
2015-08-20 17:27:15 +02:00
|
|
|
return d
|
|
|
|
|
|
|
|
def immediate_announce_all_blobs(self):
|
2015-09-04 22:22:02 +02:00
|
|
|
d = self._get_all_verified_blob_hashes()
|
2017-01-06 15:00:22 +01:00
|
|
|
d.addCallback(self._immediate_announce)
|
2015-08-20 17:27:15 +02:00
|
|
|
return d
|
|
|
|
|
2016-08-26 06:32:33 +02:00
|
|
|
def get_all_verified_blobs(self):
|
|
|
|
d = self._get_all_verified_blob_hashes()
|
|
|
|
d.addCallback(self.completed_blobs)
|
|
|
|
return d
|
|
|
|
|
2016-09-27 19:52:44 +02:00
|
|
|
def add_blob_to_download_history(self, blob_hash, host, rate):
|
|
|
|
d = self._add_blob_to_download_history(blob_hash, host, rate)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def add_blob_to_upload_history(self, blob_hash, host, rate):
|
|
|
|
d = self._add_blob_to_upload_history(blob_hash, host, rate)
|
|
|
|
return d
|
|
|
|
|
2017-06-16 19:16:19 +02:00
|
|
|
@defer.inlineCallbacks
|
|
|
|
def delete_blobs(self, blob_hashes):
|
|
|
|
bh_to_delete_from_db = []
|
|
|
|
for blob_hash in blob_hashes:
|
|
|
|
try:
|
|
|
|
blob = yield self.get_blob(blob_hash)
|
|
|
|
yield blob.delete()
|
|
|
|
bh_to_delete_from_db.append(blob_hash)
|
|
|
|
except Exception as e:
|
|
|
|
log.warning("Failed to delete blob file. Reason: %s", e)
|
|
|
|
yield self._delete_blobs_from_db(bh_to_delete_from_db)
|
2015-08-20 17:27:15 +02:00
|
|
|
|
|
|
|
######### database calls #########
|
|
|
|
|
|
|
|
def _open_db(self):
|
2015-09-04 22:22:02 +02:00
|
|
|
# check_same_thread=False is solely to quiet a spurious error that appears to be due
|
|
|
|
# to a bug in twisted, where the connection is closed by a different thread than the
|
|
|
|
# one that opened it. The individual connections in the pool are not used in multiple
|
|
|
|
# threads.
|
2016-09-27 19:52:44 +02:00
|
|
|
|
|
|
|
def create_tables(transaction):
|
|
|
|
transaction.execute("create table if not exists blobs (" +
|
|
|
|
" blob_hash text primary key, " +
|
|
|
|
" blob_length integer, " +
|
|
|
|
" last_verified_time real, " +
|
2017-07-26 19:48:58 +02:00
|
|
|
" next_announce_time real, " +
|
|
|
|
" last_announce_time real, " +
|
|
|
|
" should_announce integer)")
|
|
|
|
|
2016-09-27 19:52:44 +02:00
|
|
|
|
|
|
|
transaction.execute("create table if not exists download (" +
|
|
|
|
" id integer primary key autoincrement, " +
|
|
|
|
" blob text, " +
|
|
|
|
" host text, " +
|
|
|
|
" rate float, " +
|
|
|
|
" ts integer)")
|
|
|
|
|
|
|
|
transaction.execute("create table if not exists upload (" +
|
|
|
|
" id integer primary key autoincrement, " +
|
|
|
|
" blob text, " +
|
|
|
|
" host text, " +
|
|
|
|
" rate float, " +
|
|
|
|
" ts integer)")
|
|
|
|
|
|
|
|
return self.db_conn.runInteraction(create_tables)
|
2015-09-04 22:22:02 +02:00
|
|
|
|
|
|
|
@rerun_if_locked
|
2017-01-20 17:54:36 +01:00
|
|
|
def _add_completed_blob(self, blob_hash, length, next_announce_time):
|
2015-09-08 21:42:56 +02:00
|
|
|
log.debug("Adding a completed blob. blob_hash=%s, length=%s", blob_hash, str(length))
|
2017-01-20 17:54:36 +01:00
|
|
|
d = self.db_conn.runQuery(
|
|
|
|
"insert into blobs (blob_hash, blob_length, next_announce_time) values (?, ?, ?)",
|
|
|
|
(blob_hash, length, next_announce_time)
|
|
|
|
)
|
2015-09-04 22:22:02 +02:00
|
|
|
d.addErrback(lambda err: err.trap(sqlite3.IntegrityError))
|
|
|
|
return d
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2017-01-20 17:54:36 +01:00
|
|
|
@defer.inlineCallbacks
|
|
|
|
def _completed_blobs(self, blobhashes_to_check):
|
|
|
|
"""Returns of the blobhashes_to_check, which are valid"""
|
2017-02-14 20:18:42 +01:00
|
|
|
blobs = yield defer.DeferredList([self.get_blob(b) for b in blobhashes_to_check])
|
2017-01-20 17:54:36 +01:00
|
|
|
blob_hashes = [b.blob_hash for success, b in blobs if success and b.verified]
|
|
|
|
defer.returnValue(blob_hashes)
|
2015-09-04 22:22:02 +02:00
|
|
|
|
|
|
|
@rerun_if_locked
|
2015-08-20 17:27:15 +02:00
|
|
|
def _update_blob_verified_timestamp(self, blob, timestamp):
|
2015-09-04 22:22:02 +02:00
|
|
|
return self.db_conn.runQuery("update blobs set last_verified_time = ? where blob_hash = ?",
|
|
|
|
(blob, timestamp))
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2015-09-04 22:22:02 +02:00
|
|
|
@rerun_if_locked
|
2017-02-08 00:17:03 +01:00
|
|
|
def _get_blobs_to_announce(self):
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2015-09-04 22:22:02 +02:00
|
|
|
def get_and_update(transaction):
|
|
|
|
timestamp = time.time()
|
|
|
|
r = transaction.execute("select blob_hash from blobs " +
|
|
|
|
"where next_announce_time < ? and blob_hash is not null",
|
|
|
|
(timestamp,))
|
|
|
|
blobs = [b for b, in r.fetchall()]
|
2017-02-08 00:17:03 +01:00
|
|
|
next_announce_time = self.get_next_announce_time(len(blobs))
|
2016-11-30 21:20:45 +01:00
|
|
|
transaction.execute(
|
|
|
|
"update blobs set next_announce_time = ? where next_announce_time < ?",
|
|
|
|
(next_announce_time, timestamp))
|
2017-02-08 00:17:03 +01:00
|
|
|
log.debug("Got %s blobs to announce, next announce time is in %s seconds",
|
|
|
|
len(blobs), next_announce_time-time.time())
|
2015-09-04 22:22:02 +02:00
|
|
|
return blobs
|
|
|
|
|
|
|
|
return self.db_conn.runInteraction(get_and_update)
|
|
|
|
|
|
|
|
@rerun_if_locked
|
2015-08-20 17:27:15 +02:00
|
|
|
def _delete_blobs_from_db(self, blob_hashes):
|
|
|
|
|
2015-09-04 22:22:02 +02:00
|
|
|
def delete_blobs(transaction):
|
|
|
|
for b in blob_hashes:
|
|
|
|
transaction.execute("delete from blobs where blob_hash = ?", (b,))
|
|
|
|
|
|
|
|
return self.db_conn.runInteraction(delete_blobs)
|
|
|
|
|
|
|
|
@rerun_if_locked
|
2017-06-16 19:13:41 +02:00
|
|
|
def _get_all_blob_hashes(self):
|
2017-01-20 19:47:53 +01:00
|
|
|
d = self.db_conn.runQuery("select blob_hash from blobs")
|
2017-06-16 19:13:41 +02:00
|
|
|
return d
|
|
|
|
|
|
|
|
@rerun_if_locked
|
|
|
|
def _get_all_verified_blob_hashes(self):
|
|
|
|
d = self._get_all_blob_hashes()
|
2015-09-04 22:22:02 +02:00
|
|
|
|
|
|
|
def get_verified_blobs(blobs):
|
|
|
|
verified_blobs = []
|
2017-01-20 19:47:53 +01:00
|
|
|
for blob_hash, in blobs:
|
2015-09-04 22:22:02 +02:00
|
|
|
file_path = os.path.join(self.blob_dir, blob_hash)
|
|
|
|
if os.path.isfile(file_path):
|
2017-01-20 19:47:53 +01:00
|
|
|
verified_blobs.append(blob_hash)
|
2015-09-04 22:22:02 +02:00
|
|
|
return verified_blobs
|
|
|
|
|
|
|
|
d.addCallback(lambda blobs: threads.deferToThread(get_verified_blobs, blobs))
|
|
|
|
return d
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2016-09-27 19:52:44 +02:00
|
|
|
@rerun_if_locked
|
|
|
|
def _add_blob_to_download_history(self, blob_hash, host, rate):
|
|
|
|
ts = int(time.time())
|
2016-11-30 21:20:45 +01:00
|
|
|
d = self.db_conn.runQuery(
|
|
|
|
"insert into download values (null, ?, ?, ?, ?) ",
|
|
|
|
(blob_hash, str(host), float(rate), ts))
|
2016-09-27 19:52:44 +02:00
|
|
|
return d
|
|
|
|
|
|
|
|
@rerun_if_locked
|
|
|
|
def _add_blob_to_upload_history(self, blob_hash, host, rate):
|
|
|
|
ts = int(time.time())
|
2016-11-30 21:20:45 +01:00
|
|
|
d = self.db_conn.runQuery(
|
|
|
|
"insert into upload values (null, ?, ?, ?, ?) ",
|
|
|
|
(blob_hash, str(host), float(rate), ts))
|
2016-09-27 19:52:44 +02:00
|
|
|
return d
|
|
|
|
|
2015-08-20 17:27:15 +02:00
|
|
|
|