2019-02-08 17:37:35 -05:00
|
|
|
import os
|
2019-01-22 12:47:46 -05:00
|
|
|
import typing
|
|
|
|
import asyncio
|
|
|
|
import logging
|
2021-01-16 16:25:46 -05:00
|
|
|
from lbry.utils import LRUCacheWithMetrics
|
2019-06-20 20:55:47 -04:00
|
|
|
from lbry.blob.blob_file import is_valid_blobhash, BlobFile, BlobBuffer, AbstractBlob
|
|
|
|
from lbry.stream.descriptor import StreamDescriptor
|
|
|
|
from lbry.connection_manager import ConnectionManager
|
2019-01-22 12:47:46 -05:00
|
|
|
|
|
|
|
if typing.TYPE_CHECKING:
|
2019-06-20 20:55:47 -04:00
|
|
|
from lbry.conf import Config
|
|
|
|
from lbry.dht.protocol.data_store import DictDataStore
|
|
|
|
from lbry.extras.daemon.storage import SQLiteStorage
|
2019-01-22 12:47:46 -05:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2019-03-28 14:51:55 -04:00
|
|
|
class BlobManager:
|
2019-08-02 13:14:41 -04:00
|
|
|
def __init__(self, loop: asyncio.AbstractEventLoop, blob_dir: str, storage: 'SQLiteStorage', config: 'Config',
|
2019-04-15 16:14:19 -04:00
|
|
|
node_data_store: typing.Optional['DictDataStore'] = None):
|
2019-01-22 12:47:46 -05:00
|
|
|
"""
|
|
|
|
This class stores blobs on the hard disk
|
|
|
|
|
|
|
|
blob_dir - directory where blobs are stored
|
|
|
|
storage - SQLiteStorage object
|
|
|
|
"""
|
|
|
|
self.loop = loop
|
|
|
|
self.blob_dir = blob_dir
|
|
|
|
self.storage = storage
|
|
|
|
self._node_data_store = node_data_store
|
|
|
|
self.completed_blob_hashes: typing.Set[str] = set() if not self._node_data_store\
|
|
|
|
else self._node_data_store.completed_blobs
|
2019-03-30 20:17:42 -04:00
|
|
|
self.blobs: typing.Dict[str, AbstractBlob] = {}
|
2019-04-15 16:14:19 -04:00
|
|
|
self.config = config
|
2021-01-16 16:25:46 -05:00
|
|
|
self.decrypted_blob_lru_cache = None if not self.config.blob_lru_cache_size else LRUCacheWithMetrics(
|
2019-05-23 22:40:02 -04:00
|
|
|
self.config.blob_lru_cache_size)
|
2019-06-02 23:50:17 -04:00
|
|
|
self.connection_manager = ConnectionManager(loop)
|
2019-03-30 20:17:42 -04:00
|
|
|
|
2021-09-15 10:37:08 -04:00
|
|
|
def _get_blob(self, blob_hash: str, length: typing.Optional[int] = None, is_mine: bool = False):
|
2019-10-04 09:18:54 -04:00
|
|
|
if self.config.save_blobs or (
|
|
|
|
is_valid_blobhash(blob_hash) and os.path.isfile(os.path.join(self.blob_dir, blob_hash))):
|
2019-04-15 16:14:19 -04:00
|
|
|
return BlobFile(
|
2021-09-15 10:37:08 -04:00
|
|
|
self.loop, blob_hash, length, self.blob_completed, self.blob_dir, is_mine=is_mine
|
2019-04-15 16:14:19 -04:00
|
|
|
)
|
2019-10-04 09:18:54 -04:00
|
|
|
return BlobBuffer(
|
2021-09-15 10:37:08 -04:00
|
|
|
self.loop, blob_hash, length, self.blob_completed, self.blob_dir, is_mine=is_mine
|
2019-10-04 09:18:54 -04:00
|
|
|
)
|
2019-04-15 16:14:19 -04:00
|
|
|
|
2021-09-15 10:37:08 -04:00
|
|
|
def get_blob(self, blob_hash, length: typing.Optional[int] = None, is_mine: bool = False):
|
2019-04-15 16:14:19 -04:00
|
|
|
if blob_hash in self.blobs:
|
2019-04-17 13:32:24 -04:00
|
|
|
if self.config.save_blobs and isinstance(self.blobs[blob_hash], BlobBuffer):
|
|
|
|
buffer = self.blobs.pop(blob_hash)
|
|
|
|
if blob_hash in self.completed_blob_hashes:
|
|
|
|
self.completed_blob_hashes.remove(blob_hash)
|
2021-09-15 10:37:08 -04:00
|
|
|
self.blobs[blob_hash] = self._get_blob(blob_hash, length, is_mine)
|
2019-04-17 13:32:24 -04:00
|
|
|
if buffer.is_readable():
|
|
|
|
with buffer.reader_context() as reader:
|
|
|
|
self.blobs[blob_hash].write_blob(reader.read())
|
2019-04-15 16:14:19 -04:00
|
|
|
if length and self.blobs[blob_hash].length is None:
|
|
|
|
self.blobs[blob_hash].set_length(length)
|
|
|
|
else:
|
2021-09-15 10:37:08 -04:00
|
|
|
self.blobs[blob_hash] = self._get_blob(blob_hash, length, is_mine)
|
2019-04-15 16:14:19 -04:00
|
|
|
return self.blobs[blob_hash]
|
2019-01-22 12:47:46 -05:00
|
|
|
|
2019-04-18 15:19:06 -04:00
|
|
|
def is_blob_verified(self, blob_hash: str, length: typing.Optional[int] = None) -> bool:
|
|
|
|
if not is_valid_blobhash(blob_hash):
|
|
|
|
raise ValueError(blob_hash)
|
|
|
|
if not os.path.isfile(os.path.join(self.blob_dir, blob_hash)):
|
|
|
|
return False
|
2019-07-17 02:49:08 -03:00
|
|
|
if blob_hash in self.blobs:
|
|
|
|
return self.blobs[blob_hash].get_is_verified()
|
2019-04-18 15:19:06 -04:00
|
|
|
return self._get_blob(blob_hash, length).get_is_verified()
|
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
async def setup(self) -> bool:
|
2019-02-14 12:36:18 -05:00
|
|
|
def get_files_in_blob_dir() -> typing.Set[str]:
|
2019-03-30 20:17:42 -04:00
|
|
|
if not self.blob_dir:
|
|
|
|
return set()
|
2019-02-14 12:36:18 -05:00
|
|
|
return {
|
2019-02-08 17:37:35 -05:00
|
|
|
item.name for item in os.scandir(self.blob_dir) if is_valid_blobhash(item.name)
|
2019-02-14 12:36:18 -05:00
|
|
|
}
|
2019-10-04 09:18:54 -04:00
|
|
|
|
2019-02-14 12:36:18 -05:00
|
|
|
in_blobfiles_dir = await self.loop.run_in_executor(None, get_files_in_blob_dir)
|
2019-03-30 20:17:42 -04:00
|
|
|
to_add = await self.storage.sync_missing_blobs(in_blobfiles_dir)
|
|
|
|
if to_add:
|
|
|
|
self.completed_blob_hashes.update(to_add)
|
2022-03-21 21:58:36 -03:00
|
|
|
# check blobs that aren't set as finished but were seen on disk
|
|
|
|
await self.ensure_completed_blobs_status(in_blobfiles_dir - to_add)
|
2019-06-28 09:59:31 -04:00
|
|
|
if self.config.track_bandwidth:
|
|
|
|
self.connection_manager.start()
|
2019-01-22 12:47:46 -05:00
|
|
|
return True
|
|
|
|
|
2019-02-14 12:36:18 -05:00
|
|
|
def stop(self):
|
2019-06-02 23:50:17 -04:00
|
|
|
self.connection_manager.stop()
|
2019-02-14 12:36:18 -05:00
|
|
|
while self.blobs:
|
|
|
|
_, blob = self.blobs.popitem()
|
|
|
|
blob.close()
|
|
|
|
self.completed_blob_hashes.clear()
|
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
def get_stream_descriptor(self, sd_hash):
|
|
|
|
return StreamDescriptor.from_stream_descriptor_blob(self.loop, self.blob_dir, self.get_blob(sd_hash))
|
|
|
|
|
2019-04-17 13:32:24 -04:00
|
|
|
def blob_completed(self, blob: AbstractBlob) -> asyncio.Task:
|
2019-01-22 12:47:46 -05:00
|
|
|
if blob.blob_hash is None:
|
|
|
|
raise Exception("Blob hash is None")
|
|
|
|
if not blob.length:
|
|
|
|
raise Exception("Blob has a length of 0")
|
2019-04-15 16:14:19 -04:00
|
|
|
if isinstance(blob, BlobFile):
|
|
|
|
if blob.blob_hash not in self.completed_blob_hashes:
|
|
|
|
self.completed_blob_hashes.add(blob.blob_hash)
|
2021-09-08 10:55:21 -04:00
|
|
|
return self.loop.create_task(self.storage.add_blobs(
|
|
|
|
(blob.blob_hash, blob.length, blob.added_on, blob.is_mine), finished=True)
|
|
|
|
)
|
2019-04-15 16:14:19 -04:00
|
|
|
else:
|
2021-09-08 10:55:21 -04:00
|
|
|
return self.loop.create_task(self.storage.add_blobs(
|
|
|
|
(blob.blob_hash, blob.length, blob.added_on, blob.is_mine), finished=False)
|
|
|
|
)
|
2019-01-22 12:47:46 -05:00
|
|
|
|
2022-03-21 21:58:36 -03:00
|
|
|
async def ensure_completed_blobs_status(self, blob_hashes: typing.Iterable[str]):
|
2022-03-21 21:33:33 -03:00
|
|
|
"""Ensures that completed blobs from a given list of blob hashes are set as 'finished' in the database."""
|
|
|
|
to_add = []
|
|
|
|
for blob_hash in blob_hashes:
|
|
|
|
if not self.is_blob_verified(blob_hash):
|
|
|
|
continue
|
|
|
|
blob = self.get_blob(blob_hash)
|
|
|
|
to_add.append((blob.blob_hash, blob.length, blob.added_on, blob.is_mine))
|
2022-03-21 21:58:36 -03:00
|
|
|
if len(to_add) > 500:
|
|
|
|
await self.storage.add_blobs(*to_add, finished=True)
|
|
|
|
to_add.clear()
|
|
|
|
return await self.storage.add_blobs(*to_add, finished=True)
|
2019-01-22 12:47:46 -05:00
|
|
|
|
2019-02-14 15:42:12 -05:00
|
|
|
def delete_blob(self, blob_hash: str):
|
2019-02-11 18:27:14 -05:00
|
|
|
if not is_valid_blobhash(blob_hash):
|
|
|
|
raise Exception("invalid blob hash to delete")
|
2019-02-14 15:42:12 -05:00
|
|
|
|
2019-02-11 18:27:14 -05:00
|
|
|
if blob_hash not in self.blobs:
|
2019-03-30 20:17:42 -04:00
|
|
|
if self.blob_dir and os.path.isfile(os.path.join(self.blob_dir, blob_hash)):
|
2019-02-11 18:27:14 -05:00
|
|
|
os.remove(os.path.join(self.blob_dir, blob_hash))
|
|
|
|
else:
|
2019-02-14 15:42:12 -05:00
|
|
|
self.blobs.pop(blob_hash).delete()
|
|
|
|
if blob_hash in self.completed_blob_hashes:
|
|
|
|
self.completed_blob_hashes.remove(blob_hash)
|
2019-02-06 09:29:19 -05:00
|
|
|
|
|
|
|
async def delete_blobs(self, blob_hashes: typing.List[str], delete_from_db: typing.Optional[bool] = True):
|
2019-02-14 15:42:12 -05:00
|
|
|
for blob_hash in blob_hashes:
|
|
|
|
self.delete_blob(blob_hash)
|
|
|
|
|
2019-02-06 09:29:19 -05:00
|
|
|
if delete_from_db:
|
2019-02-14 15:42:12 -05:00
|
|
|
await self.storage.delete_blobs_from_db(blob_hashes)
|