less concurrent repeated header checks

This commit is contained in:
Victor Shyba 2020-03-23 01:19:34 -03:00
parent 952fc01efd
commit 342cb00625
3 changed files with 42 additions and 14 deletions

View file

@ -47,6 +47,8 @@ class Headers:
self._size: Optional[int] = None self._size: Optional[int] = None
self.chunk_getter: Optional[Callable] = None self.chunk_getter: Optional[Callable] = None
self.executor = ThreadPoolExecutor(1) self.executor = ThreadPoolExecutor(1)
self.known_missing_checkpointed_chunks = set()
self.check_chunk_lock = asyncio.Lock()
async def open(self): async def open(self):
if not self.executor: if not self.executor:
@ -57,6 +59,8 @@ class Headers:
else: else:
self.io = open(self.path, 'r+b') self.io = open(self.path, 'r+b')
self._size = self.io.seek(0, os.SEEK_END) // self.header_size self._size = self.io.seek(0, os.SEEK_END) // self.header_size
await self.ensure_checkpointed_size()
await self.get_all_missing_headers()
async def close(self): async def close(self):
if self.executor: if self.executor:
@ -140,14 +144,19 @@ class Headers:
self.io.seek(start * self.header_size, os.SEEK_SET) self.io.seek(start * self.header_size, os.SEEK_SET)
return self.hash_header(self.io.read(count * self.header_size)).decode() return self.hash_header(self.io.read(count * self.header_size)).decode()
async def ensure_tip(self): async def ensure_checkpointed_size(self):
if self.checkpoints: max_checkpointed_height = max(self.checkpoints.keys() or [-1])
await self.ensure_chunk_at(max(self.checkpoints.keys())) if self.height < max_checkpointed_height:
self._write(max_checkpointed_height, bytes([0] * self.header_size * 1000))
async def ensure_chunk_at(self, height): async def ensure_chunk_at(self, height):
async with self.check_chunk_lock:
if await self.has_header(height): if await self.has_header(height):
log.debug("has header %s", height) log.debug("has header %s", height)
return return
return await self.fetch_chunk(height)
async def fetch_chunk(self, height):
log.info("on-demand fetching height %s", height) log.info("on-demand fetching height %s", height)
start = (height // 1000) * 1000 start = (height // 1000) * 1000
headers = await self.chunk_getter(start) # pylint: disable=not-callable headers = await self.chunk_getter(start) # pylint: disable=not-callable
@ -156,7 +165,10 @@ class Headers:
) )
chunk_hash = self.hash_header(chunk).decode() chunk_hash = self.hash_header(chunk).decode()
if self.checkpoints.get(start) == chunk_hash: if self.checkpoints.get(start) == chunk_hash:
return await asyncio.get_running_loop().run_in_executor(self.executor, self._write, start, chunk) await asyncio.get_running_loop().run_in_executor(self.executor, self._write, start, chunk)
if start in self.known_missing_checkpointed_chunks:
self.known_missing_checkpointed_chunks.remove(start)
return
elif start not in self.checkpoints: elif start not in self.checkpoints:
return # todo: fixme return # todo: fixme
raise Exception( raise Exception(
@ -164,12 +176,27 @@ class Headers:
) )
async def has_header(self, height): async def has_header(self, height):
normalized_height = (height // 1000) * 1000
if normalized_height in self.checkpoints:
return normalized_height not in self.known_missing_checkpointed_chunks
def _has_header(height): def _has_header(height):
empty = '56944c5d3f98413ef45cf54545538103cc9f298e0575820ad3591376e2e0f65d' empty = '56944c5d3f98413ef45cf54545538103cc9f298e0575820ad3591376e2e0f65d'
all_zeroes = '789d737d4f448e554b318c94063bbfa63e9ccda6e208f5648ca76ee68896557b' all_zeroes = '789d737d4f448e554b318c94063bbfa63e9ccda6e208f5648ca76ee68896557b'
return self.chunk_hash(height, 1) not in (empty, all_zeroes) return self.chunk_hash(height, 1) not in (empty, all_zeroes)
return await asyncio.get_running_loop().run_in_executor(self.executor, _has_header, height) return await asyncio.get_running_loop().run_in_executor(self.executor, _has_header, height)
async def get_all_missing_headers(self):
# Heavy operation done in one optimized shot
def _io_checkall():
for chunk_height, expected_hash in reversed(list(self.checkpoints.items())):
if chunk_height in self.known_missing_checkpointed_chunks:
continue
if self.chunk_hash(chunk_height, 1000) != expected_hash:
self.known_missing_checkpointed_chunks.add(chunk_height)
return self.known_missing_checkpointed_chunks
return await asyncio.get_running_loop().run_in_executor(self.executor, _io_checkall)
@property @property
def height(self) -> int: def height(self) -> int:
return len(self)-1 return len(self)-1

View file

@ -344,15 +344,13 @@ class Ledger(metaclass=LedgerRegistry):
return max(self.headers.height, self._download_height) return max(self.headers.height, self._download_height)
async def initial_headers_sync(self): async def initial_headers_sync(self):
target = self.network.remote_height + 1
get_chunk = partial(self.network.retriable_call, self.network.get_headers, count=1000, b64=True) get_chunk = partial(self.network.retriable_call, self.network.get_headers, count=1000, b64=True)
self.headers.chunk_getter = get_chunk self.headers.chunk_getter = get_chunk
async def doit(): async def doit():
for height in reversed(range(0, target)):
async with self._header_processing_lock: async with self._header_processing_lock:
for height in reversed(sorted(self.headers.known_missing_checkpointed_chunks)):
await self.headers.ensure_chunk_at(height) await self.headers.ensure_chunk_at(height)
await self.headers.ensure_tip()
self._update_tasks.add(doit()) self._update_tasks.add(doit())
await self.update_headers() await self.update_headers()

View file

@ -1,12 +1,15 @@
import os import os
import asyncio import asyncio
import tempfile import tempfile
from binascii import hexlify, unhexlify from binascii import unhexlify
from lbry.crypto.hash import sha256
from lbry.wallet.util import ArithUint256 from lbry.wallet.util import ArithUint256
from lbry.testcase import AsyncioTestCase from lbry.testcase import AsyncioTestCase
from lbry.wallet.ledger import Headers from lbry.wallet.ledger import Headers as _Headers
class Headers(_Headers):
checkpoints = {}
def block_bytes(blocks): def block_bytes(blocks):