lbry-sdk/lbry/wallet/header.py

358 lines
15 KiB
Python
Raw Normal View History

2020-03-18 04:10:55 -03:00
import base64
2020-01-02 22:18:49 -05:00
import os
2018-08-15 15:23:00 -04:00
import struct
2020-01-02 22:18:49 -05:00
import asyncio
import logging
2020-03-18 04:10:55 -03:00
import zlib
from datetime import date
from concurrent.futures.thread import ThreadPoolExecutor
2020-01-02 22:18:49 -05:00
from io import BytesIO
2020-03-18 04:10:55 -03:00
from typing import Optional, Iterator, Tuple, Callable
2018-08-15 15:23:00 -04:00
from binascii import hexlify, unhexlify
from lbry.crypto.hash import sha512, double_sha256, ripemd160
from lbry.wallet.util import ArithUint256, date_to_julian_day
2020-03-18 04:10:55 -03:00
from .checkpoints import HASHES
2020-01-02 22:18:49 -05:00
log = logging.getLogger(__name__)
class InvalidHeader(Exception):
def __init__(self, height, message):
super().__init__(message)
self.message = message
self.height = height
2020-01-02 22:50:27 -05:00
class Headers:
2020-01-02 22:18:49 -05:00
2020-01-02 22:50:27 -05:00
header_size = 112
chunk_size = 10**16
2020-01-02 22:18:49 -05:00
2020-01-02 22:50:27 -05:00
max_target = 0x0000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
genesis_hash = b'9c89283ba0f3227f6c03b70216b9f665f0118d5e0fa729cedf4fb34d6a34f463'
target_timespan = 150
2020-03-21 02:01:21 -03:00
checkpoints = HASHES
first_block_timestamp = 1466646588 # block 1, as 0 is off by a lot
timestamp_average_offset = 160.6855883050695 # calculated at 733447
2020-01-02 22:18:49 -05:00
validate_difficulty: bool = True
def __init__(self, path) -> None:
if path == ':memory:':
self.io = BytesIO()
self.path = path
self._size: Optional[int] = None
2020-03-18 04:10:55 -03:00
self.chunk_getter: Optional[Callable] = None
self.executor = ThreadPoolExecutor(1)
2020-03-23 01:19:34 -03:00
self.known_missing_checkpointed_chunks = set()
self.check_chunk_lock = asyncio.Lock()
2020-01-02 22:18:49 -05:00
async def open(self):
2020-03-21 04:32:03 -03:00
if not self.executor:
self.executor = ThreadPoolExecutor(1)
2020-01-02 22:18:49 -05:00
if self.path != ':memory:':
if not os.path.exists(self.path):
self.io = open(self.path, 'w+b')
else:
self.io = open(self.path, 'r+b')
self._size = self.io.seek(0, os.SEEK_END) // self.header_size
2020-03-23 01:19:34 -03:00
await self.ensure_checkpointed_size()
await self.get_all_missing_headers()
2020-01-02 22:18:49 -05:00
async def close(self):
2020-03-21 04:32:03 -03:00
if self.executor:
self.executor.shutdown()
self.executor = None
2020-01-02 22:18:49 -05:00
self.io.close()
@staticmethod
2020-01-02 22:50:27 -05:00
def serialize(header):
return b''.join([
struct.pack('<I', header['version']),
unhexlify(header['prev_block_hash'])[::-1],
unhexlify(header['merkle_root'])[::-1],
unhexlify(header['claim_trie_root'])[::-1],
struct.pack('<III', header['timestamp'], header['bits'], header['nonce'])
])
2020-01-02 22:18:49 -05:00
@staticmethod
def deserialize(height, header):
2020-01-02 22:50:27 -05:00
version, = struct.unpack('<I', header[:4])
timestamp, bits, nonce = struct.unpack('<III', header[100:112])
return {
'version': version,
'prev_block_hash': hexlify(header[4:36][::-1]),
'merkle_root': hexlify(header[36:68][::-1]),
'claim_trie_root': hexlify(header[68:100][::-1]),
'timestamp': timestamp,
'bits': bits,
'nonce': nonce,
'block_height': height,
}
2020-01-02 22:18:49 -05:00
def get_next_chunk_target(self, chunk: int) -> ArithUint256:
return ArithUint256(self.max_target)
2020-01-02 22:50:27 -05:00
def get_next_block_target(self, max_target: ArithUint256, previous: Optional[dict],
2020-01-02 22:18:49 -05:00
current: Optional[dict]) -> ArithUint256:
2020-01-02 22:50:27 -05:00
# https://github.com/lbryio/lbrycrd/blob/master/src/lbry.cpp
if previous is None and current is None:
return max_target
if previous is None:
previous = current
actual_timespan = current['timestamp'] - previous['timestamp']
modulated_timespan = self.target_timespan + int((actual_timespan - self.target_timespan) / 8)
minimum_timespan = self.target_timespan - int(self.target_timespan / 8) # 150 - 18 = 132
maximum_timespan = self.target_timespan + int(self.target_timespan / 2) # 150 + 75 = 225
clamped_timespan = max(minimum_timespan, min(modulated_timespan, maximum_timespan))
target = ArithUint256.from_compact(current['bits'])
new_target = min(max_target, (target * clamped_timespan) / self.target_timespan)
return new_target
2020-01-02 22:18:49 -05:00
def __len__(self) -> int:
return self._size
def __bool__(self):
return True
2020-03-16 11:28:11 -03:00
async def get(self, height) -> dict:
if isinstance(height, slice):
raise NotImplementedError("Slicing of header chain has not been implemented yet.")
try:
return self.deserialize(height, await self.get_raw_header(height))
except struct.error:
raise IndexError(f"failed to get {height}, at {len(self)}")
2020-03-16 11:28:11 -03:00
def estimated_timestamp(self, height):
return self.first_block_timestamp + (height * self.timestamp_average_offset)
2020-01-02 22:18:49 -05:00
def estimated_julian_day(self, height):
return date_to_julian_day(date.fromtimestamp(self.estimated_timestamp(height)))
2020-03-18 00:59:03 -03:00
async def get_raw_header(self, height) -> bytes:
if self.chunk_getter:
await self.ensure_chunk_at(height)
2020-03-21 02:01:21 -03:00
if not 0 <= height <= self.height:
raise IndexError(f"{height} is out of bounds, current height: {self.height}")
return await asyncio.get_running_loop().run_in_executor(self.executor, self._read, height)
def _read(self, height, count=1):
2020-01-02 22:18:49 -05:00
self.io.seek(height * self.header_size, os.SEEK_SET)
return self.io.read(self.header_size * count)
2020-01-02 22:18:49 -05:00
def chunk_hash(self, start, count):
2020-03-18 04:10:55 -03:00
self.io.seek(start * self.header_size, os.SEEK_SET)
return self.hash_header(self.io.read(count * self.header_size)).decode()
2020-03-23 01:19:34 -03:00
async def ensure_checkpointed_size(self):
max_checkpointed_height = max(self.checkpoints.keys() or [-1])
if self.height < max_checkpointed_height:
self._write(max_checkpointed_height, bytes([0] * self.header_size * 1000))
2020-03-18 04:10:55 -03:00
async def ensure_chunk_at(self, height):
2020-03-23 01:19:34 -03:00
async with self.check_chunk_lock:
if await self.has_header(height):
log.debug("has header %s", height)
return
return await self.fetch_chunk(height)
async def fetch_chunk(self, height):
2020-03-18 04:10:55 -03:00
log.info("on-demand fetching height %s", height)
start = (height // 1000) * 1000
headers = await self.chunk_getter(start) # pylint: disable=not-callable
chunk = (
zlib.decompress(base64.b64decode(headers['base64']), wbits=-15, bufsize=600_000)
)
chunk_hash = self.hash_header(chunk).decode()
2020-03-21 02:01:21 -03:00
if self.checkpoints.get(start) == chunk_hash:
2020-03-23 01:19:34 -03:00
await asyncio.get_running_loop().run_in_executor(self.executor, self._write, start, chunk)
if start in self.known_missing_checkpointed_chunks:
self.known_missing_checkpointed_chunks.remove(start)
return
2020-03-21 02:01:21 -03:00
elif start not in self.checkpoints:
return # todo: fixme
2020-03-18 04:10:55 -03:00
raise Exception(
2020-03-21 02:01:21 -03:00
f"Checkpoint mismatch at height {start}. Expected {self.checkpoints[start]}, but got {chunk_hash} instead."
2020-03-18 04:10:55 -03:00
)
async def has_header(self, height):
2020-03-23 01:19:34 -03:00
normalized_height = (height // 1000) * 1000
if normalized_height in self.checkpoints:
return normalized_height not in self.known_missing_checkpointed_chunks
def _has_header(height):
empty = '56944c5d3f98413ef45cf54545538103cc9f298e0575820ad3591376e2e0f65d'
all_zeroes = '789d737d4f448e554b318c94063bbfa63e9ccda6e208f5648ca76ee68896557b'
return self.chunk_hash(height, 1) not in (empty, all_zeroes)
return await asyncio.get_running_loop().run_in_executor(self.executor, _has_header, height)
2020-03-18 04:10:55 -03:00
2020-03-23 01:19:34 -03:00
async def get_all_missing_headers(self):
# Heavy operation done in one optimized shot
def _io_checkall():
for chunk_height, expected_hash in reversed(list(self.checkpoints.items())):
if chunk_height in self.known_missing_checkpointed_chunks:
continue
if self.chunk_hash(chunk_height, 1000) != expected_hash:
self.known_missing_checkpointed_chunks.add(chunk_height)
return self.known_missing_checkpointed_chunks
return await asyncio.get_running_loop().run_in_executor(self.executor, _io_checkall)
2020-01-02 22:18:49 -05:00
@property
def height(self) -> int:
return len(self)-1
@property
def bytes_size(self):
return len(self) * self.header_size
2020-03-18 00:59:03 -03:00
async def hash(self, height=None) -> bytes:
2020-01-02 22:18:49 -05:00
return self.hash_header(
2020-03-18 00:59:03 -03:00
await self.get_raw_header(height if height is not None else self.height)
2020-01-02 22:18:49 -05:00
)
@staticmethod
def hash_header(header: bytes) -> bytes:
if header is None:
return b'0' * 64
return hexlify(double_sha256(header)[::-1])
async def connect(self, start: int, headers: bytes) -> int:
added = 0
bail = False
for height, chunk in self._iterate_chunks(start, headers):
try:
# validate_chunk() is CPU bound and reads previous chunks from file system
2020-03-16 11:28:11 -03:00
await self.validate_chunk(height, chunk)
2020-01-02 22:18:49 -05:00
except InvalidHeader as e:
bail = True
chunk = chunk[:(height-e.height)*self.header_size]
2020-03-21 02:01:21 -03:00
if chunk:
added += await asyncio.get_running_loop().run_in_executor(self.executor, self._write, height, chunk)
2020-01-02 22:18:49 -05:00
if bail:
break
return added
def _write(self, height, verified_chunk):
self.io.seek(height * self.header_size, os.SEEK_SET)
written = self.io.write(verified_chunk) // self.header_size
# self.io.truncate()
2020-01-02 22:18:49 -05:00
# .seek()/.write()/.truncate() might also .flush() when needed
# the goal here is mainly to ensure we're definitely flush()'ing
self.io.flush()
2020-03-21 04:32:03 -03:00
self._size = max(self._size or 0, self.io.tell() // self.header_size)
2020-01-02 22:18:49 -05:00
return written
2020-03-16 11:28:11 -03:00
async def validate_chunk(self, height, chunk):
2020-01-02 22:18:49 -05:00
previous_hash, previous_header, previous_previous_header = None, None, None
if height > 0:
2020-03-21 02:01:21 -03:00
raw = await self.get_raw_header(height-1)
previous_header = self.deserialize(height-1, raw)
previous_hash = self.hash_header(raw)
2020-01-02 22:18:49 -05:00
if height > 1:
2020-03-16 11:28:11 -03:00
previous_previous_header = await self.get(height-2)
2020-01-02 22:18:49 -05:00
chunk_target = self.get_next_chunk_target(height // 2016 - 1)
for current_hash, current_header in self._iterate_headers(height, chunk):
block_target = self.get_next_block_target(chunk_target, previous_previous_header, previous_header)
self.validate_header(height, current_hash, current_header, previous_hash, block_target)
previous_previous_header = previous_header
previous_header = current_header
previous_hash = current_hash
def validate_header(self, height: int, current_hash: bytes,
header: dict, previous_hash: bytes, target: ArithUint256):
if previous_hash is None:
if self.genesis_hash is not None and self.genesis_hash != current_hash:
raise InvalidHeader(
height, f"genesis header doesn't match: {current_hash.decode()} "
f"vs expected {self.genesis_hash.decode()}")
return
if header['prev_block_hash'] != previous_hash:
raise InvalidHeader(
height, "previous hash mismatch: {} vs expected {}".format(
header['prev_block_hash'].decode(), previous_hash.decode())
)
if self.validate_difficulty:
if header['bits'] != target.compact:
raise InvalidHeader(
height, "bits mismatch: {} vs expected {}".format(
header['bits'], target.compact)
)
proof_of_work = self.get_proof_of_work(current_hash)
if proof_of_work > target:
raise InvalidHeader(
height, f"insufficient proof of work: {proof_of_work.value} vs target {target.value}"
)
async def repair(self):
previous_header_hash = fail = None
batch_size = 36
for start_height in range(0, self.height, batch_size):
headers = await asyncio.get_running_loop().run_in_executor(
self.executor, self._read, start_height, batch_size
)
2020-01-02 22:18:49 -05:00
if len(headers) % self.header_size != 0:
headers = headers[:(len(headers) // self.header_size) * self.header_size]
for header_hash, header in self._iterate_headers(start_height, headers):
height = header['block_height']
if height:
if header['prev_block_hash'] != previous_header_hash:
fail = True
else:
if header_hash != self.genesis_hash:
fail = True
if fail:
log.warning("Header file corrupted at height %s, truncating it.", height - 1)
def __truncate(at_height):
self.io.seek(max(0, (at_height - 1)) * self.header_size, os.SEEK_SET)
self.io.truncate()
self.io.flush()
self._size = self.io.seek(0, os.SEEK_END) // self.header_size
return await asyncio.get_running_loop().run_in_executor(self.executor, __truncate, height)
2020-01-02 22:18:49 -05:00
previous_header_hash = header_hash
2020-01-02 22:50:27 -05:00
@classmethod
def get_proof_of_work(cls, header_hash: bytes):
return ArithUint256(int(b'0x' + cls.header_hash_to_pow_hash(header_hash), 16))
2020-01-02 22:18:49 -05:00
def _iterate_chunks(self, height: int, headers: bytes) -> Iterator[Tuple[int, bytes]]:
assert len(headers) % self.header_size == 0, f"{len(headers)} {len(headers)%self.header_size}"
start = 0
end = (self.chunk_size - height % self.chunk_size) * self.header_size
while start < end:
yield height + (start // self.header_size), headers[start:end]
start = end
end = min(len(headers), end + self.chunk_size * self.header_size)
def _iterate_headers(self, height: int, headers: bytes) -> Iterator[Tuple[bytes, dict]]:
assert len(headers) % self.header_size == 0, len(headers)
for idx in range(len(headers) // self.header_size):
start, end = idx * self.header_size, (idx + 1) * self.header_size
header = headers[start:end]
yield self.hash_header(header), self.deserialize(height+idx, header)
2018-08-15 15:23:00 -04:00
@staticmethod
def header_hash_to_pow_hash(header_hash: bytes):
header_hash_bytes = unhexlify(header_hash)[::-1]
h = sha512(header_hash_bytes)
pow_hash = double_sha256(
ripemd160(h[:len(h) // 2]) +
ripemd160(h[len(h) // 2:])
)
return hexlify(pow_hash[::-1])
class UnvalidatedHeaders(Headers):
validate_difficulty = False
max_target = 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
genesis_hash = b'6e3fcf1299d4ec5d79c3a4c91d624a4acf9e2e173d95a1a0504f677669687556'
2020-03-21 02:01:21 -03:00
checkpoints = {}