2021-10-21 01:34:55 +02:00
|
|
|
import os
|
2021-01-20 05:41:54 +01:00
|
|
|
import argparse
|
|
|
|
import asyncio
|
2021-02-12 18:41:03 +01:00
|
|
|
import logging
|
2021-01-20 05:41:54 +01:00
|
|
|
from elasticsearch import AsyncElasticsearch
|
2021-10-19 20:00:39 +02:00
|
|
|
from elasticsearch.helpers import async_streaming_bulk
|
2021-03-29 19:16:49 +02:00
|
|
|
from lbry.wallet.server.env import Env
|
2021-06-18 03:22:23 +02:00
|
|
|
from lbry.wallet.server.leveldb import LevelDB
|
2021-08-06 20:11:28 +02:00
|
|
|
from lbry.wallet.server.db.elasticsearch.search import SearchIndex, IndexVersionMismatch
|
|
|
|
from lbry.wallet.server.db.elasticsearch.constants import ALL_FIELDS
|
2021-01-20 05:41:54 +01:00
|
|
|
|
|
|
|
|
2021-10-21 01:34:55 +02:00
|
|
|
async def get_recent_claims(env, index_name='claims', db=None):
|
2021-10-19 20:00:39 +02:00
|
|
|
need_open = db is None
|
|
|
|
db = db or LevelDB(env)
|
|
|
|
try:
|
2021-10-21 01:34:55 +02:00
|
|
|
if need_open:
|
2021-10-22 19:54:09 +02:00
|
|
|
db.open_db()
|
|
|
|
if db.es_sync_height == db.db_height or db.db_height <= 0:
|
2021-10-21 01:34:55 +02:00
|
|
|
return
|
2021-10-22 19:54:09 +02:00
|
|
|
if need_open:
|
|
|
|
await db.initialize_caches()
|
2021-10-19 20:00:39 +02:00
|
|
|
cnt = 0
|
|
|
|
touched_claims = set()
|
|
|
|
deleted_claims = set()
|
2021-10-22 19:54:09 +02:00
|
|
|
for height in range(db.es_sync_height, db.db_height + 1):
|
2021-10-19 20:00:39 +02:00
|
|
|
touched_or_deleted = db.prefix_db.touched_or_deleted.get(height)
|
|
|
|
touched_claims.update(touched_or_deleted.touched_claims)
|
|
|
|
deleted_claims.update(touched_or_deleted.deleted_claims)
|
|
|
|
touched_claims.difference_update(deleted_claims)
|
|
|
|
|
|
|
|
for deleted in deleted_claims:
|
|
|
|
yield {
|
|
|
|
'_index': index_name,
|
|
|
|
'_op_type': 'delete',
|
|
|
|
'_id': deleted.hex()
|
|
|
|
}
|
|
|
|
for touched in touched_claims:
|
|
|
|
claim = db.claim_producer(touched)
|
|
|
|
if claim:
|
|
|
|
yield {
|
|
|
|
'doc': {key: value for key, value in claim.items() if key in ALL_FIELDS},
|
|
|
|
'_id': claim['claim_id'],
|
|
|
|
'_index': index_name,
|
|
|
|
'_op_type': 'update',
|
|
|
|
'doc_as_upsert': True
|
|
|
|
}
|
|
|
|
cnt += 1
|
|
|
|
else:
|
|
|
|
logging.warning("could not sync claim %s", touched.hex())
|
|
|
|
if cnt % 10000 == 0:
|
2021-10-20 17:40:15 +02:00
|
|
|
logging.info("%i claims sent to ES", cnt)
|
2021-10-21 01:34:55 +02:00
|
|
|
|
|
|
|
db.es_sync_height = db.db_height
|
|
|
|
db.write_db_state()
|
|
|
|
db.prefix_db.unsafe_commit()
|
|
|
|
db.assert_db_state()
|
|
|
|
|
2021-10-28 02:19:08 +02:00
|
|
|
logging.info("finished sending %i claims to ES, deleted %i", cnt, len(deleted_claims))
|
2021-10-19 20:00:39 +02:00
|
|
|
finally:
|
|
|
|
if need_open:
|
|
|
|
db.close()
|
|
|
|
|
|
|
|
|
2021-10-21 01:34:55 +02:00
|
|
|
async def get_all_claims(env, index_name='claims', db=None):
|
2021-06-18 03:22:23 +02:00
|
|
|
need_open = db is None
|
|
|
|
db = db or LevelDB(env)
|
|
|
|
if need_open:
|
2021-10-22 19:54:09 +02:00
|
|
|
db.open_db()
|
|
|
|
await db.initialize_caches()
|
2021-10-20 17:40:15 +02:00
|
|
|
logging.info("Fetching claims to send ES from leveldb")
|
2021-06-18 03:30:31 +02:00
|
|
|
try:
|
|
|
|
cnt = 0
|
2021-07-27 22:11:27 +02:00
|
|
|
async for claim in db.all_claims_producer():
|
2021-08-06 20:11:28 +02:00
|
|
|
yield {
|
|
|
|
'doc': {key: value for key, value in claim.items() if key in ALL_FIELDS},
|
|
|
|
'_id': claim['claim_id'],
|
|
|
|
'_index': index_name,
|
|
|
|
'_op_type': 'update',
|
|
|
|
'doc_as_upsert': True
|
|
|
|
}
|
2021-06-18 03:30:31 +02:00
|
|
|
cnt += 1
|
|
|
|
if cnt % 10000 == 0:
|
2021-10-20 17:40:15 +02:00
|
|
|
logging.info("sent %i claims to ES", cnt)
|
2021-06-18 03:30:31 +02:00
|
|
|
finally:
|
|
|
|
if need_open:
|
|
|
|
db.close()
|
2021-02-12 05:10:30 +01:00
|
|
|
|
|
|
|
|
2021-10-21 01:34:55 +02:00
|
|
|
async def make_es_index_and_run_sync(env: Env, clients=32, force=False, db=None, index_name='claims'):
|
|
|
|
index = SearchIndex(env.es_index_prefix, elastic_host=env.elastic_host, elastic_port=env.elastic_port)
|
|
|
|
logging.info("ES sync host: %s:%i", env.elastic_host, env.elastic_port)
|
2021-02-12 05:10:30 +01:00
|
|
|
try:
|
2021-10-21 01:34:55 +02:00
|
|
|
created = await index.start()
|
2021-05-07 18:42:52 +02:00
|
|
|
except IndexVersionMismatch as err:
|
|
|
|
logging.info(
|
|
|
|
"dropping ES search index (version %s) for upgrade to version %s", err.got_version, err.expected_version
|
|
|
|
)
|
|
|
|
await index.delete_index()
|
2021-05-12 05:21:03 +02:00
|
|
|
await index.stop()
|
2021-10-21 01:34:55 +02:00
|
|
|
created = await index.start()
|
2021-02-12 05:10:30 +01:00
|
|
|
finally:
|
2021-02-12 18:41:03 +01:00
|
|
|
index.stop()
|
2021-01-25 03:19:28 +01:00
|
|
|
|
2021-06-18 03:22:23 +02:00
|
|
|
es = AsyncElasticsearch([{'host': env.elastic_host, 'port': env.elastic_port}])
|
2021-10-21 01:34:55 +02:00
|
|
|
if force or created:
|
|
|
|
claim_generator = get_all_claims(env, index_name=index_name, db=db)
|
2021-10-19 20:00:39 +02:00
|
|
|
else:
|
2021-10-21 01:34:55 +02:00
|
|
|
claim_generator = get_recent_claims(env, index_name=index_name, db=db)
|
2021-06-18 03:22:23 +02:00
|
|
|
try:
|
2021-10-19 20:00:39 +02:00
|
|
|
async for ok, item in async_streaming_bulk(es, claim_generator, request_timeout=600, raise_on_error=False):
|
|
|
|
if not ok:
|
|
|
|
logging.warning("indexing failed for an item: %s", item)
|
2021-06-18 03:22:23 +02:00
|
|
|
await es.indices.refresh(index=index_name)
|
|
|
|
finally:
|
|
|
|
await es.close()
|
2021-01-27 05:43:06 +01:00
|
|
|
|
2021-02-12 05:10:30 +01:00
|
|
|
|
|
|
|
def run_elastic_sync():
|
2021-02-12 18:41:03 +01:00
|
|
|
logging.basicConfig(level=logging.INFO)
|
2021-03-29 19:16:49 +02:00
|
|
|
logging.getLogger('aiohttp').setLevel(logging.WARNING)
|
|
|
|
logging.getLogger('elasticsearch').setLevel(logging.WARNING)
|
|
|
|
|
2021-02-12 18:41:03 +01:00
|
|
|
logging.info('lbry.server starting')
|
2021-03-24 21:07:17 +01:00
|
|
|
parser = argparse.ArgumentParser(prog="lbry-hub-elastic-sync")
|
2021-07-21 18:54:10 +02:00
|
|
|
parser.add_argument("-c", "--clients", type=int, default=32)
|
2021-02-17 05:09:12 +01:00
|
|
|
parser.add_argument("-f", "--force", default=False, action='store_true')
|
2021-10-21 01:34:55 +02:00
|
|
|
Env.contribute_to_arg_parser(parser)
|
2021-01-27 05:43:06 +01:00
|
|
|
args = parser.parse_args()
|
2021-10-21 01:34:55 +02:00
|
|
|
env = Env.from_arg_parser(args)
|
2021-02-12 05:10:30 +01:00
|
|
|
|
2021-10-21 01:34:55 +02:00
|
|
|
if not os.path.exists(os.path.join(args.db_dir, 'lbry-leveldb')):
|
|
|
|
logging.info("DB path doesnt exist, nothing to sync to ES")
|
2021-02-12 05:10:30 +01:00
|
|
|
return
|
2021-10-21 01:34:55 +02:00
|
|
|
|
|
|
|
asyncio.run(make_es_index_and_run_sync(env, clients=args.clients, force=args.force))
|