check ES synced without a process and wait for ES

This commit is contained in:
Victor Shyba 2021-02-12 14:41:03 -03:00
parent 24d11de5a7
commit 67817005b5
2 changed files with 16 additions and 26 deletions

View file

@ -35,7 +35,7 @@ class SearchIndex:
except ConnectionError: except ConnectionError:
self.logger.warning("Failed to connect to Elasticsearch. Waiting for it!") self.logger.warning("Failed to connect to Elasticsearch. Waiting for it!")
await asyncio.sleep(1) await asyncio.sleep(1)
await self.client.indices.create( res = await self.client.indices.create(
self.index, self.index,
{ {
"settings": "settings":
@ -70,6 +70,7 @@ class SearchIndex:
} }
}, ignore=400 }, ignore=400
) )
return res.get('acknowledged', False)
def stop(self): def stop(self):
client = self.client client = self.client

View file

@ -1,5 +1,6 @@
import argparse import argparse
import asyncio import asyncio
import logging
from collections import namedtuple from collections import namedtuple
from multiprocessing import Process from multiprocessing import Process
@ -13,6 +14,7 @@ INDEX = 'claims'
async def get_all(db, shard_num, shards_total): async def get_all(db, shard_num, shards_total):
logging.info("shard %d starting", shard_num)
def exec_factory(cursor, statement, bindings): def exec_factory(cursor, statement, bindings):
tpl = namedtuple('row', (d[0] for d in cursor.getdescription())) tpl = namedtuple('row', (d[0] for d in cursor.getdescription()))
cursor.setrowtrace(lambda cursor, row: tpl(*row)) cursor.setrowtrace(lambda cursor, row: tpl(*row))
@ -35,7 +37,7 @@ WHERE claim.height % {shards_total} = {shard_num}
claim['tags'] = claim['tags'].split(',,') if claim['tags'] else [] claim['tags'] = claim['tags'].split(',,') if claim['tags'] else []
claim['languages'] = claim['languages'].split(' ') if claim['languages'] else [] claim['languages'] = claim['languages'].split(' ') if claim['languages'] else []
if num % 10_000 == 0: if num % 10_000 == 0:
print(num, total) logging.info("%d/%d", num, total)
yield extract_doc(claim, INDEX) yield extract_doc(claim, INDEX)
@ -49,26 +51,21 @@ async def consume(producer):
async def make_es_index(): async def make_es_index():
es = AsyncElasticsearch()
try:
if await es.indices.exists(index=INDEX):
print("already synced ES")
return 1
index = SearchIndex('') index = SearchIndex('')
await index.start() try:
await index.stop() return await index.start()
return 0
finally: finally:
await es.close() index.stop()
async def run(args, shard): async def run(args, shard):
def itsbusy():
logging.info("shard %d: db is busy, retry")
return True
db = apsw.Connection(args.db_path, flags=apsw.SQLITE_OPEN_READONLY | apsw.SQLITE_OPEN_URI) db = apsw.Connection(args.db_path, flags=apsw.SQLITE_OPEN_READONLY | apsw.SQLITE_OPEN_URI)
db.setbusyhandler(itsbusy)
db.cursor().execute('pragma journal_mode=wal;') db.cursor().execute('pragma journal_mode=wal;')
db.cursor().execute('pragma temp_store=memory;') db.cursor().execute('pragma temp_store=memory;')
index = SearchIndex('')
await index.start()
await index.stop()
producer = get_all(db.cursor(), shard, args.clients) producer = get_all(db.cursor(), shard, args.clients)
await asyncio.gather(*(consume(producer) for _ in range(min(8, args.clients)))) await asyncio.gather(*(consume(producer) for _ in range(min(8, args.clients))))
@ -78,26 +75,18 @@ def __run(args, shard):
asyncio.run(run(args, shard)) asyncio.run(run(args, shard))
def __make_index():
return asyncio.run(make_es_index())
def run_elastic_sync(): def run_elastic_sync():
logging.basicConfig(level=logging.INFO)
logging.info('lbry.server starting')
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("db_path", type=str) parser.add_argument("db_path", type=str)
parser.add_argument("-c", "--clients", type=int, default=16) parser.add_argument("-c", "--clients", type=int, default=16)
args = parser.parse_args() args = parser.parse_args()
processes = [] processes = []
init_proc = Process(target=__make_index, args=()) if not asyncio.run(make_es_index()):
init_proc.start() logging.info("ES is already initialized")
init_proc.join()
exitcode = init_proc.exitcode
init_proc.close()
if exitcode:
print("ES is already initialized")
return return
print("bulk-loading ES")
for i in range(args.clients): for i in range(args.clients):
processes.append(Process(target=__run, args=(args, i))) processes.append(Process(target=__run, args=(args, i)))
processes[-1].start() processes[-1].start()