check ES synced without a process and wait for ES
This commit is contained in:
parent
24d11de5a7
commit
67817005b5
2 changed files with 16 additions and 26 deletions
|
@ -35,7 +35,7 @@ class SearchIndex:
|
||||||
except ConnectionError:
|
except ConnectionError:
|
||||||
self.logger.warning("Failed to connect to Elasticsearch. Waiting for it!")
|
self.logger.warning("Failed to connect to Elasticsearch. Waiting for it!")
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
await self.client.indices.create(
|
res = await self.client.indices.create(
|
||||||
self.index,
|
self.index,
|
||||||
{
|
{
|
||||||
"settings":
|
"settings":
|
||||||
|
@ -70,6 +70,7 @@ class SearchIndex:
|
||||||
}
|
}
|
||||||
}, ignore=400
|
}, ignore=400
|
||||||
)
|
)
|
||||||
|
return res.get('acknowledged', False)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
client = self.client
|
client = self.client
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import logging
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from multiprocessing import Process
|
from multiprocessing import Process
|
||||||
|
|
||||||
|
@ -13,6 +14,7 @@ INDEX = 'claims'
|
||||||
|
|
||||||
|
|
||||||
async def get_all(db, shard_num, shards_total):
|
async def get_all(db, shard_num, shards_total):
|
||||||
|
logging.info("shard %d starting", shard_num)
|
||||||
def exec_factory(cursor, statement, bindings):
|
def exec_factory(cursor, statement, bindings):
|
||||||
tpl = namedtuple('row', (d[0] for d in cursor.getdescription()))
|
tpl = namedtuple('row', (d[0] for d in cursor.getdescription()))
|
||||||
cursor.setrowtrace(lambda cursor, row: tpl(*row))
|
cursor.setrowtrace(lambda cursor, row: tpl(*row))
|
||||||
|
@ -35,7 +37,7 @@ WHERE claim.height % {shards_total} = {shard_num}
|
||||||
claim['tags'] = claim['tags'].split(',,') if claim['tags'] else []
|
claim['tags'] = claim['tags'].split(',,') if claim['tags'] else []
|
||||||
claim['languages'] = claim['languages'].split(' ') if claim['languages'] else []
|
claim['languages'] = claim['languages'].split(' ') if claim['languages'] else []
|
||||||
if num % 10_000 == 0:
|
if num % 10_000 == 0:
|
||||||
print(num, total)
|
logging.info("%d/%d", num, total)
|
||||||
yield extract_doc(claim, INDEX)
|
yield extract_doc(claim, INDEX)
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,26 +51,21 @@ async def consume(producer):
|
||||||
|
|
||||||
|
|
||||||
async def make_es_index():
|
async def make_es_index():
|
||||||
es = AsyncElasticsearch()
|
index = SearchIndex('')
|
||||||
try:
|
try:
|
||||||
if await es.indices.exists(index=INDEX):
|
return await index.start()
|
||||||
print("already synced ES")
|
|
||||||
return 1
|
|
||||||
index = SearchIndex('')
|
|
||||||
await index.start()
|
|
||||||
await index.stop()
|
|
||||||
return 0
|
|
||||||
finally:
|
finally:
|
||||||
await es.close()
|
index.stop()
|
||||||
|
|
||||||
|
|
||||||
async def run(args, shard):
|
async def run(args, shard):
|
||||||
|
def itsbusy():
|
||||||
|
logging.info("shard %d: db is busy, retry")
|
||||||
|
return True
|
||||||
db = apsw.Connection(args.db_path, flags=apsw.SQLITE_OPEN_READONLY | apsw.SQLITE_OPEN_URI)
|
db = apsw.Connection(args.db_path, flags=apsw.SQLITE_OPEN_READONLY | apsw.SQLITE_OPEN_URI)
|
||||||
|
db.setbusyhandler(itsbusy)
|
||||||
db.cursor().execute('pragma journal_mode=wal;')
|
db.cursor().execute('pragma journal_mode=wal;')
|
||||||
db.cursor().execute('pragma temp_store=memory;')
|
db.cursor().execute('pragma temp_store=memory;')
|
||||||
index = SearchIndex('')
|
|
||||||
await index.start()
|
|
||||||
await index.stop()
|
|
||||||
|
|
||||||
producer = get_all(db.cursor(), shard, args.clients)
|
producer = get_all(db.cursor(), shard, args.clients)
|
||||||
await asyncio.gather(*(consume(producer) for _ in range(min(8, args.clients))))
|
await asyncio.gather(*(consume(producer) for _ in range(min(8, args.clients))))
|
||||||
|
@ -78,26 +75,18 @@ def __run(args, shard):
|
||||||
asyncio.run(run(args, shard))
|
asyncio.run(run(args, shard))
|
||||||
|
|
||||||
|
|
||||||
def __make_index():
|
|
||||||
return asyncio.run(make_es_index())
|
|
||||||
|
|
||||||
|
|
||||||
def run_elastic_sync():
|
def run_elastic_sync():
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logging.info('lbry.server starting')
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("db_path", type=str)
|
parser.add_argument("db_path", type=str)
|
||||||
parser.add_argument("-c", "--clients", type=int, default=16)
|
parser.add_argument("-c", "--clients", type=int, default=16)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
processes = []
|
processes = []
|
||||||
|
|
||||||
init_proc = Process(target=__make_index, args=())
|
if not asyncio.run(make_es_index()):
|
||||||
init_proc.start()
|
logging.info("ES is already initialized")
|
||||||
init_proc.join()
|
|
||||||
exitcode = init_proc.exitcode
|
|
||||||
init_proc.close()
|
|
||||||
if exitcode:
|
|
||||||
print("ES is already initialized")
|
|
||||||
return
|
return
|
||||||
print("bulk-loading ES")
|
|
||||||
for i in range(args.clients):
|
for i in range(args.clients):
|
||||||
processes.append(Process(target=__run, args=(args, i)))
|
processes.append(Process(target=__run, args=(args, i)))
|
||||||
processes[-1].start()
|
processes[-1].start()
|
||||||
|
|
Loading…
Add table
Reference in a new issue