full text search in wallet server
This commit is contained in:
parent
37161057c6
commit
48a18ff771
5 changed files with 119 additions and 6 deletions
|
@ -2094,7 +2094,7 @@ class Daemon(metaclass=JSONRPCServerType):
|
|||
eg. --height=">400000" would limit results to only claims above 400k block height.
|
||||
|
||||
Usage:
|
||||
claim_search [<name> | --name=<name>] [--txid=<txid>] [--nout=<nout>]
|
||||
claim_search [<name> | --name=<name>] [--text=<text>] [--txid=<txid>] [--nout=<nout>]
|
||||
[--claim_id=<claim_id> | --claim_ids=<claim_ids>...]
|
||||
[--channel=<channel> |
|
||||
[[--channel_ids=<channel_ids>...] [--not_channel_ids=<not_channel_ids>...]]]
|
||||
|
@ -2119,6 +2119,7 @@ class Daemon(metaclass=JSONRPCServerType):
|
|||
|
||||
Options:
|
||||
--name=<name> : (str) claim name (normalized)
|
||||
--text=<text> : (str) full text search
|
||||
--claim_id=<claim_id> : (str) full or partial claim id
|
||||
--claim_ids=<claim_ids> : (list) list of full claim ids
|
||||
--txid=<txid> : (str) transaction id
|
||||
|
|
53
lbry/lbry/wallet/server/db/full_text_search.py
Normal file
53
lbry/lbry/wallet/server/db/full_text_search.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
import sqlite3
|
||||
from torba.client.basedatabase import constraints_to_sql
|
||||
|
||||
CREATE_FULL_TEXT_SEARCH = """
|
||||
create virtual table if not exists search using fts5(
|
||||
claim_name, channel_name, title, description, author, tags,
|
||||
content=claim, tokenize=porter
|
||||
);
|
||||
"""
|
||||
|
||||
FTS_ORDER_BY = "bm25(search, 4.0, 8.0, 1.0, 0.5, 1.0, 0.5)"
|
||||
|
||||
|
||||
def fts_action_sql(claims=None, action='insert'):
|
||||
select = {
|
||||
'rowid': "claim.rowid",
|
||||
'claim_name': "claim.normalized",
|
||||
'channel_name': "channel.normalized",
|
||||
'title': "claim.title",
|
||||
'description': "claim.description",
|
||||
'author': "claim.author",
|
||||
'tags': "(select group_concat(tag, ' ') from tag where tag.claim_hash=claim.claim_hash)"
|
||||
}
|
||||
if action == 'delete':
|
||||
select['search'] = 'delete'
|
||||
|
||||
where, values = "", {}
|
||||
if claims:
|
||||
where, values = constraints_to_sql({
|
||||
'claim.claim_hash__in': [sqlite3.Binary(claim_hash) for claim_hash in claims]
|
||||
})
|
||||
where = 'WHERE '+where
|
||||
|
||||
return f"""
|
||||
INSERT INTO search ({','.join(select.keys())})
|
||||
SELECT {','.join(select.values())} FROM claim
|
||||
LEFT JOIN claim as channel ON (claim.channel_hash=channel.claim_hash) {where}
|
||||
""", values
|
||||
|
||||
|
||||
def update_full_text_search(action, outputs, db, height, final_height, is_first_sync):
|
||||
if is_first_sync:
|
||||
if height == final_height:
|
||||
db.execute(*fts_action_sql())
|
||||
return
|
||||
if not outputs:
|
||||
return
|
||||
if action in ("before-delete", "before-update"):
|
||||
db.execute(*fts_action_sql(outputs, 'delete'))
|
||||
elif action in ("after-insert", "after-update"):
|
||||
db.execute(*fts_action_sql(outputs, 'insert'))
|
||||
else:
|
||||
raise ValueError(f"Invalid action for updating full text search: '{action}'")
|
|
@ -18,6 +18,7 @@ from lbry.schema.result import Outputs
|
|||
from lbry.wallet.ledger import BaseLedger, MainNetLedger, RegTestLedger
|
||||
|
||||
from .common import CLAIM_TYPES, STREAM_TYPES, COMMON_TAGS
|
||||
from .full_text_search import FTS_ORDER_BY
|
||||
|
||||
|
||||
class SQLiteOperationalError(sqlite3.OperationalError):
|
||||
|
@ -45,7 +46,7 @@ INTEGER_PARAMS = {
|
|||
}
|
||||
|
||||
SEARCH_PARAMS = {
|
||||
'name', 'claim_id', 'claim_ids', 'txid', 'nout', 'channel', 'channel_ids', 'not_channel_ids',
|
||||
'name', 'text', 'claim_id', 'claim_ids', 'txid', 'nout', 'channel', 'channel_ids', 'not_channel_ids',
|
||||
'public_key_id', 'claim_type', 'stream_types', 'media_types', 'fee_currency',
|
||||
'has_channel_signature', 'signature_valid',
|
||||
'any_tags', 'all_tags', 'not_tags',
|
||||
|
@ -295,6 +296,11 @@ def _get_claims(cols, for_count=False, **constraints) -> Tuple[str, Dict]:
|
|||
_apply_constraints_for_array_attributes(constraints, 'language', lambda _: _, for_count)
|
||||
_apply_constraints_for_array_attributes(constraints, 'location', lambda _: _, for_count)
|
||||
|
||||
if 'text' in constraints:
|
||||
constraints["search"] = constraints.pop("text")
|
||||
constraints["order_by"] = FTS_ORDER_BY
|
||||
select = f"SELECT {cols} FROM search JOIN claim ON (search.rowid=claim.rowid)"
|
||||
else:
|
||||
select = f"SELECT {cols} FROM claim"
|
||||
|
||||
sql, values = query(
|
||||
|
|
|
@ -13,6 +13,7 @@ from lbry.schema.mime_types import guess_stream_type
|
|||
from lbry.wallet.ledger import MainNetLedger, RegTestLedger
|
||||
from lbry.wallet.transaction import Transaction, Output
|
||||
from lbry.wallet.server.db.canonical import register_canonical_functions
|
||||
from lbry.wallet.server.db.full_text_search import update_full_text_search, CREATE_FULL_TEXT_SEARCH
|
||||
from lbry.wallet.server.db.trending import (
|
||||
CREATE_TREND_TABLE, calculate_trending, register_trending_functions
|
||||
)
|
||||
|
@ -49,6 +50,10 @@ class SQLDB:
|
|||
short_url text not null, -- normalized#shortest-unique-claim_id
|
||||
canonical_url text, -- channel's-short_url/normalized#shortest-unique-claim_id-within-channel
|
||||
|
||||
title text,
|
||||
author text,
|
||||
description text,
|
||||
|
||||
claim_type integer,
|
||||
|
||||
-- streams
|
||||
|
@ -151,6 +156,7 @@ class SQLDB:
|
|||
PRAGMAS +
|
||||
CREATE_CLAIM_TABLE +
|
||||
CREATE_TREND_TABLE +
|
||||
CREATE_FULL_TEXT_SEARCH +
|
||||
CREATE_SUPPORT_TABLE +
|
||||
CREATE_CLAIMTRIE_TABLE +
|
||||
CREATE_TAG_TABLE
|
||||
|
@ -233,12 +239,15 @@ class SQLDB:
|
|||
'amount': txo.amount,
|
||||
'timestamp': header['timestamp'],
|
||||
'height': tx.height,
|
||||
'title': None,
|
||||
'description': None,
|
||||
'author': None,
|
||||
'claim_type': None,
|
||||
'stream_type': None,
|
||||
'media_type': None,
|
||||
'release_time': None,
|
||||
'fee_currency': None,
|
||||
'fee_amount': 0
|
||||
'fee_amount': 0,
|
||||
}
|
||||
claims.append(claim_record)
|
||||
|
||||
|
@ -252,6 +261,9 @@ class SQLDB:
|
|||
claim_record['claim_type'] = CLAIM_TYPES['stream']
|
||||
claim_record['media_type'] = claim.stream.source.media_type
|
||||
claim_record['stream_type'] = STREAM_TYPES[guess_stream_type(claim_record['media_type'])]
|
||||
claim_record['title'] = claim.stream.title
|
||||
claim_record['description'] = claim.stream.description
|
||||
claim_record['author'] = claim.stream.author
|
||||
if claim.stream.release_time:
|
||||
claim_record['release_time'] = claim.stream.release_time
|
||||
if claim.stream.has_fee:
|
||||
|
@ -283,12 +295,12 @@ class SQLDB:
|
|||
INSERT OR IGNORE INTO claim (
|
||||
claim_hash, claim_id, claim_name, normalized, txo_hash, tx_position, amount,
|
||||
claim_type, media_type, stream_type, timestamp, creation_timestamp,
|
||||
fee_currency, fee_amount, height,
|
||||
fee_currency, fee_amount, title, description, author, height,
|
||||
creation_height, release_time, activation_height, expiration_height, short_url)
|
||||
VALUES (
|
||||
:claim_hash, :claim_id, :claim_name, :normalized, :txo_hash, :tx_position, :amount,
|
||||
:claim_type, :media_type, :stream_type, :timestamp, :timestamp,
|
||||
:fee_currency, :fee_amount, :height, :height,
|
||||
:fee_currency, :fee_amount, :title, :description, :author, :height, :height,
|
||||
CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE :timestamp END,
|
||||
CASE WHEN :normalized NOT IN (SELECT normalized FROM claimtrie) THEN :height END,
|
||||
CASE WHEN :height >= 137181 THEN :height+2102400 ELSE :height+262974 END,
|
||||
|
@ -306,6 +318,7 @@ class SQLDB:
|
|||
txo_hash=:txo_hash, tx_position=:tx_position, amount=:amount, height=:height,
|
||||
claim_type=:claim_type, media_type=:media_type, stream_type=:stream_type,
|
||||
timestamp=:timestamp, fee_amount=:fee_amount, fee_currency=:fee_currency,
|
||||
title=:title, description=:description, author=:author,
|
||||
release_time=CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE release_time END
|
||||
WHERE claim_hash=:claim_hash;
|
||||
""", claims)
|
||||
|
@ -692,10 +705,18 @@ class SQLDB:
|
|||
expire_timer.stop()
|
||||
|
||||
r = timer.run
|
||||
r(update_full_text_search, 'before-delete',
|
||||
delete_claim_hashes, self.db, height, daemon_height, self.main.first_sync)
|
||||
affected_channels = r(self.delete_claims, delete_claim_hashes)
|
||||
r(self.delete_supports, delete_support_txo_hashes)
|
||||
r(self.insert_claims, insert_claims, header)
|
||||
r(update_full_text_search, 'after-insert',
|
||||
[txo.claim_hash for txo in insert_claims], self.db, height, daemon_height, self.main.first_sync)
|
||||
r(update_full_text_search, 'before-update',
|
||||
[txo.claim_hash for txo in update_claims], self.db, height, daemon_height, self.main.first_sync)
|
||||
r(self.update_claims, update_claims, header)
|
||||
r(update_full_text_search, 'after-update',
|
||||
[txo.claim_hash for txo in update_claims], self.db, height, daemon_height, self.main.first_sync)
|
||||
r(self.validate_channel_signatures, height, insert_claims,
|
||||
update_claims, delete_claim_hashes, affected_channels, forward_timer=True)
|
||||
r(self.insert_supports, insert_supports)
|
||||
|
|
|
@ -328,6 +328,38 @@ class ClaimSearchCommand(ClaimTestCase):
|
|||
await self.assertFindsClaims([image], media_types=['image/png'])
|
||||
await self.assertFindsClaims([image, video], media_types=['video/mp4', 'image/png'])
|
||||
|
||||
async def test_search_by_text(self):
|
||||
chan1_id = self.get_claim_id(await self.channel_create('@SatoshiNakamoto'))
|
||||
chan2_id = self.get_claim_id(await self.channel_create('@Bitcoin'))
|
||||
chan3_id = self.get_claim_id(await self.channel_create('@IAmSatoshi'))
|
||||
|
||||
claim1 = await self.stream_create(
|
||||
"the-real-satoshi", title="The Real Satoshi Nakamoto",
|
||||
description="Documentary about the real Satoshi Nakamoto, creator of bitcoin.",
|
||||
tags=['satoshi nakamoto', 'bitcoin', 'documentary']
|
||||
)
|
||||
claim2 = await self.stream_create(
|
||||
"about-me", channel_id=chan1_id, title="Satoshi Nakamoto Autobiography",
|
||||
description="I am Satoshi Nakamoto and this is my autobiography.",
|
||||
tags=['satoshi nakamoto', 'bitcoin', 'documentary', 'autobiography']
|
||||
)
|
||||
claim3 = await self.stream_create(
|
||||
"history-of-bitcoin", channel_id=chan2_id, title="History of Bitcoin",
|
||||
description="History of bitcoin and its creator Satoshi Nakamoto.",
|
||||
tags=['satoshi nakamoto', 'bitcoin', 'documentary', 'history']
|
||||
)
|
||||
claim4 = await self.stream_create(
|
||||
"satoshi-conspiracies", channel_id=chan3_id, title="Satoshi Nakamoto Conspiracies",
|
||||
description="Documentary detailing various conspiracies surrounding Satoshi Nakamoto.",
|
||||
tags=['conspiracies', 'bitcoin', 'satoshi nakamoto']
|
||||
)
|
||||
|
||||
await self.assertFindsClaims([], text='cheese')
|
||||
await self.assertFindsClaims([claim3], text='history')
|
||||
await self.assertFindsClaims([claim4], text='conspiracy')
|
||||
await self.assertFindsClaims([claim1, claim4, claim2, claim3], text='documentary')
|
||||
await self.assertFindsClaims([claim4, claim1, claim2, claim3], text='satoshi')
|
||||
|
||||
|
||||
class ChannelCommands(CommandTestCase):
|
||||
|
||||
|
|
Loading…
Reference in a new issue