full text search in wallet server
This commit is contained in:
parent
37161057c6
commit
48a18ff771
5 changed files with 119 additions and 6 deletions
|
@ -2094,7 +2094,7 @@ class Daemon(metaclass=JSONRPCServerType):
|
||||||
eg. --height=">400000" would limit results to only claims above 400k block height.
|
eg. --height=">400000" would limit results to only claims above 400k block height.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
claim_search [<name> | --name=<name>] [--txid=<txid>] [--nout=<nout>]
|
claim_search [<name> | --name=<name>] [--text=<text>] [--txid=<txid>] [--nout=<nout>]
|
||||||
[--claim_id=<claim_id> | --claim_ids=<claim_ids>...]
|
[--claim_id=<claim_id> | --claim_ids=<claim_ids>...]
|
||||||
[--channel=<channel> |
|
[--channel=<channel> |
|
||||||
[[--channel_ids=<channel_ids>...] [--not_channel_ids=<not_channel_ids>...]]]
|
[[--channel_ids=<channel_ids>...] [--not_channel_ids=<not_channel_ids>...]]]
|
||||||
|
@ -2119,6 +2119,7 @@ class Daemon(metaclass=JSONRPCServerType):
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--name=<name> : (str) claim name (normalized)
|
--name=<name> : (str) claim name (normalized)
|
||||||
|
--text=<text> : (str) full text search
|
||||||
--claim_id=<claim_id> : (str) full or partial claim id
|
--claim_id=<claim_id> : (str) full or partial claim id
|
||||||
--claim_ids=<claim_ids> : (list) list of full claim ids
|
--claim_ids=<claim_ids> : (list) list of full claim ids
|
||||||
--txid=<txid> : (str) transaction id
|
--txid=<txid> : (str) transaction id
|
||||||
|
|
53
lbry/lbry/wallet/server/db/full_text_search.py
Normal file
53
lbry/lbry/wallet/server/db/full_text_search.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
import sqlite3
|
||||||
|
from torba.client.basedatabase import constraints_to_sql
|
||||||
|
|
||||||
|
CREATE_FULL_TEXT_SEARCH = """
|
||||||
|
create virtual table if not exists search using fts5(
|
||||||
|
claim_name, channel_name, title, description, author, tags,
|
||||||
|
content=claim, tokenize=porter
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
|
FTS_ORDER_BY = "bm25(search, 4.0, 8.0, 1.0, 0.5, 1.0, 0.5)"
|
||||||
|
|
||||||
|
|
||||||
|
def fts_action_sql(claims=None, action='insert'):
|
||||||
|
select = {
|
||||||
|
'rowid': "claim.rowid",
|
||||||
|
'claim_name': "claim.normalized",
|
||||||
|
'channel_name': "channel.normalized",
|
||||||
|
'title': "claim.title",
|
||||||
|
'description': "claim.description",
|
||||||
|
'author': "claim.author",
|
||||||
|
'tags': "(select group_concat(tag, ' ') from tag where tag.claim_hash=claim.claim_hash)"
|
||||||
|
}
|
||||||
|
if action == 'delete':
|
||||||
|
select['search'] = 'delete'
|
||||||
|
|
||||||
|
where, values = "", {}
|
||||||
|
if claims:
|
||||||
|
where, values = constraints_to_sql({
|
||||||
|
'claim.claim_hash__in': [sqlite3.Binary(claim_hash) for claim_hash in claims]
|
||||||
|
})
|
||||||
|
where = 'WHERE '+where
|
||||||
|
|
||||||
|
return f"""
|
||||||
|
INSERT INTO search ({','.join(select.keys())})
|
||||||
|
SELECT {','.join(select.values())} FROM claim
|
||||||
|
LEFT JOIN claim as channel ON (claim.channel_hash=channel.claim_hash) {where}
|
||||||
|
""", values
|
||||||
|
|
||||||
|
|
||||||
|
def update_full_text_search(action, outputs, db, height, final_height, is_first_sync):
|
||||||
|
if is_first_sync:
|
||||||
|
if height == final_height:
|
||||||
|
db.execute(*fts_action_sql())
|
||||||
|
return
|
||||||
|
if not outputs:
|
||||||
|
return
|
||||||
|
if action in ("before-delete", "before-update"):
|
||||||
|
db.execute(*fts_action_sql(outputs, 'delete'))
|
||||||
|
elif action in ("after-insert", "after-update"):
|
||||||
|
db.execute(*fts_action_sql(outputs, 'insert'))
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid action for updating full text search: '{action}'")
|
|
@ -18,6 +18,7 @@ from lbry.schema.result import Outputs
|
||||||
from lbry.wallet.ledger import BaseLedger, MainNetLedger, RegTestLedger
|
from lbry.wallet.ledger import BaseLedger, MainNetLedger, RegTestLedger
|
||||||
|
|
||||||
from .common import CLAIM_TYPES, STREAM_TYPES, COMMON_TAGS
|
from .common import CLAIM_TYPES, STREAM_TYPES, COMMON_TAGS
|
||||||
|
from .full_text_search import FTS_ORDER_BY
|
||||||
|
|
||||||
|
|
||||||
class SQLiteOperationalError(sqlite3.OperationalError):
|
class SQLiteOperationalError(sqlite3.OperationalError):
|
||||||
|
@ -45,7 +46,7 @@ INTEGER_PARAMS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
SEARCH_PARAMS = {
|
SEARCH_PARAMS = {
|
||||||
'name', 'claim_id', 'claim_ids', 'txid', 'nout', 'channel', 'channel_ids', 'not_channel_ids',
|
'name', 'text', 'claim_id', 'claim_ids', 'txid', 'nout', 'channel', 'channel_ids', 'not_channel_ids',
|
||||||
'public_key_id', 'claim_type', 'stream_types', 'media_types', 'fee_currency',
|
'public_key_id', 'claim_type', 'stream_types', 'media_types', 'fee_currency',
|
||||||
'has_channel_signature', 'signature_valid',
|
'has_channel_signature', 'signature_valid',
|
||||||
'any_tags', 'all_tags', 'not_tags',
|
'any_tags', 'all_tags', 'not_tags',
|
||||||
|
@ -295,6 +296,11 @@ def _get_claims(cols, for_count=False, **constraints) -> Tuple[str, Dict]:
|
||||||
_apply_constraints_for_array_attributes(constraints, 'language', lambda _: _, for_count)
|
_apply_constraints_for_array_attributes(constraints, 'language', lambda _: _, for_count)
|
||||||
_apply_constraints_for_array_attributes(constraints, 'location', lambda _: _, for_count)
|
_apply_constraints_for_array_attributes(constraints, 'location', lambda _: _, for_count)
|
||||||
|
|
||||||
|
if 'text' in constraints:
|
||||||
|
constraints["search"] = constraints.pop("text")
|
||||||
|
constraints["order_by"] = FTS_ORDER_BY
|
||||||
|
select = f"SELECT {cols} FROM search JOIN claim ON (search.rowid=claim.rowid)"
|
||||||
|
else:
|
||||||
select = f"SELECT {cols} FROM claim"
|
select = f"SELECT {cols} FROM claim"
|
||||||
|
|
||||||
sql, values = query(
|
sql, values = query(
|
||||||
|
|
|
@ -13,6 +13,7 @@ from lbry.schema.mime_types import guess_stream_type
|
||||||
from lbry.wallet.ledger import MainNetLedger, RegTestLedger
|
from lbry.wallet.ledger import MainNetLedger, RegTestLedger
|
||||||
from lbry.wallet.transaction import Transaction, Output
|
from lbry.wallet.transaction import Transaction, Output
|
||||||
from lbry.wallet.server.db.canonical import register_canonical_functions
|
from lbry.wallet.server.db.canonical import register_canonical_functions
|
||||||
|
from lbry.wallet.server.db.full_text_search import update_full_text_search, CREATE_FULL_TEXT_SEARCH
|
||||||
from lbry.wallet.server.db.trending import (
|
from lbry.wallet.server.db.trending import (
|
||||||
CREATE_TREND_TABLE, calculate_trending, register_trending_functions
|
CREATE_TREND_TABLE, calculate_trending, register_trending_functions
|
||||||
)
|
)
|
||||||
|
@ -49,6 +50,10 @@ class SQLDB:
|
||||||
short_url text not null, -- normalized#shortest-unique-claim_id
|
short_url text not null, -- normalized#shortest-unique-claim_id
|
||||||
canonical_url text, -- channel's-short_url/normalized#shortest-unique-claim_id-within-channel
|
canonical_url text, -- channel's-short_url/normalized#shortest-unique-claim_id-within-channel
|
||||||
|
|
||||||
|
title text,
|
||||||
|
author text,
|
||||||
|
description text,
|
||||||
|
|
||||||
claim_type integer,
|
claim_type integer,
|
||||||
|
|
||||||
-- streams
|
-- streams
|
||||||
|
@ -151,6 +156,7 @@ class SQLDB:
|
||||||
PRAGMAS +
|
PRAGMAS +
|
||||||
CREATE_CLAIM_TABLE +
|
CREATE_CLAIM_TABLE +
|
||||||
CREATE_TREND_TABLE +
|
CREATE_TREND_TABLE +
|
||||||
|
CREATE_FULL_TEXT_SEARCH +
|
||||||
CREATE_SUPPORT_TABLE +
|
CREATE_SUPPORT_TABLE +
|
||||||
CREATE_CLAIMTRIE_TABLE +
|
CREATE_CLAIMTRIE_TABLE +
|
||||||
CREATE_TAG_TABLE
|
CREATE_TAG_TABLE
|
||||||
|
@ -233,12 +239,15 @@ class SQLDB:
|
||||||
'amount': txo.amount,
|
'amount': txo.amount,
|
||||||
'timestamp': header['timestamp'],
|
'timestamp': header['timestamp'],
|
||||||
'height': tx.height,
|
'height': tx.height,
|
||||||
|
'title': None,
|
||||||
|
'description': None,
|
||||||
|
'author': None,
|
||||||
'claim_type': None,
|
'claim_type': None,
|
||||||
'stream_type': None,
|
'stream_type': None,
|
||||||
'media_type': None,
|
'media_type': None,
|
||||||
'release_time': None,
|
'release_time': None,
|
||||||
'fee_currency': None,
|
'fee_currency': None,
|
||||||
'fee_amount': 0
|
'fee_amount': 0,
|
||||||
}
|
}
|
||||||
claims.append(claim_record)
|
claims.append(claim_record)
|
||||||
|
|
||||||
|
@ -252,6 +261,9 @@ class SQLDB:
|
||||||
claim_record['claim_type'] = CLAIM_TYPES['stream']
|
claim_record['claim_type'] = CLAIM_TYPES['stream']
|
||||||
claim_record['media_type'] = claim.stream.source.media_type
|
claim_record['media_type'] = claim.stream.source.media_type
|
||||||
claim_record['stream_type'] = STREAM_TYPES[guess_stream_type(claim_record['media_type'])]
|
claim_record['stream_type'] = STREAM_TYPES[guess_stream_type(claim_record['media_type'])]
|
||||||
|
claim_record['title'] = claim.stream.title
|
||||||
|
claim_record['description'] = claim.stream.description
|
||||||
|
claim_record['author'] = claim.stream.author
|
||||||
if claim.stream.release_time:
|
if claim.stream.release_time:
|
||||||
claim_record['release_time'] = claim.stream.release_time
|
claim_record['release_time'] = claim.stream.release_time
|
||||||
if claim.stream.has_fee:
|
if claim.stream.has_fee:
|
||||||
|
@ -283,12 +295,12 @@ class SQLDB:
|
||||||
INSERT OR IGNORE INTO claim (
|
INSERT OR IGNORE INTO claim (
|
||||||
claim_hash, claim_id, claim_name, normalized, txo_hash, tx_position, amount,
|
claim_hash, claim_id, claim_name, normalized, txo_hash, tx_position, amount,
|
||||||
claim_type, media_type, stream_type, timestamp, creation_timestamp,
|
claim_type, media_type, stream_type, timestamp, creation_timestamp,
|
||||||
fee_currency, fee_amount, height,
|
fee_currency, fee_amount, title, description, author, height,
|
||||||
creation_height, release_time, activation_height, expiration_height, short_url)
|
creation_height, release_time, activation_height, expiration_height, short_url)
|
||||||
VALUES (
|
VALUES (
|
||||||
:claim_hash, :claim_id, :claim_name, :normalized, :txo_hash, :tx_position, :amount,
|
:claim_hash, :claim_id, :claim_name, :normalized, :txo_hash, :tx_position, :amount,
|
||||||
:claim_type, :media_type, :stream_type, :timestamp, :timestamp,
|
:claim_type, :media_type, :stream_type, :timestamp, :timestamp,
|
||||||
:fee_currency, :fee_amount, :height, :height,
|
:fee_currency, :fee_amount, :title, :description, :author, :height, :height,
|
||||||
CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE :timestamp END,
|
CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE :timestamp END,
|
||||||
CASE WHEN :normalized NOT IN (SELECT normalized FROM claimtrie) THEN :height END,
|
CASE WHEN :normalized NOT IN (SELECT normalized FROM claimtrie) THEN :height END,
|
||||||
CASE WHEN :height >= 137181 THEN :height+2102400 ELSE :height+262974 END,
|
CASE WHEN :height >= 137181 THEN :height+2102400 ELSE :height+262974 END,
|
||||||
|
@ -306,6 +318,7 @@ class SQLDB:
|
||||||
txo_hash=:txo_hash, tx_position=:tx_position, amount=:amount, height=:height,
|
txo_hash=:txo_hash, tx_position=:tx_position, amount=:amount, height=:height,
|
||||||
claim_type=:claim_type, media_type=:media_type, stream_type=:stream_type,
|
claim_type=:claim_type, media_type=:media_type, stream_type=:stream_type,
|
||||||
timestamp=:timestamp, fee_amount=:fee_amount, fee_currency=:fee_currency,
|
timestamp=:timestamp, fee_amount=:fee_amount, fee_currency=:fee_currency,
|
||||||
|
title=:title, description=:description, author=:author,
|
||||||
release_time=CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE release_time END
|
release_time=CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE release_time END
|
||||||
WHERE claim_hash=:claim_hash;
|
WHERE claim_hash=:claim_hash;
|
||||||
""", claims)
|
""", claims)
|
||||||
|
@ -692,10 +705,18 @@ class SQLDB:
|
||||||
expire_timer.stop()
|
expire_timer.stop()
|
||||||
|
|
||||||
r = timer.run
|
r = timer.run
|
||||||
|
r(update_full_text_search, 'before-delete',
|
||||||
|
delete_claim_hashes, self.db, height, daemon_height, self.main.first_sync)
|
||||||
affected_channels = r(self.delete_claims, delete_claim_hashes)
|
affected_channels = r(self.delete_claims, delete_claim_hashes)
|
||||||
r(self.delete_supports, delete_support_txo_hashes)
|
r(self.delete_supports, delete_support_txo_hashes)
|
||||||
r(self.insert_claims, insert_claims, header)
|
r(self.insert_claims, insert_claims, header)
|
||||||
|
r(update_full_text_search, 'after-insert',
|
||||||
|
[txo.claim_hash for txo in insert_claims], self.db, height, daemon_height, self.main.first_sync)
|
||||||
|
r(update_full_text_search, 'before-update',
|
||||||
|
[txo.claim_hash for txo in update_claims], self.db, height, daemon_height, self.main.first_sync)
|
||||||
r(self.update_claims, update_claims, header)
|
r(self.update_claims, update_claims, header)
|
||||||
|
r(update_full_text_search, 'after-update',
|
||||||
|
[txo.claim_hash for txo in update_claims], self.db, height, daemon_height, self.main.first_sync)
|
||||||
r(self.validate_channel_signatures, height, insert_claims,
|
r(self.validate_channel_signatures, height, insert_claims,
|
||||||
update_claims, delete_claim_hashes, affected_channels, forward_timer=True)
|
update_claims, delete_claim_hashes, affected_channels, forward_timer=True)
|
||||||
r(self.insert_supports, insert_supports)
|
r(self.insert_supports, insert_supports)
|
||||||
|
|
|
@ -328,6 +328,38 @@ class ClaimSearchCommand(ClaimTestCase):
|
||||||
await self.assertFindsClaims([image], media_types=['image/png'])
|
await self.assertFindsClaims([image], media_types=['image/png'])
|
||||||
await self.assertFindsClaims([image, video], media_types=['video/mp4', 'image/png'])
|
await self.assertFindsClaims([image, video], media_types=['video/mp4', 'image/png'])
|
||||||
|
|
||||||
|
async def test_search_by_text(self):
|
||||||
|
chan1_id = self.get_claim_id(await self.channel_create('@SatoshiNakamoto'))
|
||||||
|
chan2_id = self.get_claim_id(await self.channel_create('@Bitcoin'))
|
||||||
|
chan3_id = self.get_claim_id(await self.channel_create('@IAmSatoshi'))
|
||||||
|
|
||||||
|
claim1 = await self.stream_create(
|
||||||
|
"the-real-satoshi", title="The Real Satoshi Nakamoto",
|
||||||
|
description="Documentary about the real Satoshi Nakamoto, creator of bitcoin.",
|
||||||
|
tags=['satoshi nakamoto', 'bitcoin', 'documentary']
|
||||||
|
)
|
||||||
|
claim2 = await self.stream_create(
|
||||||
|
"about-me", channel_id=chan1_id, title="Satoshi Nakamoto Autobiography",
|
||||||
|
description="I am Satoshi Nakamoto and this is my autobiography.",
|
||||||
|
tags=['satoshi nakamoto', 'bitcoin', 'documentary', 'autobiography']
|
||||||
|
)
|
||||||
|
claim3 = await self.stream_create(
|
||||||
|
"history-of-bitcoin", channel_id=chan2_id, title="History of Bitcoin",
|
||||||
|
description="History of bitcoin and its creator Satoshi Nakamoto.",
|
||||||
|
tags=['satoshi nakamoto', 'bitcoin', 'documentary', 'history']
|
||||||
|
)
|
||||||
|
claim4 = await self.stream_create(
|
||||||
|
"satoshi-conspiracies", channel_id=chan3_id, title="Satoshi Nakamoto Conspiracies",
|
||||||
|
description="Documentary detailing various conspiracies surrounding Satoshi Nakamoto.",
|
||||||
|
tags=['conspiracies', 'bitcoin', 'satoshi nakamoto']
|
||||||
|
)
|
||||||
|
|
||||||
|
await self.assertFindsClaims([], text='cheese')
|
||||||
|
await self.assertFindsClaims([claim3], text='history')
|
||||||
|
await self.assertFindsClaims([claim4], text='conspiracy')
|
||||||
|
await self.assertFindsClaims([claim1, claim4, claim2, claim3], text='documentary')
|
||||||
|
await self.assertFindsClaims([claim4, claim1, claim2, claim3], text='satoshi')
|
||||||
|
|
||||||
|
|
||||||
class ChannelCommands(CommandTestCase):
|
class ChannelCommands(CommandTestCase):
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue