popular tags are indexed and use different search strategy than less popular tags
This commit is contained in:
parent
8d5f50b695
commit
ad2df4871e
5 changed files with 272 additions and 39 deletions
|
@ -6,7 +6,7 @@ WEIRD_CHARS_RE = re.compile(r"[#!~]")
|
||||||
|
|
||||||
|
|
||||||
def normalize_tag(tag: str):
|
def normalize_tag(tag: str):
|
||||||
return MULTI_SPACE_RE.sub(' ', WEIRD_CHARS_RE.sub(' ', tag.lower())).strip()
|
return MULTI_SPACE_RE.sub(' ', WEIRD_CHARS_RE.sub(' ', tag.lower().replace("'", ""))).strip()
|
||||||
|
|
||||||
|
|
||||||
def clean_tags(tags: List[str]):
|
def clean_tags(tags: List[str]):
|
||||||
|
|
|
@ -11,3 +11,210 @@ STREAM_TYPES = {
|
||||||
'binary': 5,
|
'binary': 5,
|
||||||
'model': 6
|
'model': 6
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MATURE_TAGS = [
|
||||||
|
'nsfw', 'porn', 'xxx', 'mature', 'adult', 'sex'
|
||||||
|
]
|
||||||
|
|
||||||
|
COMMON_TAGS = {
|
||||||
|
"gaming": "gaming",
|
||||||
|
"people & blogs": "people_and_blogs",
|
||||||
|
"pop culture": "pop_culture",
|
||||||
|
"entertainment": "entertainment",
|
||||||
|
"technology": "technology",
|
||||||
|
"music": "music",
|
||||||
|
"funny": "funny",
|
||||||
|
"education": "education",
|
||||||
|
"learning": "learning",
|
||||||
|
"news": "news",
|
||||||
|
"gameplay": "gameplay",
|
||||||
|
"science & technology": "science_and_technology",
|
||||||
|
"playstation 4": "playstation_4",
|
||||||
|
"beliefs": "beliefs",
|
||||||
|
"nature": "nature",
|
||||||
|
"news & politics": "news_and_politics",
|
||||||
|
"comedy": "comedy",
|
||||||
|
"games": "games",
|
||||||
|
"sony interactive entertainment": "sony_interactive_entertainment",
|
||||||
|
"film & animation": "film_and_animation",
|
||||||
|
"game": "game",
|
||||||
|
"howto & style": "howto_and_style",
|
||||||
|
"weapons": "weapons",
|
||||||
|
"blockchain": "blockchain",
|
||||||
|
"video game": "video_game",
|
||||||
|
"sports": "sports",
|
||||||
|
"walkthrough": "walkthrough",
|
||||||
|
"ps4live": "ps4live",
|
||||||
|
"art": "art",
|
||||||
|
"pc": "pc",
|
||||||
|
"economics": "economics",
|
||||||
|
"automotive": "automotive",
|
||||||
|
"minecraft": "minecraft",
|
||||||
|
"playthrough": "playthrough",
|
||||||
|
"ps4share": "ps4share",
|
||||||
|
"tutorial": "tutorial",
|
||||||
|
"play": "play",
|
||||||
|
"twitch": "twitch",
|
||||||
|
"how to": "how_to",
|
||||||
|
"ps4": "ps4",
|
||||||
|
"bitcoin": "bitcoin",
|
||||||
|
"fortnite": "fortnite",
|
||||||
|
"commentary": "commentary",
|
||||||
|
"lets play": "lets_play",
|
||||||
|
"fun": "fun",
|
||||||
|
"politics": "politics",
|
||||||
|
"xbox": "xbox",
|
||||||
|
"autos & vehicles": "autos_and_vehicles",
|
||||||
|
"travel & events": "travel_and_events",
|
||||||
|
"food": "food",
|
||||||
|
"science": "science",
|
||||||
|
"mature": "mature",
|
||||||
|
"xbox one": "xbox_one",
|
||||||
|
"liberal": "liberal",
|
||||||
|
"democrat": "democrat",
|
||||||
|
"progressive": "progressive",
|
||||||
|
"survival": "survival",
|
||||||
|
"nonprofits & activism": "nonprofits_and_activism",
|
||||||
|
"cryptocurrency": "cryptocurrency",
|
||||||
|
"playstation": "playstation",
|
||||||
|
"nintendo": "nintendo",
|
||||||
|
"government": "government",
|
||||||
|
"steam": "steam",
|
||||||
|
"podcast": "podcast",
|
||||||
|
"horror": "horror",
|
||||||
|
"conservative": "conservative",
|
||||||
|
"reaction": "reaction",
|
||||||
|
"trailer": "trailer",
|
||||||
|
"love": "love",
|
||||||
|
"cnn": "cnn",
|
||||||
|
"republican": "republican",
|
||||||
|
"gamer": "gamer",
|
||||||
|
"political": "political",
|
||||||
|
"hangoutsonair": "hangoutsonair",
|
||||||
|
"hoa": "hoa",
|
||||||
|
"msnbc": "msnbc",
|
||||||
|
"cbs": "cbs",
|
||||||
|
"donald trump": "donald_trump",
|
||||||
|
"fiction": "fiction",
|
||||||
|
"fox news": "fox_news",
|
||||||
|
"anime": "anime",
|
||||||
|
"crypto": "crypto",
|
||||||
|
"ethereum": "ethereum",
|
||||||
|
"call of duty": "call_of_duty",
|
||||||
|
"multiplayer": "multiplayer",
|
||||||
|
"android": "android",
|
||||||
|
"epic": "epic",
|
||||||
|
"rpg": "rpg",
|
||||||
|
"adventure": "adventure",
|
||||||
|
"secular talk": "secular_talk",
|
||||||
|
"btc": "btc",
|
||||||
|
"atheist": "atheist",
|
||||||
|
"atheism": "atheism",
|
||||||
|
"ps3": "ps3",
|
||||||
|
"video games": "video_games",
|
||||||
|
"cod": "cod",
|
||||||
|
"agnostic": "agnostic",
|
||||||
|
"movie": "movie",
|
||||||
|
"online": "online",
|
||||||
|
"fps": "fps",
|
||||||
|
"mod": "mod",
|
||||||
|
"reviews": "reviews",
|
||||||
|
"sharefactory": "sharefactory",
|
||||||
|
"world": "world",
|
||||||
|
"space": "space",
|
||||||
|
"hilarious": "hilarious",
|
||||||
|
"stream": "stream",
|
||||||
|
"lol": "lol",
|
||||||
|
"sony": "sony",
|
||||||
|
"god": "god",
|
||||||
|
"lets": "lets",
|
||||||
|
"dance": "dance",
|
||||||
|
"pvp": "pvp",
|
||||||
|
"tech": "tech",
|
||||||
|
"zombies": "zombies",
|
||||||
|
"pokemon": "pokemon",
|
||||||
|
"fail": "fail",
|
||||||
|
"xbox 360": "xbox_360",
|
||||||
|
"film": "film",
|
||||||
|
"unboxing": "unboxing",
|
||||||
|
"animation": "animation",
|
||||||
|
"travel": "travel",
|
||||||
|
"money": "money",
|
||||||
|
"wwe": "wwe",
|
||||||
|
"how": "how",
|
||||||
|
"mods": "mods",
|
||||||
|
"pubg": "pubg",
|
||||||
|
"indie": "indie",
|
||||||
|
"strategy": "strategy",
|
||||||
|
"history": "history",
|
||||||
|
"rap": "rap",
|
||||||
|
"ios": "ios",
|
||||||
|
"sony computer entertainment": "sony_computer_entertainment",
|
||||||
|
"mobile": "mobile",
|
||||||
|
"trump": "trump",
|
||||||
|
"flat earth": "flat_earth",
|
||||||
|
"hack": "hack",
|
||||||
|
"trap": "trap",
|
||||||
|
"fox": "fox",
|
||||||
|
"vlogging": "vlogging",
|
||||||
|
"news radio": "news_radio",
|
||||||
|
"humor": "humor",
|
||||||
|
"facebook": "facebook",
|
||||||
|
"edm": "edm",
|
||||||
|
"fitness": "fitness",
|
||||||
|
"vaping": "vaping",
|
||||||
|
"hip hop": "hip_hop",
|
||||||
|
"secular": "secular",
|
||||||
|
"jesus": "jesus",
|
||||||
|
"vape": "vape",
|
||||||
|
"song": "song",
|
||||||
|
"remix": "remix",
|
||||||
|
"guitar": "guitar",
|
||||||
|
"daily": "daily",
|
||||||
|
"mining": "mining",
|
||||||
|
"diy": "diy",
|
||||||
|
"videogame": "videogame",
|
||||||
|
"pets & animals": "pets_and_animals",
|
||||||
|
"funny moments": "funny_moments",
|
||||||
|
"religion": "religion",
|
||||||
|
"death": "death",
|
||||||
|
"media": "media",
|
||||||
|
"nbc": "nbc",
|
||||||
|
"war": "war",
|
||||||
|
"freedom": "freedom",
|
||||||
|
"viral": "viral",
|
||||||
|
"meme": "meme",
|
||||||
|
"family": "family",
|
||||||
|
"gold": "gold",
|
||||||
|
"photography": "photography",
|
||||||
|
"chill": "chill",
|
||||||
|
"zombie": "zombie",
|
||||||
|
"computer": "computer",
|
||||||
|
"sniper": "sniper",
|
||||||
|
"bible": "bible",
|
||||||
|
"linux": "linux",
|
||||||
|
"overwatch": "overwatch",
|
||||||
|
"pro": "pro",
|
||||||
|
"dragon": "dragon",
|
||||||
|
"litecoin": "litecoin",
|
||||||
|
"gta": "gta",
|
||||||
|
"iphone": "iphone",
|
||||||
|
"house": "house",
|
||||||
|
"bass": "bass",
|
||||||
|
"bitcoin news": "bitcoin_news",
|
||||||
|
"wii": "wii",
|
||||||
|
"crash": "crash",
|
||||||
|
"league of legends": "league_of_legends",
|
||||||
|
"grand theft auto v": "grand_theft_auto_v",
|
||||||
|
"mario": "mario",
|
||||||
|
"mmorpg": "mmorpg",
|
||||||
|
"satire": "satire",
|
||||||
|
"fire": "fire",
|
||||||
|
"racing": "racing",
|
||||||
|
"apple": "apple",
|
||||||
|
"health": "health",
|
||||||
|
"instrumental": "instrumental",
|
||||||
|
"destiny": "destiny",
|
||||||
|
"truth": "truth",
|
||||||
|
"race": "race"
|
||||||
|
}
|
||||||
|
|
|
@ -16,7 +16,7 @@ from lbry.schema.tags import clean_tags
|
||||||
from lbry.schema.result import Outputs
|
from lbry.schema.result import Outputs
|
||||||
from lbry.wallet.ledger import BaseLedger, MainNetLedger, RegTestLedger
|
from lbry.wallet.ledger import BaseLedger, MainNetLedger, RegTestLedger
|
||||||
|
|
||||||
from .common import CLAIM_TYPES, STREAM_TYPES
|
from .common import CLAIM_TYPES, STREAM_TYPES, COMMON_TAGS
|
||||||
|
|
||||||
|
|
||||||
class SQLiteOperationalError(sqlite3.OperationalError):
|
class SQLiteOperationalError(sqlite3.OperationalError):
|
||||||
|
@ -433,24 +433,36 @@ def _apply_constraints_for_array_attributes(constraints, attr, cleaner, for_coun
|
||||||
any_items = {item for item in any_items if item not in not_items}
|
any_items = {item for item in any_items if item not in not_items}
|
||||||
|
|
||||||
if any_items:
|
if any_items:
|
||||||
|
|
||||||
|
any_queries = {}
|
||||||
|
|
||||||
|
common_items = any_items & COMMON_TAGS.keys()
|
||||||
|
if common_items:
|
||||||
|
any_items -= common_items
|
||||||
|
for item in common_items:
|
||||||
|
index_name = COMMON_TAGS[item]
|
||||||
|
any_queries[f'$any_{attr}_{index_name}'] = item
|
||||||
|
any_queries[f'#_any_{attr}_{index_name}'] = f"""
|
||||||
|
EXISTS(
|
||||||
|
SELECT 1 FROM {attr} INDEXED BY tag_{index_name}_idx WHERE
|
||||||
|
claim.claim_hash={attr}.claim_hash
|
||||||
|
AND {attr} = '{item}'
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
constraints.update({
|
constraints.update({
|
||||||
f'$any_{attr}{i}': item for i, item in enumerate(any_items)
|
f'$any_{attr}{i}': item for i, item in enumerate(any_items)
|
||||||
})
|
})
|
||||||
values = ', '.join(
|
values = ', '.join(
|
||||||
f':$any_{attr}{i}' for i in range(len(any_items))
|
f':$any_{attr}{i}' for i in range(len(any_items))
|
||||||
)
|
)
|
||||||
if for_count:
|
any_queries[f'claim.claim_hash__in#_any_{attr}'] = f"""
|
||||||
constraints[f'claim.claim_hash__in#_any_{attr}'] = f"""
|
|
||||||
SELECT claim_hash FROM {attr} WHERE {attr} IN ({values})
|
SELECT claim_hash FROM {attr} WHERE {attr} IN ({values})
|
||||||
"""
|
"""
|
||||||
else:
|
if len(any_queries) == 1:
|
||||||
constraints[f'#_any_{attr}'] = f"""
|
constraints.update(any_queries)
|
||||||
EXISTS(
|
elif len(any_queries) > 1:
|
||||||
SELECT 1 FROM {attr} WHERE
|
constraints[f'ORed_{attr}_queries__any'] = any_queries
|
||||||
claim.claim_hash={attr}.claim_hash
|
|
||||||
AND {attr} IN ({values})
|
|
||||||
)
|
|
||||||
"""
|
|
||||||
|
|
||||||
if all_items:
|
if all_items:
|
||||||
constraints[f'$all_{attr}_count'] = len(all_items)
|
constraints[f'$all_{attr}_count'] = len(all_items)
|
||||||
|
|
|
@ -16,7 +16,7 @@ from lbry.wallet.server.db.trending import (
|
||||||
CREATE_TREND_TABLE, calculate_trending, register_trending_functions
|
CREATE_TREND_TABLE, calculate_trending, register_trending_functions
|
||||||
)
|
)
|
||||||
|
|
||||||
from .common import CLAIM_TYPES, STREAM_TYPES
|
from .common import CLAIM_TYPES, STREAM_TYPES, COMMON_TAGS
|
||||||
|
|
||||||
|
|
||||||
ATTRIBUTE_ARRAY_MAX_LENGTH = 100
|
ATTRIBUTE_ARRAY_MAX_LENGTH = 100
|
||||||
|
@ -76,30 +76,12 @@ class SQLDB:
|
||||||
trending_global integer not null default 0
|
trending_global integer not null default 0
|
||||||
);
|
);
|
||||||
|
|
||||||
create index if not exists claim_resolve_idx on claim (normalized, claim_id);
|
create index if not exists claim_normalized_idx on claim (normalized, activation_height);
|
||||||
|
create index if not exists claim_channel_hash_idx on claim (channel_hash, signature, claim_hash);
|
||||||
create index if not exists claim_claims_in_channel_idx on claim (signature_valid, channel_hash, normalized);
|
create index if not exists claim_claims_in_channel_idx on claim (signature_valid, channel_hash, normalized);
|
||||||
|
|
||||||
create index if not exists claim_id_idx on claim (claim_id);
|
|
||||||
create index if not exists claim_normalized_idx on claim (normalized);
|
|
||||||
create index if not exists claim_txo_hash_idx on claim (txo_hash);
|
create index if not exists claim_txo_hash_idx on claim (txo_hash);
|
||||||
create index if not exists claim_channel_hash_idx on claim (channel_hash);
|
create index if not exists claim_activation_height_idx on claim (activation_height, claim_hash);
|
||||||
create index if not exists claim_timestamp_idx on claim (timestamp);
|
|
||||||
create index if not exists claim_height_idx on claim (height);
|
|
||||||
create index if not exists claim_activation_height_idx on claim (activation_height);
|
|
||||||
create index if not exists claim_expiration_height_idx on claim (expiration_height);
|
create index if not exists claim_expiration_height_idx on claim (expiration_height);
|
||||||
create index if not exists claim_public_key_hash_idx on claim (public_key_hash);
|
|
||||||
|
|
||||||
create index if not exists claim_claim_type_idx on claim (claim_type);
|
|
||||||
create index if not exists claim_stream_type_idx on claim (stream_type);
|
|
||||||
create index if not exists claim_media_type_idx on claim (media_type);
|
|
||||||
create index if not exists claim_fee_amount_idx on claim (fee_amount);
|
|
||||||
create index if not exists claim_fee_currency_idx on claim (fee_currency);
|
|
||||||
|
|
||||||
create index if not exists claim_signature_valid_idx on claim (signature_valid);
|
|
||||||
|
|
||||||
create unique index if not exists claim_effective_amount_idx on claim (effective_amount, claim_hash, release_time);
|
|
||||||
create unique index if not exists claim_release_time_idx on claim (release_time, claim_hash);
|
|
||||||
create unique index if not exists claim_trending_global_mixed_idx on claim (trending_global, trending_mixed, claim_hash);
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
CREATE_SUPPORT_TABLE = """
|
CREATE_SUPPORT_TABLE = """
|
||||||
|
@ -110,7 +92,6 @@ class SQLDB:
|
||||||
claim_hash bytes not null,
|
claim_hash bytes not null,
|
||||||
amount integer not null
|
amount integer not null
|
||||||
);
|
);
|
||||||
create index if not exists support_txo_hash_idx on support (txo_hash);
|
|
||||||
create index if not exists support_claim_hash_idx on support (claim_hash, height);
|
create index if not exists support_claim_hash_idx on support (claim_hash, height);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -120,7 +101,6 @@ class SQLDB:
|
||||||
claim_hash bytes not null,
|
claim_hash bytes not null,
|
||||||
height integer not null
|
height integer not null
|
||||||
);
|
);
|
||||||
create index if not exists tag_tag_idx on tag (tag);
|
|
||||||
create unique index if not exists tag_claim_hash_tag_idx on tag (claim_hash, tag);
|
create unique index if not exists tag_claim_hash_tag_idx on tag (claim_hash, tag);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -133,6 +113,37 @@ class SQLDB:
|
||||||
create index if not exists claimtrie_claim_hash_idx on claimtrie (claim_hash);
|
create index if not exists claimtrie_claim_hash_idx on claimtrie (claim_hash);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
SEARCH_INDEXES = """
|
||||||
|
-- used by any tag clouds
|
||||||
|
create index if not exists tag_tag_idx on tag (tag, claim_hash);
|
||||||
|
{custom_tags_indexes}
|
||||||
|
|
||||||
|
-- common ORDER BY
|
||||||
|
create unique index if not exists claim_effective_amount_idx on claim (effective_amount, claim_hash, release_time);
|
||||||
|
create unique index if not exists claim_release_time_idx on claim (release_time, claim_hash);
|
||||||
|
create unique index if not exists claim_trending_global_mixed_idx on claim (trending_global, trending_mixed, claim_hash);
|
||||||
|
|
||||||
|
-- TODO: verify that all indexes below are used
|
||||||
|
create index if not exists claim_height_normalized_idx on claim (height, normalized asc);
|
||||||
|
|
||||||
|
create index if not exists claim_resolve_idx on claim (normalized, claim_id);
|
||||||
|
|
||||||
|
create index if not exists claim_id_idx on claim (claim_id, claim_hash);
|
||||||
|
create index if not exists claim_timestamp_idx on claim (timestamp);
|
||||||
|
create index if not exists claim_public_key_hash_idx on claim (public_key_hash);
|
||||||
|
|
||||||
|
create index if not exists claim_claim_type_idx on claim (claim_type);
|
||||||
|
create index if not exists claim_stream_type_idx on claim (stream_type);
|
||||||
|
create index if not exists claim_media_type_idx on claim (media_type);
|
||||||
|
create index if not exists claim_fee_amount_idx on claim (fee_amount);
|
||||||
|
create index if not exists claim_fee_currency_idx on claim (fee_currency);
|
||||||
|
|
||||||
|
create index if not exists claim_signature_valid_idx on claim (signature_valid);
|
||||||
|
""".format(custom_tags_indexes='\n'.join(
|
||||||
|
f'create unique index if not exists tag_{tag_key}_idx on tag (tag, claim_hash) WHERE tag="{tag_value}";'
|
||||||
|
for tag_value, tag_key in COMMON_TAGS.items()
|
||||||
|
))
|
||||||
|
|
||||||
CREATE_TABLES_QUERY = (
|
CREATE_TABLES_QUERY = (
|
||||||
PRAGMAS +
|
PRAGMAS +
|
||||||
CREATE_CLAIM_TABLE +
|
CREATE_CLAIM_TABLE +
|
||||||
|
@ -688,6 +699,9 @@ class SQLDB:
|
||||||
r(self.update_claimtrie, height, recalculate_claim_hashes, deleted_claim_names, forward_timer=True)
|
r(self.update_claimtrie, height, recalculate_claim_hashes, deleted_claim_names, forward_timer=True)
|
||||||
r(calculate_trending, self.db, height, self.main.first_sync, daemon_height)
|
r(calculate_trending, self.db, height, self.main.first_sync, daemon_height)
|
||||||
|
|
||||||
|
if self.main.first_sync and height == daemon_height:
|
||||||
|
self.db.executescript(self.SEARCH_INDEXES)
|
||||||
|
|
||||||
|
|
||||||
class LBRYDB(DB):
|
class LBRYDB(DB):
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ class TestTagNormalization(unittest.TestCase):
|
||||||
def test_normalize_tag(self):
|
def test_normalize_tag(self):
|
||||||
tag = self.assertNormalizedTag
|
tag = self.assertNormalizedTag
|
||||||
tag('', ' \t #!~')
|
tag('', ' \t #!~')
|
||||||
tag('tag', 'Tag')
|
tag('tag', 'T\'ag')
|
||||||
tag('t ag', '\tT \nAG ')
|
tag('t ag', '\tT \nAG ')
|
||||||
tag('tag hash', '#tag~#hash!')
|
tag('tag hash', '#tag~#hash!')
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue