implement blocking and filtering
This commit is contained in:
parent
bc0c1b9a3e
commit
08d635322e
6 changed files with 98 additions and 37 deletions
|
@ -1,6 +1,5 @@
|
||||||
import base64
|
import base64
|
||||||
import struct
|
from typing import List, TYPE_CHECKING, Union, Optional
|
||||||
from typing import List, TYPE_CHECKING, Union
|
|
||||||
from binascii import hexlify
|
from binascii import hexlify
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
|
@ -43,19 +42,19 @@ class Censor:
|
||||||
def apply(self, rows):
|
def apply(self, rows):
|
||||||
return [row for row in rows if not self.censor(row)]
|
return [row for row in rows if not self.censor(row)]
|
||||||
|
|
||||||
def censor(self, row) -> bool:
|
def censor(self, row) -> Optional[bytes]:
|
||||||
if self.is_censored(row):
|
if self.is_censored(row):
|
||||||
censoring_channel_hash = row['censoring_channel_hash']
|
censoring_channel_hash = row['censoring_channel_hash']
|
||||||
self.censored.setdefault(censoring_channel_hash, set())
|
self.censored.setdefault(censoring_channel_hash, set())
|
||||||
self.censored[censoring_channel_hash].add(row['tx_hash'])
|
self.censored[censoring_channel_hash].add(row['tx_hash'])
|
||||||
return True
|
return censoring_channel_hash
|
||||||
return False
|
return None
|
||||||
|
|
||||||
def to_message(self, outputs: OutputsMessage, extra_txo_rows: dict):
|
def to_message(self, outputs: OutputsMessage, extra_txo_rows: dict):
|
||||||
for censoring_channel_hash, count in self.censored.items():
|
for censoring_channel_hash, count in self.censored.items():
|
||||||
blocked = outputs.blocked.add()
|
blocked = outputs.blocked.add()
|
||||||
blocked.count = len(count)
|
blocked.count = len(count)
|
||||||
set_reference(blocked.channel, extra_txo_rows.get(censoring_channel_hash))
|
set_reference(blocked.channel, censoring_channel_hash, extra_txo_rows)
|
||||||
outputs.blocked_total += len(count)
|
outputs.blocked_total += len(count)
|
||||||
|
|
||||||
|
|
||||||
|
@ -178,8 +177,8 @@ class Outputs:
|
||||||
page.offset = offset
|
page.offset = offset
|
||||||
if total is not None:
|
if total is not None:
|
||||||
page.total = total
|
page.total = total
|
||||||
# if blocked is not None:
|
if blocked is not None:
|
||||||
# blocked.to_message(page, extra_txo_rows)
|
blocked.to_message(page, extra_txo_rows)
|
||||||
for row in extra_txo_rows:
|
for row in extra_txo_rows:
|
||||||
cls.encode_txo(page.extra_txos.add(), row)
|
cls.encode_txo(page.extra_txos.add(), row)
|
||||||
|
|
||||||
|
@ -192,7 +191,8 @@ class Outputs:
|
||||||
set_reference(txo_message.claim.channel, row.channel_hash, extra_txo_rows)
|
set_reference(txo_message.claim.channel, row.channel_hash, extra_txo_rows)
|
||||||
if row.reposted_claim_hash:
|
if row.reposted_claim_hash:
|
||||||
set_reference(txo_message.claim.repost, row.reposted_claim_hash, extra_txo_rows)
|
set_reference(txo_message.claim.repost, row.reposted_claim_hash, extra_txo_rows)
|
||||||
# set_reference(txo_message.error.blocked.channel, row.censor_hash, extra_txo_rows)
|
elif isinstance(row, ResolveCensoredError):
|
||||||
|
set_reference(txo_message.error.blocked.channel, row.censor_hash, extra_txo_rows)
|
||||||
return page.SerializeToString()
|
return page.SerializeToString()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -318,7 +318,10 @@ class BlockProcessor:
|
||||||
await self.run_in_thread(self.db.apply_expiration_extension_fork)
|
await self.run_in_thread(self.db.apply_expiration_extension_fork)
|
||||||
# TODO: we shouldnt wait on the search index updating before advancing to the next block
|
# TODO: we shouldnt wait on the search index updating before advancing to the next block
|
||||||
if not self.db.first_sync:
|
if not self.db.first_sync:
|
||||||
|
self.db.reload_blocking_filtering_streams()
|
||||||
await self.db.search_index.claim_consumer(self.claim_producer())
|
await self.db.search_index.claim_consumer(self.claim_producer())
|
||||||
|
await self.db.search_index.apply_filters(self.db.blocked_streams, self.db.blocked_channels,
|
||||||
|
self.db.filtered_streams, self.db.filtered_channels)
|
||||||
self.db.search_index.clear_caches()
|
self.db.search_index.clear_caches()
|
||||||
self.touched_claims_to_send_es.clear()
|
self.touched_claims_to_send_es.clear()
|
||||||
self.removed_claims_to_send_es.clear()
|
self.removed_claims_to_send_es.clear()
|
||||||
|
|
|
@ -130,7 +130,7 @@ class SearchIndex:
|
||||||
self.logger.debug("Indexing done.")
|
self.logger.debug("Indexing done.")
|
||||||
|
|
||||||
def update_filter_query(self, censor_type, blockdict, channels=False):
|
def update_filter_query(self, censor_type, blockdict, channels=False):
|
||||||
blockdict = {key[::-1].hex(): value[::-1].hex() for key, value in blockdict.items()}
|
blockdict = {key.hex(): value.hex() for key, value in blockdict.items()}
|
||||||
if channels:
|
if channels:
|
||||||
update = expand_query(channel_id__in=list(blockdict.keys()), censor_type=f"<{censor_type}")
|
update = expand_query(channel_id__in=list(blockdict.keys()), censor_type=f"<{censor_type}")
|
||||||
else:
|
else:
|
||||||
|
@ -483,7 +483,7 @@ def extract_doc(doc, index):
|
||||||
channel_hash = doc.pop('channel_hash')
|
channel_hash = doc.pop('channel_hash')
|
||||||
doc['channel_id'] = channel_hash[::-1].hex() if channel_hash else channel_hash
|
doc['channel_id'] = channel_hash[::-1].hex() if channel_hash else channel_hash
|
||||||
channel_hash = doc.pop('censoring_channel_hash')
|
channel_hash = doc.pop('censoring_channel_hash')
|
||||||
doc['censoring_channel_hash'] = channel_hash[::-1].hex() if channel_hash else channel_hash
|
doc['censoring_channel_hash'] = channel_hash.hex() if channel_hash else channel_hash
|
||||||
# txo_hash = doc.pop('txo_hash')
|
# txo_hash = doc.pop('txo_hash')
|
||||||
# doc['tx_id'] = txo_hash[:32][::-1].hex()
|
# doc['tx_id'] = txo_hash[:32][::-1].hex()
|
||||||
# doc['tx_nout'] = struct.unpack('<I', txo_hash[32:])[0]
|
# doc['tx_nout'] = struct.unpack('<I', txo_hash[32:])[0]
|
||||||
|
|
|
@ -23,6 +23,9 @@ from functools import partial
|
||||||
from asyncio import sleep
|
from asyncio import sleep
|
||||||
from bisect import bisect_right
|
from bisect import bisect_right
|
||||||
from collections import defaultdict, OrderedDict
|
from collections import defaultdict, OrderedDict
|
||||||
|
|
||||||
|
from lbry.error import ResolveCensoredError
|
||||||
|
from lbry.schema.result import Censor
|
||||||
from lbry.utils import LRUCacheWithMetrics
|
from lbry.utils import LRUCacheWithMetrics
|
||||||
from lbry.schema.url import URL
|
from lbry.schema.url import URL
|
||||||
from lbry.wallet.server import util
|
from lbry.wallet.server import util
|
||||||
|
@ -115,6 +118,20 @@ class LevelDB:
|
||||||
self.hist_comp_flush_count = -1
|
self.hist_comp_flush_count = -1
|
||||||
self.hist_comp_cursor = -1
|
self.hist_comp_cursor = -1
|
||||||
|
|
||||||
|
# blocking/filtering dicts
|
||||||
|
blocking_channels = self.env.default('BLOCKING_CHANNEL_IDS', '').split(' ')
|
||||||
|
filtering_channels = self.env.default('FILTERING_CHANNEL_IDS', '').split(' ')
|
||||||
|
self.blocked_streams = {}
|
||||||
|
self.blocked_channels = {}
|
||||||
|
self.blocking_channel_hashes = {
|
||||||
|
bytes.fromhex(channel_id)[::-1] for channel_id in blocking_channels if channel_id
|
||||||
|
}
|
||||||
|
self.filtered_streams = {}
|
||||||
|
self.filtered_channels = {}
|
||||||
|
self.filtering_channel_hashes = {
|
||||||
|
bytes.fromhex(channel_id)[::-1] for channel_id in filtering_channels if channel_id
|
||||||
|
}
|
||||||
|
|
||||||
self.tx_counts = None
|
self.tx_counts = None
|
||||||
self.headers = None
|
self.headers = None
|
||||||
self.encoded_headers = LRUCacheWithMetrics(1 << 21, metric_name='encoded_headers', namespace='wallet_server')
|
self.encoded_headers = LRUCacheWithMetrics(1 << 21, metric_name='encoded_headers', namespace='wallet_server')
|
||||||
|
@ -352,6 +369,16 @@ class LevelDB:
|
||||||
if not resolved_stream:
|
if not resolved_stream:
|
||||||
return LookupError(f'Could not find claim at "{url}".'), None
|
return LookupError(f'Could not find claim at "{url}".'), None
|
||||||
|
|
||||||
|
if resolved_stream or resolved_channel:
|
||||||
|
claim_hash = resolved_stream.claim_hash if resolved_stream else resolved_channel.claim_hash
|
||||||
|
claim = resolved_stream if resolved_stream else resolved_channel
|
||||||
|
reposted_claim_hash = resolved_stream.reposted_claim_hash if resolved_stream else None
|
||||||
|
blocker_hash = self.blocked_streams.get(claim_hash) or self.blocked_streams.get(
|
||||||
|
reposted_claim_hash) or self.blocked_channels.get(claim_hash) or self.blocked_channels.get(
|
||||||
|
reposted_claim_hash) or self.blocked_channels.get(claim.channel_hash)
|
||||||
|
if blocker_hash:
|
||||||
|
reason_row = self._fs_get_claim_by_hash(blocker_hash)
|
||||||
|
return None, ResolveCensoredError(url, blocker_hash, censor_row=reason_row)
|
||||||
return resolved_stream, resolved_channel
|
return resolved_stream, resolved_channel
|
||||||
|
|
||||||
async def fs_resolve(self, url) -> typing.Tuple[OptionalResolveResultOrError, OptionalResolveResultOrError]:
|
async def fs_resolve(self, url) -> typing.Tuple[OptionalResolveResultOrError, OptionalResolveResultOrError]:
|
||||||
|
@ -435,6 +462,31 @@ class LevelDB:
|
||||||
count += 1
|
count += 1
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
def reload_blocking_filtering_streams(self):
|
||||||
|
self.blocked_streams, self.blocked_channels = self.get_streams_and_channels_reposted_by_channel_hashes(self.blocking_channel_hashes)
|
||||||
|
self.filtered_streams, self.filtered_channels = self.get_streams_and_channels_reposted_by_channel_hashes(self.filtering_channel_hashes)
|
||||||
|
|
||||||
|
def get_streams_and_channels_reposted_by_channel_hashes(self, reposter_channel_hashes: bytes):
|
||||||
|
streams, channels = {}, {}
|
||||||
|
for reposter_channel_hash in reposter_channel_hashes:
|
||||||
|
reposts = self.get_reposts_in_channel(reposter_channel_hash)
|
||||||
|
for repost in reposts:
|
||||||
|
txo = self.get_claim_txo(repost)
|
||||||
|
if txo.name.startswith('@'):
|
||||||
|
channels[repost] = reposter_channel_hash
|
||||||
|
else:
|
||||||
|
streams[repost] = reposter_channel_hash
|
||||||
|
return streams, channels
|
||||||
|
|
||||||
|
def get_reposts_in_channel(self, channel_hash):
|
||||||
|
reposts = set()
|
||||||
|
for value in self.db.iterator(prefix=Prefixes.channel_to_claim.pack_partial_key(channel_hash), include_key=False):
|
||||||
|
stream = Prefixes.channel_to_claim.unpack_value(value)
|
||||||
|
repost = self.get_repost(stream.claim_hash)
|
||||||
|
if repost:
|
||||||
|
reposts.add(repost)
|
||||||
|
return reposts
|
||||||
|
|
||||||
def get_channel_for_claim(self, claim_hash, tx_num, position) -> Optional[bytes]:
|
def get_channel_for_claim(self, claim_hash, tx_num, position) -> Optional[bytes]:
|
||||||
return self.db.get(Prefixes.claim_to_channel.pack_key(claim_hash, tx_num, position))
|
return self.db.get(Prefixes.claim_to_channel.pack_key(claim_hash, tx_num, position))
|
||||||
|
|
||||||
|
@ -542,18 +594,22 @@ class LevelDB:
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
if reposted_metadata:
|
if reposted_metadata:
|
||||||
reposted_tags = [] if not reposted_metadata.is_stream else [tag for tag in reposted_metadata.stream.tags]
|
meta = reposted_metadata.stream if reposted_metadata.is_stream else reposted_metadata.channel
|
||||||
reposted_languages = [] if not reposted_metadata.is_stream else (
|
reposted_tags = [tag for tag in meta.tags]
|
||||||
[lang.language or 'none' for lang in reposted_metadata.stream.languages] or ['none']
|
reposted_languages = [lang.language or 'none' for lang in meta.languages] or ['none']
|
||||||
)
|
|
||||||
reposted_has_source = False if not reposted_metadata.is_stream else reposted_metadata.stream.has_source
|
reposted_has_source = False if not reposted_metadata.is_stream else reposted_metadata.stream.has_source
|
||||||
reposted_claim_type = CLAIM_TYPES[reposted_metadata.claim_type]
|
reposted_claim_type = CLAIM_TYPES[reposted_metadata.claim_type]
|
||||||
claim_tags = [] if not metadata.is_stream else [tag for tag in metadata.stream.tags]
|
lang_tags = metadata.stream if metadata.is_stream else metadata.channel if metadata.is_channel else metadata.repost
|
||||||
claim_languages = [] if not metadata.is_stream else (
|
claim_tags = [tag for tag in lang_tags.tags]
|
||||||
[lang.language or 'none' for lang in metadata.stream.languages] or ['none']
|
claim_languages = [lang.language or 'none' for lang in lang_tags.languages] or ['none']
|
||||||
)
|
|
||||||
tags = list(set(claim_tags).union(set(reposted_tags)))
|
tags = list(set(claim_tags).union(set(reposted_tags)))
|
||||||
languages = list(set(claim_languages).union(set(reposted_languages)))
|
languages = list(set(claim_languages).union(set(reposted_languages)))
|
||||||
|
blocked_hash = self.blocked_streams.get(claim_hash) or self.blocked_streams.get(
|
||||||
|
reposted_claim_hash) or self.blocked_channels.get(claim_hash) or self.blocked_channels.get(
|
||||||
|
reposted_claim_hash) or self.blocked_channels.get(claim.channel_hash)
|
||||||
|
filtered_hash = self.filtered_streams.get(claim_hash) or self.filtered_streams.get(
|
||||||
|
reposted_claim_hash) or self.filtered_channels.get(claim_hash) or self.filtered_channels.get(
|
||||||
|
reposted_claim_hash) or self.filtered_channels.get(claim.channel_hash)
|
||||||
value = {
|
value = {
|
||||||
'claim_hash': claim_hash[::-1],
|
'claim_hash': claim_hash[::-1],
|
||||||
# 'claim_id': claim_hash.hex(),
|
# 'claim_id': claim_hash.hex(),
|
||||||
|
@ -603,8 +659,8 @@ class LevelDB:
|
||||||
'signature_valid': claim.signature_valid,
|
'signature_valid': claim.signature_valid,
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'languages': languages,
|
'languages': languages,
|
||||||
'censor_type': 0, # TODO: fix
|
'censor_type': Censor.RESOLVE if blocked_hash else Censor.SEARCH if filtered_hash else Censor.NOT_CENSORED,
|
||||||
'censoring_channel_hash': None, # TODO: fix
|
'censoring_channel_hash': blocked_hash or filtered_hash or None,
|
||||||
'claims_in_channel': None if not metadata.is_channel else self.get_claims_in_channel_count(claim_hash)
|
'claims_in_channel': None if not metadata.is_channel else self.get_claims_in_channel_count(claim_hash)
|
||||||
# 'trending_group': 0,
|
# 'trending_group': 0,
|
||||||
# 'trending_mixed': 0,
|
# 'trending_mixed': 0,
|
||||||
|
|
|
@ -21,7 +21,7 @@ from elasticsearch import ConnectionTimeout
|
||||||
from prometheus_client import Counter, Info, Histogram, Gauge
|
from prometheus_client import Counter, Info, Histogram, Gauge
|
||||||
|
|
||||||
import lbry
|
import lbry
|
||||||
from lbry.error import TooManyClaimSearchParametersError
|
from lbry.error import ResolveCensoredError, TooManyClaimSearchParametersError
|
||||||
from lbry.build_info import BUILD, COMMIT_HASH, DOCKER_TAG
|
from lbry.build_info import BUILD, COMMIT_HASH, DOCKER_TAG
|
||||||
from lbry.schema.result import Outputs
|
from lbry.schema.result import Outputs
|
||||||
from lbry.wallet.server.block_processor import BlockProcessor
|
from lbry.wallet.server.block_processor import BlockProcessor
|
||||||
|
@ -997,7 +997,13 @@ class LBRYElectrumX(SessionBase):
|
||||||
self.session_mgr.urls_to_resolve_count_metric.inc()
|
self.session_mgr.urls_to_resolve_count_metric.inc()
|
||||||
stream, channel = await self.db.fs_resolve(url)
|
stream, channel = await self.db.fs_resolve(url)
|
||||||
self.session_mgr.resolved_url_count_metric.inc()
|
self.session_mgr.resolved_url_count_metric.inc()
|
||||||
if channel and not stream:
|
if isinstance(channel, ResolveCensoredError):
|
||||||
|
rows.append(channel)
|
||||||
|
extra.append(channel.censor_row)
|
||||||
|
elif isinstance(stream, ResolveCensoredError):
|
||||||
|
rows.append(stream)
|
||||||
|
extra.append(stream.censor_row)
|
||||||
|
elif channel and not stream:
|
||||||
rows.append(channel)
|
rows.append(channel)
|
||||||
# print("resolved channel", channel.name.decode())
|
# print("resolved channel", channel.name.decode())
|
||||||
elif stream:
|
elif stream:
|
||||||
|
|
|
@ -1492,12 +1492,10 @@ class StreamCommands(ClaimTestCase):
|
||||||
filtering_channel_id = self.get_claim_id(
|
filtering_channel_id = self.get_claim_id(
|
||||||
await self.channel_create('@filtering', '0.1')
|
await self.channel_create('@filtering', '0.1')
|
||||||
)
|
)
|
||||||
# self.conductor.spv_node.server.db.sql.filtering_channel_hashes.add(
|
self.conductor.spv_node.server.db.filtering_channel_hashes.add(bytes.fromhex(filtering_channel_id))
|
||||||
# unhexlify(filtering_channel_id)[::-1]
|
self.assertEqual(0, len(self.conductor.spv_node.server.db.filtered_streams))
|
||||||
# )
|
|
||||||
self.assertEqual(0, len(self.conductor.spv_node.server.db.sql.filtered_streams))
|
|
||||||
await self.stream_repost(bad_content_id, 'filter1', '0.1', channel_name='@filtering')
|
await self.stream_repost(bad_content_id, 'filter1', '0.1', channel_name='@filtering')
|
||||||
self.assertEqual(1, len(self.conductor.spv_node.server.db.sql.filtered_streams))
|
self.assertEqual(1, len(self.conductor.spv_node.server.db.filtered_streams))
|
||||||
|
|
||||||
# search for filtered content directly
|
# search for filtered content directly
|
||||||
result = await self.out(self.daemon.jsonrpc_claim_search(name='bad_content'))
|
result = await self.out(self.daemon.jsonrpc_claim_search(name='bad_content'))
|
||||||
|
@ -1539,12 +1537,10 @@ class StreamCommands(ClaimTestCase):
|
||||||
blocking_channel_id = self.get_claim_id(
|
blocking_channel_id = self.get_claim_id(
|
||||||
await self.channel_create('@blocking', '0.1')
|
await self.channel_create('@blocking', '0.1')
|
||||||
)
|
)
|
||||||
self.conductor.spv_node.server.db.sql.blocking_channel_hashes.add(
|
self.conductor.spv_node.server.db.blocking_channel_hashes.add(bytes.fromhex(blocking_channel_id))
|
||||||
unhexlify(blocking_channel_id)[::-1]
|
self.assertEqual(0, len(self.conductor.spv_node.server.db.blocked_streams))
|
||||||
)
|
|
||||||
self.assertEqual(0, len(self.conductor.spv_node.server.db.sql.blocked_streams))
|
|
||||||
await self.stream_repost(bad_content_id, 'block1', '0.1', channel_name='@blocking')
|
await self.stream_repost(bad_content_id, 'block1', '0.1', channel_name='@blocking')
|
||||||
self.assertEqual(1, len(self.conductor.spv_node.server.db.sql.blocked_streams))
|
self.assertEqual(1, len(self.conductor.spv_node.server.db.blocked_streams))
|
||||||
|
|
||||||
# blocked content is not resolveable
|
# blocked content is not resolveable
|
||||||
error = (await self.resolve('lbry://@some_channel/bad_content'))['error']
|
error = (await self.resolve('lbry://@some_channel/bad_content'))['error']
|
||||||
|
@ -1567,9 +1563,9 @@ class StreamCommands(ClaimTestCase):
|
||||||
self.assertEqual('@bad_channel', result['items'][1]['name'])
|
self.assertEqual('@bad_channel', result['items'][1]['name'])
|
||||||
|
|
||||||
# filter channel out
|
# filter channel out
|
||||||
self.assertEqual(0, len(self.conductor.spv_node.server.db.sql.filtered_channels))
|
self.assertEqual(0, len(self.conductor.spv_node.server.db.filtered_channels))
|
||||||
await self.stream_repost(bad_channel_id, 'filter2', '0.1', channel_name='@filtering')
|
await self.stream_repost(bad_channel_id, 'filter2', '0.1', channel_name='@filtering')
|
||||||
self.assertEqual(1, len(self.conductor.spv_node.server.db.sql.filtered_channels))
|
self.assertEqual(1, len(self.conductor.spv_node.server.db.filtered_channels))
|
||||||
|
|
||||||
# same claim search as previous now returns 0 results
|
# same claim search as previous now returns 0 results
|
||||||
result = await self.out(self.daemon.jsonrpc_claim_search(any_tags=['bad-stuff'], order_by=['height']))
|
result = await self.out(self.daemon.jsonrpc_claim_search(any_tags=['bad-stuff'], order_by=['height']))
|
||||||
|
@ -1594,9 +1590,9 @@ class StreamCommands(ClaimTestCase):
|
||||||
self.assertEqual(worse_content_id, result['claim_id'])
|
self.assertEqual(worse_content_id, result['claim_id'])
|
||||||
|
|
||||||
# block channel
|
# block channel
|
||||||
self.assertEqual(0, len(self.conductor.spv_node.server.db.sql.blocked_channels))
|
self.assertEqual(0, len(self.conductor.spv_node.server.db.blocked_channels))
|
||||||
await self.stream_repost(bad_channel_id, 'block2', '0.1', channel_name='@blocking')
|
await self.stream_repost(bad_channel_id, 'block2', '0.1', channel_name='@blocking')
|
||||||
self.assertEqual(1, len(self.conductor.spv_node.server.db.sql.blocked_channels))
|
self.assertEqual(1, len(self.conductor.spv_node.server.db.blocked_channels))
|
||||||
|
|
||||||
# channel, claim in channel or claim individually no longer resolve
|
# channel, claim in channel or claim individually no longer resolve
|
||||||
self.assertEqual((await self.resolve('lbry://@bad_channel'))['error']['name'], 'BLOCKED')
|
self.assertEqual((await self.resolve('lbry://@bad_channel'))['error']['name'], 'BLOCKED')
|
||||||
|
|
Loading…
Reference in a new issue