lbry-sdk/lbry/schema/url.py
2020-10-30 11:37:54 -04:00

129 lines
3.6 KiB
Python

import re
import unicodedata
from typing import NamedTuple, Tuple
def _create_url_regex():
# see https://spec.lbry.com/ and test_url.py
invalid_names_regex = \
r"[^=&#:$@%*?;\"/\\<>%{}|^~`\[\]" \
r"\u0000-\u0020\uD800-\uDFFF\uFFFE-\uFFFF]+"
def _named(name, regex):
return "(?P<" + name + ">" + regex + ")"
def _group(regex):
return "(?:" + regex + ")"
def _oneof(*choices):
return _group('|'.join(choices))
def _claim(name, prefix=""):
return _group(
_named(name+"_name", prefix + invalid_names_regex) +
_oneof(
_group('[:#]' + _named(name+"_claim_id", "[0-9a-f]{1,40}")),
_group(r'\*' + _named(name+"_sequence", '[1-9][0-9]*')),
_group(r'\$' + _named(name+"_amount_order", '[1-9][0-9]*'))
) + '?'
)
return (
'^' +
_named("scheme", "lbry://") + '?' +
_oneof(
_group(_claim("channel_with_stream", "@") + "/" + _claim("stream_in_channel")),
_claim("channel", "@"),
_claim("stream")
) +
'$'
)
URL_REGEX = _create_url_regex()
def normalize_name(name):
return unicodedata.normalize('NFD', name).casefold()
class PathSegment(NamedTuple):
name: str
claim_id: str = None
sequence: int = None
amount_order: int = None
@property
def normalized(self):
return normalize_name(self.name)
def to_dict(self):
q = {'name': self.name}
if self.claim_id is not None:
q['claim_id'] = self.claim_id
if self.sequence is not None:
q['sequence'] = self.sequence
if self.amount_order is not None:
q['amount_order'] = self.amount_order
return q
def __str__(self):
if self.claim_id is not None:
return f"{self.name}:{self.claim_id}"
elif self.sequence is not None:
return f"{self.name}*{self.sequence}"
elif self.amount_order is not None:
return f"{self.name}${self.amount_order}"
return self.name
class URL(NamedTuple):
stream: PathSegment
channel: PathSegment
@property
def has_channel(self):
return self.channel is not None
@property
def has_stream(self):
return self.stream is not None
@property
def has_stream_in_channel(self):
return self.has_channel and self.has_stream
@property
def parts(self) -> Tuple:
if self.has_stream_in_channel:
return self.channel, self.stream
if self.has_channel:
return self.channel,
return self.stream,
def __str__(self):
return f"lbry://{'/'.join(str(p) for p in self.parts)}"
@classmethod
def parse(cls, url):
match = re.match(URL_REGEX, url)
if match is None:
raise ValueError('Invalid LBRY URL')
segments = {}
parts = match.groupdict()
for segment in ('channel', 'stream', 'channel_with_stream', 'stream_in_channel'):
if parts[f'{segment}_name'] is not None:
segments[segment] = PathSegment(
parts[f'{segment}_name'],
parts[f'{segment}_claim_id'],
parts[f'{segment}_sequence'],
parts[f'{segment}_amount_order']
)
if 'channel_with_stream' in segments:
segments['channel'] = segments['channel_with_stream']
segments['stream'] = segments['stream_in_channel']
return cls(segments.get('stream', None), segments.get('channel', None))