Allow : or # for claim_id

This removes the code for trying multiple patterns and the setup for it

Added a few unit tests to check that the parsed URL is as expected
This commit is contained in:
Kevin Raoofi 2020-10-27 13:53:50 -04:00 committed by Lex Berezhny
parent 7637aa2ab6
commit e8d299d3b6
2 changed files with 19 additions and 28 deletions

View file

@ -1,6 +1,6 @@
import re
import unicodedata
from typing import Iterable, NamedTuple, Pattern, Tuple
from typing import NamedTuple, Tuple
def _create_url_regex(legacy=False):
@ -18,42 +18,29 @@ def _create_url_regex(legacy=False):
def _oneof(*choices):
return _group('|'.join(choices))
def _legacy_claim(name, prefix=""):
return _group(
_named(name + "_name", prefix + invalid_names_regex) +
_oneof(
_group('#' + _named(name + "_claim_id", "[0-9a-f]{1,40}")),
_group(':' + _named(name + "_sequence", '[1-9][0-9]*')),
_group(r'\$' + _named(name + "_amount_order", '[1-9][0-9]*'))
) + '?'
)
def _claim(name, prefix=""):
return _group(
_named(name+"_name", prefix + invalid_names_regex) +
_oneof(
_group(':' + _named(name+"_claim_id", "[0-9a-f]{1,40}")),
_group('[:#]' + _named(name+"_claim_id", "[0-9a-f]{1,40}")),
_group(r'\*' + _named(name+"_sequence", '[1-9][0-9]*')),
_group(r'\$' + _named(name+"_amount_order", '[1-9][0-9]*'))
) + '?'
)
claim = _claim if not legacy else _legacy_claim
return (
'^' +
_named("scheme", "lbry://") + '?' +
_oneof(
_group(claim("channel_with_stream", "@") + "/" + claim("stream_in_channel")),
claim("channel", "@"),
claim("stream")
_group(_claim("channel_with_stream", "@") + "/" + _claim("stream_in_channel")),
_claim("channel", "@"),
_claim("stream")
) +
'$'
)
URL_REGEX = _create_url_regex()
URL_REGEX_LEGACY = _create_url_regex(legacy=True)
def normalize_name(name):
@ -117,13 +104,9 @@ class URL(NamedTuple):
def __str__(self):
return f"lbry://{'/'.join(str(p) for p in self.parts)}"
@staticmethod
def _first_match(x: str, ptns: Iterable[Pattern[str]]):
return next(filter(None, (re.match(ptn, x) for ptn in ptns)), None)
@classmethod
def parse(cls, url):
match = URL._first_match(url, (URL_REGEX, URL_REGEX_LEGACY))
match = re.match(URL_REGEX, url)
if match is None:
raise ValueError('Invalid LBRY URL')

View file

@ -11,8 +11,10 @@ class TestURLParsing(unittest.TestCase):
segments = 'stream', 'channel'
fields = 'name', 'claim_id', 'sequence', 'amount_order'
def _assert_url(self, url_string, **kwargs):
def _assert_url(self, url_string, strictly=True, **kwargs):
url = URL.parse(url_string)
if strictly:
if url_string.startswith('lbry://'):
self.assertEqual(url_string, str(url))
else:
@ -55,6 +57,12 @@ class TestURLParsing(unittest.TestCase):
url('lbry://@test*1/stuff', channel_name='@test', channel_sequence='1', stream_name='stuff')
url('lbry://@test$1/stuff', channel_name='@test', channel_amount_order='1', stream_name='stuff')
url(f'lbry://@test:{claim_id}/stuff', channel_name='@test', channel_claim_id=claim_id, stream_name='stuff')
# legacy/new conversions
url(f'test#{claim_id}', stream_name='test', stream_claim_id=claim_id, strictly=False)
url('@test:1/stuff#2', channel_claim_id='1', stream_claim_id='2',
channel_name='@test', stream_name='stuff', strictly=False)
url('@test*1/stuff#2', channel_sequence='1', stream_claim_id='2',
channel_name='@test', stream_name='stuff', strictly=False)
# unicode regex edges
_url = lambda name: url(name, stream_name=name)
_url('\uD799')