Allow : or # for claim_id

This removes the code for trying multiple patterns and the setup for it Added a few unit tests to check that the parsed URL is as expected
2020-10-27 13:53:50 -04:00 · 2020-10-27 13:53:50 -04:00 · e8d299d3b6
parent 7637aa2ab6
commit e8d299d3b6
2 changed files with 19 additions and 28 deletions
--- a/lbry/schema/url.py
+++ b/lbry/schema/url.py
@ -1,6 +1,6 @@
 import re
 import unicodedata
-from typing import Iterable, NamedTuple, Pattern, Tuple
+from typing import NamedTuple, Tuple


 def _create_url_regex(legacy=False):
@ -18,42 +18,29 @@ def _create_url_regex(legacy=False):
    def _oneof(*choices):
        return _group('|'.join(choices))

-    def _legacy_claim(name, prefix=""):
-        return _group(
-            _named(name + "_name", prefix + invalid_names_regex) +
-            _oneof(
-                _group('#' + _named(name + "_claim_id", "[0-9a-f]{1,40}")),
-                _group(':' + _named(name + "_sequence", '[1-9][0-9]*')),
-                _group(r'\$' + _named(name + "_amount_order", '[1-9][0-9]*'))
-            ) + '?'
-        )
-
    def _claim(name, prefix=""):
        return _group(
            _named(name+"_name", prefix + invalid_names_regex) +
            _oneof(
-                _group(':' + _named(name+"_claim_id", "[0-9a-f]{1,40}")),
+                _group('[:#]' + _named(name+"_claim_id", "[0-9a-f]{1,40}")),
                _group(r'\*' + _named(name+"_sequence", '[1-9][0-9]*')),
                _group(r'\$' + _named(name+"_amount_order", '[1-9][0-9]*'))
            ) + '?'
        )

-    claim = _claim if not legacy else _legacy_claim
-
    return (
        '^' +
        _named("scheme", "lbry://") + '?' +
        _oneof(
-            _group(claim("channel_with_stream", "@") + "/" + claim("stream_in_channel")),
-            claim("channel", "@"),
-            claim("stream")
+            _group(_claim("channel_with_stream", "@") + "/" + _claim("stream_in_channel")),
+            _claim("channel", "@"),
+            _claim("stream")
        ) +
        '$'
    )


 URL_REGEX = _create_url_regex()
-URL_REGEX_LEGACY = _create_url_regex(legacy=True)


 def normalize_name(name):
@ -117,13 +104,9 @@ class URL(NamedTuple):
    def __str__(self):
        return f"lbry://{'/'.join(str(p) for p in self.parts)}"

-    @staticmethod
-    def _first_match(x: str, ptns: Iterable[Pattern[str]]):
-        return next(filter(None, (re.match(ptn, x) for ptn in ptns)), None)
-
    @classmethod
    def parse(cls, url):
-        match = URL._first_match(url, (URL_REGEX, URL_REGEX_LEGACY))
+        match = re.match(URL_REGEX, url)

        if match is None:
            raise ValueError('Invalid LBRY URL')
--- a/tests/unit/schema/test_url.py
+++ b/tests/unit/schema/test_url.py
@ -11,8 +11,10 @@ class TestURLParsing(unittest.TestCase):
    segments = 'stream', 'channel'
    fields = 'name', 'claim_id', 'sequence', 'amount_order'

-    def _assert_url(self, url_string, **kwargs):
+    def _assert_url(self, url_string, strictly=True, **kwargs):
        url = URL.parse(url_string)
+
+        if strictly:
            if url_string.startswith('lbry://'):
                self.assertEqual(url_string, str(url))
            else:
@ -55,6 +57,12 @@ class TestURLParsing(unittest.TestCase):
        url('lbry://@test*1/stuff', channel_name='@test', channel_sequence='1', stream_name='stuff')
        url('lbry://@test$1/stuff', channel_name='@test', channel_amount_order='1', stream_name='stuff')
        url(f'lbry://@test:{claim_id}/stuff', channel_name='@test', channel_claim_id=claim_id, stream_name='stuff')
+        # legacy/new conversions
+        url(f'test#{claim_id}', stream_name='test', stream_claim_id=claim_id, strictly=False)
+        url('@test:1/stuff#2', channel_claim_id='1', stream_claim_id='2',
+            channel_name='@test', stream_name='stuff', strictly=False)
+        url('@test*1/stuff#2', channel_sequence='1', stream_claim_id='2',
+            channel_name='@test', stream_name='stuff', strictly=False)
        # unicode regex edges
        _url = lambda name: url(name, stream_name=name)
        _url('\uD799')