diff --git a/lbry/lbry/schema/url.py b/lbry/lbry/schema/url.py index ef3ccf5e5..e6c83ac71 100644 --- a/lbry/lbry/schema/url.py +++ b/lbry/lbry/schema/url.py @@ -4,7 +4,10 @@ from typing import NamedTuple, Tuple def _create_url_regex(): - # see https://spec.lbry.com/ + # see https://spec.lbry.com/ and test_url.py + invalid_names_regex = \ + r"[^=&#:$@%?;\"/\\<>%{}|^~`\[\]" \ + r"\u0000-\u0008\u000b-\u000c\u000e-\u001F\uD800-\uDFFF\uFFFE-\uFFFF]+" def _named(name, regex): return "(?P<" + name + ">" + regex + ")" @@ -17,7 +20,7 @@ def _create_url_regex(): def _claim(name, prefix=""): return _group( - _named(name+"_name", prefix + "[^=&#:$@%?/]+") + + _named(name+"_name", prefix + invalid_names_regex) + _oneof( _group('#' + _named(name+"_claim_id", "[0-9a-f]{1,40}")), _group(':' + _named(name+"_sequence", '[1-9][0-9]*')), diff --git a/lbry/tests/unit/schema/test_url.py b/lbry/tests/unit/schema/test_url.py index 83cc203fd..c496f6e5a 100644 --- a/lbry/tests/unit/schema/test_url.py +++ b/lbry/tests/unit/schema/test_url.py @@ -55,10 +55,41 @@ class TestURLParsing(unittest.TestCase): url('lbry://@test:1/stuff', channel_name='@test', channel_sequence='1', stream_name='stuff') url('lbry://@test$1/stuff', channel_name='@test', channel_amount_order='1', stream_name='stuff') url(f'lbry://@test#{claim_id}/stuff', channel_name='@test', channel_claim_id=claim_id, stream_name='stuff') + # unicode regex edges + _url = lambda name: url(name, stream_name=name) + _url('\u0009') + _url('\u000a') + _url('\u000d') + _url('\u0020') + _url('\uD799') + _url('\uE000') + _url('\uFFFD') def test_parser_invalid_urls(self): fail = self._fail_url fail("lbry://") + fail("lbry://\u0000") + fail("lbry://\u0008") + fail("lbry://\u000b") + fail("lbry://\u000c") + fail("lbry://\u000e") + fail("lbry://\u001f") + fail("lbry://\uD800") + fail("lbry://\uDFFF") + fail("lbry://\uDFFE") + fail("lbry://\uFFFF") + fail("lbry://;") + fail("lbry://\"") + fail("lbry://\\") + fail("lbry:///") + fail("lbry://<") and fail("lbry://>") + fail("lbry://{") and fail("lbry://}") + fail("lbry://[") and fail("lbry://]") + fail("lbry://%") + fail("lbry://|") + fail("lbry://^") + fail("lbry://~") + fail("lbry://`") fail("lbry://test:3$1") fail("lbry://test$1:1") fail("lbry://test#x")