match regex from app into ours

This commit is contained in:
Victor Shyba 2019-09-19 17:07:20 -03:00 committed by Lex Berezhny
parent 34d19c4a72
commit 965972a719
2 changed files with 36 additions and 2 deletions

View file

@ -4,7 +4,10 @@ from typing import NamedTuple, Tuple
def _create_url_regex(): def _create_url_regex():
# see https://spec.lbry.com/ # see https://spec.lbry.com/ and test_url.py
invalid_names_regex = \
r"[^=&#:$@%?;\"/\\<>%{}|^~`\[\]" \
r"\u0000-\u0008\u000b-\u000c\u000e-\u001F\uD800-\uDFFF\uFFFE-\uFFFF]+"
def _named(name, regex): def _named(name, regex):
return "(?P<" + name + ">" + regex + ")" return "(?P<" + name + ">" + regex + ")"
@ -17,7 +20,7 @@ def _create_url_regex():
def _claim(name, prefix=""): def _claim(name, prefix=""):
return _group( return _group(
_named(name+"_name", prefix + "[^=&#:$@%?/]+") + _named(name+"_name", prefix + invalid_names_regex) +
_oneof( _oneof(
_group('#' + _named(name+"_claim_id", "[0-9a-f]{1,40}")), _group('#' + _named(name+"_claim_id", "[0-9a-f]{1,40}")),
_group(':' + _named(name+"_sequence", '[1-9][0-9]*')), _group(':' + _named(name+"_sequence", '[1-9][0-9]*')),

View file

@ -55,10 +55,41 @@ class TestURLParsing(unittest.TestCase):
url('lbry://@test:1/stuff', channel_name='@test', channel_sequence='1', stream_name='stuff') url('lbry://@test:1/stuff', channel_name='@test', channel_sequence='1', stream_name='stuff')
url('lbry://@test$1/stuff', channel_name='@test', channel_amount_order='1', stream_name='stuff') url('lbry://@test$1/stuff', channel_name='@test', channel_amount_order='1', stream_name='stuff')
url(f'lbry://@test#{claim_id}/stuff', channel_name='@test', channel_claim_id=claim_id, stream_name='stuff') url(f'lbry://@test#{claim_id}/stuff', channel_name='@test', channel_claim_id=claim_id, stream_name='stuff')
# unicode regex edges
_url = lambda name: url(name, stream_name=name)
_url('\u0009')
_url('\u000a')
_url('\u000d')
_url('\u0020')
_url('\uD799')
_url('\uE000')
_url('\uFFFD')
def test_parser_invalid_urls(self): def test_parser_invalid_urls(self):
fail = self._fail_url fail = self._fail_url
fail("lbry://") fail("lbry://")
fail("lbry://\u0000")
fail("lbry://\u0008")
fail("lbry://\u000b")
fail("lbry://\u000c")
fail("lbry://\u000e")
fail("lbry://\u001f")
fail("lbry://\uD800")
fail("lbry://\uDFFF")
fail("lbry://\uDFFE")
fail("lbry://\uFFFF")
fail("lbry://;")
fail("lbry://\"")
fail("lbry://\\")
fail("lbry:///")
fail("lbry://<") and fail("lbry://>")
fail("lbry://{") and fail("lbry://}")
fail("lbry://[") and fail("lbry://]")
fail("lbry://%")
fail("lbry://|")
fail("lbry://^")
fail("lbry://~")
fail("lbry://`")
fail("lbry://test:3$1") fail("lbry://test:3$1")
fail("lbry://test$1:1") fail("lbry://test$1:1")
fail("lbry://test#x") fail("lbry://test#x")