lbry-sdk/lbrynet/schema/uri.py
2019-03-25 22:54:08 -04:00

186 lines
5.8 KiB
Python

import re
PROTOCOL = 'lbry://'
CHANNEL_CHAR = '@'
CLAIM_ID_CHAR = '#'
CLAIM_SEQUENCE_CHAR = ':'
BID_POSITION_CHAR = '$'
PATH_CHAR = '/'
QUERY_CHAR = '?'
CLAIM_ID_MAX_LENGTH = 40
CHANNEL_NAME_MIN_LENGTH = 1
class URIParseError(Exception):
pass
class URI(object):
__slots__ = ['name', 'claim_sequence', 'bid_position', 'claim_id', 'path']
def __init__(self, name, claim_sequence=None, bid_position=None, claim_id=None, path=None):
if len([v for v in [claim_sequence, bid_position, claim_id] if v is not None]) > 1:
raise ValueError(
"Only one of these may be present at a time: claim_sequence, bid_position, claim_id"
)
self.name = name
self.claim_sequence = claim_sequence
self.bid_position = bid_position
self.claim_id = claim_id
self.path = path
if self.path is not None and not self.contains_channel:
raise ValueError("Content claims cannot have paths")
def __str__(self):
return self.to_uri_string()
def __eq__(self, other):
for prop in self.__slots__:
if not hasattr(other, prop) or getattr(self, prop) != getattr(other, prop):
return False
return self.__class__ == other.__class__
@property
def channel_name(self):
return self.name if self.contains_channel else None
@property
def claim_name(self):
return self.name if not self.contains_channel else self.path
@property
def contains_channel(self):
return self.name.startswith(CHANNEL_CHAR)
@property
def is_channel(self):
return self.contains_channel and not self.path
def to_uri_string(self):
uri_string = PROTOCOL + "%s" % self.name
if self.claim_sequence is not None:
uri_string += CLAIM_SEQUENCE_CHAR + "%i" % self.claim_sequence
elif self.bid_position is not None:
uri_string += BID_POSITION_CHAR + "%i" % self.bid_position
elif self.claim_id is not None:
uri_string += CLAIM_ID_CHAR + "%s" % self.claim_id
if self.path is not None:
uri_string += PATH_CHAR + "%s" % self.path
return uri_string
def to_dict(self):
return {
"name": self.name,
'claim_sequence': self.claim_sequence,
'bid_position': self.bid_position,
'claim_id': self.claim_id,
'path': self.path,
}
@classmethod
def from_uri_string(cls, uri_string):
"""
Parses LBRY uri into its components
:param uri_string: format - lbry://name:n$rank#id/path
optional modifiers:
claim_sequence (int): the nth claim to the name
bid_position (int): the bid queue position of the claim for the name
claim_id (str): the claim id for the claim
path (str): claim within a channel
:return: URI
"""
match = re.match(get_schema_regex(), uri_string)
if match is None:
raise URIParseError('Invalid URI')
if match.group('content_name') and match.group('path'):
raise URIParseError('Only channels may have paths')
return cls(
name=match.group("content_or_channel_name"),
claim_sequence=int(match.group("claim_sequence")) if match.group(
"claim_sequence") is not None else None,
bid_position=int(match.group("bid_position")) if match.group(
"bid_position") is not None else None,
claim_id=match.group("claim_id"),
path=match.group("path")
)
@classmethod
def from_dict(cls, uri_dict):
"""
Creates URI from dict
:return: URI
"""
return cls(**uri_dict)
def get_schema_regex():
def _named(name, regex):
return "(?P<" + name + ">" + regex + ")"
def _group(regex):
return "(?:" + regex + ")"
# TODO: regex should include the fact that content names cannot have paths
# right now this is only enforced in code, not in the regex
# Escape constants
claim_id_char = re.escape(CLAIM_ID_CHAR)
claim_sequence_char = re.escape(CLAIM_SEQUENCE_CHAR)
bid_position_char = re.escape(BID_POSITION_CHAR)
channel_char = re.escape(CHANNEL_CHAR)
path_char = re.escape(PATH_CHAR)
protocol = _named("protocol", re.escape(PROTOCOL))
# Define basic building blocks
valid_name_char = "[^=&#:$@%?/]" # from the grammar section of https://spec.lbry.io/
name_content = valid_name_char + '+'
name_min_channel_length = valid_name_char + '{' + str(CHANNEL_NAME_MIN_LENGTH) + ',}'
positive_number = "[1-9][0-9]*"
number = '\-?' + positive_number
# Define URI components
content_name = _named("content_name", name_content)
channel_name = _named("channel_name", channel_char + name_min_channel_length)
content_or_channel_name = _named("content_or_channel_name", content_name + "|" + channel_name)
claim_id_piece = _named("claim_id", "[0-9a-f]{1," + str(CLAIM_ID_MAX_LENGTH) + "}")
claim_id = _group(claim_id_char + claim_id_piece)
bid_position_piece = _named("bid_position", number)
bid_position = _group(bid_position_char + bid_position_piece)
claim_sequence_piece = _named("claim_sequence", number)
claim_sequence = _group(claim_sequence_char + claim_sequence_piece)
modifier = _named("modifier", claim_id + "|" + bid_position + "|" + claim_sequence)
path_piece = _named("path", name_content)
path = _group(path_char + path_piece)
# Combine components
uri = _named("uri", (
'^' +
protocol + '?' +
content_or_channel_name +
modifier + '?' +
path + '?' +
'$'
))
return uri
def parse_lbry_uri(lbry_uri):
return URI.from_uri_string(lbry_uri)