lbry-sdk/lbrynet/schema/uri.py

import re

PROTOCOL = 'lbry://'
CHANNEL_CHAR = '@'
CLAIM_ID_CHAR = '#'
CLAIM_SEQUENCE_CHAR = ':'
BID_POSITION_CHAR = '$'
PATH_CHAR = '/'
QUERY_CHAR = '?'

CLAIM_ID_MAX_LENGTH = 40
CHANNEL_NAME_MIN_LENGTH = 1


class URIParseError(Exception):
    pass


class URI(object):
    __slots__ = ['name', 'claim_sequence', 'bid_position', 'claim_id', 'path']

    def __init__(self, name, claim_sequence=None, bid_position=None, claim_id=None, path=None):
        if len([v for v in [claim_sequence, bid_position, claim_id] if v is not None]) > 1:
            raise ValueError(
                "Only one of these may be present at a time: claim_sequence, bid_position, claim_id"
            )

        self.name = name
        self.claim_sequence = claim_sequence
        self.bid_position = bid_position
        self.claim_id = claim_id
        self.path = path

        if self.path is not None and not self.contains_channel:
            raise ValueError("Content claims cannot have paths")

    def __str__(self):
        return self.to_uri_string()

    def __eq__(self, other):
        for prop in self.__slots__:
            if not hasattr(other, prop) or getattr(self, prop) != getattr(other, prop):
                return False
        return self.__class__ == other.__class__
    @property
    def channel_name(self):
        return self.name if self.contains_channel else None

    @property
    def claim_name(self):
        return self.name if not self.contains_channel else self.path

    @property
    def contains_channel(self):
        return self.name.startswith(CHANNEL_CHAR)

    @property
    def is_channel(self):
        return self.contains_channel and not self.path

    def to_uri_string(self):
        uri_string = PROTOCOL + "%s" % self.name

        if self.claim_sequence is not None:
            uri_string += CLAIM_SEQUENCE_CHAR + "%i" % self.claim_sequence
        elif self.bid_position is not None:
            uri_string += BID_POSITION_CHAR + "%i" % self.bid_position
        elif self.claim_id is not None:
            uri_string += CLAIM_ID_CHAR + "%s" % self.claim_id

        if self.path is not None:
            uri_string += PATH_CHAR + "%s" % self.path

        return uri_string

    def to_dict(self):
        return {
            "name": self.name,
            'claim_sequence': self.claim_sequence,
            'bid_position': self.bid_position,
            'claim_id': self.claim_id,
            'path': self.path,
        }

    @classmethod
    def from_uri_string(cls, uri_string):
        """
        Parses LBRY uri into its components

        :param uri_string: format - lbry://name:n$rank#id/path
                           optional modifiers:
                           claim_sequence (int): the nth claim to the name
                           bid_position (int): the bid queue position of the claim for the name
                           claim_id (str): the claim id for the claim
                           path (str): claim within a channel
        :return: URI
        """
        match = re.match(get_schema_regex(), uri_string)

        if match is None:
            raise URIParseError('Invalid URI')

        if match.group('content_name') and match.group('path'):
            raise URIParseError('Only channels may have paths')

        return cls(
            name=match.group("content_or_channel_name"),
            claim_sequence=int(match.group("claim_sequence")) if match.group(
                "claim_sequence") is not None else None,
            bid_position=int(match.group("bid_position")) if match.group(
                "bid_position") is not None else None,
            claim_id=match.group("claim_id"),
            path=match.group("path")
        )

    @classmethod
    def from_dict(cls, uri_dict):
        """
        Creates URI from dict

        :return: URI
        """
        return cls(**uri_dict)


def get_schema_regex():
    def _named(name, regex):
        return "(?P<" + name + ">" + regex + ")"

    def _group(regex):
        return "(?:" + regex + ")"

    # TODO: regex should include the fact that content names cannot have paths
    #       right now this is only enforced in code, not in the regex

    # Escape constants
    claim_id_char = re.escape(CLAIM_ID_CHAR)
    claim_sequence_char = re.escape(CLAIM_SEQUENCE_CHAR)
    bid_position_char = re.escape(BID_POSITION_CHAR)
    channel_char = re.escape(CHANNEL_CHAR)
    path_char = re.escape(PATH_CHAR)
    protocol = _named("protocol", re.escape(PROTOCOL))

    # Define basic building blocks
    valid_name_char = "[^=&#:$@%?/]"  # from the grammar section of https://spec.lbry.io/
    name_content = valid_name_char + '+'
    name_min_channel_length = valid_name_char + '{' + str(CHANNEL_NAME_MIN_LENGTH) + ',}'

    positive_number = "[1-9][0-9]*"
    number = '\-?' + positive_number

    # Define URI components
    content_name = _named("content_name", name_content)
    channel_name = _named("channel_name", channel_char + name_min_channel_length)
    content_or_channel_name = _named("content_or_channel_name", content_name + "|" + channel_name)

    claim_id_piece = _named("claim_id", "[0-9a-f]{1," + str(CLAIM_ID_MAX_LENGTH) + "}")
    claim_id = _group(claim_id_char + claim_id_piece)

    bid_position_piece = _named("bid_position", number)
    bid_position = _group(bid_position_char + bid_position_piece)

    claim_sequence_piece = _named("claim_sequence", number)
    claim_sequence = _group(claim_sequence_char + claim_sequence_piece)

    modifier = _named("modifier", claim_id + "|" + bid_position + "|" + claim_sequence)

    path_piece = _named("path", name_content)
    path = _group(path_char + path_piece)

    # Combine components
    uri = _named("uri", (
        '^' +
        protocol + '?' +
        content_or_channel_name +
        modifier + '?' +
        path + '?' +
        '$'
    ))

    return uri


def parse_lbry_uri(lbry_uri):
    return URI.from_uri_string(lbry_uri)