lbry-sdk/lbrynet/dht/encoding.py

140 lines
4.9 KiB
Python
Raw Normal View History

2018-07-05 05:16:52 +02:00
from __future__ import print_function
from .error import DecodeError
2018-07-18 02:38:54 +02:00
import sys
if sys.version_info > (3,):
long = int
2018-07-20 19:27:42 +02:00
raw = ord
else:
raw = lambda x: x
2017-03-31 19:32:43 +02:00
class Encoding:
2015-08-20 17:27:15 +02:00
""" Interface for RPC message encoders/decoders
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
All encoding implementations used with this library should inherit and
implement this.
"""
2017-03-31 19:32:43 +02:00
2015-08-20 17:27:15 +02:00
def encode(self, data):
""" Encode the specified data
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@param data: The data to encode
This method has to support encoding of the following
types: C{str}, C{int} and C{long}
Any additional data types may be supported as long as the
implementing class's C{decode()} method can successfully
decode them.
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@return: The encoded data
@rtype: str
"""
2017-03-31 19:32:43 +02:00
2015-08-20 17:27:15 +02:00
def decode(self, data):
""" Decode the specified data string
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@param data: The data (byte string) to decode.
@type data: str
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@return: The decoded data (in its correct type)
"""
2017-03-31 19:32:43 +02:00
2015-08-20 17:27:15 +02:00
class Bencode(Encoding):
""" Implementation of a Bencode-based algorithm (Bencode is the encoding
algorithm used by Bittorrent).
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@note: This algorithm differs from the "official" Bencode algorithm in
that it can encode/decode floating point values in addition to
integers.
"""
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
def encode(self, data):
""" Encoder implementation of the Bencode algorithm
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@param data: The data to encode
@type data: int, long, tuple, list, dict or str
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@return: The encoded data
@rtype: str
"""
2017-04-25 20:31:05 +02:00
if isinstance(data, (int, long)):
return b'i%de' % data
elif isinstance(data, bytes):
return b'%d:%s' % (len(data), data)
2017-04-25 20:31:05 +02:00
elif isinstance(data, (list, tuple)):
encodedListItems = b''
2015-08-20 17:27:15 +02:00
for item in data:
encodedListItems += self.encode(item)
return b'l%se' % encodedListItems
2017-04-25 20:31:05 +02:00
elif isinstance(data, dict):
encodedDictItems = b''
2015-08-20 17:27:15 +02:00
keys = data.keys()
2018-07-18 02:38:54 +02:00
for key in sorted(keys):
encodedDictItems += self.encode(key) # TODO: keys should always be bytestrings
2015-08-20 17:27:15 +02:00
encodedDictItems += self.encode(data[key])
return b'd%se' % encodedDictItems
2015-08-20 17:27:15 +02:00
else:
2017-04-25 20:31:05 +02:00
raise TypeError("Cannot bencode '%s' object" % type(data))
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
def decode(self, data):
2016-12-14 00:08:29 +01:00
""" Decoder implementation of the Bencode algorithm
2015-08-20 17:27:15 +02:00
@param data: The encoded data
@type data: str
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@note: This is a convenience wrapper for the recursive decoding
algorithm, C{_decodeRecursive}
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@return: The decoded data, as a native Python type
@rtype: int, list, dict or str
"""
2018-07-20 19:27:42 +02:00
assert type(data) == bytes # fixme: _maybe_ remove this after porting
2015-08-20 17:27:15 +02:00
if len(data) == 0:
2017-04-25 20:31:05 +02:00
raise DecodeError('Cannot decode empty string')
try:
return self._decodeRecursive(data)[0]
except ValueError as e:
2017-04-25 20:31:05 +02:00
raise DecodeError(e.message)
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
@staticmethod
def _decodeRecursive(data, startIndex=0):
""" Actual implementation of the recursive Bencode algorithm
2016-12-14 00:08:29 +01:00
2015-08-20 17:27:15 +02:00
Do not call this; use C{decode()} instead
"""
2018-07-20 19:27:42 +02:00
if data[startIndex] == raw('i'):
endPos = data[startIndex:].find(b'e') + startIndex
2017-04-25 20:31:05 +02:00
return int(data[startIndex + 1:endPos]), endPos + 1
2018-07-20 19:27:42 +02:00
elif data[startIndex] == raw('l'):
2015-08-20 17:27:15 +02:00
startIndex += 1
decodedList = []
2018-07-20 19:27:42 +02:00
while data[startIndex] != raw('e'):
2015-08-20 17:27:15 +02:00
listData, startIndex = Bencode._decodeRecursive(data, startIndex)
decodedList.append(listData)
2017-04-25 20:31:05 +02:00
return decodedList, startIndex + 1
2018-07-20 19:27:42 +02:00
elif data[startIndex] == raw('d'):
2015-08-20 17:27:15 +02:00
startIndex += 1
decodedDict = {}
2018-07-20 19:27:42 +02:00
while data[startIndex] != raw('e'):
2015-08-20 17:27:15 +02:00
key, startIndex = Bencode._decodeRecursive(data, startIndex)
value, startIndex = Bencode._decodeRecursive(data, startIndex)
decodedDict[key] = value
2017-04-25 20:31:05 +02:00
return decodedDict, startIndex
2018-07-20 19:27:42 +02:00
elif data[startIndex] == raw('f'):
2015-08-20 17:27:15 +02:00
# This (float data type) is a non-standard extension to the original Bencode algorithm
2018-07-20 19:27:42 +02:00
endPos = data[startIndex:].find(b'e') + startIndex
2017-04-25 20:31:05 +02:00
return float(data[startIndex + 1:endPos]), endPos + 1
2018-07-20 19:27:42 +02:00
elif data[startIndex] == raw('n'):
2016-11-30 21:20:45 +01:00
# This (None/NULL data type) is a non-standard extension
# to the original Bencode algorithm
2017-04-25 20:31:05 +02:00
return None, startIndex + 1
2015-08-20 17:27:15 +02:00
else:
2018-07-20 19:27:42 +02:00
splitPos = data[startIndex:].find(b':') + startIndex
2015-08-20 17:27:15 +02:00
try:
length = int(data[startIndex:splitPos])
except ValueError:
raise DecodeError()
2017-03-31 19:32:43 +02:00
startIndex = splitPos + 1
endPos = startIndex + length
2015-08-20 17:27:15 +02:00
bytes = data[startIndex:endPos]
2017-04-25 20:31:05 +02:00
return bytes, endPos