diff --git a/lbry/schema/mime_types.py b/lbry/schema/mime_types.py index 95e6c08dc..5dee9ccbc 100644 --- a/lbry/schema/mime_types.py +++ b/lbry/schema/mime_types.py @@ -1,4 +1,6 @@ import os +import filetype +import logging types_map = { # http://www.iana.org/assignments/media-types @@ -166,10 +168,41 @@ types_map = { '.wmv': ('video/x-ms-wmv', 'video') } +# maps detected extensions to the possible analogs +# i.e. .cbz file is actually a .zip +synonyms_map = { + '.zip': ['.cbz'], + '.rar': ['.cbr'], + '.ar': ['.a'] +} + +log = logging.getLogger(__name__) + def guess_media_type(path): _, ext = os.path.splitext(path) extension = ext.strip().lower() + + # try detecting real file format if path points to a readable file + try: + kind = filetype.guess(path) + if kind: + realext = f".{kind.extension}" + + # override extension parsed from file... + if extension != realext: + if extension: + log.warning(f"file extension does not match it's contents {path}, identified as {realext}") + else: + log.debug(f"file {path} does not have extension, identified by contents as {realext}") + + # don't do anything if extension is in synonyms + if not extension in synonyms_map[realext]: + extension = realext + + except OSError as error: + pass + if extension[1:]: if extension in types_map: return types_map[extension] diff --git a/setup.py b/setup.py index 56832e8eb..da749bfd9 100644 --- a/setup.py +++ b/setup.py @@ -56,7 +56,8 @@ setup( 'attrs==18.2.0', 'pylru==1.1.0', 'elasticsearch==7.10.1', - 'grpcio==1.38.0' + 'grpcio==1.38.0', + 'filetype==1.0.9' ] + PLYVEL, extras_require={ 'torrent': ['lbry-libtorrent'],