From eb2cd8fec1bd2838b886340171c01013ef7e43cc Mon Sep 17 00:00:00 2001 From: Thomas Zarebczan Date: Mon, 1 Apr 2019 12:47:07 -0400 Subject: [PATCH] stream type mapping Mapping extensions to stream types. I went with document / model / binary for the ones we were missing. This can be adjusted later on if not correct, but at least we'll have the main ones. Added extensions for lbry, cbz, cbr also. --- lbrynet/schema/mime_types.py | 299 ++++++++++++++++++----------------- 1 file changed, 153 insertions(+), 146 deletions(-) diff --git a/lbrynet/schema/mime_types.py b/lbrynet/schema/mime_types.py index 88aa7077f..be2694149 100644 --- a/lbrynet/schema/mime_types.py +++ b/lbrynet/schema/mime_types.py @@ -1,160 +1,167 @@ import os - types_map = { # http://www.iana.org/assignments/media-types - '.a': 'application/octet-stream', - '.ai': 'application/postscript', - '.aif': 'audio/x-aiff', - '.aifc': 'audio/x-aiff', - '.aiff': 'audio/x-aiff', - '.au': 'audio/basic', - '.avi': 'video/x-msvideo', - '.bat': 'text/plain', - '.bcpio': 'application/x-bcpio', - '.bin': 'application/octet-stream', - '.bmp': 'image/bmp', - '.c': 'text/plain', - '.cdf': 'application/x-netcdf', - '.cpio': 'application/x-cpio', - '.csh': 'application/x-csh', - '.css': 'text/css', - '.csv': 'text/csv', - '.dll': 'application/octet-stream', - '.doc': 'application/msword', - '.dot': 'application/msword', - '.dvi': 'application/x-dvi', - '.eml': 'message/rfc822', - '.eps': 'application/postscript', - '.epub': 'application/epub+zip', - '.etx': 'text/x-setext', - '.exe': 'application/octet-stream', - '.gif': 'image/gif', - '.gtar': 'application/x-gtar', - '.h': 'text/plain', - '.hdf': 'application/x-hdf', - '.htm': 'text/html', - '.html': 'text/html', - '.ico': 'image/vnd.microsoft.icon', - '.ief': 'image/ief', - '.iges': 'model/iges', - '.jpe': 'image/jpeg', - '.jpeg': 'image/jpeg', - '.jpg': 'image/jpeg', - '.js': 'application/javascript', - '.json': 'application/json', - '.ksh': 'text/plain', - '.latex': 'application/x-latex', - '.m1v': 'video/mpeg', - '.m3u': 'application/vnd.apple.mpegurl', - '.m3u8': 'application/vnd.apple.mpegurl', - '.man': 'application/x-troff-man', - '.markdown': 'text/markdown', - '.md': 'text/markdown', - '.me': 'application/x-troff-me', - '.mht': 'message/rfc822', - '.mhtml': 'message/rfc822', - '.mif': 'application/x-mif', - '.mov': 'video/quicktime', - '.movie': 'video/x-sgi-movie', - '.mp2': 'audio/mpeg', - '.mp3': 'audio/mpeg', - '.mp4': 'video/mp4', - '.mpa': 'video/mpeg', - '.mpe': 'video/mpeg', - '.mpeg': 'video/mpeg', - '.mpg': 'video/mpeg', - '.ms': 'application/x-troff-ms', - '.nc': 'application/x-netcdf', - '.nws': 'message/rfc822', - '.o': 'application/octet-stream', - '.obj': 'application/octet-stream', - '.oda': 'application/oda', - '.p12': 'application/x-pkcs12', - '.p7c': 'application/pkcs7-mime', - '.pbm': 'image/x-portable-bitmap', - '.pdf': 'application/pdf', - '.pfx': 'application/x-pkcs12', - '.pgm': 'image/x-portable-graymap', - '.pl': 'text/plain', - '.png': 'image/png', - '.pnm': 'image/x-portable-anymap', - '.pot': 'application/vnd.ms-powerpoint', - '.ppa': 'application/vnd.ms-powerpoint', - '.ppm': 'image/x-portable-pixmap', - '.pps': 'application/vnd.ms-powerpoint', - '.ppt': 'application/vnd.ms-powerpoint', - '.ps': 'application/postscript', - '.pwz': 'application/vnd.ms-powerpoint', - '.py': 'text/x-python', - '.pyc': 'application/x-python-code', - '.pyo': 'application/x-python-code', - '.qt': 'video/quicktime', - '.ra': 'audio/x-pn-realaudio', - '.ram': 'application/x-pn-realaudio', - '.ras': 'image/x-cmu-raster', - '.rdf': 'application/xml', - '.rgb': 'image/x-rgb', - '.roff': 'application/x-troff', - '.rtx': 'text/richtext', - '.sgm': 'text/x-sgml', - '.sgml': 'text/x-sgml', - '.sh': 'application/x-sh', - '.shar': 'application/x-shar', - '.snd': 'audio/basic', - '.so': 'application/octet-stream', - '.src': 'application/x-wais-source', - '.stl': 'model/stl', - '.sv4cpio': 'application/x-sv4cpio', - '.sv4crc': 'application/x-sv4crc', - '.svg': 'image/svg+xml', - '.swf': 'application/x-shockwave-flash', - '.t': 'application/x-troff', - '.tar': 'application/x-tar', - '.tcl': 'application/x-tcl', - '.tex': 'application/x-tex', - '.texi': 'application/x-texinfo', - '.texinfo': 'application/x-texinfo', - '.tif': 'image/tiff', - '.tiff': 'image/tiff', - '.tr': 'application/x-troff', - '.tsv': 'text/tab-separated-values', - '.txt': 'text/plain', - '.ustar': 'application/x-ustar', - '.vcf': 'text/x-vcard', - '.wav': 'audio/x-wav', - '.webm': 'video/webm', - '.wiz': 'application/msword', - '.wsdl': 'application/xml', - '.xbm': 'image/x-xbitmap', - '.xlb': 'application/vnd.ms-excel', - '.xls': 'application/vnd.ms-excel', - '.xml': 'text/xml', - '.xpdl': 'application/xml', - '.xpm': 'image/x-xpixmap', - '.xsl': 'application/xml', - '.xwd': 'image/x-xwindowdump', - '.zip': 'application/zip', + # Type mapping for automated metadata extraction (video, audio, image, document, binary, model) + '.a': ('application/octet-stream', 'binary'), + '.ai': ('application/postscript', 'image'), + '.aif': ('audio/x-aiff', 'audio'), + '.aifc': ('audio/x-aiff', 'audio'), + '.aiff': ('audio/x-aiff', 'audio'), + '.au': ('audio/basic', 'audio'), + '.avi': ('video/x-msvideo', 'video'), + '.bat': ('text/plain', 'document'), + '.bcpio': ('application/x-bcpio', 'binary'), + '.bin': ('application/octet-stream', 'binary'), + '.bmp': ('image/bmp', 'image'), + '.c': ('text/plain', 'document'), + '.cdf': ('application/x-netcdf', 'binary'), + '.cpio': ('application/x-cpio', 'binary'), + '.csh': ('application/x-csh', 'binary'), + '.css': ('text/css', 'document'), + '.csv': ('text/csv', 'document'), + '.dll': ('application/octet-stream', 'binary'), + '.doc': ('application/msword', 'document'), + '.dot': ('application/msword', 'document'), + '.dvi': ('application/x-dvi', 'binary'), + '.eml': ('message/rfc822', 'document'), + '.eps': ('application/postscript', 'document'), + '.epub': ('application/epub+zip', 'document'), + '.etx': ('text/x-setext', 'document'), + '.exe': ('application/octet-stream', 'binary'), + '.gif': ('image/gif', 'image'), + '.gtar': ('application/x-gtar', 'binary'), + '.h': ('text/plain', 'document'), + '.hdf': ('application/x-hdf', 'binary'), + '.htm': ('text/html', 'document'), + '.html': ('text/html', 'document'), + '.ico': ('image/vnd.microsoft.icon', 'image'), + '.ief': ('image/ief', 'image'), + '.iges': ('model/iges', 'model'), + '.jpe': ('image/jpeg', 'image'), + '.jpeg': ('image/jpeg', 'image'), + '.jpg': ('image/jpeg', 'image'), + '.js': ('application/javascript', 'document'), + '.json': ('application/json', 'document'), + '.ksh': ('text/plain', 'document'), + '.latex': ('application/x-latex', 'binary'), + '.m1v': ('video/mpeg', 'video'), + '.m3u': ('application/vnd.apple.mpegurl', 'audio'), + '.m3u8': ('application/vnd.apple.mpegurl', 'audio'), + '.man': ('application/x-troff-man', 'document'), + '.markdown': ('text/markdown', 'document'), + '.md': ('text/markdown', 'document'), + '.me': ('application/x-troff-me', 'binary'), + '.mht': ('message/rfc822', 'document'), + '.mhtml': ('message/rfc822', 'document'), + '.mif': ('application/x-mif', 'binary'), + '.mov': ('video/quicktime', 'video'), + '.movie': ('video/x-sgi-movie', 'video'), + '.mp2': ('audio/mpeg', 'audio'), + '.mp3': ('audio/mpeg', 'audio'), + '.mp4': ('video/mp4', 'video'), + '.mpa': ('video/mpeg', 'video'), + '.mpe': ('video/mpeg', 'video'), + '.mpeg': ('video/mpeg', 'video'), + '.mpg': ('video/mpeg', 'video'), + '.ms': ('application/x-troff-ms', 'binary'), + '.nc': ('application/x-netcdf', 'binary'), + '.nws': ('message/rfc822', 'document'), + '.o': ('application/octet-stream', 'binary'), + '.obj': ('application/octet-stream', 'model'), + '.oda': ('application/oda', 'binary'), + '.p12': ('application/x-pkcs12', 'binary'), + '.p7c': ('application/pkcs7-mime', 'binary'), + '.pbm': ('image/x-portable-bitmap', 'image'), + '.pdf': ('application/pdf', 'document'), + '.pfx': ('application/x-pkcs12', 'binary'), + '.pgm': ('image/x-portable-graymap', 'image'), + '.pl': ('text/plain', 'document'), + '.png': ('image/png', 'image'), + '.pnm': ('image/x-portable-anymap', 'image'), + '.pot': ('application/vnd.ms-powerpoint', 'document'), + '.ppa': ('application/vnd.ms-powerpoint', 'document'), + '.ppm': ('image/x-portable-pixmap', 'image'), + '.pps': ('application/vnd.ms-powerpoint', 'document'), + '.ppt': ('application/vnd.ms-powerpoint', 'document'), + '.ps': ('application/postscript', 'document'), + '.pwz': ('application/vnd.ms-powerpoint', 'document'), + '.py': ('text/x-python', 'document'), + '.pyc': ('application/x-python-code', 'binary'), + '.pyo': ('application/x-python-code', 'binary'), + '.qt': ('video/quicktime', 'video'), + '.ra': ('audio/x-pn-realaudio', 'audio'), + '.ram': ('application/x-pn-realaudio', 'audio'), + '.ras': ('image/x-cmu-raster', 'image'), + '.rdf': ('application/xml', 'binary'), + '.rgb': ('image/x-rgb', 'image'), + '.roff': ('application/x-troff', 'binary'), + '.rtx': ('text/richtext', 'document'), + '.sgm': ('text/x-sgml', 'document'), + '.sgml': ('text/x-sgml', 'document'), + '.sh': ('application/x-sh', 'document'), + '.shar': ('application/x-shar', 'binary'), + '.snd': ('audio/basic', 'audio'), + '.so': ('application/octet-stream', 'binary'), + '.src': ('application/x-wais-source', 'binary'), + '.stl': ('model/stl', 'model'), + '.sv4cpio': ('application/x-sv4cpio', 'binary'), + '.sv4crc': ('application/x-sv4crc', 'binary'), + '.svg': ('image/svg+xml', 'image'), + '.swf': ('application/x-shockwave-flash', 'binary'), + '.t': ('application/x-troff', 'binary'), + '.tar': ('application/x-tar', 'binary'), + '.tcl': ('application/x-tcl', 'binary'), + '.tex': ('application/x-tex', 'binary'), + '.texi': ('application/x-texinfo', 'binary'), + '.texinfo': ('application/x-texinfo', 'binary'), + '.tif': ('image/tiff', 'image'), + '.tiff': ('image/tiff', 'image'), + '.tr': ('application/x-troff', 'binary'), + '.tsv': ('text/tab-separated-values', 'document'), + '.txt': ('text/plain', 'document'), + '.ustar': ('application/x-ustar', 'binary'), + '.vcf': ('text/x-vcard', 'document'), + '.wav': ('audio/x-wav', 'audio'), + '.webm': ('video/webm', 'video'), + '.wiz': ('application/msword', 'document'), + '.wsdl': ('application/xml', 'document'), + '.xbm': ('image/x-xbitmap', 'image'), + '.xlb': ('application/vnd.ms-excel', 'document'), + '.xls': ('application/vnd.ms-excel', 'document'), + '.xml': ('text/xml', 'document'), + '.xpdl': ('application/xml', 'document'), + '.xpm': ('image/x-xpixmap', 'image'), + '.xsl': ('application/xml', 'document'), + '.xwd': ('image/x-xwindowdump', 'image'), + '.zip': ('application/zip', 'binary'), # These are non-standard types, commonly found in the wild. - '.mid': 'audio/midi', - '.midi': 'audio/midi', - '.pct': 'image/pict', - '.pic': 'image/pict', - '.pict': 'image/pict', - '.rtf': 'application/rtf', - '.xul': 'text/xul', - '.m4v': 'video/m4v', + '.cbr': ('application/vnd.comicbook+zip', 'document'), + '.cbz': ('application/vnd.comicbook+zip', 'document'), + '.lbry': ('application/x-ext-lbry', 'document'), + '.mid': ('audio/midi', 'audio'), + '.midi': ('audio/midi', 'audio'), + '.mobi': ('application/x-mobipocket-ebook', 'document'), + '.pct': ('image/pict', 'image'), + '.pic': ('image/pict', 'image'), + '.pict': ('image/pict', 'image'), + '.prc': ('application/x-mobipocket-ebook', 'document'), + '.rtf': ('application/rtf', 'document'), + '.xul': ('text/xul', 'document'), + '.m4v': ('video/m4v', 'video'), # microsoft is special and has its own 'standard' # https://docs.microsoft.com/en-us/windows/desktop/wmp/file-name-extensions - '.wmv': 'video/x-ms-wmv' + '.wmv': ('video/x-ms-wmv', 'video') } def guess_media_type(path): _, ext = os.path.splitext(path) - return types_map.get( - ext.strip().lower(), - 'application/octet-stream' if not (ext and ext[1:].strip()) else f'application/x-ext-{ext[1:].strip().lower()}' - ) + extension = ext.strip().lower() + if extension: + if extension in types_map: + return types_map[extension][0] + return f'application/x-ext-{extension[1:]}' + return 'application/octet-stream'