stream type mapping

Mapping extensions to stream types. I went with document / model / binary for the ones we were missing. This can be adjusted later on if not correct, but at least we'll have the main ones. Added extensions for lbry, cbz, cbr also.
This commit is contained in:
Thomas Zarebczan 2019-04-01 12:47:07 -04:00 committed by Lex Berezhny
parent 6356818557
commit eb2cd8fec1

View file

@ -1,160 +1,167 @@
import os import os
types_map = { types_map = {
# http://www.iana.org/assignments/media-types # http://www.iana.org/assignments/media-types
'.a': 'application/octet-stream', # Type mapping for automated metadata extraction (video, audio, image, document, binary, model)
'.ai': 'application/postscript', '.a': ('application/octet-stream', 'binary'),
'.aif': 'audio/x-aiff', '.ai': ('application/postscript', 'image'),
'.aifc': 'audio/x-aiff', '.aif': ('audio/x-aiff', 'audio'),
'.aiff': 'audio/x-aiff', '.aifc': ('audio/x-aiff', 'audio'),
'.au': 'audio/basic', '.aiff': ('audio/x-aiff', 'audio'),
'.avi': 'video/x-msvideo', '.au': ('audio/basic', 'audio'),
'.bat': 'text/plain', '.avi': ('video/x-msvideo', 'video'),
'.bcpio': 'application/x-bcpio', '.bat': ('text/plain', 'document'),
'.bin': 'application/octet-stream', '.bcpio': ('application/x-bcpio', 'binary'),
'.bmp': 'image/bmp', '.bin': ('application/octet-stream', 'binary'),
'.c': 'text/plain', '.bmp': ('image/bmp', 'image'),
'.cdf': 'application/x-netcdf', '.c': ('text/plain', 'document'),
'.cpio': 'application/x-cpio', '.cdf': ('application/x-netcdf', 'binary'),
'.csh': 'application/x-csh', '.cpio': ('application/x-cpio', 'binary'),
'.css': 'text/css', '.csh': ('application/x-csh', 'binary'),
'.csv': 'text/csv', '.css': ('text/css', 'document'),
'.dll': 'application/octet-stream', '.csv': ('text/csv', 'document'),
'.doc': 'application/msword', '.dll': ('application/octet-stream', 'binary'),
'.dot': 'application/msword', '.doc': ('application/msword', 'document'),
'.dvi': 'application/x-dvi', '.dot': ('application/msword', 'document'),
'.eml': 'message/rfc822', '.dvi': ('application/x-dvi', 'binary'),
'.eps': 'application/postscript', '.eml': ('message/rfc822', 'document'),
'.epub': 'application/epub+zip', '.eps': ('application/postscript', 'document'),
'.etx': 'text/x-setext', '.epub': ('application/epub+zip', 'document'),
'.exe': 'application/octet-stream', '.etx': ('text/x-setext', 'document'),
'.gif': 'image/gif', '.exe': ('application/octet-stream', 'binary'),
'.gtar': 'application/x-gtar', '.gif': ('image/gif', 'image'),
'.h': 'text/plain', '.gtar': ('application/x-gtar', 'binary'),
'.hdf': 'application/x-hdf', '.h': ('text/plain', 'document'),
'.htm': 'text/html', '.hdf': ('application/x-hdf', 'binary'),
'.html': 'text/html', '.htm': ('text/html', 'document'),
'.ico': 'image/vnd.microsoft.icon', '.html': ('text/html', 'document'),
'.ief': 'image/ief', '.ico': ('image/vnd.microsoft.icon', 'image'),
'.iges': 'model/iges', '.ief': ('image/ief', 'image'),
'.jpe': 'image/jpeg', '.iges': ('model/iges', 'model'),
'.jpeg': 'image/jpeg', '.jpe': ('image/jpeg', 'image'),
'.jpg': 'image/jpeg', '.jpeg': ('image/jpeg', 'image'),
'.js': 'application/javascript', '.jpg': ('image/jpeg', 'image'),
'.json': 'application/json', '.js': ('application/javascript', 'document'),
'.ksh': 'text/plain', '.json': ('application/json', 'document'),
'.latex': 'application/x-latex', '.ksh': ('text/plain', 'document'),
'.m1v': 'video/mpeg', '.latex': ('application/x-latex', 'binary'),
'.m3u': 'application/vnd.apple.mpegurl', '.m1v': ('video/mpeg', 'video'),
'.m3u8': 'application/vnd.apple.mpegurl', '.m3u': ('application/vnd.apple.mpegurl', 'audio'),
'.man': 'application/x-troff-man', '.m3u8': ('application/vnd.apple.mpegurl', 'audio'),
'.markdown': 'text/markdown', '.man': ('application/x-troff-man', 'document'),
'.md': 'text/markdown', '.markdown': ('text/markdown', 'document'),
'.me': 'application/x-troff-me', '.md': ('text/markdown', 'document'),
'.mht': 'message/rfc822', '.me': ('application/x-troff-me', 'binary'),
'.mhtml': 'message/rfc822', '.mht': ('message/rfc822', 'document'),
'.mif': 'application/x-mif', '.mhtml': ('message/rfc822', 'document'),
'.mov': 'video/quicktime', '.mif': ('application/x-mif', 'binary'),
'.movie': 'video/x-sgi-movie', '.mov': ('video/quicktime', 'video'),
'.mp2': 'audio/mpeg', '.movie': ('video/x-sgi-movie', 'video'),
'.mp3': 'audio/mpeg', '.mp2': ('audio/mpeg', 'audio'),
'.mp4': 'video/mp4', '.mp3': ('audio/mpeg', 'audio'),
'.mpa': 'video/mpeg', '.mp4': ('video/mp4', 'video'),
'.mpe': 'video/mpeg', '.mpa': ('video/mpeg', 'video'),
'.mpeg': 'video/mpeg', '.mpe': ('video/mpeg', 'video'),
'.mpg': 'video/mpeg', '.mpeg': ('video/mpeg', 'video'),
'.ms': 'application/x-troff-ms', '.mpg': ('video/mpeg', 'video'),
'.nc': 'application/x-netcdf', '.ms': ('application/x-troff-ms', 'binary'),
'.nws': 'message/rfc822', '.nc': ('application/x-netcdf', 'binary'),
'.o': 'application/octet-stream', '.nws': ('message/rfc822', 'document'),
'.obj': 'application/octet-stream', '.o': ('application/octet-stream', 'binary'),
'.oda': 'application/oda', '.obj': ('application/octet-stream', 'model'),
'.p12': 'application/x-pkcs12', '.oda': ('application/oda', 'binary'),
'.p7c': 'application/pkcs7-mime', '.p12': ('application/x-pkcs12', 'binary'),
'.pbm': 'image/x-portable-bitmap', '.p7c': ('application/pkcs7-mime', 'binary'),
'.pdf': 'application/pdf', '.pbm': ('image/x-portable-bitmap', 'image'),
'.pfx': 'application/x-pkcs12', '.pdf': ('application/pdf', 'document'),
'.pgm': 'image/x-portable-graymap', '.pfx': ('application/x-pkcs12', 'binary'),
'.pl': 'text/plain', '.pgm': ('image/x-portable-graymap', 'image'),
'.png': 'image/png', '.pl': ('text/plain', 'document'),
'.pnm': 'image/x-portable-anymap', '.png': ('image/png', 'image'),
'.pot': 'application/vnd.ms-powerpoint', '.pnm': ('image/x-portable-anymap', 'image'),
'.ppa': 'application/vnd.ms-powerpoint', '.pot': ('application/vnd.ms-powerpoint', 'document'),
'.ppm': 'image/x-portable-pixmap', '.ppa': ('application/vnd.ms-powerpoint', 'document'),
'.pps': 'application/vnd.ms-powerpoint', '.ppm': ('image/x-portable-pixmap', 'image'),
'.ppt': 'application/vnd.ms-powerpoint', '.pps': ('application/vnd.ms-powerpoint', 'document'),
'.ps': 'application/postscript', '.ppt': ('application/vnd.ms-powerpoint', 'document'),
'.pwz': 'application/vnd.ms-powerpoint', '.ps': ('application/postscript', 'document'),
'.py': 'text/x-python', '.pwz': ('application/vnd.ms-powerpoint', 'document'),
'.pyc': 'application/x-python-code', '.py': ('text/x-python', 'document'),
'.pyo': 'application/x-python-code', '.pyc': ('application/x-python-code', 'binary'),
'.qt': 'video/quicktime', '.pyo': ('application/x-python-code', 'binary'),
'.ra': 'audio/x-pn-realaudio', '.qt': ('video/quicktime', 'video'),
'.ram': 'application/x-pn-realaudio', '.ra': ('audio/x-pn-realaudio', 'audio'),
'.ras': 'image/x-cmu-raster', '.ram': ('application/x-pn-realaudio', 'audio'),
'.rdf': 'application/xml', '.ras': ('image/x-cmu-raster', 'image'),
'.rgb': 'image/x-rgb', '.rdf': ('application/xml', 'binary'),
'.roff': 'application/x-troff', '.rgb': ('image/x-rgb', 'image'),
'.rtx': 'text/richtext', '.roff': ('application/x-troff', 'binary'),
'.sgm': 'text/x-sgml', '.rtx': ('text/richtext', 'document'),
'.sgml': 'text/x-sgml', '.sgm': ('text/x-sgml', 'document'),
'.sh': 'application/x-sh', '.sgml': ('text/x-sgml', 'document'),
'.shar': 'application/x-shar', '.sh': ('application/x-sh', 'document'),
'.snd': 'audio/basic', '.shar': ('application/x-shar', 'binary'),
'.so': 'application/octet-stream', '.snd': ('audio/basic', 'audio'),
'.src': 'application/x-wais-source', '.so': ('application/octet-stream', 'binary'),
'.stl': 'model/stl', '.src': ('application/x-wais-source', 'binary'),
'.sv4cpio': 'application/x-sv4cpio', '.stl': ('model/stl', 'model'),
'.sv4crc': 'application/x-sv4crc', '.sv4cpio': ('application/x-sv4cpio', 'binary'),
'.svg': 'image/svg+xml', '.sv4crc': ('application/x-sv4crc', 'binary'),
'.swf': 'application/x-shockwave-flash', '.svg': ('image/svg+xml', 'image'),
'.t': 'application/x-troff', '.swf': ('application/x-shockwave-flash', 'binary'),
'.tar': 'application/x-tar', '.t': ('application/x-troff', 'binary'),
'.tcl': 'application/x-tcl', '.tar': ('application/x-tar', 'binary'),
'.tex': 'application/x-tex', '.tcl': ('application/x-tcl', 'binary'),
'.texi': 'application/x-texinfo', '.tex': ('application/x-tex', 'binary'),
'.texinfo': 'application/x-texinfo', '.texi': ('application/x-texinfo', 'binary'),
'.tif': 'image/tiff', '.texinfo': ('application/x-texinfo', 'binary'),
'.tiff': 'image/tiff', '.tif': ('image/tiff', 'image'),
'.tr': 'application/x-troff', '.tiff': ('image/tiff', 'image'),
'.tsv': 'text/tab-separated-values', '.tr': ('application/x-troff', 'binary'),
'.txt': 'text/plain', '.tsv': ('text/tab-separated-values', 'document'),
'.ustar': 'application/x-ustar', '.txt': ('text/plain', 'document'),
'.vcf': 'text/x-vcard', '.ustar': ('application/x-ustar', 'binary'),
'.wav': 'audio/x-wav', '.vcf': ('text/x-vcard', 'document'),
'.webm': 'video/webm', '.wav': ('audio/x-wav', 'audio'),
'.wiz': 'application/msword', '.webm': ('video/webm', 'video'),
'.wsdl': 'application/xml', '.wiz': ('application/msword', 'document'),
'.xbm': 'image/x-xbitmap', '.wsdl': ('application/xml', 'document'),
'.xlb': 'application/vnd.ms-excel', '.xbm': ('image/x-xbitmap', 'image'),
'.xls': 'application/vnd.ms-excel', '.xlb': ('application/vnd.ms-excel', 'document'),
'.xml': 'text/xml', '.xls': ('application/vnd.ms-excel', 'document'),
'.xpdl': 'application/xml', '.xml': ('text/xml', 'document'),
'.xpm': 'image/x-xpixmap', '.xpdl': ('application/xml', 'document'),
'.xsl': 'application/xml', '.xpm': ('image/x-xpixmap', 'image'),
'.xwd': 'image/x-xwindowdump', '.xsl': ('application/xml', 'document'),
'.zip': 'application/zip', '.xwd': ('image/x-xwindowdump', 'image'),
'.zip': ('application/zip', 'binary'),
# These are non-standard types, commonly found in the wild. # These are non-standard types, commonly found in the wild.
'.mid': 'audio/midi', '.cbr': ('application/vnd.comicbook+zip', 'document'),
'.midi': 'audio/midi', '.cbz': ('application/vnd.comicbook+zip', 'document'),
'.pct': 'image/pict', '.lbry': ('application/x-ext-lbry', 'document'),
'.pic': 'image/pict', '.mid': ('audio/midi', 'audio'),
'.pict': 'image/pict', '.midi': ('audio/midi', 'audio'),
'.rtf': 'application/rtf', '.mobi': ('application/x-mobipocket-ebook', 'document'),
'.xul': 'text/xul', '.pct': ('image/pict', 'image'),
'.m4v': 'video/m4v', '.pic': ('image/pict', 'image'),
'.pict': ('image/pict', 'image'),
'.prc': ('application/x-mobipocket-ebook', 'document'),
'.rtf': ('application/rtf', 'document'),
'.xul': ('text/xul', 'document'),
'.m4v': ('video/m4v', 'video'),
# microsoft is special and has its own 'standard' # microsoft is special and has its own 'standard'
# https://docs.microsoft.com/en-us/windows/desktop/wmp/file-name-extensions # https://docs.microsoft.com/en-us/windows/desktop/wmp/file-name-extensions
'.wmv': 'video/x-ms-wmv' '.wmv': ('video/x-ms-wmv', 'video')
} }
def guess_media_type(path): def guess_media_type(path):
_, ext = os.path.splitext(path) _, ext = os.path.splitext(path)
return types_map.get( extension = ext.strip().lower()
ext.strip().lower(), if extension:
'application/octet-stream' if not (ext and ext[1:].strip()) else f'application/x-ext-{ext[1:].strip().lower()}' if extension in types_map:
) return types_map[extension][0]
return f'application/x-ext-{extension[1:]}'
return 'application/octet-stream'