stream type mapping

Mapping extensions to stream types. I went with document / model / binary for the ones we were missing. This can be adjusted later on if not correct, but at least we'll have the main ones. Added extensions for lbry, cbz, cbr also.
This commit is contained in:
Thomas Zarebczan 2019-04-01 12:47:07 -04:00 committed by Lex Berezhny
parent 6356818557
commit eb2cd8fec1

View file

@ -1,160 +1,167 @@
import os
types_map = {
# http://www.iana.org/assignments/media-types
'.a': 'application/octet-stream',
'.ai': 'application/postscript',
'.aif': 'audio/x-aiff',
'.aifc': 'audio/x-aiff',
'.aiff': 'audio/x-aiff',
'.au': 'audio/basic',
'.avi': 'video/x-msvideo',
'.bat': 'text/plain',
'.bcpio': 'application/x-bcpio',
'.bin': 'application/octet-stream',
'.bmp': 'image/bmp',
'.c': 'text/plain',
'.cdf': 'application/x-netcdf',
'.cpio': 'application/x-cpio',
'.csh': 'application/x-csh',
'.css': 'text/css',
'.csv': 'text/csv',
'.dll': 'application/octet-stream',
'.doc': 'application/msword',
'.dot': 'application/msword',
'.dvi': 'application/x-dvi',
'.eml': 'message/rfc822',
'.eps': 'application/postscript',
'.epub': 'application/epub+zip',
'.etx': 'text/x-setext',
'.exe': 'application/octet-stream',
'.gif': 'image/gif',
'.gtar': 'application/x-gtar',
'.h': 'text/plain',
'.hdf': 'application/x-hdf',
'.htm': 'text/html',
'.html': 'text/html',
'.ico': 'image/vnd.microsoft.icon',
'.ief': 'image/ief',
'.iges': 'model/iges',
'.jpe': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.jpg': 'image/jpeg',
'.js': 'application/javascript',
'.json': 'application/json',
'.ksh': 'text/plain',
'.latex': 'application/x-latex',
'.m1v': 'video/mpeg',
'.m3u': 'application/vnd.apple.mpegurl',
'.m3u8': 'application/vnd.apple.mpegurl',
'.man': 'application/x-troff-man',
'.markdown': 'text/markdown',
'.md': 'text/markdown',
'.me': 'application/x-troff-me',
'.mht': 'message/rfc822',
'.mhtml': 'message/rfc822',
'.mif': 'application/x-mif',
'.mov': 'video/quicktime',
'.movie': 'video/x-sgi-movie',
'.mp2': 'audio/mpeg',
'.mp3': 'audio/mpeg',
'.mp4': 'video/mp4',
'.mpa': 'video/mpeg',
'.mpe': 'video/mpeg',
'.mpeg': 'video/mpeg',
'.mpg': 'video/mpeg',
'.ms': 'application/x-troff-ms',
'.nc': 'application/x-netcdf',
'.nws': 'message/rfc822',
'.o': 'application/octet-stream',
'.obj': 'application/octet-stream',
'.oda': 'application/oda',
'.p12': 'application/x-pkcs12',
'.p7c': 'application/pkcs7-mime',
'.pbm': 'image/x-portable-bitmap',
'.pdf': 'application/pdf',
'.pfx': 'application/x-pkcs12',
'.pgm': 'image/x-portable-graymap',
'.pl': 'text/plain',
'.png': 'image/png',
'.pnm': 'image/x-portable-anymap',
'.pot': 'application/vnd.ms-powerpoint',
'.ppa': 'application/vnd.ms-powerpoint',
'.ppm': 'image/x-portable-pixmap',
'.pps': 'application/vnd.ms-powerpoint',
'.ppt': 'application/vnd.ms-powerpoint',
'.ps': 'application/postscript',
'.pwz': 'application/vnd.ms-powerpoint',
'.py': 'text/x-python',
'.pyc': 'application/x-python-code',
'.pyo': 'application/x-python-code',
'.qt': 'video/quicktime',
'.ra': 'audio/x-pn-realaudio',
'.ram': 'application/x-pn-realaudio',
'.ras': 'image/x-cmu-raster',
'.rdf': 'application/xml',
'.rgb': 'image/x-rgb',
'.roff': 'application/x-troff',
'.rtx': 'text/richtext',
'.sgm': 'text/x-sgml',
'.sgml': 'text/x-sgml',
'.sh': 'application/x-sh',
'.shar': 'application/x-shar',
'.snd': 'audio/basic',
'.so': 'application/octet-stream',
'.src': 'application/x-wais-source',
'.stl': 'model/stl',
'.sv4cpio': 'application/x-sv4cpio',
'.sv4crc': 'application/x-sv4crc',
'.svg': 'image/svg+xml',
'.swf': 'application/x-shockwave-flash',
'.t': 'application/x-troff',
'.tar': 'application/x-tar',
'.tcl': 'application/x-tcl',
'.tex': 'application/x-tex',
'.texi': 'application/x-texinfo',
'.texinfo': 'application/x-texinfo',
'.tif': 'image/tiff',
'.tiff': 'image/tiff',
'.tr': 'application/x-troff',
'.tsv': 'text/tab-separated-values',
'.txt': 'text/plain',
'.ustar': 'application/x-ustar',
'.vcf': 'text/x-vcard',
'.wav': 'audio/x-wav',
'.webm': 'video/webm',
'.wiz': 'application/msword',
'.wsdl': 'application/xml',
'.xbm': 'image/x-xbitmap',
'.xlb': 'application/vnd.ms-excel',
'.xls': 'application/vnd.ms-excel',
'.xml': 'text/xml',
'.xpdl': 'application/xml',
'.xpm': 'image/x-xpixmap',
'.xsl': 'application/xml',
'.xwd': 'image/x-xwindowdump',
'.zip': 'application/zip',
# Type mapping for automated metadata extraction (video, audio, image, document, binary, model)
'.a': ('application/octet-stream', 'binary'),
'.ai': ('application/postscript', 'image'),
'.aif': ('audio/x-aiff', 'audio'),
'.aifc': ('audio/x-aiff', 'audio'),
'.aiff': ('audio/x-aiff', 'audio'),
'.au': ('audio/basic', 'audio'),
'.avi': ('video/x-msvideo', 'video'),
'.bat': ('text/plain', 'document'),
'.bcpio': ('application/x-bcpio', 'binary'),
'.bin': ('application/octet-stream', 'binary'),
'.bmp': ('image/bmp', 'image'),
'.c': ('text/plain', 'document'),
'.cdf': ('application/x-netcdf', 'binary'),
'.cpio': ('application/x-cpio', 'binary'),
'.csh': ('application/x-csh', 'binary'),
'.css': ('text/css', 'document'),
'.csv': ('text/csv', 'document'),
'.dll': ('application/octet-stream', 'binary'),
'.doc': ('application/msword', 'document'),
'.dot': ('application/msword', 'document'),
'.dvi': ('application/x-dvi', 'binary'),
'.eml': ('message/rfc822', 'document'),
'.eps': ('application/postscript', 'document'),
'.epub': ('application/epub+zip', 'document'),
'.etx': ('text/x-setext', 'document'),
'.exe': ('application/octet-stream', 'binary'),
'.gif': ('image/gif', 'image'),
'.gtar': ('application/x-gtar', 'binary'),
'.h': ('text/plain', 'document'),
'.hdf': ('application/x-hdf', 'binary'),
'.htm': ('text/html', 'document'),
'.html': ('text/html', 'document'),
'.ico': ('image/vnd.microsoft.icon', 'image'),
'.ief': ('image/ief', 'image'),
'.iges': ('model/iges', 'model'),
'.jpe': ('image/jpeg', 'image'),
'.jpeg': ('image/jpeg', 'image'),
'.jpg': ('image/jpeg', 'image'),
'.js': ('application/javascript', 'document'),
'.json': ('application/json', 'document'),
'.ksh': ('text/plain', 'document'),
'.latex': ('application/x-latex', 'binary'),
'.m1v': ('video/mpeg', 'video'),
'.m3u': ('application/vnd.apple.mpegurl', 'audio'),
'.m3u8': ('application/vnd.apple.mpegurl', 'audio'),
'.man': ('application/x-troff-man', 'document'),
'.markdown': ('text/markdown', 'document'),
'.md': ('text/markdown', 'document'),
'.me': ('application/x-troff-me', 'binary'),
'.mht': ('message/rfc822', 'document'),
'.mhtml': ('message/rfc822', 'document'),
'.mif': ('application/x-mif', 'binary'),
'.mov': ('video/quicktime', 'video'),
'.movie': ('video/x-sgi-movie', 'video'),
'.mp2': ('audio/mpeg', 'audio'),
'.mp3': ('audio/mpeg', 'audio'),
'.mp4': ('video/mp4', 'video'),
'.mpa': ('video/mpeg', 'video'),
'.mpe': ('video/mpeg', 'video'),
'.mpeg': ('video/mpeg', 'video'),
'.mpg': ('video/mpeg', 'video'),
'.ms': ('application/x-troff-ms', 'binary'),
'.nc': ('application/x-netcdf', 'binary'),
'.nws': ('message/rfc822', 'document'),
'.o': ('application/octet-stream', 'binary'),
'.obj': ('application/octet-stream', 'model'),
'.oda': ('application/oda', 'binary'),
'.p12': ('application/x-pkcs12', 'binary'),
'.p7c': ('application/pkcs7-mime', 'binary'),
'.pbm': ('image/x-portable-bitmap', 'image'),
'.pdf': ('application/pdf', 'document'),
'.pfx': ('application/x-pkcs12', 'binary'),
'.pgm': ('image/x-portable-graymap', 'image'),
'.pl': ('text/plain', 'document'),
'.png': ('image/png', 'image'),
'.pnm': ('image/x-portable-anymap', 'image'),
'.pot': ('application/vnd.ms-powerpoint', 'document'),
'.ppa': ('application/vnd.ms-powerpoint', 'document'),
'.ppm': ('image/x-portable-pixmap', 'image'),
'.pps': ('application/vnd.ms-powerpoint', 'document'),
'.ppt': ('application/vnd.ms-powerpoint', 'document'),
'.ps': ('application/postscript', 'document'),
'.pwz': ('application/vnd.ms-powerpoint', 'document'),
'.py': ('text/x-python', 'document'),
'.pyc': ('application/x-python-code', 'binary'),
'.pyo': ('application/x-python-code', 'binary'),
'.qt': ('video/quicktime', 'video'),
'.ra': ('audio/x-pn-realaudio', 'audio'),
'.ram': ('application/x-pn-realaudio', 'audio'),
'.ras': ('image/x-cmu-raster', 'image'),
'.rdf': ('application/xml', 'binary'),
'.rgb': ('image/x-rgb', 'image'),
'.roff': ('application/x-troff', 'binary'),
'.rtx': ('text/richtext', 'document'),
'.sgm': ('text/x-sgml', 'document'),
'.sgml': ('text/x-sgml', 'document'),
'.sh': ('application/x-sh', 'document'),
'.shar': ('application/x-shar', 'binary'),
'.snd': ('audio/basic', 'audio'),
'.so': ('application/octet-stream', 'binary'),
'.src': ('application/x-wais-source', 'binary'),
'.stl': ('model/stl', 'model'),
'.sv4cpio': ('application/x-sv4cpio', 'binary'),
'.sv4crc': ('application/x-sv4crc', 'binary'),
'.svg': ('image/svg+xml', 'image'),
'.swf': ('application/x-shockwave-flash', 'binary'),
'.t': ('application/x-troff', 'binary'),
'.tar': ('application/x-tar', 'binary'),
'.tcl': ('application/x-tcl', 'binary'),
'.tex': ('application/x-tex', 'binary'),
'.texi': ('application/x-texinfo', 'binary'),
'.texinfo': ('application/x-texinfo', 'binary'),
'.tif': ('image/tiff', 'image'),
'.tiff': ('image/tiff', 'image'),
'.tr': ('application/x-troff', 'binary'),
'.tsv': ('text/tab-separated-values', 'document'),
'.txt': ('text/plain', 'document'),
'.ustar': ('application/x-ustar', 'binary'),
'.vcf': ('text/x-vcard', 'document'),
'.wav': ('audio/x-wav', 'audio'),
'.webm': ('video/webm', 'video'),
'.wiz': ('application/msword', 'document'),
'.wsdl': ('application/xml', 'document'),
'.xbm': ('image/x-xbitmap', 'image'),
'.xlb': ('application/vnd.ms-excel', 'document'),
'.xls': ('application/vnd.ms-excel', 'document'),
'.xml': ('text/xml', 'document'),
'.xpdl': ('application/xml', 'document'),
'.xpm': ('image/x-xpixmap', 'image'),
'.xsl': ('application/xml', 'document'),
'.xwd': ('image/x-xwindowdump', 'image'),
'.zip': ('application/zip', 'binary'),
# These are non-standard types, commonly found in the wild.
'.mid': 'audio/midi',
'.midi': 'audio/midi',
'.pct': 'image/pict',
'.pic': 'image/pict',
'.pict': 'image/pict',
'.rtf': 'application/rtf',
'.xul': 'text/xul',
'.m4v': 'video/m4v',
'.cbr': ('application/vnd.comicbook+zip', 'document'),
'.cbz': ('application/vnd.comicbook+zip', 'document'),
'.lbry': ('application/x-ext-lbry', 'document'),
'.mid': ('audio/midi', 'audio'),
'.midi': ('audio/midi', 'audio'),
'.mobi': ('application/x-mobipocket-ebook', 'document'),
'.pct': ('image/pict', 'image'),
'.pic': ('image/pict', 'image'),
'.pict': ('image/pict', 'image'),
'.prc': ('application/x-mobipocket-ebook', 'document'),
'.rtf': ('application/rtf', 'document'),
'.xul': ('text/xul', 'document'),
'.m4v': ('video/m4v', 'video'),
# microsoft is special and has its own 'standard'
# https://docs.microsoft.com/en-us/windows/desktop/wmp/file-name-extensions
'.wmv': 'video/x-ms-wmv'
'.wmv': ('video/x-ms-wmv', 'video')
}
def guess_media_type(path):
_, ext = os.path.splitext(path)
return types_map.get(
ext.strip().lower(),
'application/octet-stream' if not (ext and ext[1:].strip()) else f'application/x-ext-{ext[1:].strip().lower()}'
)
extension = ext.strip().lower()
if extension:
if extension in types_map:
return types_map[extension][0]
return f'application/x-ext-{extension[1:]}'
return 'application/octet-stream'