detect media_type from the file contents
This commit is contained in:
parent
9adfec6b00
commit
557348e345
2 changed files with 35 additions and 1 deletions
|
@ -1,4 +1,6 @@
|
|||
import os
|
||||
import filetype
|
||||
import logging
|
||||
|
||||
types_map = {
|
||||
# http://www.iana.org/assignments/media-types
|
||||
|
@ -166,10 +168,41 @@ types_map = {
|
|||
'.wmv': ('video/x-ms-wmv', 'video')
|
||||
}
|
||||
|
||||
# maps detected extensions to the possible analogs
|
||||
# i.e. .cbz file is actually a .zip
|
||||
synonyms_map = {
|
||||
'.zip': ['.cbz'],
|
||||
'.rar': ['.cbr'],
|
||||
'.ar': ['.a']
|
||||
}
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def guess_media_type(path):
|
||||
_, ext = os.path.splitext(path)
|
||||
extension = ext.strip().lower()
|
||||
|
||||
# try detecting real file format if path points to a readable file
|
||||
try:
|
||||
kind = filetype.guess(path)
|
||||
if kind:
|
||||
realext = f".{kind.extension}"
|
||||
|
||||
# override extension parsed from file...
|
||||
if extension != realext:
|
||||
if extension:
|
||||
log.warning(f"file extension does not match it's contents {path}, identified as {realext}")
|
||||
else:
|
||||
log.debug(f"file {path} does not have extension, identified by contents as {realext}")
|
||||
|
||||
# don't do anything if extension is in synonyms
|
||||
if not extension in synonyms_map[realext]:
|
||||
extension = realext
|
||||
|
||||
except OSError as error:
|
||||
pass
|
||||
|
||||
if extension[1:]:
|
||||
if extension in types_map:
|
||||
return types_map[extension]
|
||||
|
|
3
setup.py
3
setup.py
|
@ -56,7 +56,8 @@ setup(
|
|||
'attrs==18.2.0',
|
||||
'pylru==1.1.0',
|
||||
'elasticsearch==7.10.1',
|
||||
'grpcio==1.38.0'
|
||||
'grpcio==1.38.0',
|
||||
'filetype==1.0.9'
|
||||
] + PLYVEL,
|
||||
extras_require={
|
||||
'torrent': ['lbry-libtorrent'],
|
||||
|
|
Loading…
Reference in a new issue