diff --git a/lbry/schema/mime_types.py b/lbry/schema/mime_types.py index 0d75268db..62505be04 100644 --- a/lbry/schema/mime_types.py +++ b/lbry/schema/mime_types.py @@ -183,20 +183,17 @@ def guess_media_type(path): _, ext = os.path.splitext(path) extension = ext.strip().lower() - # try detecting real file format if path points to a readable file try: kind = filetype.guess(path) if kind: real_extension = f".{kind.extension}" - # override extension parsed from file... if extension != real_extension: if extension: log.warning(f"file extension does not match it's contents: {path}, identified as {real_extension}") else: log.debug(f"file {path} does not have extension, identified by it's contents as {real_extension}") - # don't do anything if extension is in synonyms if extension not in synonyms_map.get(real_extension, []): extension = real_extension diff --git a/tests/unit/schema/test_mime_types.py b/tests/unit/schema/test_mime_types.py new file mode 100644 index 000000000..6d5beed2b --- /dev/null +++ b/tests/unit/schema/test_mime_types.py @@ -0,0 +1,51 @@ +import unittest +import tempfile +import os + +from lbry.schema.mime_types import guess_media_type + +class MediaTypeTests(unittest.TestCase): + def test_guess_media_type_from_path_only(self): + kind = guess_media_type('/tmp/test.mkv') + self.assertEqual(kind, ('video/x-matroska', 'video')) + + def test_defaults_for_no_extension(self): + kind = guess_media_type('/tmp/test') + self.assertEqual(kind, ('application/octet-stream', 'binary')) + + def test_defaults_for_unknown_extension(self): + kind = guess_media_type('/tmp/test.unk') + self.assertEqual(kind, ('application/x-ext-unk', 'binary')) + + def test_spoofed_unknown(self): + with tempfile.TemporaryDirectory() as temp_dir: + file = os.path.join(temp_dir, 'spoofed_unknown.txt') + with open(file, 'wb') as fd: + bytes_lz4 = bytearray([0x04,0x22,0x4d,0x18]) + fd.write(bytes_lz4) + fd.close() + + kind = guess_media_type(file) + self.assertEqual(kind, ('application/x-ext-lz4', 'binary')) + + def test_spoofed_known(self): + with tempfile.TemporaryDirectory() as temp_dir: + file = os.path.join(temp_dir, 'spoofed_known.avi') + with open(file, 'wb') as fd: + bytes_zip = bytearray([0x50,0x4b,0x03,0x06]) + fd.write(bytes_zip) + fd.close() + + kind = guess_media_type(file) + self.assertEqual(kind, ('application/zip', 'binary')) + + def test_spoofed_synonym(self): + with tempfile.TemporaryDirectory() as temp_dir: + file = os.path.join(temp_dir, 'spoofed_known.cbz') + with open(file, 'wb') as fd: + bytes_zip = bytearray([0x50,0x4b,0x03,0x06]) + fd.write(bytes_zip) + fd.close() + + kind = guess_media_type(file) + self.assertEqual(kind, ('application/vnd.comicbook+zip', 'document'))