Merge pull request #2880 from lbryio/handle_mkv_files

Handle MKV files
2020-03-26 16:45:20 -04:00 · 2020-03-26 16:45:20 -04:00 · 39b4031684
commit 39b4031684
parent 15eb5d47eb 71f8965393
5 changed files with 77 additions and 31 deletions
--- a/lbry/extras/daemon/daemon.py
+++ b/lbry/extras/daemon/daemon.py
@ -3168,9 +3168,11 @@ class Daemon(metaclass=JSONRPCServerType):
                    f"Use --allow-duplicate-name flag to override."
                )

-        file_path = await self._video_file_analyzer.verify_or_repair(
+        file_path, spec = await self._video_file_analyzer.verify_or_repair(
            validate_file, optimize_file, file_path, ignore_non_video=True
        )
+        kwargs.update(spec)
+
        claim = Claim()
        claim.stream.update(file_path=file_path, sd_hash='0' * 96, **kwargs)
        tx = await Transaction.claim_create(
@ -3364,9 +3366,10 @@ class Daemon(metaclass=JSONRPCServerType):
        if fee_address:
            kwargs['fee_address'] = fee_address

-        file_path = await self._video_file_analyzer.verify_or_repair(
+        file_path, spec = await self._video_file_analyzer.verify_or_repair(
            validate_file, optimize_file, file_path, ignore_non_video=True
        )
+        kwargs.update(spec)

        if replace:
            claim = Claim()
--- a/lbry/file_analysis.py
+++ b/lbry/file_analysis.py
@ -113,9 +113,24 @@ class VideoFileAnalyzer:
    def _verify_container(scan_data: json):
        container = scan_data["format"]["format_name"]
        log.debug("   Detected container is %s", container)
-        if not {"webm", "mp4", "3gp", "ogg"}.intersection(container.split(",")):
+        splits = container.split(",")
+        if not {"webm", "mp4", "3gp", "ogg"}.intersection(splits):
            return "Container format is not in the approved list of WebM, MP4. " \
                   f"Actual: {container} [{scan_data['format']['format_long_name']}]"
+
+        if "matroska" in splits:
+            for stream in scan_data["streams"]:
+                if stream["codec_type"] == "video":
+                    codec = stream["codec_name"]
+                    if not {"vp8", "vp9", "av1"}.intersection(codec.split(",")):
+                        return "WebM format requires VP8/9 or AV1 video. " \
+                               f"Actual: {codec} [{stream['codec_long_name']}]"
+                elif stream["codec_type"] == "audio":
+                    codec = stream["codec_name"]
+                    if not {"vorbis", "opus"}.intersection(codec.split(",")):
+                        return "WebM format requires Vorbis or Opus audio. " \
+                               f"Actual: {codec} [{stream['codec_long_name']}]"
+
        return ""

    @staticmethod
@ -289,20 +304,22 @@ class VideoFileAnalyzer:
        # if we are vp8/vp9/av1 we want webm
        # use mp4 for anything else

-        if not video_encoder:  # not re-encoding video
-            for stream in scan_data["streams"]:
-                if stream["codec_type"] != "video":
-                    continue
-                codec = stream["codec_name"].split(",")
-                if "theora" in codec:
-                    return "ogv"
-                if {"vp8", "vp9", "av1"}.intersection(codec):
-                    return "webm"
+        if video_encoder:  # not re-encoding video
+            if "theora" in video_encoder:
+                return "ogv"
+            if re.search(r"vp[89x]|av1", video_encoder.split(" ", 1)[0]):
+                return "webm"
+            return "mp4"
+
+        for stream in scan_data["streams"]:
+            if stream["codec_type"] != "video":
+                continue
+            codec = stream["codec_name"].split(",")
+            if "theora" in codec:
+                return "ogv"
+            if {"vp8", "vp9", "av1"}.intersection(codec):
+                return "webm"

-        if "theora" in video_encoder:
-            return "ogv"
-        elif re.search(r"vp[89x]|av1", video_encoder.split(" ", 1)[0]):
-            return "webm"
        return "mp4"

    async def _get_scan_data(self, validate, file_path):
@ -324,23 +341,46 @@ class VideoFileAnalyzer:

        return scan_data

+    @staticmethod
+    def _build_spec(scan_data):
+        assert scan_data
+
+        duration = float(scan_data["format"]["duration"])  # existence verified when scan_data made
+        width = -1
+        height = -1
+        for stream in scan_data["streams"]:
+            if stream["codec_type"] != "video":
+                continue
+            width = max(width, int(stream["width"]))
+            height = max(height, int(stream["height"]))
+
+        log.debug("   Detected duration: %f sec. with resolution: %d x %d", duration, width, height)
+
+        spec = {"duration": duration}
+        if height >= 0:
+            spec["height"] = height
+        if width >= 0:
+            spec["width"] = width
+        return spec
+
    async def verify_or_repair(self, validate, repair, file_path, ignore_non_video=False):
        if not validate and not repair:
-            return file_path
+            return file_path, {}

        if ignore_non_video and not file_path:
-            return file_path
+            return file_path, {}

        await self._verify_ffmpeg_installed()
        try:
            scan_data = await self._get_scan_data(validate, file_path)
        except ValueError:
            if ignore_non_video:
-                return file_path
+                return file_path, {}
            raise

        fast_start_msg = await self._verify_fast_start(scan_data, file_path)
        log.debug("Analyzing %s:", file_path)
+        spec = self._build_spec(scan_data)
        log.debug("   Detected faststart is %s", "false" if fast_start_msg else "true")
        container_msg = self._verify_container(scan_data)
        bitrate_msg = self._verify_bitrate(scan_data, file_path)
@ -350,7 +390,7 @@ class VideoFileAnalyzer:
        messages = [container_msg, bitrate_msg, fast_start_msg, video_msg, audio_msg, volume_msg]

        if not any(messages):
-            return file_path
+            return file_path, spec

        if not repair:
            errors = ["Streamability verification failed:"]
@ -401,6 +441,6 @@ class VideoFileAnalyzer:
                raise
            log.info("Unable to transcode %s . Message: %s", file_path, str(e))
            # TODO: delete partial output file here if it exists?
-            return file_path
+            return file_path, spec

-        return str(output)
+        return str(output), spec
--- a/lbry/schema/claim.py
+++ b/lbry/schema/claim.py
@ -253,7 +253,7 @@ class Stream(BaseClaim):
        if stream_type in ('image', 'video', 'audio'):
            media = getattr(self, stream_type)
            media_args = {'file_metadata': None}
-            if file_path is not None:
+            if file_path is not None and not all((duration, width, height)):
                try:
                    media_args['file_metadata'] = binary_file_metadata(binary_file_parser(file_path))
                except:
--- a/scripts/check_video.py
+++ b/scripts/check_video.py
@ -33,7 +33,7 @@ async def process_video(analyzer, video_file):
        transcode = input("Would you like to make a repaired clone now? [y/N] ")
        if transcode == "y":
            try:
-                new_video_file = await analyzer.verify_or_repair(True, True, video_file)
+                new_video_file, _ = await analyzer.verify_or_repair(True, True, video_file)
                print("Successfully created ", new_video_file)
            except Exception as e:
                print("Unable to complete the transcode. Message: ", str(e))
--- a/tests/integration/other/test_transcoding.py
+++ b/tests/integration/other/test_transcoding.py
@ -52,12 +52,15 @@ class TranscodeValidation(ClaimTestCase):
                self.assertEqual(code, 0, output)

    async def test_should_work(self):
-        new_file_name = await self.analyzer.verify_or_repair(True, False, self.video_file_name)
+        new_file_name, _ = await self.analyzer.verify_or_repair(True, False, self.video_file_name)
        self.assertEqual(self.video_file_name, new_file_name)
-        new_file_name = await self.analyzer.verify_or_repair(True, False, self.video_file_ogg)
+        new_file_name, _ = await self.analyzer.verify_or_repair(True, False, self.video_file_ogg)
        self.assertEqual(self.video_file_ogg, new_file_name)
-        new_file_name = await self.analyzer.verify_or_repair(True, False, self.video_file_webm)
+        new_file_name, spec = await self.analyzer.verify_or_repair(True, False, self.video_file_webm)
        self.assertEqual(self.video_file_webm, new_file_name)
+        self.assertEqual(spec["width"], 1280)
+        self.assertEqual(spec["height"], 720)
+        self.assertEqual(spec["duration"], 15.054)

    async def test_volume(self):
        self.conf.volume_analysis_time = 200
@ -75,7 +78,7 @@ class TranscodeValidation(ClaimTestCase):
        with self.assertRaisesRegex(Exception, "Container format is not in the approved list"):
            await self.analyzer.verify_or_repair(True, False, file_name)

-        fixed_file = await self.analyzer.verify_or_repair(True, True, file_name)
+        fixed_file, _ = await self.analyzer.verify_or_repair(True, True, file_name)
        pathlib.Path(fixed_file).unlink()

    async def test_video_codec(self):
@ -91,7 +94,7 @@ class TranscodeValidation(ClaimTestCase):
        with self.assertRaisesRegex(Exception, "faststart flag was not used"):
            await self.analyzer.verify_or_repair(True, False, file_name)

-        fixed_file = await self.analyzer.verify_or_repair(True, True, file_name)
+        fixed_file, _ = await self.analyzer.verify_or_repair(True, True, file_name)
        pathlib.Path(fixed_file).unlink()

    async def test_max_bit_rate(self):
@ -111,7 +114,7 @@ class TranscodeValidation(ClaimTestCase):
        with self.assertRaisesRegex(Exception, "pixel format does not match the approved"):
            await self.analyzer.verify_or_repair(True, False, file_name)

-        fixed_file = await self.analyzer.verify_or_repair(True, True, file_name)
+        fixed_file, _ = await self.analyzer.verify_or_repair(True, True, file_name)
        pathlib.Path(fixed_file).unlink()

    async def test_audio_codec(self):
@ -125,7 +128,7 @@ class TranscodeValidation(ClaimTestCase):
        with self.assertRaisesRegex(Exception, "Audio codec is not in the approved list"):
            await self.analyzer.verify_or_repair(True, False, file_name)

-        fixed_file = await self.analyzer.verify_or_repair(True, True, file_name)
+        fixed_file, _ = await self.analyzer.verify_or_repair(True, True, file_name)
        pathlib.Path(fixed_file).unlink()

    async def test_extension_choice(self):