Merge pull request #2793 from lbryio/dont_validate_non_video

don't validate and repair files that aren't videos; add maximum video bit rate setting
2020-03-04 09:16:37 -05:00 · 2020-03-04 09:16:37 -05:00 · c4905d02b9
commit c4905d02b9
parent 75a1cc0d33 ee39880fb5
6 changed files with 52 additions and 39 deletions
--- a/lbry/conf.py
+++ b/lbry/conf.py
@ -467,12 +467,17 @@ class TranscodeConfig(BaseConfig):
    ffmpeg_folder = String('The path to ffmpeg and ffprobe', '')
    video_encoder = String('FFmpeg codec and parameters for the video encoding. '
                           'Example: libaom-av1 -crf 25 -b:v 0 -strict experimental',
-                           'libx264 -crf 18 -vf "format=yuv420p"')
+                           'libx264 -crf 21 -preset faster -pix_fmt yuv420p')
    video_bitrate_maximum = Integer('Maximum bits per second allowed for video streams (0 to disable).', 8400000)
    video_scaler = String('FFmpeg scaling parameters for reducing bitrate. '
                          'Example: -vf "scale=-2:720,fps=24" -maxrate 5M -bufsize 3M',
                          r'-vf "scale=if(gte(iw\,ih)\,min(2560\,iw)\,-2):if(lt(iw\,ih)\,min(2560\,ih)\,-2)" '
                          r'-maxrate 8400K -bufsize 5000K')
    audio_encoder = String('FFmpeg codec and parameters for the audio encoding. '
                           'Example: libopus -b:a 128k',
-                           'aac -b:a 192k')
+                           'aac -b:a 160k')
    volume_filter = String('FFmpeg filter for audio normalization.', '-af loudnorm')
-    volume_analysis_time = Integer('Maximum seconds into the file that we examine audio volume (0 to disable).', '240')
+    volume_analysis_time = Integer('Maximum seconds into the file that we examine audio volume (0 to disable).', 240)
 class CLIConfig(TranscodeConfig):
--- a/lbry/extras/daemon/daemon.py
+++ b/lbry/extras/daemon/daemon.py
@ -3142,8 +3142,9 @@ class Daemon(metaclass=JSONRPCServerType):
                    f"Use --allow-duplicate-name flag to override."
                )
-        file_path = await self._video_file_analyzer.verify_or_repair(validate_file, optimize_file, file_path)
+        file_path = await self._video_file_analyzer.verify_or_repair(
-
+            validate_file, optimize_file, file_path, ignore_non_video=True
        )
        claim = Claim()
        claim.stream.update(file_path=file_path, sd_hash='0' * 96, **kwargs)
        tx = await Transaction.claim_create(
--- a/lbry/file_analysis.py
+++ b/lbry/file_analysis.py
@ -47,7 +47,7 @@ class VideoFileAnalyzer:
            return
        await self._verify_executable("ffprobe")
        version = await self._verify_executable("ffmpeg")
-        self._which = shutil.which("ffmpeg")
+        self._which = shutil.which(os.path.join(self._conf.ffmpeg_folder, "ffmpeg"))
        self._ffmpeg_installed = True
        log.debug("Using %s at %s", version.splitlines()[0].split(" Copyright")[0], self._which)
@ -97,24 +97,21 @@ class VideoFileAnalyzer:
        return ""
-    @staticmethod
+    def _verify_bitrate(self, scan_data: json, file_path):
-    def _verify_bitrate(scan_data: json):
+        bit_rate_max = float(self._conf.video_bitrate_maximum)
-        if "bit_rate" not in scan_data["format"]:
+        if bit_rate_max <= 0:
            return ""
        if "bit_rate" in scan_data["format"]:
            bit_rate = float(scan_data["format"]["bit_rate"])
-        log.debug("   Detected bitrate is %s Mbps", str(bit_rate / 1000000.0))
+        else:
-        pixels = -1.0
+            bit_rate = os.stat(file_path).st_size / float(scan_data["format"]["duration"])
-        for stream in scan_data["streams"]:
+        log.debug("   Detected bitrate is %s Mbps. Allowed is %s Mbps",
-            if stream["codec_type"] == "video":
+                  str(bit_rate / 1000000.0), str(bit_rate_max / 1000000.0))
                pieces = stream["r_frame_rate"].split('/', 1)
                frame_rate = float(pieces[0]) if len(pieces) == 1 \
                    else float(pieces[0]) / float(pieces[1])
                pixels = max(pixels, float(stream["height"]) * float(stream["width"]) * frame_rate)
-        if pixels > 0.0 and pixels / bit_rate < 3.0:
+        if bit_rate > bit_rate_max:
-            return "Bits per second is excessive for this data; this may impact web streaming performance. " \
+            return "The bit rate is above the configured maximum. Actual: " \
-                   f"Actual: {str(bit_rate / 1000000.0)} Mbps"
+                   f"{bit_rate / 1000000.0} Mbps; Allowed: {bit_rate_max / 1000000.0} Mbps"
        return ""
@ -178,6 +175,9 @@ class VideoFileAnalyzer:
        # https://developers.google.com/media/vp9/settings/vod/
        return int(-0.011 * height + 40)
    def _get_video_scaler(self):
        return self._conf.video_scaler
    async def _get_video_encoder(self, scan_data):
        # use what the user said if it's there:
        # if it's not there, use h264 if we can because it's way faster than the others
@ -257,12 +257,12 @@ class VideoFileAnalyzer:
                    continue
                codec = stream["codec_name"].split(",")
                if "theora" in codec:
-                    return "ogg"
+                    return "ogv"
                if {"vp8", "vp9", "av1"}.intersection(codec):
                    return "webm"
        if "theora" in video_encoder:
-            return "ogg"
+            return "ogv"
        elif re.search(r"vp[89x]|av1", video_encoder.split(" ", 1)[0]):
            return "webm"
        return "mp4"
@ -274,31 +274,35 @@ class VideoFileAnalyzer:
            scan_data = json.loads(result)
        except Exception as e:
            log.debug("Failure in JSON parsing ffprobe results. Message: %s", str(e))
-            if validate:
+            raise ValueError(f'Absent or unreadable video file: {file_path}')
                raise Exception(f'Invalid video file: {file_path}')
            log.info("Unable to optimize %s . FFmpeg output was unreadable.", file_path)
            return
-        if "format" not in scan_data:
+        if "format" not in scan_data or "duration" not in scan_data["format"]:
-            if validate:
+            log.debug("Format data is missing from ffprobe results for: %s", file_path)
-                raise FileNotFoundError(f'Unexpected or absent video file contents at: {file_path}')
+            raise ValueError(f'Media file does not appear to contain video content at: {file_path}')
-            log.info("Unable to optimize %s . FFmpeg output is missing the format section.", file_path)
+
-            return
+        if float(scan_data["format"]["duration"]) < 0.1:
            log.debug("Media file appears to be an image: %s", file_path)
            raise ValueError(f'Assuming image file at: {file_path}')
        return scan_data
-    async def verify_or_repair(self, validate, repair, file_path):
+    async def verify_or_repair(self, validate, repair, file_path, ignore_non_video=False):
        if not validate and not repair:
            return file_path
        await self._verify_ffmpeg_installed()
        try:
            scan_data = await self._get_scan_data(validate, file_path)
        except ValueError:
            if ignore_non_video:
                return file_path
            raise
        fast_start_msg = await self._verify_fast_start(scan_data, file_path)
        log.debug("Analyzing %s:", file_path)
        log.debug("   Detected faststart is %s", "false" if fast_start_msg else "true")
        container_msg = self._verify_container(scan_data)
-        bitrate_msg = self._verify_bitrate(scan_data)
+        bitrate_msg = self._verify_bitrate(scan_data, file_path)
        video_msg = self._verify_video_encoding(scan_data)
        audio_msg = self._verify_audio_encoding(scan_data)
        volume_msg = await self._verify_audio_volume(self._conf.volume_analysis_time, file_path)
@ -324,6 +328,8 @@ class VideoFileAnalyzer:
            if video_msg or bitrate_msg:
                video_encoder = await self._get_video_encoder(scan_data)
                transcode_command.append(video_encoder)
                # could do the scaling only if bitrate_msg, but if we're going to the effort to re-encode anyway...
                transcode_command.append(self._get_video_scaler())
            else:
                transcode_command.append("copy")
--- a/lbry/schema/mime_types.py
+++ b/lbry/schema/mime_types.py
@ -148,6 +148,7 @@ types_map = {
    '.mobi': ('application/x-mobipocket-ebook', 'document'),
    '.oga': ('audio/ogg', 'audio'),
    '.ogv': ('video/ogg', 'video'),
    '.ogg': ('video/ogg', 'video'),
    '.pct': ('image/pict', 'image'),
    '.pic': ('image/pict', 'image'),
    '.pict': ('image/pict', 'image'),
--- a/scripts/check_video.py
+++ b/scripts/check_video.py
@ -26,8 +26,8 @@ async def process_video(analyzer, video_file):
    try:
        await analyzer.verify_or_repair(True, False, video_file)
        print("No concerns. Ship it!")
-    except FileNotFoundError as e:
+    except (FileNotFoundError, ValueError) as e:
-        print(str(e))
+        print("Analysis failed.", str(e))
    except Exception as e:
        print(str(e))
        transcode = input("Would you like to make a repaired clone now? [y/N] ")
--- a/tests/integration/other/test_transcoding.py
+++ b/tests/integration/other/test_transcoding.py
@ -130,7 +130,7 @@ class TranscodeValidation(ClaimTestCase):
        scan_data = await self.analyzer._get_scan_data(True, self.video_file_ogg)
        extension = self.analyzer._get_best_container_extension(scan_data, "")
-        self.assertEqual(extension, "ogg")
+        self.assertEqual(extension, "ogv")
        scan_data = await self.analyzer._get_scan_data(True, self.video_file_webm)
        extension = self.analyzer._get_best_container_extension(scan_data, "")
@ -143,7 +143,7 @@ class TranscodeValidation(ClaimTestCase):
        self.assertEqual("webm", extension)
        extension = self.analyzer._get_best_container_extension("", "libtheora")
-        self.assertEqual("ogg", extension)
+        self.assertEqual("ogv", extension)
    async def test_no_ffmpeg(self):
        self.conf.ffmpeg_folder = "I don't really exist/"