add new throttling detection

This commit is contained in:
Niko Storni 2019-12-16 00:29:56 +01:00
parent 2bde06e4b9
commit 68c0fd9ed7
2 changed files with 112 additions and 1 deletions

111
0001-lbry-patch.patch Normal file
View file

@ -0,0 +1,111 @@
From 30380338ba9af01696c94b61f0597131638eaec1 Mon Sep 17 00:00:00 2001
From: Niko Storni <niko@lbry.io>
Date: Mon, 16 Dec 2019 00:13:36 +0100
Subject: [PATCH] lbry-patch
---
youtube_dl/extractor/youtube.py | 45 +++++++++++++++++++++++++--------
1 file changed, 35 insertions(+), 10 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b913d07a6..cd66a5b01 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -10,6 +10,7 @@ import random
import re
import time
import traceback
+import subprocess
from .common import InfoExtractor, SearchInfoExtractor
from ..jsinterp import JSInterpreter
@@ -536,6 +537,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
_GEO_BYPASS = False
+ _WGET_429_RATE_LIMIT = 8191
+ _WGET_BINARY = "wget"
+
IE_NAME = 'youtube'
_TESTS = [
{
@@ -1254,6 +1258,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
""" Return a string representation of a signature """
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+ def _rate_limit_download(self, url, video_id, note=None):
+ if note is None:
+ self.report_download_webpage(video_id)
+ elif note is not False:
+ if video_id is None:
+ self.to_screen('%s' % (note,))
+ else:
+ self.to_screen('%s: %s' % (video_id, note))
+ source_address = self._downloader.params.get('source_address')
+ return subprocess.run([self._WGET_BINARY, '-q', '--limit-rate', str(self._WGET_429_RATE_LIMIT), '--bind-address', source_address, '-O', '-', url], check=True, stdout=subprocess.PIPE).stdout.decode(encoding='UTF-8')
+
def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match(
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
@@ -1678,7 +1693,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
- video_webpage = self._download_webpage(url, video_id)
+ video_webpage = self._rate_limit_download(url, video_id)
# Attempt to extract SWF player URL
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
@@ -1736,10 +1751,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
- video_info_webpage = self._download_webpage(
+ video_info_webpage = self._rate_limit_download(
video_info_url, video_id,
- note='Refetching age-gated info webpage',
- errnote='unable to download video info webpage')
+ note='Refetching age-gated info webpage')
video_info = compat_parse_qs(video_info_webpage)
pl_response = video_info.get('player_response', [None])[0]
player_response = extract_player_response(pl_response, video_id)
@@ -1777,7 +1791,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# The general idea is to take a union of itags of both DASH manifests (for example
# video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
self.report_video_info_webpage_download(video_id)
- for el in ('embedded', 'detailpage', 'vevo', ''):
+ for el in ('', 'embedded', 'detailpage', 'vevo'):
query = {
'video_id': video_id,
'ps': 'default',
@@ -1789,11 +1803,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
query['el'] = el
if sts:
query['sts'] = sts
- video_info_webpage = self._download_webpage(
- '%s://www.youtube.com/get_video_info' % proto,
- video_id, note=False,
- errnote='unable to download video info webpage',
- fatal=False, query=query)
+
+ if el == '':
+ base_url = 'https://youtube.com/get_video_info?video_id={}'.format(video_id)
+ else:
+ base_url = 'https://youtube.com/get_video_info'
+
+ for q in query:
+ if q is None or q is "":
+ continue
+ if query[q] is None or query[q] is "":
+ continue
+
+ base_url = base_url + "?{}={}".format(q, query[q])
+
+ video_info_webpage = self._rate_limit_download(base_url, video_id)
+
if not video_info_webpage:
continue
get_video_info = compat_parse_qs(video_info_webpage)
--
2.17.1

View file

@ -277,7 +277,7 @@ runcmd:
if err = cmd.Wait(); err != nil { if err = cmd.Wait(); err != nil {
if strings.Contains(err.Error(), "exit status 1") { if strings.Contains(err.Error(), "exit status 1") {
if strings.Contains(string(errorLog), "HTTP Error 429") { if strings.Contains(string(errorLog), "HTTP Error 429") || strings.Contains(string(errorLog), "returned non-zero exit status 8") {
v.pool.SetThrottled(sourceAddress, v.stopGroup) v.pool.SetThrottled(sourceAddress, v.stopGroup)
} else if strings.Contains(string(errorLog), "giving up after 0 fragment retries") && qualityIndex < len(qualities)-1 { } else if strings.Contains(string(errorLog), "giving up after 0 fragment retries") && qualityIndex < len(qualities)-1 {
qualityIndex++ qualityIndex++