From 30380338ba9af01696c94b61f0597131638eaec1 Mon Sep 17 00:00:00 2001 From: Niko Storni Date: Mon, 16 Dec 2019 00:13:36 +0100 Subject: [PATCH] lbry-patch --- youtube_dl/extractor/youtube.py | 45 +++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b913d07a6..cd66a5b01 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -10,6 +10,7 @@ import random import re import time import traceback +import subprocess from .common import InfoExtractor, SearchInfoExtractor from ..jsinterp import JSInterpreter @@ -536,6 +537,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): _GEO_BYPASS = False + _WGET_429_RATE_LIMIT = 8191 + _WGET_BINARY = "wget" + IE_NAME = 'youtube' _TESTS = [ { @@ -1254,6 +1258,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): """ Return a string representation of a signature """ return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) + def _rate_limit_download(self, url, video_id, note=None): + if note is None: + self.report_download_webpage(video_id) + elif note is not False: + if video_id is None: + self.to_screen('%s' % (note,)) + else: + self.to_screen('%s: %s' % (video_id, note)) + source_address = self._downloader.params.get('source_address') + return subprocess.run([self._WGET_BINARY, '-q', '--limit-rate', str(self._WGET_429_RATE_LIMIT), '--bind-address', source_address, '-O', '-', url], check=True, stdout=subprocess.PIPE).stdout.decode(encoding='UTF-8') + def _extract_signature_function(self, video_id, player_url, example_sig): id_m = re.match( r'.*?-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P[a-z]+)$', @@ -1678,7 +1693,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Get video webpage url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id - video_webpage = self._download_webpage(url, video_id) + video_webpage = self._rate_limit_download(url, video_id) # Attempt to extract SWF player URL mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) @@ -1736,10 +1751,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''), }) video_info_url = proto + '://www.youtube.com/get_video_info?' + data - video_info_webpage = self._download_webpage( + video_info_webpage = self._rate_limit_download( video_info_url, video_id, - note='Refetching age-gated info webpage', - errnote='unable to download video info webpage') + note='Refetching age-gated info webpage') video_info = compat_parse_qs(video_info_webpage) pl_response = video_info.get('player_response', [None])[0] player_response = extract_player_response(pl_response, video_id) @@ -1777,7 +1791,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # The general idea is to take a union of itags of both DASH manifests (for example # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093) self.report_video_info_webpage_download(video_id) - for el in ('embedded', 'detailpage', 'vevo', ''): + for el in ('', 'embedded', 'detailpage', 'vevo'): query = { 'video_id': video_id, 'ps': 'default', @@ -1789,11 +1803,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): query['el'] = el if sts: query['sts'] = sts - video_info_webpage = self._download_webpage( - '%s://www.youtube.com/get_video_info' % proto, - video_id, note=False, - errnote='unable to download video info webpage', - fatal=False, query=query) + + if el == '': + base_url = 'https://youtube.com/get_video_info?video_id={}'.format(video_id) + else: + base_url = 'https://youtube.com/get_video_info' + + for q in query: + if q is None or q is "": + continue + if query[q] is None or query[q] is "": + continue + + base_url = base_url + "?{}={}".format(q, query[q]) + + video_info_webpage = self._rate_limit_download(base_url, video_id) + if not video_info_webpage: continue get_video_info = compat_parse_qs(video_info_webpage) -- 2.17.1