111 lines
5 KiB
Diff
111 lines
5 KiB
Diff
From 30380338ba9af01696c94b61f0597131638eaec1 Mon Sep 17 00:00:00 2001
|
|
From: Niko Storni <niko@lbry.io>
|
|
Date: Mon, 16 Dec 2019 00:13:36 +0100
|
|
Subject: [PATCH] lbry-patch
|
|
|
|
---
|
|
youtube_dl/extractor/youtube.py | 45 +++++++++++++++++++++++++--------
|
|
1 file changed, 35 insertions(+), 10 deletions(-)
|
|
|
|
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
|
|
index b913d07a6..cd66a5b01 100644
|
|
--- a/youtube_dl/extractor/youtube.py
|
|
+++ b/youtube_dl/extractor/youtube.py
|
|
@@ -10,6 +10,7 @@ import random
|
|
import re
|
|
import time
|
|
import traceback
|
|
+import subprocess
|
|
|
|
from .common import InfoExtractor, SearchInfoExtractor
|
|
from ..jsinterp import JSInterpreter
|
|
@@ -536,6 +537,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
_GEO_BYPASS = False
|
|
|
|
+ _WGET_429_RATE_LIMIT = 8191
|
|
+ _WGET_BINARY = "wget"
|
|
+
|
|
IE_NAME = 'youtube'
|
|
_TESTS = [
|
|
{
|
|
@@ -1254,6 +1258,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
""" Return a string representation of a signature """
|
|
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
|
|
|
+ def _rate_limit_download(self, url, video_id, note=None):
|
|
+ if note is None:
|
|
+ self.report_download_webpage(video_id)
|
|
+ elif note is not False:
|
|
+ if video_id is None:
|
|
+ self.to_screen('%s' % (note,))
|
|
+ else:
|
|
+ self.to_screen('%s: %s' % (video_id, note))
|
|
+ source_address = self._downloader.params.get('source_address')
|
|
+ return subprocess.run([self._WGET_BINARY, '-q', '--limit-rate', str(self._WGET_429_RATE_LIMIT), '--bind-address', source_address, '-O', '-', url], check=True, stdout=subprocess.PIPE).stdout.decode(encoding='UTF-8')
|
|
+
|
|
def _extract_signature_function(self, video_id, player_url, example_sig):
|
|
id_m = re.match(
|
|
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
|
|
@@ -1678,7 +1693,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
# Get video webpage
|
|
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
|
|
- video_webpage = self._download_webpage(url, video_id)
|
|
+ video_webpage = self._rate_limit_download(url, video_id)
|
|
|
|
# Attempt to extract SWF player URL
|
|
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
|
|
@@ -1736,10 +1751,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
|
})
|
|
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
|
- video_info_webpage = self._download_webpage(
|
|
+ video_info_webpage = self._rate_limit_download(
|
|
video_info_url, video_id,
|
|
- note='Refetching age-gated info webpage',
|
|
- errnote='unable to download video info webpage')
|
|
+ note='Refetching age-gated info webpage')
|
|
video_info = compat_parse_qs(video_info_webpage)
|
|
pl_response = video_info.get('player_response', [None])[0]
|
|
player_response = extract_player_response(pl_response, video_id)
|
|
@@ -1777,7 +1791,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
# The general idea is to take a union of itags of both DASH manifests (for example
|
|
# video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
|
|
self.report_video_info_webpage_download(video_id)
|
|
- for el in ('embedded', 'detailpage', 'vevo', ''):
|
|
+ for el in ('', 'embedded', 'detailpage', 'vevo'):
|
|
query = {
|
|
'video_id': video_id,
|
|
'ps': 'default',
|
|
@@ -1789,11 +1803,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
query['el'] = el
|
|
if sts:
|
|
query['sts'] = sts
|
|
- video_info_webpage = self._download_webpage(
|
|
- '%s://www.youtube.com/get_video_info' % proto,
|
|
- video_id, note=False,
|
|
- errnote='unable to download video info webpage',
|
|
- fatal=False, query=query)
|
|
+
|
|
+ if el == '':
|
|
+ base_url = 'https://youtube.com/get_video_info?video_id={}'.format(video_id)
|
|
+ else:
|
|
+ base_url = 'https://youtube.com/get_video_info'
|
|
+
|
|
+ for q in query:
|
|
+ if q is None or q is "":
|
|
+ continue
|
|
+ if query[q] is None or query[q] is "":
|
|
+ continue
|
|
+
|
|
+ base_url = base_url + "?{}={}".format(q, query[q])
|
|
+
|
|
+ video_info_webpage = self._rate_limit_download(base_url, video_id)
|
|
+
|
|
if not video_info_webpage:
|
|
continue
|
|
get_video_info = compat_parse_qs(video_info_webpage)
|
|
--
|
|
2.17.1
|
|
|