refactor youtube-dl execution process
This commit is contained in:
parent
ddca850c17
commit
a56166ee51
1 changed files with 79 additions and 83 deletions
|
@ -3,7 +3,6 @@ package downloader
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
@ -25,8 +24,8 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan, pool *ip_manager.IPPool) ([]string, error) {
|
func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan, pool *ip_manager.IPPool) ([]string, error) {
|
||||||
args := []string{"--skip-download", "https://www.youtube.com/channel/" + channelName, "--get-id", "--flat-playlist"}
|
args := []string{"--skip-download", "https://www.youtube.com/channel/" + channelName, "--get-id", "--flat-playlist", "--cookies", "cookies.txt"}
|
||||||
ids, err := run(channelName, args, true, true, stopChan, pool)
|
ids, err := run(channelName, args, stopChan, pool)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Err(err)
|
return nil, errors.Err(err)
|
||||||
}
|
}
|
||||||
|
@ -44,8 +43,8 @@ func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan,
|
||||||
const releaseTimeFormat = "2006-01-02, 15:04:05 (MST)"
|
const releaseTimeFormat = "2006-01-02, 15:04:05 (MST)"
|
||||||
|
|
||||||
func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) {
|
func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) {
|
||||||
args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID}
|
args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID, "--cookies", "cookies.txt"}
|
||||||
results, err := run(videoID, args, true, true, stopChan, pool)
|
results, err := run(videoID, args, stopChan, pool)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Err(err)
|
return nil, errors.Err(err)
|
||||||
}
|
}
|
||||||
|
@ -244,100 +243,97 @@ func getClient(ip *net.TCPAddr) *http.Client {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func run(use string, args []string, withStdErr, withStdOut bool, stopChan stop.Chan, pool *ip_manager.IPPool) ([]string, error) {
|
const (
|
||||||
var maxTries = 10
|
googleBotUA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||||
var attempts int
|
chromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
|
||||||
|
maxAttempts = 3
|
||||||
|
extractionError = "YouTube said: Unable to extract video data"
|
||||||
|
throttledError = "HTTP Error 429"
|
||||||
|
AlternateThrottledError = "returned non-zero exit status 8"
|
||||||
|
youtubeDlError = "exit status 1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func run(use string, args []string, stopChan stop.Chan, pool *ip_manager.IPPool) ([]string, error) {
|
||||||
var useragent []string
|
var useragent []string
|
||||||
for {
|
var lastError error
|
||||||
|
for attempts := 0; attempts < maxAttempts; attempts++ {
|
||||||
sourceAddress, err := getIPFromPool(use, stopChan, pool)
|
sourceAddress, err := getIPFromPool(use, stopChan, pool)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer pool.ReleaseIP(sourceAddress)
|
|
||||||
argsForCommand := append(args, "--source-address", sourceAddress)
|
argsForCommand := append(args, "--source-address", sourceAddress)
|
||||||
argsForCommand = append(argsForCommand, useragent...)
|
argsForCommand = append(argsForCommand, useragent...)
|
||||||
cmd := exec.Command("youtube-dl", argsForCommand...)
|
cmd := exec.Command("youtube-dl", argsForCommand...)
|
||||||
logrus.Printf("Running command youtube-dl %s", strings.Join(argsForCommand, " "))
|
|
||||||
|
|
||||||
var stderr io.ReadCloser
|
res, err := runCmd(cmd, stopChan)
|
||||||
var errorLog []byte
|
pool.ReleaseIP(sourceAddress)
|
||||||
if withStdErr {
|
if err == nil {
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
lastError = err
|
||||||
|
if strings.Contains(err.Error(), youtubeDlError) {
|
||||||
|
if strings.Contains(err.Error(), extractionError) {
|
||||||
|
logrus.Warnf("known extraction error: %s", errors.FullTrace(err))
|
||||||
|
useragent = nextUA(useragent)
|
||||||
|
}
|
||||||
|
if strings.Contains(err.Error(), throttledError) || strings.Contains(err.Error(), AlternateThrottledError) {
|
||||||
|
pool.SetThrottled(sourceAddress)
|
||||||
|
//we don't want throttle errors to count toward the max retries
|
||||||
|
attempts--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, lastError
|
||||||
|
}
|
||||||
|
|
||||||
|
func nextUA(current []string) []string {
|
||||||
|
if len(current) == 0 {
|
||||||
|
return []string{"--user-agent", googleBotUA}
|
||||||
|
}
|
||||||
|
return []string{"--user-agent", chromeUA}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runCmd(cmd *exec.Cmd, stopChan stop.Chan) ([]string, error) {
|
||||||
|
logrus.Infof("running youtube-dl cmd: %s", strings.Join(cmd.Args, " "))
|
||||||
var err error
|
var err error
|
||||||
stderr, err = cmd.StderrPipe()
|
stderr, err := cmd.StderrPipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Err(err)
|
return nil, errors.Err(err)
|
||||||
}
|
}
|
||||||
}
|
stdout, err := cmd.StdoutPipe()
|
||||||
|
|
||||||
var stdout io.ReadCloser
|
|
||||||
var outLog []byte
|
|
||||||
if withStdOut {
|
|
||||||
var err error
|
|
||||||
stdout, err = cmd.StdoutPipe()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Err(err)
|
return nil, errors.Err(err)
|
||||||
}
|
}
|
||||||
|
err = cmd.Start()
|
||||||
if err := cmd.Start(); err != nil {
|
|
||||||
return nil, errors.Err(err)
|
|
||||||
}
|
|
||||||
outLog, err = ioutil.ReadAll(stdout)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Err(err)
|
return nil, errors.Err(err)
|
||||||
}
|
}
|
||||||
if withStdErr {
|
outLog, err := ioutil.ReadAll(stdout)
|
||||||
errorLog, err = ioutil.ReadAll(stderr)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Err(err)
|
return nil, errors.Err(err)
|
||||||
}
|
}
|
||||||
|
errorLog, err := ioutil.ReadAll(stderr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Err(err)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
done := make(chan error, 1)
|
done := make(chan error, 1)
|
||||||
go func() {
|
go func() {
|
||||||
attempts++
|
|
||||||
done <- cmd.Wait()
|
done <- cmd.Wait()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-stopChan:
|
case <-stopChan:
|
||||||
if err := cmd.Process.Kill(); err != nil {
|
err := cmd.Process.Kill()
|
||||||
|
if err != nil {
|
||||||
return nil, errors.Prefix("failed to kill command after stopper cancellation", err)
|
return nil, errors.Prefix("failed to kill command after stopper cancellation", err)
|
||||||
}
|
}
|
||||||
return nil, errors.Err("canceled by stopper")
|
return nil, errors.Err("canceled by stopper")
|
||||||
case err := <-done:
|
case err := <-done:
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if strings.Contains(err.Error(), "exit status 1") {
|
return nil, errors.Prefix("youtube-dl "+strings.Join(cmd.Args, " ")+" ["+string(errorLog)+"]", err)
|
||||||
if strings.Contains(string(errorLog), "HTTP Error 429") || strings.Contains(string(errorLog), "returned non-zero exit status 8") {
|
|
||||||
pool.SetThrottled(sourceAddress)
|
|
||||||
logrus.Debugf("known throttling error...try again (%d)", attempts)
|
|
||||||
}
|
|
||||||
if strings.Contains(string(errorLog), "YouTube said: Unable to extract video data") {
|
|
||||||
useragent = []string{"--user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"}
|
|
||||||
if attempts == 1 {
|
|
||||||
useragent = []string{"--user-agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"}
|
|
||||||
}
|
|
||||||
if attempts > 3 {
|
|
||||||
logrus.Debugf("It's pointless to keep trying here... skipping (%d)", attempts)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
logrus.Debugf("known extraction issue, maybe user agent specification will work...try again (%d)", attempts)
|
|
||||||
}
|
|
||||||
if attempts > maxTries {
|
|
||||||
logrus.Debug("too many tries returning failure")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
logrus.Debugf("Unknown error, returning failure: %s", err.Error())
|
|
||||||
return nil, errors.Prefix("youtube-dl "+strings.Join(argsForCommand, " ")+" ["+string(errorLog)+"] ", err)
|
|
||||||
}
|
}
|
||||||
return strings.Split(strings.Replace(string(outLog), "\r\n", "\n", -1), "\n"), nil
|
return strings.Split(strings.Replace(string(outLog), "\r\n", "\n", -1), "\n"), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(errorLog) > 0 {
|
|
||||||
return nil, errors.Err(string(errorLog))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getIPFromPool(use string, stopChan stop.Chan, pool *ip_manager.IPPool) (sourceAddress string, err error) {
|
func getIPFromPool(use string, stopChan stop.Chan, pool *ip_manager.IPPool) (sourceAddress string, err error) {
|
||||||
|
|
Loading…
Reference in a new issue