refactor youtube-dl execution process

This commit is contained in:
Niko Storni 2020-08-12 19:44:57 +02:00
parent ddca850c17
commit a56166ee51

View file

@ -3,7 +3,6 @@ package downloader
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"net" "net"
"net/http" "net/http"
@ -25,8 +24,8 @@ import (
) )
func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan, pool *ip_manager.IPPool) ([]string, error) { func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan, pool *ip_manager.IPPool) ([]string, error) {
args := []string{"--skip-download", "https://www.youtube.com/channel/" + channelName, "--get-id", "--flat-playlist"} args := []string{"--skip-download", "https://www.youtube.com/channel/" + channelName, "--get-id", "--flat-playlist", "--cookies", "cookies.txt"}
ids, err := run(channelName, args, true, true, stopChan, pool) ids, err := run(channelName, args, stopChan, pool)
if err != nil { if err != nil {
return nil, errors.Err(err) return nil, errors.Err(err)
} }
@ -44,8 +43,8 @@ func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan,
const releaseTimeFormat = "2006-01-02, 15:04:05 (MST)" const releaseTimeFormat = "2006-01-02, 15:04:05 (MST)"
func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) { func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) {
args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID} args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID, "--cookies", "cookies.txt"}
results, err := run(videoID, args, true, true, stopChan, pool) results, err := run(videoID, args, stopChan, pool)
if err != nil { if err != nil {
return nil, errors.Err(err) return nil, errors.Err(err)
} }
@ -244,99 +243,96 @@ func getClient(ip *net.TCPAddr) *http.Client {
} }
} }
func run(use string, args []string, withStdErr, withStdOut bool, stopChan stop.Chan, pool *ip_manager.IPPool) ([]string, error) { const (
var maxTries = 10 googleBotUA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
var attempts int chromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
maxAttempts = 3
extractionError = "YouTube said: Unable to extract video data"
throttledError = "HTTP Error 429"
AlternateThrottledError = "returned non-zero exit status 8"
youtubeDlError = "exit status 1"
)
func run(use string, args []string, stopChan stop.Chan, pool *ip_manager.IPPool) ([]string, error) {
var useragent []string var useragent []string
for { var lastError error
for attempts := 0; attempts < maxAttempts; attempts++ {
sourceAddress, err := getIPFromPool(use, stopChan, pool) sourceAddress, err := getIPFromPool(use, stopChan, pool)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer pool.ReleaseIP(sourceAddress)
argsForCommand := append(args, "--source-address", sourceAddress) argsForCommand := append(args, "--source-address", sourceAddress)
argsForCommand = append(argsForCommand, useragent...) argsForCommand = append(argsForCommand, useragent...)
cmd := exec.Command("youtube-dl", argsForCommand...) cmd := exec.Command("youtube-dl", argsForCommand...)
logrus.Printf("Running command youtube-dl %s", strings.Join(argsForCommand, " "))
var stderr io.ReadCloser res, err := runCmd(cmd, stopChan)
var errorLog []byte pool.ReleaseIP(sourceAddress)
if withStdErr { if err == nil {
var err error return res, nil
stderr, err = cmd.StderrPipe() }
if err != nil { lastError = err
return nil, errors.Err(err) if strings.Contains(err.Error(), youtubeDlError) {
if strings.Contains(err.Error(), extractionError) {
logrus.Warnf("known extraction error: %s", errors.FullTrace(err))
useragent = nextUA(useragent)
}
if strings.Contains(err.Error(), throttledError) || strings.Contains(err.Error(), AlternateThrottledError) {
pool.SetThrottled(sourceAddress)
//we don't want throttle errors to count toward the max retries
attempts--
} }
} }
}
return nil, lastError
}
var stdout io.ReadCloser func nextUA(current []string) []string {
var outLog []byte if len(current) == 0 {
if withStdOut { return []string{"--user-agent", googleBotUA}
var err error }
stdout, err = cmd.StdoutPipe() return []string{"--user-agent", chromeUA}
if err != nil { }
return nil, errors.Err(err)
}
if err := cmd.Start(); err != nil { func runCmd(cmd *exec.Cmd, stopChan stop.Chan) ([]string, error) {
return nil, errors.Err(err) logrus.Infof("running youtube-dl cmd: %s", strings.Join(cmd.Args, " "))
} var err error
outLog, err = ioutil.ReadAll(stdout) stderr, err := cmd.StderrPipe()
if err != nil { if err != nil {
return nil, errors.Err(err) return nil, errors.Err(err)
} }
if withStdErr { stdout, err := cmd.StdoutPipe()
errorLog, err = ioutil.ReadAll(stderr) if err != nil {
if err != nil { return nil, errors.Err(err)
return nil, errors.Err(err) }
} err = cmd.Start()
} if err != nil {
return nil, errors.Err(err)
}
outLog, err := ioutil.ReadAll(stdout)
if err != nil {
return nil, errors.Err(err)
}
errorLog, err := ioutil.ReadAll(stderr)
if err != nil {
return nil, errors.Err(err)
}
done := make(chan error, 1)
go func() {
done <- cmd.Wait()
}()
select {
case <-stopChan:
err := cmd.Process.Kill()
if err != nil {
return nil, errors.Prefix("failed to kill command after stopper cancellation", err)
} }
return nil, errors.Err("canceled by stopper")
done := make(chan error, 1) case err := <-done:
go func() { if err != nil {
attempts++ return nil, errors.Prefix("youtube-dl "+strings.Join(cmd.Args, " ")+" ["+string(errorLog)+"]", err)
done <- cmd.Wait()
}()
select {
case <-stopChan:
if err := cmd.Process.Kill(); err != nil {
return nil, errors.Prefix("failed to kill command after stopper cancellation", err)
}
return nil, errors.Err("canceled by stopper")
case err := <-done:
if err != nil {
if strings.Contains(err.Error(), "exit status 1") {
if strings.Contains(string(errorLog), "HTTP Error 429") || strings.Contains(string(errorLog), "returned non-zero exit status 8") {
pool.SetThrottled(sourceAddress)
logrus.Debugf("known throttling error...try again (%d)", attempts)
}
if strings.Contains(string(errorLog), "YouTube said: Unable to extract video data") {
useragent = []string{"--user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"}
if attempts == 1 {
useragent = []string{"--user-agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"}
}
if attempts > 3 {
logrus.Debugf("It's pointless to keep trying here... skipping (%d)", attempts)
break
}
logrus.Debugf("known extraction issue, maybe user agent specification will work...try again (%d)", attempts)
}
if attempts > maxTries {
logrus.Debug("too many tries returning failure")
break
}
continue
}
logrus.Debugf("Unknown error, returning failure: %s", err.Error())
return nil, errors.Prefix("youtube-dl "+strings.Join(argsForCommand, " ")+" ["+string(errorLog)+"] ", err)
}
return strings.Split(strings.Replace(string(outLog), "\r\n", "\n", -1), "\n"), nil
}
if len(errorLog) > 0 {
return nil, errors.Err(string(errorLog))
} }
return strings.Split(strings.Replace(string(outLog), "\r\n", "\n", -1), "\n"), nil
} }
} }