From 0b002c82284a4af858db2597620f41e5f676a1f8 Mon Sep 17 00:00:00 2001 From: Niko Storni Date: Tue, 18 Aug 2020 00:03:38 +0200 Subject: [PATCH] lots of bug fixes and improvements --- downloader/downloader.go | 31 ++++++++++++++++++------------- manager/ytsync.go | 13 +------------ sdk/api.go | 40 ++++++++++++++++++++++++++++++++++++++++ shared/shared.go | 13 +++++++++++++ sources/youtubeVideo.go | 23 +++++++++++++++++++---- ytapi/ytapi.go | 17 ++++++++++++----- 6 files changed, 103 insertions(+), 34 deletions(-) diff --git a/downloader/downloader.go b/downloader/downloader.go index 4479402..404bab6 100644 --- a/downloader/downloader.go +++ b/downloader/downloader.go @@ -125,7 +125,7 @@ func triggerScrape(videoID string, ip *net.TCPAddr) error { if err != nil { return errors.Err(err) } - req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36") + req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36") res, err := client.Do(req) if err != nil { @@ -168,20 +168,25 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa if err != nil { logrus.Error(err) } - if release != nil { - //const sqlTimeFormat = "2006-01-02 15:04:05" - sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC) - if err == nil { - return sqlTime.Format(releaseTimeFormat), nil - } else { - logrus.Error(err) - } - } ytdlUploadDate, err := time.Parse("20060102", uploadDate) if err != nil { logrus.Error(err) } - if time.Now().AddDate(0, 0, -2).After(ytdlUploadDate) { + if release != nil { + //const sqlTimeFormat = "2006-01-02 15:04:05" + sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC) + if err == nil { + if sqlTime.Day() != ytdlUploadDate.Day() { + logrus.Infof("upload day from APIs differs from the ytdl one by more than 1 day.") + } else { + return sqlTime.Format(releaseTimeFormat), nil + } + } else { + logrus.Error(err) + } + } + + if time.Now().AddDate(0, 0, -3).After(ytdlUploadDate) { return ytdlUploadDate.Format(releaseTimeFormat), nil } client := getClient(ip) @@ -189,7 +194,7 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa if err != nil { return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err) } - req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36") + req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36") res, err := client.Do(req) if err != nil { @@ -245,7 +250,7 @@ func getClient(ip *net.TCPAddr) *http.Client { const ( googleBotUA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" - chromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" + chromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36" maxAttempts = 3 extractionError = "YouTube said: Unable to extract video data" throttledError = "HTTP Error 429" diff --git a/manager/ytsync.go b/manager/ytsync.go index 45f0da1..e1ab683 100644 --- a/manager/ytsync.go +++ b/manager/ytsync.go @@ -918,18 +918,7 @@ func (s *Sync) processVideo(v ytapi.Video) (err error) { alreadyPublished := ok && sv.Published videoRequiresUpgrade := ok && s.Manager.CliFlags.UpgradeMetadata && sv.MetadataVersion < newMetadataVersion - neverRetryFailures := []string{ - "Error extracting sts from embedded url response", - "Unable to extract signature tokens", - "the video is too big to sync, skipping for now", - "video is too long to process", - "This video contains content from", - "no compatible format available for this video", - "Watch this video on YouTube.", - "have blocked it on copyright grounds", - "giving up after 0 fragment retries", - "Sign in to confirm your age", - } + neverRetryFailures := shared.NeverRetryFailures if ok && !sv.Published && util.SubstringInSlice(sv.FailureReason, neverRetryFailures) { log.Println(v.ID() + " can't ever be published") return nil diff --git a/sdk/api.go b/sdk/api.go index 4d15ae6..8dbb5ca 100644 --- a/sdk/api.go +++ b/sdk/api.go @@ -48,6 +48,11 @@ func (a *APIConfig) FetchChannels(status string, cliFlags *shared.SyncFlags) ([] "channel_id": {cliFlags.ChannelID}, }) if err != nil { + if strings.Contains(err.Error(), "EOF") { + util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint) + time.Sleep(30 * time.Second) + return a.FetchChannels(status, cliFlags) + } return nil, errors.Err(err) } defer res.Body.Close() @@ -106,6 +111,11 @@ func (a *APIConfig) SetChannelCert(certHex string, channelID string) error { "auth_token": {a.ApiToken}, }) if err != nil { + if strings.Contains(err.Error(), "EOF") { + util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint) + time.Sleep(30 * time.Second) + return a.SetChannelCert(certHex, channelID) + } return errors.Err(err) } defer res.Body.Close() @@ -149,6 +159,11 @@ func (a *APIConfig) SetChannelStatus(channelID string, status string, failureRea } res, err := http.PostForm(endpoint, params) if err != nil { + if strings.Contains(err.Error(), "EOF") { + util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint) + time.Sleep(30 * time.Second) + return a.SetChannelStatus(channelID, status, failureReason, transferState) + } return nil, nil, errors.Err(err) } defer res.Body.Close() @@ -194,6 +209,11 @@ func (a *APIConfig) SetChannelClaimID(channelID string, channelClaimID string) e "channel_claim_id": {channelClaimID}, }) if err != nil { + if strings.Contains(err.Error(), "EOF") { + util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint) + time.Sleep(30 * time.Second) + return a.SetChannelClaimID(channelID, channelClaimID) + } return errors.Err(err) } defer res.Body.Close() @@ -233,6 +253,11 @@ func (a *APIConfig) DeleteVideos(videos []string) error { } res, err := http.PostForm(endpoint, vals) if err != nil { + if strings.Contains(err.Error(), "EOF") { + util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint) + time.Sleep(30 * time.Second) + return a.DeleteVideos(videos) + } return errors.Err(err) } defer res.Body.Close() @@ -294,6 +319,11 @@ func (a *APIConfig) MarkVideoStatus(status shared.VideoStatus) error { } res, err := http.PostForm(endpoint, vals) if err != nil { + if strings.Contains(err.Error(), "EOF") { + util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint) + time.Sleep(30 * time.Second) + return a.MarkVideoStatus(status) + } return errors.Err(err) } defer res.Body.Close() @@ -331,6 +361,11 @@ func (a *APIConfig) VideoState(videoID string) (string, error) { res, err := http.PostForm(endpoint, vals) if err != nil { + if strings.Contains(err.Error(), "EOF") { + util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint) + time.Sleep(30 * time.Second) + return a.VideoState(videoID) + } return "", errors.Err(err) } defer res.Body.Close() @@ -380,6 +415,11 @@ func (a *APIConfig) GetReleasedDate(videoID string) (*VideoRelease, error) { res, err := http.PostForm(endpoint, vals) if err != nil { + if strings.Contains(err.Error(), "EOF") { + util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint) + time.Sleep(30 * time.Second) + return a.GetReleasedDate(videoID) + } return nil, errors.Err(err) } defer res.Body.Close() diff --git a/shared/shared.go b/shared/shared.go index 95f425b..28b8077 100644 --- a/shared/shared.go +++ b/shared/shared.go @@ -27,6 +27,19 @@ type YoutubeChannel struct { LastUploadedVideo string `json:"last_uploaded_video"` } +var NeverRetryFailures = []string{ + "Error extracting sts from embedded url response", + "Unable to extract signature tokens", + "the video is too big to sync, skipping for now", + "video is too long to process", + "This video contains content from", + "no compatible format available for this video", + "Watch this video on YouTube.", + "have blocked it on copyright grounds", + "giving up after 0 fragment retries", + "Sign in to confirm your age", +} + type SyncFlags struct { StopOnError bool TakeOverExistingChannel bool diff --git a/sources/youtubeVideo.go b/sources/youtubeVideo.go index 0b0a3bb..6118745 100644 --- a/sources/youtubeVideo.go +++ b/sources/youtubeVideo.go @@ -209,6 +209,15 @@ func (v *YoutubeVideo) download() error { "480", "320", } + dur := time.Duration(v.youtubeInfo.Duration) * time.Second + if dur.Hours() > 2 { //for videos longer than 2 hours only sync up to 720p + qualities = []string{ + "720", + "480", + "320", + } + } + ytdlArgs := []string{ "--no-progress", "-o" + strings.TrimSuffix(v.getFullPath(), ".mp4"), @@ -219,12 +228,11 @@ func (v *YoutubeVideo) download() error { "-movflags faststart", "--abort-on-unavailable-fragment", "--fragment-retries", - "0", - "--user-agent", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36", + "1", "--cookies", "cookies.txt", } + userAgent := []string{"--user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"} if v.maxVideoSize > 0 { ytdlArgs = append(ytdlArgs, "--max-filesize", @@ -264,8 +272,10 @@ func (v *YoutubeVideo) download() error { "https://www.youtube.com/watch?v="+v.ID(), ) - for i, quality := range qualities { + for i := 0; i < len(qualities); i++ { + quality := qualities[i] argsWithFilters := append(ytdlArgs, "-fbestvideo[ext=mp4][height<="+quality+"]+bestaudio[ext!=webm]") + argsWithFilters = append(argsWithFilters, userAgent...) cmd := exec.Command("youtube-dl", argsWithFilters...) log.Printf("Running command youtube-dl %s", strings.Join(argsWithFilters, " ")) @@ -294,6 +304,11 @@ func (v *YoutubeVideo) download() error { return errors.Err(string(errorLog)) } continue //this bypasses the yt throttling IP redistribution... TODO: don't + } else if strings.Contains(string(errorLog), "YouTube said: Unable to extract video data") && !strings.Contains(userAgent[1], "Googlebot") { + i-- //do not lower quality when trying a different user agent + userAgent = []string{"--user-agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"} + log.Infof("trying different user agent for video %s", v.ID()) + continue } return errors.Err(string(errorLog)) } diff --git a/ytapi/ytapi.go b/ytapi/ytapi.go index f2c61b9..9cb2acf 100644 --- a/ytapi/ytapi.go +++ b/ytapi/ytapi.go @@ -14,7 +14,7 @@ import ( "time" "github.com/lbryio/ytsync/v5/shared" - "github.com/lbryio/ytsync/v5/util" + logUtils "github.com/lbryio/ytsync/v5/util" "github.com/lbryio/ytsync/v5/downloader/ytdl" @@ -26,6 +26,7 @@ import ( "github.com/lbryio/lbry.go/v2/extras/errors" "github.com/lbryio/lbry.go/v2/extras/jsonrpc" "github.com/lbryio/lbry.go/v2/extras/stop" + "github.com/lbryio/lbry.go/v2/extras/util" "github.com/aws/aws-sdk-go/aws" log "github.com/sirupsen/logrus" @@ -56,16 +57,22 @@ type VideoParams struct { var mostRecentlyFailedChannel string // TODO: fix this hack! func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) { - var videos []Video if quickSync && maxVideos > 50 { maxVideos = 50 } - videoIDs, err := downloader.GetPlaylistVideoIDs(channelID, maxVideos, videoParams.Stopper.Ch(), videoParams.IPPool) + allVideos, err := downloader.GetPlaylistVideoIDs(channelID, maxVideos, videoParams.Stopper.Ch(), videoParams.IPPool) if err != nil { return nil, errors.Err(err) } - + videoIDs := make([]string, 0, len(allVideos)) + for _, video := range allVideos { + sv, ok := syncedVideos[video] + if ok && util.SubstringInSlice(sv.FailureReason, shared.NeverRetryFailures) { + continue + } + videoIDs = append(videoIDs, video) + } log.Infof("Got info for %d videos from youtube downloader", len(videoIDs)) playlistMap := make(map[string]int64) @@ -216,7 +223,7 @@ func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopC Status: "failed", FailureReason: err.Error(), }) - util.SendErrorToSlack(fmt.Sprintf("Skipping video (%s): %s", videoID, errors.FullTrace(err))) + logUtils.SendErrorToSlack(fmt.Sprintf("Skipping video (%s): %s", videoID, errors.FullTrace(err))) if errSDK != nil { return nil, errors.Err(errSDK) }