lots of bug fixes and improvements

This commit is contained in:
Niko Storni 2020-08-18 00:03:38 +02:00
parent a56166ee51
commit 0b002c8228
6 changed files with 103 additions and 34 deletions

View file

@ -125,7 +125,7 @@ func triggerScrape(videoID string, ip *net.TCPAddr) error {
if err != nil { if err != nil {
return errors.Err(err) return errors.Err(err)
} }
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36") req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36")
res, err := client.Do(req) res, err := client.Do(req)
if err != nil { if err != nil {
@ -168,20 +168,25 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa
if err != nil { if err != nil {
logrus.Error(err) logrus.Error(err)
} }
if release != nil {
//const sqlTimeFormat = "2006-01-02 15:04:05"
sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC)
if err == nil {
return sqlTime.Format(releaseTimeFormat), nil
} else {
logrus.Error(err)
}
}
ytdlUploadDate, err := time.Parse("20060102", uploadDate) ytdlUploadDate, err := time.Parse("20060102", uploadDate)
if err != nil { if err != nil {
logrus.Error(err) logrus.Error(err)
} }
if time.Now().AddDate(0, 0, -2).After(ytdlUploadDate) { if release != nil {
//const sqlTimeFormat = "2006-01-02 15:04:05"
sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC)
if err == nil {
if sqlTime.Day() != ytdlUploadDate.Day() {
logrus.Infof("upload day from APIs differs from the ytdl one by more than 1 day.")
} else {
return sqlTime.Format(releaseTimeFormat), nil
}
} else {
logrus.Error(err)
}
}
if time.Now().AddDate(0, 0, -3).After(ytdlUploadDate) {
return ytdlUploadDate.Format(releaseTimeFormat), nil return ytdlUploadDate.Format(releaseTimeFormat), nil
} }
client := getClient(ip) client := getClient(ip)
@ -189,7 +194,7 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa
if err != nil { if err != nil {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err) return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err)
} }
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36") req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36")
res, err := client.Do(req) res, err := client.Do(req)
if err != nil { if err != nil {
@ -245,7 +250,7 @@ func getClient(ip *net.TCPAddr) *http.Client {
const ( const (
googleBotUA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" googleBotUA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
chromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" chromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
maxAttempts = 3 maxAttempts = 3
extractionError = "YouTube said: Unable to extract video data" extractionError = "YouTube said: Unable to extract video data"
throttledError = "HTTP Error 429" throttledError = "HTTP Error 429"

View file

@ -918,18 +918,7 @@ func (s *Sync) processVideo(v ytapi.Video) (err error) {
alreadyPublished := ok && sv.Published alreadyPublished := ok && sv.Published
videoRequiresUpgrade := ok && s.Manager.CliFlags.UpgradeMetadata && sv.MetadataVersion < newMetadataVersion videoRequiresUpgrade := ok && s.Manager.CliFlags.UpgradeMetadata && sv.MetadataVersion < newMetadataVersion
neverRetryFailures := []string{ neverRetryFailures := shared.NeverRetryFailures
"Error extracting sts from embedded url response",
"Unable to extract signature tokens",
"the video is too big to sync, skipping for now",
"video is too long to process",
"This video contains content from",
"no compatible format available for this video",
"Watch this video on YouTube.",
"have blocked it on copyright grounds",
"giving up after 0 fragment retries",
"Sign in to confirm your age",
}
if ok && !sv.Published && util.SubstringInSlice(sv.FailureReason, neverRetryFailures) { if ok && !sv.Published && util.SubstringInSlice(sv.FailureReason, neverRetryFailures) {
log.Println(v.ID() + " can't ever be published") log.Println(v.ID() + " can't ever be published")
return nil return nil

View file

@ -48,6 +48,11 @@ func (a *APIConfig) FetchChannels(status string, cliFlags *shared.SyncFlags) ([]
"channel_id": {cliFlags.ChannelID}, "channel_id": {cliFlags.ChannelID},
}) })
if err != nil { if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.FetchChannels(status, cliFlags)
}
return nil, errors.Err(err) return nil, errors.Err(err)
} }
defer res.Body.Close() defer res.Body.Close()
@ -106,6 +111,11 @@ func (a *APIConfig) SetChannelCert(certHex string, channelID string) error {
"auth_token": {a.ApiToken}, "auth_token": {a.ApiToken},
}) })
if err != nil { if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.SetChannelCert(certHex, channelID)
}
return errors.Err(err) return errors.Err(err)
} }
defer res.Body.Close() defer res.Body.Close()
@ -149,6 +159,11 @@ func (a *APIConfig) SetChannelStatus(channelID string, status string, failureRea
} }
res, err := http.PostForm(endpoint, params) res, err := http.PostForm(endpoint, params)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.SetChannelStatus(channelID, status, failureReason, transferState)
}
return nil, nil, errors.Err(err) return nil, nil, errors.Err(err)
} }
defer res.Body.Close() defer res.Body.Close()
@ -194,6 +209,11 @@ func (a *APIConfig) SetChannelClaimID(channelID string, channelClaimID string) e
"channel_claim_id": {channelClaimID}, "channel_claim_id": {channelClaimID},
}) })
if err != nil { if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.SetChannelClaimID(channelID, channelClaimID)
}
return errors.Err(err) return errors.Err(err)
} }
defer res.Body.Close() defer res.Body.Close()
@ -233,6 +253,11 @@ func (a *APIConfig) DeleteVideos(videos []string) error {
} }
res, err := http.PostForm(endpoint, vals) res, err := http.PostForm(endpoint, vals)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.DeleteVideos(videos)
}
return errors.Err(err) return errors.Err(err)
} }
defer res.Body.Close() defer res.Body.Close()
@ -294,6 +319,11 @@ func (a *APIConfig) MarkVideoStatus(status shared.VideoStatus) error {
} }
res, err := http.PostForm(endpoint, vals) res, err := http.PostForm(endpoint, vals)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.MarkVideoStatus(status)
}
return errors.Err(err) return errors.Err(err)
} }
defer res.Body.Close() defer res.Body.Close()
@ -331,6 +361,11 @@ func (a *APIConfig) VideoState(videoID string) (string, error) {
res, err := http.PostForm(endpoint, vals) res, err := http.PostForm(endpoint, vals)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.VideoState(videoID)
}
return "", errors.Err(err) return "", errors.Err(err)
} }
defer res.Body.Close() defer res.Body.Close()
@ -380,6 +415,11 @@ func (a *APIConfig) GetReleasedDate(videoID string) (*VideoRelease, error) {
res, err := http.PostForm(endpoint, vals) res, err := http.PostForm(endpoint, vals)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.GetReleasedDate(videoID)
}
return nil, errors.Err(err) return nil, errors.Err(err)
} }
defer res.Body.Close() defer res.Body.Close()

View file

@ -27,6 +27,19 @@ type YoutubeChannel struct {
LastUploadedVideo string `json:"last_uploaded_video"` LastUploadedVideo string `json:"last_uploaded_video"`
} }
var NeverRetryFailures = []string{
"Error extracting sts from embedded url response",
"Unable to extract signature tokens",
"the video is too big to sync, skipping for now",
"video is too long to process",
"This video contains content from",
"no compatible format available for this video",
"Watch this video on YouTube.",
"have blocked it on copyright grounds",
"giving up after 0 fragment retries",
"Sign in to confirm your age",
}
type SyncFlags struct { type SyncFlags struct {
StopOnError bool StopOnError bool
TakeOverExistingChannel bool TakeOverExistingChannel bool

View file

@ -209,6 +209,15 @@ func (v *YoutubeVideo) download() error {
"480", "480",
"320", "320",
} }
dur := time.Duration(v.youtubeInfo.Duration) * time.Second
if dur.Hours() > 2 { //for videos longer than 2 hours only sync up to 720p
qualities = []string{
"720",
"480",
"320",
}
}
ytdlArgs := []string{ ytdlArgs := []string{
"--no-progress", "--no-progress",
"-o" + strings.TrimSuffix(v.getFullPath(), ".mp4"), "-o" + strings.TrimSuffix(v.getFullPath(), ".mp4"),
@ -219,12 +228,11 @@ func (v *YoutubeVideo) download() error {
"-movflags faststart", "-movflags faststart",
"--abort-on-unavailable-fragment", "--abort-on-unavailable-fragment",
"--fragment-retries", "--fragment-retries",
"0", "1",
"--user-agent",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
"--cookies", "--cookies",
"cookies.txt", "cookies.txt",
} }
userAgent := []string{"--user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"}
if v.maxVideoSize > 0 { if v.maxVideoSize > 0 {
ytdlArgs = append(ytdlArgs, ytdlArgs = append(ytdlArgs,
"--max-filesize", "--max-filesize",
@ -264,8 +272,10 @@ func (v *YoutubeVideo) download() error {
"https://www.youtube.com/watch?v="+v.ID(), "https://www.youtube.com/watch?v="+v.ID(),
) )
for i, quality := range qualities { for i := 0; i < len(qualities); i++ {
quality := qualities[i]
argsWithFilters := append(ytdlArgs, "-fbestvideo[ext=mp4][height<="+quality+"]+bestaudio[ext!=webm]") argsWithFilters := append(ytdlArgs, "-fbestvideo[ext=mp4][height<="+quality+"]+bestaudio[ext!=webm]")
argsWithFilters = append(argsWithFilters, userAgent...)
cmd := exec.Command("youtube-dl", argsWithFilters...) cmd := exec.Command("youtube-dl", argsWithFilters...)
log.Printf("Running command youtube-dl %s", strings.Join(argsWithFilters, " ")) log.Printf("Running command youtube-dl %s", strings.Join(argsWithFilters, " "))
@ -294,6 +304,11 @@ func (v *YoutubeVideo) download() error {
return errors.Err(string(errorLog)) return errors.Err(string(errorLog))
} }
continue //this bypasses the yt throttling IP redistribution... TODO: don't continue //this bypasses the yt throttling IP redistribution... TODO: don't
} else if strings.Contains(string(errorLog), "YouTube said: Unable to extract video data") && !strings.Contains(userAgent[1], "Googlebot") {
i-- //do not lower quality when trying a different user agent
userAgent = []string{"--user-agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"}
log.Infof("trying different user agent for video %s", v.ID())
continue
} }
return errors.Err(string(errorLog)) return errors.Err(string(errorLog))
} }

View file

@ -14,7 +14,7 @@ import (
"time" "time"
"github.com/lbryio/ytsync/v5/shared" "github.com/lbryio/ytsync/v5/shared"
"github.com/lbryio/ytsync/v5/util" logUtils "github.com/lbryio/ytsync/v5/util"
"github.com/lbryio/ytsync/v5/downloader/ytdl" "github.com/lbryio/ytsync/v5/downloader/ytdl"
@ -26,6 +26,7 @@ import (
"github.com/lbryio/lbry.go/v2/extras/errors" "github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/jsonrpc" "github.com/lbryio/lbry.go/v2/extras/jsonrpc"
"github.com/lbryio/lbry.go/v2/extras/stop" "github.com/lbryio/lbry.go/v2/extras/stop"
"github.com/lbryio/lbry.go/v2/extras/util"
"github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
@ -56,16 +57,22 @@ type VideoParams struct {
var mostRecentlyFailedChannel string // TODO: fix this hack! var mostRecentlyFailedChannel string // TODO: fix this hack!
func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) { func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) {
var videos []Video var videos []Video
if quickSync && maxVideos > 50 { if quickSync && maxVideos > 50 {
maxVideos = 50 maxVideos = 50
} }
videoIDs, err := downloader.GetPlaylistVideoIDs(channelID, maxVideos, videoParams.Stopper.Ch(), videoParams.IPPool) allVideos, err := downloader.GetPlaylistVideoIDs(channelID, maxVideos, videoParams.Stopper.Ch(), videoParams.IPPool)
if err != nil { if err != nil {
return nil, errors.Err(err) return nil, errors.Err(err)
} }
videoIDs := make([]string, 0, len(allVideos))
for _, video := range allVideos {
sv, ok := syncedVideos[video]
if ok && util.SubstringInSlice(sv.FailureReason, shared.NeverRetryFailures) {
continue
}
videoIDs = append(videoIDs, video)
}
log.Infof("Got info for %d videos from youtube downloader", len(videoIDs)) log.Infof("Got info for %d videos from youtube downloader", len(videoIDs))
playlistMap := make(map[string]int64) playlistMap := make(map[string]int64)
@ -216,7 +223,7 @@ func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopC
Status: "failed", Status: "failed",
FailureReason: err.Error(), FailureReason: err.Error(),
}) })
util.SendErrorToSlack(fmt.Sprintf("Skipping video (%s): %s", videoID, errors.FullTrace(err))) logUtils.SendErrorToSlack(fmt.Sprintf("Skipping video (%s): %s", videoID, errors.FullTrace(err)))
if errSDK != nil { if errSDK != nil {
return nil, errors.Err(errSDK) return nil, errors.Err(errSDK)
} }