lots of bug fixes and improvements

This commit is contained in:
Niko Storni 2020-08-18 00:03:38 +02:00
parent a56166ee51
commit 0b002c8228
6 changed files with 103 additions and 34 deletions

View file

@ -125,7 +125,7 @@ func triggerScrape(videoID string, ip *net.TCPAddr) error {
if err != nil {
return errors.Err(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36")
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36")
res, err := client.Do(req)
if err != nil {
@ -168,20 +168,25 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa
if err != nil {
logrus.Error(err)
}
if release != nil {
//const sqlTimeFormat = "2006-01-02 15:04:05"
sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC)
if err == nil {
return sqlTime.Format(releaseTimeFormat), nil
} else {
logrus.Error(err)
}
}
ytdlUploadDate, err := time.Parse("20060102", uploadDate)
if err != nil {
logrus.Error(err)
}
if time.Now().AddDate(0, 0, -2).After(ytdlUploadDate) {
if release != nil {
//const sqlTimeFormat = "2006-01-02 15:04:05"
sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC)
if err == nil {
if sqlTime.Day() != ytdlUploadDate.Day() {
logrus.Infof("upload day from APIs differs from the ytdl one by more than 1 day.")
} else {
return sqlTime.Format(releaseTimeFormat), nil
}
} else {
logrus.Error(err)
}
}
if time.Now().AddDate(0, 0, -3).After(ytdlUploadDate) {
return ytdlUploadDate.Format(releaseTimeFormat), nil
}
client := getClient(ip)
@ -189,7 +194,7 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa
if err != nil {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36")
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36")
res, err := client.Do(req)
if err != nil {
@ -245,7 +250,7 @@ func getClient(ip *net.TCPAddr) *http.Client {
const (
googleBotUA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
chromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
chromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
maxAttempts = 3
extractionError = "YouTube said: Unable to extract video data"
throttledError = "HTTP Error 429"

View file

@ -918,18 +918,7 @@ func (s *Sync) processVideo(v ytapi.Video) (err error) {
alreadyPublished := ok && sv.Published
videoRequiresUpgrade := ok && s.Manager.CliFlags.UpgradeMetadata && sv.MetadataVersion < newMetadataVersion
neverRetryFailures := []string{
"Error extracting sts from embedded url response",
"Unable to extract signature tokens",
"the video is too big to sync, skipping for now",
"video is too long to process",
"This video contains content from",
"no compatible format available for this video",
"Watch this video on YouTube.",
"have blocked it on copyright grounds",
"giving up after 0 fragment retries",
"Sign in to confirm your age",
}
neverRetryFailures := shared.NeverRetryFailures
if ok && !sv.Published && util.SubstringInSlice(sv.FailureReason, neverRetryFailures) {
log.Println(v.ID() + " can't ever be published")
return nil

View file

@ -48,6 +48,11 @@ func (a *APIConfig) FetchChannels(status string, cliFlags *shared.SyncFlags) ([]
"channel_id": {cliFlags.ChannelID},
})
if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.FetchChannels(status, cliFlags)
}
return nil, errors.Err(err)
}
defer res.Body.Close()
@ -106,6 +111,11 @@ func (a *APIConfig) SetChannelCert(certHex string, channelID string) error {
"auth_token": {a.ApiToken},
})
if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.SetChannelCert(certHex, channelID)
}
return errors.Err(err)
}
defer res.Body.Close()
@ -149,6 +159,11 @@ func (a *APIConfig) SetChannelStatus(channelID string, status string, failureRea
}
res, err := http.PostForm(endpoint, params)
if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.SetChannelStatus(channelID, status, failureReason, transferState)
}
return nil, nil, errors.Err(err)
}
defer res.Body.Close()
@ -194,6 +209,11 @@ func (a *APIConfig) SetChannelClaimID(channelID string, channelClaimID string) e
"channel_claim_id": {channelClaimID},
})
if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.SetChannelClaimID(channelID, channelClaimID)
}
return errors.Err(err)
}
defer res.Body.Close()
@ -233,6 +253,11 @@ func (a *APIConfig) DeleteVideos(videos []string) error {
}
res, err := http.PostForm(endpoint, vals)
if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.DeleteVideos(videos)
}
return errors.Err(err)
}
defer res.Body.Close()
@ -294,6 +319,11 @@ func (a *APIConfig) MarkVideoStatus(status shared.VideoStatus) error {
}
res, err := http.PostForm(endpoint, vals)
if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.MarkVideoStatus(status)
}
return errors.Err(err)
}
defer res.Body.Close()
@ -331,6 +361,11 @@ func (a *APIConfig) VideoState(videoID string) (string, error) {
res, err := http.PostForm(endpoint, vals)
if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.VideoState(videoID)
}
return "", errors.Err(err)
}
defer res.Body.Close()
@ -380,6 +415,11 @@ func (a *APIConfig) GetReleasedDate(videoID string) (*VideoRelease, error) {
res, err := http.PostForm(endpoint, vals)
if err != nil {
if strings.Contains(err.Error(), "EOF") {
util.SendErrorToSlack("EOF error while trying to call %s. Waiting to retry", endpoint)
time.Sleep(30 * time.Second)
return a.GetReleasedDate(videoID)
}
return nil, errors.Err(err)
}
defer res.Body.Close()

View file

@ -27,6 +27,19 @@ type YoutubeChannel struct {
LastUploadedVideo string `json:"last_uploaded_video"`
}
var NeverRetryFailures = []string{
"Error extracting sts from embedded url response",
"Unable to extract signature tokens",
"the video is too big to sync, skipping for now",
"video is too long to process",
"This video contains content from",
"no compatible format available for this video",
"Watch this video on YouTube.",
"have blocked it on copyright grounds",
"giving up after 0 fragment retries",
"Sign in to confirm your age",
}
type SyncFlags struct {
StopOnError bool
TakeOverExistingChannel bool

View file

@ -209,6 +209,15 @@ func (v *YoutubeVideo) download() error {
"480",
"320",
}
dur := time.Duration(v.youtubeInfo.Duration) * time.Second
if dur.Hours() > 2 { //for videos longer than 2 hours only sync up to 720p
qualities = []string{
"720",
"480",
"320",
}
}
ytdlArgs := []string{
"--no-progress",
"-o" + strings.TrimSuffix(v.getFullPath(), ".mp4"),
@ -219,12 +228,11 @@ func (v *YoutubeVideo) download() error {
"-movflags faststart",
"--abort-on-unavailable-fragment",
"--fragment-retries",
"0",
"--user-agent",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
"1",
"--cookies",
"cookies.txt",
}
userAgent := []string{"--user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"}
if v.maxVideoSize > 0 {
ytdlArgs = append(ytdlArgs,
"--max-filesize",
@ -264,8 +272,10 @@ func (v *YoutubeVideo) download() error {
"https://www.youtube.com/watch?v="+v.ID(),
)
for i, quality := range qualities {
for i := 0; i < len(qualities); i++ {
quality := qualities[i]
argsWithFilters := append(ytdlArgs, "-fbestvideo[ext=mp4][height<="+quality+"]+bestaudio[ext!=webm]")
argsWithFilters = append(argsWithFilters, userAgent...)
cmd := exec.Command("youtube-dl", argsWithFilters...)
log.Printf("Running command youtube-dl %s", strings.Join(argsWithFilters, " "))
@ -294,6 +304,11 @@ func (v *YoutubeVideo) download() error {
return errors.Err(string(errorLog))
}
continue //this bypasses the yt throttling IP redistribution... TODO: don't
} else if strings.Contains(string(errorLog), "YouTube said: Unable to extract video data") && !strings.Contains(userAgent[1], "Googlebot") {
i-- //do not lower quality when trying a different user agent
userAgent = []string{"--user-agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"}
log.Infof("trying different user agent for video %s", v.ID())
continue
}
return errors.Err(string(errorLog))
}

View file

@ -14,7 +14,7 @@ import (
"time"
"github.com/lbryio/ytsync/v5/shared"
"github.com/lbryio/ytsync/v5/util"
logUtils "github.com/lbryio/ytsync/v5/util"
"github.com/lbryio/ytsync/v5/downloader/ytdl"
@ -26,6 +26,7 @@ import (
"github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/jsonrpc"
"github.com/lbryio/lbry.go/v2/extras/stop"
"github.com/lbryio/lbry.go/v2/extras/util"
"github.com/aws/aws-sdk-go/aws"
log "github.com/sirupsen/logrus"
@ -56,16 +57,22 @@ type VideoParams struct {
var mostRecentlyFailedChannel string // TODO: fix this hack!
func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) {
var videos []Video
if quickSync && maxVideos > 50 {
maxVideos = 50
}
videoIDs, err := downloader.GetPlaylistVideoIDs(channelID, maxVideos, videoParams.Stopper.Ch(), videoParams.IPPool)
allVideos, err := downloader.GetPlaylistVideoIDs(channelID, maxVideos, videoParams.Stopper.Ch(), videoParams.IPPool)
if err != nil {
return nil, errors.Err(err)
}
videoIDs := make([]string, 0, len(allVideos))
for _, video := range allVideos {
sv, ok := syncedVideos[video]
if ok && util.SubstringInSlice(sv.FailureReason, shared.NeverRetryFailures) {
continue
}
videoIDs = append(videoIDs, video)
}
log.Infof("Got info for %d videos from youtube downloader", len(videoIDs))
playlistMap := make(map[string]int64)
@ -216,7 +223,7 @@ func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopC
Status: "failed",
FailureReason: err.Error(),
})
util.SendErrorToSlack(fmt.Sprintf("Skipping video (%s): %s", videoID, errors.FullTrace(err)))
logUtils.SendErrorToSlack(fmt.Sprintf("Skipping video (%s): %s", videoID, errors.FullTrace(err)))
if errSDK != nil {
return nil, errors.Err(errSDK)
}