fix post live detection

prevent unlisted videos from ever publishing (even if they were public before and we know about them)
fix timestamp on videos
update user agent
This commit is contained in:
Niko Storni 2022-08-10 21:26:26 +02:00
parent 98a10d1269
commit e454cdb4c9
3 changed files with 106 additions and 172 deletions

View file

@ -195,7 +195,7 @@ func getClient(ip *net.TCPAddr) *http.Client {
const (
GoogleBotUA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
ChromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
ChromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
maxAttempts = 3
extractionError = "YouTube said: Unable to extract video data"
throttledError = "HTTP Error 429"

View file

@ -1,7 +1,6 @@
package ytdl
import (
"math"
"time"
"github.com/lbryio/ytsync/v5/sdk"
@ -9,192 +8,123 @@ import (
)
type YtdlVideo struct {
ID string `json:"id"`
Title string `json:"title"`
Formats []struct {
FormatId string `json:"format_id"`
FormatNote string `json:"format_note"`
Ext string `json:"ext"`
Protocol string `json:"protocol"`
Acodec string `json:"acodec"`
Vcodec string `json:"vcodec"`
Url string `json:"url"`
Width int `json:"width,omitempty"`
Height int `json:"height,omitempty"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration"`
} `json:"fragments,omitempty"`
AudioExt string `json:"audio_ext"`
VideoExt string `json:"video_ext"`
Format string `json:"format"`
Resolution string `json:"resolution"`
HttpHeaders struct {
UserAgent string `json:"User-Agent"`
Accept string `json:"Accept"`
AcceptEncoding string `json:"Accept-Encoding"`
AcceptLanguage string `json:"Accept-Language"`
} `json:"http_headers"`
Asr float64 `json:"asr,omitempty"`
Filesize int64 `json:"filesize,omitempty"`
SourcePreference int `json:"source_preference,omitempty"`
Quality int `json:"quality,omitempty"`
Tbr float64 `json:"tbr,omitempty"`
Language string `json:"language,omitempty"`
LanguagePreference int `json:"language_preference,omitempty"`
Abr float64 `json:"abr,omitempty"`
DownloaderOptions struct {
HttpChunkSize int `json:"http_chunk_size"`
} `json:"downloader_options,omitempty"`
Container string `json:"container,omitempty"`
Fps float64 `json:"fps,omitempty"`
DynamicRange string `json:"dynamic_range,omitempty"`
Vbr float64 `json:"vbr,omitempty"`
FilesizeApprox float64 `json:"filesize_approx,omitempty"`
} `json:"formats"`
Thumbnails []Thumbnail `json:"thumbnails"`
Thumbnail string `json:"thumbnail"`
Description string `json:"description"`
UploadDate string `json:"upload_date"`
uploadDateForReal *time.Time `json:"upload_date_for_real"`
Uploader string `json:"uploader"`
UploaderId string `json:"uploader_id"`
UploaderUrl string `json:"uploader_url"`
ChannelID string `json:"channel_id"`
ChannelUrl string `json:"channel_url"`
Duration int `json:"duration"`
ViewCount int `json:"view_count"`
AgeLimit int `json:"age_limit"`
WebpageUrl string `json:"webpage_url"`
Categories []string `json:"categories"`
Tags []string `json:"tags"`
PlayableInEmbed bool `json:"playable_in_embed"`
IsLive bool `json:"is_live"`
WasLive bool `json:"was_live"`
LiveStatus string `json:"live_status"`
ReleaseTimestamp int64 `json:"release_timestamp"`
LikeCount int `json:"like_count"`
Channel string `json:"channel"`
Availability string `json:"availability"`
WebpageUrlBasename string `json:"webpage_url_basename"`
WebpageUrlDomain string `json:"webpage_url_domain"`
Extractor string `json:"extractor"`
ExtractorKey string `json:"extractor_key"`
DisplayId string `json:"display_id"`
DurationString string `json:"duration_string"`
ReleaseDate string `json:"release_date"`
Asr float64 `json:"asr"`
FormatId string `json:"format_id"`
FormatNote string `json:"format_note"`
SourcePreference int `json:"source_preference"`
Fps float64 `json:"fps"`
Height int `json:"height"`
Quality int `json:"quality"`
Tbr float64 `json:"tbr"`
Url string `json:"url"`
Width int `json:"width"`
Language string `json:"language"`
LanguagePreference int `json:"language_preference"`
Ext string `json:"ext"`
Vcodec string `json:"vcodec"`
Acodec string `json:"acodec"`
DynamicRange string `json:"dynamic_range"`
Protocol string `json:"protocol"`
VideoExt string `json:"video_ext"`
AudioExt string `json:"audio_ext"`
Vbr float64 `json:"vbr"`
Abr float64 `json:"abr"`
Format string `json:"format"`
Resolution string `json:"resolution"`
FilesizeApprox float64 `json:"filesize_approx"`
HttpHeaders struct {
UserAgent string `json:"User-Agent"`
Accept string `json:"Accept"`
AcceptEncoding string `json:"Accept-Encoding"`
AcceptLanguage string `json:"Accept-Language"`
} `json:"http_headers"`
Fulltitle string `json:"fulltitle"`
Epoch int `json:"epoch"`
}
ID string `json:"id"`
Title string `json:"title"`
Thumbnails []Thumbnail `json:"thumbnails"`
Description string `json:"description"`
ChannelID string `json:"channel_id"`
Duration int `json:"duration"`
Categories []string `json:"categories"`
Tags []string `json:"tags"`
IsLive bool `json:"is_live"`
LiveStatus string `json:"live_status"`
ReleaseTimestamp int64 `json:"release_timestamp"`
uploadDateForReal *time.Time
Availability string `json:"availability"`
ReleaseDate string `json:"release_date"`
type Format struct {
Asr int `json:"asr"`
Filesize int `json:"filesize"`
FormatID string `json:"format_id"`
FormatNote string `json:"format_note"`
Fps interface{} `json:"fps"`
Height interface{} `json:"height"`
Quality int `json:"quality"`
Tbr float64 `json:"tbr"`
URL string `json:"url"`
Width interface{} `json:"width"`
Ext string `json:"ext"`
Vcodec string `json:"vcodec"`
Acodec string `json:"acodec"`
Abr float64 `json:"abr,omitempty"`
DownloaderOptions struct {
HTTPChunkSize int `json:"http_chunk_size"`
} `json:"downloader_options,omitempty"`
Container string `json:"container,omitempty"`
Format string `json:"format"`
Protocol string `json:"protocol"`
HTTPHeaders struct {
UserAgent string `json:"User-Agent"`
AcceptCharset string `json:"Accept-Charset"`
Accept string `json:"Accept"`
AcceptEncoding string `json:"Accept-Encoding"`
AcceptLanguage string `json:"Accept-Language"`
} `json:"http_headers"`
Vbr float64 `json:"vbr,omitempty"`
//WasLive bool `json:"was_live"`
//Formats interface{} `json:"formats"`
//Thumbnail string `json:"thumbnail"`
//Uploader string `json:"uploader"`
//UploaderID string `json:"uploader_id"`
//UploaderURL string `json:"uploader_url"`
//ChannelURL string `json:"channel_url"`
//ViewCount int `json:"view_count"`
//AverageRating interface{} `json:"average_rating"`
//AgeLimit int `json:"age_limit"`
//WebpageURL string `json:"webpage_url"`
//PlayableInEmbed bool `json:"playable_in_embed"`
//AutomaticCaptions interface{} `json:"automatic_captions"`
//Subtitles interface{} `json:"subtitles"`
//Chapters interface{} `json:"chapters"`
//LikeCount int `json:"like_count"`
//Channel string `json:"channel"`
//ChannelFollowerCount int `json:"channel_follower_count"`
//UploadDate string `json:"upload_date"`
//OriginalURL string `json:"original_url"`
//WebpageURLBasename string `json:"webpage_url_basename"`
//WebpageURLDomain string `json:"webpage_url_domain"`
//Extractor string `json:"extractor"`
//ExtractorKey string `json:"extractor_key"`
//Playlist interface{} `json:"playlist"`
//PlaylistIndex interface{} `json:"playlist_index"`
//DisplayID string `json:"display_id"`
//Fulltitle string `json:"fulltitle"`
//DurationString string `json:"duration_string"`
//RequestedSubtitles interface{} `json:"requested_subtitles"`
//HasDrm bool `json:"__has_drm"`
//RequestedFormats interface{} `json:"requested_formats"`
//Format string `json:"format"`
//FormatID string `json:"format_id"`
//Ext string `json:"ext"`
//Protocol string `json:"protocol"`
//Language interface{} `json:"language"`
//FormatNote string `json:"format_note"`
//FilesizeApprox int `json:"filesize_approx"`
//Tbr float64 `json:"tbr"`
//Width int `json:"width"`
//Height int `json:"height"`
//Resolution string `json:"resolution"`
//Fps int `json:"fps"`
//DynamicRange string `json:"dynamic_range"`
//Vcodec string `json:"vcodec"`
//Vbr float64 `json:"vbr"`
//StretchedRatio interface{} `json:"stretched_ratio"`
//Acodec string `json:"acodec"`
//Abr float64 `json:"abr"`
//Asr int `json:"asr"`
//Epoch int `json:"epoch"`
//Filename string `json:"filename"`
//Urls string `json:"urls"`
//Type string `json:"_type"`
}
type Thumbnail struct {
URL string `json:"url"`
Preference int `json:"preference"`
ID string `json:"id"`
Height int `json:"height"`
Width int `json:"width"`
Resolution string `json:"resolution"`
}
type HTTPHeaders struct {
AcceptCharset string `json:"Accept-Charset"`
AcceptLanguage string `json:"Accept-Language"`
AcceptEncoding string `json:"Accept-Encoding"`
Accept string `json:"Accept"`
UserAgent string `json:"User-Agent"`
Height int `json:"height,omitempty"`
Width int `json:"width,omitempty"`
Resolution string `json:"resolution,omitempty"`
}
func (v *YtdlVideo) GetUploadTime() time.Time {
//priority list:
// release timestamp from yt
// release timestamp from morty
// release date from yt
if v.uploadDateForReal != nil {
return *v.uploadDateForReal
}
release, err := sdk.GetAPIsConfigs().GetReleasedDate(v.ID)
var ytdlReleaseTimestamp time.Time
if v.ReleaseTimestamp > 0 {
ytdlReleaseTimestamp = time.Unix(v.ReleaseTimestamp, 0).UTC()
}
//get morty timestamp
var mortyReleaseTimestamp time.Time
mortyRelease, err := sdk.GetAPIsConfigs().GetReleasedDate(v.ID)
if err != nil {
logrus.Error(err)
}
ytdlUploadDate, err := time.Parse("20060102", v.UploadDate)
if err != nil {
logrus.Error(err)
}
if v.ReleaseTimestamp != 0 {
ytdlUploadDate = time.Unix(v.ReleaseTimestamp, 0)
}
if release != nil {
sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC)
if err == nil {
hoursDiff := math.Abs(sqlTime.Sub(ytdlUploadDate).Hours())
if hoursDiff > 48 {
logrus.Infof("upload day from APIs differs from the ytdl one by more than 2 days.")
} else {
v.uploadDateForReal = &sqlTime
return sqlTime
}
} else {
} else {
mortyReleaseTimestamp, err = time.ParseInLocation(time.RFC3339, mortyRelease.ReleaseTime, time.UTC)
if err != nil {
logrus.Error(err)
}
}
v.uploadDateForReal = &ytdlUploadDate
return ytdlUploadDate
ytdlReleaseDate, err := time.Parse("20060102", v.ReleaseDate)
if err != nil {
logrus.Error(err)
}
if !ytdlReleaseTimestamp.IsZero() {
v.uploadDateForReal = &ytdlReleaseTimestamp
} else if !mortyReleaseTimestamp.IsZero() {
v.uploadDateForReal = &mortyReleaseTimestamp
} else {
v.uploadDateForReal = &ytdlReleaseDate
}
return *v.uploadDateForReal
}

View file

@ -787,6 +787,9 @@ func (v *YoutubeVideo) downloadAndPublish(daemon *jsonrpc.Client, params SyncPar
if v.youtubeInfo.IsLive == true {
return nil, errors.Err("video is a live stream and hasn't completed yet")
}
if v.youtubeInfo.Availability != "public" {
return nil, errors.Err("video is not public")
}
if dur > v.maxVideoLength {
logUtils.SendErrorToSlack("%s is %s long and the limit is %s", v.id, dur.String(), v.maxVideoLength.String())
return nil, errors.Err("video is too long to process")
@ -795,7 +798,8 @@ func (v *YoutubeVideo) downloadAndPublish(daemon *jsonrpc.Client, params SyncPar
logUtils.SendErrorToSlack("%s is %s long and the minimum is %s", v.id, dur.String(), minDuration.String())
return nil, errors.Err("video is too short to process")
}
buggedLivestream := v.youtubeInfo.WasLive && time.Unix(v.youtubeInfo.ReleaseTimestamp, 0).After(time.Now().AddDate(0, 0, -1)) && int(dur.Seconds())%7200 == 0
buggedLivestream := v.youtubeInfo.LiveStatus == "post_live"
if buggedLivestream {
return nil, errors.Err("livestream is likely bugged as it was published less than 24 hours ago and has a length of %s", dur.String())
}