From 17944fa46a8603bc1957101372ff6ef713fe0917 Mon Sep 17 00:00:00 2001 From: Niko Storni Date: Thu, 30 Dec 2021 13:17:11 -0500 Subject: [PATCH] refactor get video time remove broken time lookup refactor quite some code --- configs/configs.go | 23 +++++++++++ downloader/downloader.go | 86 +--------------------------------------- downloader/ytdl/Video.go | 44 +++++++++++++++++++- main.go | 22 +--------- manager/manager.go | 7 ++-- manager/ytsync.go | 2 +- metrics/metrics.go | 18 +-------- sdk/api.go | 14 +++++++ sources/youtubeVideo.go | 2 +- ytapi/ytapi.go | 14 +++---- 10 files changed, 95 insertions(+), 137 deletions(-) diff --git a/configs/configs.go b/configs/configs.go index baad17c..8d969f3 100644 --- a/configs/configs.go +++ b/configs/configs.go @@ -1,10 +1,14 @@ package configs import ( + "os" + "regexp" + "github.com/lbryio/lbry.go/v2/extras/errors" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" + log "github.com/sirupsen/logrus" "github.com/tkanos/gonfig" ) @@ -50,3 +54,22 @@ func (s *S3Configs) GetS3AWSConfig() *aws.Config { S3ForcePathStyle: aws.Bool(true), } } +func (c *Configs) GetHostname() string { + var hostname string + + var err error + hostname, err = os.Hostname() + if err != nil { + log.Error("could not detect system hostname") + hostname = "ytsync_unknown" + } + reg, err := regexp.Compile("[^a-zA-Z0-9_]+") + if err == nil { + hostname = reg.ReplaceAllString(hostname, "_") + + } + if len(hostname) > 30 { + hostname = hostname[0:30] + } + return hostname +} diff --git a/downloader/downloader.go b/downloader/downloader.go index 2b7ee06..fe63ed7 100644 --- a/downloader/downloader.go +++ b/downloader/downloader.go @@ -50,7 +50,7 @@ func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan, const releaseTimeFormat = "2006-01-02, 15:04:05 (MST)" -func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) { +func GetVideoInformation(videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) { args := []string{ "--skip-download", "--write-info-json", @@ -80,50 +80,6 @@ func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Ch return nil, errors.Err(err) } - // now get an accurate time - const maxTries = 5 - tries := 0 -GetTime: - tries++ - t, err := getUploadTime(config, videoID, ip, video.UploadDate) - if err != nil { - //slack(":warning: Upload time error: %v", err) - if tries <= maxTries && (errors.Is(err, errNotScraped) || errors.Is(err, errUploadTimeEmpty) || errors.Is(err, errStatusParse) || errors.Is(err, errConnectionIssue)) { - err := triggerScrape(videoID, ip) - if err == nil { - time.Sleep(2 * time.Second) // let them scrape it - goto GetTime - } else { - //slack("triggering scrape returned error: %v", err) - } - } else if !errors.Is(err, errNotScraped) && !errors.Is(err, errUploadTimeEmpty) { - //slack(":warning: Error while trying to get accurate upload time for %s: %v", videoID, err) - if t == "" { - return nil, errors.Err(err) - } else { - t = "" //TODO: get rid of the other piece below? - } - } - // do fallback below - } - //slack("After all that, upload time for %s is %s", videoID, t) - - if t != "" { - parsed, err := time.Parse("2006-01-02, 15:04:05 (MST)", t) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date - if err != nil { - return nil, errors.Err(err) - } - //slack(":exclamation: Got an accurate time for %s", videoID) - video.UploadDateForReal = parsed - } else { //TODO: this is the piece that isn't needed! - slack(":warning: Could not get accurate time for %s. Falling back to time from upload ytdl: %s.", videoID, video.UploadDate) - // fall back to UploadDate from youtube-dl - video.UploadDateForReal, err = time.Parse("20060102", video.UploadDate) - if err != nil { - return nil, err - } - } - return video, nil } @@ -213,45 +169,7 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa } } - if time.Now().AddDate(0, 0, -3).After(ytdlUploadDate) { - return ytdlUploadDate.Format(releaseTimeFormat), nil - } - client := getClient(ip) - req, err := http.NewRequest(http.MethodGet, "https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v="+videoID, nil) - if err != nil { - return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err) - } - req.Header.Set("User-Agent", ChromeUA) - - res, err := client.Do(req) - if err != nil { - return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err) - } - defer res.Body.Close() - - var uploadTime struct { - Time string `json:"video_upload_time"` - Message string `json:"message"` - Status string `json:"status"` - } - err = json.NewDecoder(res.Body).Decode(&uploadTime) - if err != nil { - return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err) - } - - if uploadTime.Status == "ERROR1" { - return ytdlUploadDate.Format(releaseTimeFormat), errNotScraped - } - - if uploadTime.Status == "" && strings.HasPrefix(uploadTime.Message, "CANNOT_RETRIEVE_REPORT_FOR_VIDEO_") { - return ytdlUploadDate.Format(releaseTimeFormat), errors.Err("cannot retrieve report for video") - } - - if uploadTime.Time == "" { - return ytdlUploadDate.Format(releaseTimeFormat), errUploadTimeEmpty - } - - return uploadTime.Time, nil + return ytdlUploadDate.Format(releaseTimeFormat), nil } func getClient(ip *net.TCPAddr) *http.Client { diff --git a/downloader/ytdl/Video.go b/downloader/ytdl/Video.go index 233e667..9da2db5 100644 --- a/downloader/ytdl/Video.go +++ b/downloader/ytdl/Video.go @@ -1,6 +1,12 @@ package ytdl -import "time" +import ( + "math" + "time" + + "github.com/lbryio/ytsync/v5/sdk" + "github.com/sirupsen/logrus" +) type YtdlVideo struct { ID string `json:"id"` @@ -50,7 +56,7 @@ type YtdlVideo struct { Thumbnail string `json:"thumbnail"` Description string `json:"description"` UploadDate string `json:"upload_date"` - UploadDateForReal time.Time `json:"upload_date_for_real"` + uploadDateForReal *time.Time `json:"upload_date_for_real"` Uploader string `json:"uploader"` UploaderId string `json:"uploader_id"` UploaderUrl string `json:"uploader_url"` @@ -158,3 +164,37 @@ type HTTPHeaders struct { Accept string `json:"Accept"` UserAgent string `json:"User-Agent"` } + +func (v *YtdlVideo) GetUploadTime() time.Time { + if v.uploadDateForReal != nil { + return *v.uploadDateForReal + } + + release, err := sdk.GetAPIsConfigs().GetReleasedDate(v.ID) + if err != nil { + logrus.Error(err) + } + ytdlUploadDate, err := time.Parse("20060102", v.UploadDate) + if err != nil { + logrus.Error(err) + } + if v.ReleaseTimestamp != 0 { + ytdlUploadDate = time.Unix(v.ReleaseTimestamp, 0) + } + if release != nil { + sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC) + if err == nil { + hoursDiff := math.Abs(sqlTime.Sub(ytdlUploadDate).Hours()) + if hoursDiff > 48 { + logrus.Infof("upload day from APIs differs from the ytdl one by more than 2 days.") + } else { + v.uploadDateForReal = &sqlTime + return sqlTime + } + } else { + logrus.Error(err) + } + } + v.uploadDateForReal = &ytdlUploadDate + return ytdlUploadDate +} diff --git a/main.go b/main.go index b8ff1cf..2ff801f 100644 --- a/main.go +++ b/main.go @@ -9,7 +9,6 @@ import ( "github.com/lbryio/ytsync/v5/configs" "github.com/lbryio/ytsync/v5/manager" - "github.com/lbryio/ytsync/v5/sdk" "github.com/lbryio/ytsync/v5/shared" ytUtils "github.com/lbryio/ytsync/v5/util" @@ -75,22 +74,11 @@ func ytSync(cmd *cobra.Command, args []string) { if err != nil { log.Fatalf("could not parse configuration file: %s", errors.FullTrace(err)) } - var hostname string if configs.Configuration.SlackToken == "" { log.Error("A slack token was not present in the config! Slack messages disabled!") } else { - var err error - hostname, err = os.Hostname() - if err != nil { - log.Error("could not detect system hostname") - hostname = "ytsync-unknown" - } - if len(hostname) > 30 { - hostname = hostname[0:30] - } - - util.InitSlack(configs.Configuration.SlackToken, configs.Configuration.SlackChannel, hostname) + util.InitSlack(configs.Configuration.SlackToken, configs.Configuration.SlackChannel, configs.Configuration.GetHostname()) } if cliFlags.Status != "" && !util.InSlice(cliFlags.Status, shared.SyncStatuses) { @@ -131,17 +119,9 @@ func ytSync(cmd *cobra.Command, args []string) { blobsDir := ytUtils.GetBlobsDir() - apiConfig := &sdk.APIConfig{ - ApiURL: configs.Configuration.InternalApisEndpoint, - ApiToken: configs.Configuration.InternalApisAuthToken, - HostName: hostname, - } - sm := manager.NewSyncManager( cliFlags, blobsDir, - configs.Configuration.LbrycrdString, - apiConfig, ) err = sm.Start() if err != nil { diff --git a/manager/manager.go b/manager/manager.go index d5abf79..fa142b1 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -8,6 +8,7 @@ import ( "time" "github.com/lbryio/ytsync/v5/blobs_reflector" + "github.com/lbryio/ytsync/v5/configs" "github.com/lbryio/ytsync/v5/ip_manager" "github.com/lbryio/ytsync/v5/namer" "github.com/lbryio/ytsync/v5/sdk" @@ -29,12 +30,12 @@ type SyncManager struct { channelsToSync []Sync } -func NewSyncManager(cliFlags shared.SyncFlags, blobsDir, lbrycrdDsn string, apiConfig *sdk.APIConfig) *SyncManager { +func NewSyncManager(cliFlags shared.SyncFlags, blobsDir string) *SyncManager { return &SyncManager{ CliFlags: cliFlags, blobsDir: blobsDir, - LbrycrdDsn: lbrycrdDsn, - ApiConfig: apiConfig, + LbrycrdDsn: configs.Configuration.LbrycrdString, + ApiConfig: sdk.GetAPIsConfigs(), } } func (s *SyncManager) enqueueChannel(channel *shared.YoutubeChannel) { diff --git a/manager/ytsync.go b/manager/ytsync.go index a5be285..af96543 100644 --- a/manager/ytsync.go +++ b/manager/ytsync.go @@ -868,7 +868,7 @@ func (s *Sync) enqueueYoutubeVideos() error { return err } - videos, err := ytapi.GetVideosToSync(s.Manager.ApiConfig, s.DbChannelData.ChannelId, s.syncedVideos, s.Manager.CliFlags.QuickSync, s.Manager.CliFlags.VideosToSync(s.DbChannelData.TotalSubscribers), ytapi.VideoParams{ + videos, err := ytapi.GetVideosToSync(s.DbChannelData.ChannelId, s.syncedVideos, s.Manager.CliFlags.QuickSync, s.Manager.CliFlags.VideosToSync(s.DbChannelData.TotalSubscribers), ytapi.VideoParams{ VideoDir: s.videoDirectory, Stopper: s.grp, IPPool: ipPool, diff --git a/metrics/metrics.go b/metrics/metrics.go index c978430..531f478 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -1,31 +1,17 @@ package metrics import ( - "os" - "regexp" + "github.com/lbryio/ytsync/v5/configs" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - log "github.com/sirupsen/logrus" ) var ( Durations = promauto.NewHistogramVec(prometheus.HistogramOpts{ Namespace: "ytsync", - Subsystem: getHostname(), + Subsystem: configs.Configuration.GetHostname(), Name: "duration", Help: "The durations of the individual modules", }, []string{"path"}) ) - -func getHostname() string { - hostname, err := os.Hostname() - if err != nil { - hostname = "ytsync_unknown" - } - reg, err := regexp.Compile("[^a-zA-Z0-9_]+") - if err != nil { - log.Fatal(err) - } - return reg.ReplaceAllString(hostname, "_") -} diff --git a/sdk/api.go b/sdk/api.go index 3314825..d83ce4f 100644 --- a/sdk/api.go +++ b/sdk/api.go @@ -13,6 +13,7 @@ import ( "github.com/lbryio/lbry.go/v2/extras/errors" "github.com/lbryio/lbry.go/v2/extras/null" + "github.com/lbryio/ytsync/v5/configs" "github.com/lbryio/ytsync/v5/shared" "github.com/lbryio/ytsync/v5/util" @@ -30,6 +31,19 @@ type APIConfig struct { HostName string } +var instance *APIConfig + +func GetAPIsConfigs() *APIConfig { + if instance == nil { + instance = &APIConfig{ + ApiURL: configs.Configuration.InternalApisEndpoint, + ApiToken: configs.Configuration.InternalApisAuthToken, + HostName: configs.Configuration.GetHostname(), + } + } + return instance +} + func (a *APIConfig) FetchChannels(status string, cliFlags *shared.SyncFlags) ([]shared.YoutubeChannel, error) { type apiJobsResponse struct { Success bool `json:"success"` diff --git a/sources/youtubeVideo.go b/sources/youtubeVideo.go index ba2fb89..bcbb5e0 100644 --- a/sources/youtubeVideo.go +++ b/sources/youtubeVideo.go @@ -107,7 +107,7 @@ func NewYoutubeVideo(directory string, videoData *ytdl.YtdlVideo, playlistPositi title: videoData.Title, description: videoData.Description, playlistPosition: playlistPosition, - publishedAt: videoData.UploadDateForReal, + publishedAt: videoData.GetUploadTime(), dir: directory, youtubeInfo: videoData, mocked: false, diff --git a/ytapi/ytapi.go b/ytapi/ytapi.go index 915e635..7e34ac6 100644 --- a/ytapi/ytapi.go +++ b/ytapi/ytapi.go @@ -55,7 +55,7 @@ type VideoParams struct { var mostRecentlyFailedChannel string // TODO: fix this hack! -func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) { +func GetVideosToSync(channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) { var videos []Video if quickSync && maxVideos > 50 { maxVideos = 50 @@ -94,7 +94,7 @@ func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[s mostRecentlyFailedChannel = channelID } - vids, err := getVideos(config, channelID, videoIDs, videoParams.Stopper.Ch(), videoParams.IPPool) + vids, err := getVideos(channelID, videoIDs, videoParams.Stopper.Ch(), videoParams.IPPool) if err != nil { return nil, err } @@ -203,7 +203,8 @@ func ChannelInfo(channelID string) (*YoutubeStatsResponse, error) { return &decodedResponse, nil } -func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopChan stop.Chan, ipPool *ip_manager.IPPool) ([]*ytdl.YtdlVideo, error) { +func getVideos(channelID string, videoIDs []string, stopChan stop.Chan, ipPool *ip_manager.IPPool) ([]*ytdl.YtdlVideo, error) { + config := sdk.GetAPIsConfigs() var videos []*ytdl.YtdlVideo for _, videoID := range videoIDs { if len(videoID) < 5 { @@ -215,11 +216,6 @@ func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopC default: } - //ip, err := ipPool.GetIP(videoID) - //if err != nil { - // return nil, err - //} - //video, err := downloader.GetVideoInformation(videoID, &net.TCPAddr{IP: net.ParseIP(ip)}) state, err := config.VideoState(videoID) if err != nil { return nil, errors.Err(err) @@ -227,7 +223,7 @@ func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopC if state == "published" { continue } - video, err := downloader.GetVideoInformation(config, videoID, stopChan, nil, ipPool) + video, err := downloader.GetVideoInformation(videoID, stopChan, nil, ipPool) if err != nil { errSDK := config.MarkVideoStatus(shared.VideoStatus{ ChannelID: channelID,