refactor get video time

remove broken time lookup
refactor quite some code
This commit is contained in:
Niko Storni 2021-12-30 13:17:11 -05:00
parent 3c18ae8de2
commit 17944fa46a
10 changed files with 95 additions and 137 deletions

View file

@ -1,10 +1,14 @@
package configs package configs
import ( import (
"os"
"regexp"
"github.com/lbryio/lbry.go/v2/extras/errors" "github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/credentials"
log "github.com/sirupsen/logrus"
"github.com/tkanos/gonfig" "github.com/tkanos/gonfig"
) )
@ -50,3 +54,22 @@ func (s *S3Configs) GetS3AWSConfig() *aws.Config {
S3ForcePathStyle: aws.Bool(true), S3ForcePathStyle: aws.Bool(true),
} }
} }
func (c *Configs) GetHostname() string {
var hostname string
var err error
hostname, err = os.Hostname()
if err != nil {
log.Error("could not detect system hostname")
hostname = "ytsync_unknown"
}
reg, err := regexp.Compile("[^a-zA-Z0-9_]+")
if err == nil {
hostname = reg.ReplaceAllString(hostname, "_")
}
if len(hostname) > 30 {
hostname = hostname[0:30]
}
return hostname
}

View file

@ -50,7 +50,7 @@ func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan,
const releaseTimeFormat = "2006-01-02, 15:04:05 (MST)" const releaseTimeFormat = "2006-01-02, 15:04:05 (MST)"
func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) { func GetVideoInformation(videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) {
args := []string{ args := []string{
"--skip-download", "--skip-download",
"--write-info-json", "--write-info-json",
@ -80,50 +80,6 @@ func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Ch
return nil, errors.Err(err) return nil, errors.Err(err)
} }
// now get an accurate time
const maxTries = 5
tries := 0
GetTime:
tries++
t, err := getUploadTime(config, videoID, ip, video.UploadDate)
if err != nil {
//slack(":warning: Upload time error: %v", err)
if tries <= maxTries && (errors.Is(err, errNotScraped) || errors.Is(err, errUploadTimeEmpty) || errors.Is(err, errStatusParse) || errors.Is(err, errConnectionIssue)) {
err := triggerScrape(videoID, ip)
if err == nil {
time.Sleep(2 * time.Second) // let them scrape it
goto GetTime
} else {
//slack("triggering scrape returned error: %v", err)
}
} else if !errors.Is(err, errNotScraped) && !errors.Is(err, errUploadTimeEmpty) {
//slack(":warning: Error while trying to get accurate upload time for %s: %v", videoID, err)
if t == "" {
return nil, errors.Err(err)
} else {
t = "" //TODO: get rid of the other piece below?
}
}
// do fallback below
}
//slack("After all that, upload time for %s is %s", videoID, t)
if t != "" {
parsed, err := time.Parse("2006-01-02, 15:04:05 (MST)", t) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date
if err != nil {
return nil, errors.Err(err)
}
//slack(":exclamation: Got an accurate time for %s", videoID)
video.UploadDateForReal = parsed
} else { //TODO: this is the piece that isn't needed!
slack(":warning: Could not get accurate time for %s. Falling back to time from upload ytdl: %s.", videoID, video.UploadDate)
// fall back to UploadDate from youtube-dl
video.UploadDateForReal, err = time.Parse("20060102", video.UploadDate)
if err != nil {
return nil, err
}
}
return video, nil return video, nil
} }
@ -213,46 +169,8 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa
} }
} }
if time.Now().AddDate(0, 0, -3).After(ytdlUploadDate) {
return ytdlUploadDate.Format(releaseTimeFormat), nil return ytdlUploadDate.Format(releaseTimeFormat), nil
} }
client := getClient(ip)
req, err := http.NewRequest(http.MethodGet, "https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v="+videoID, nil)
if err != nil {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err)
}
req.Header.Set("User-Agent", ChromeUA)
res, err := client.Do(req)
if err != nil {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err)
}
defer res.Body.Close()
var uploadTime struct {
Time string `json:"video_upload_time"`
Message string `json:"message"`
Status string `json:"status"`
}
err = json.NewDecoder(res.Body).Decode(&uploadTime)
if err != nil {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err)
}
if uploadTime.Status == "ERROR1" {
return ytdlUploadDate.Format(releaseTimeFormat), errNotScraped
}
if uploadTime.Status == "" && strings.HasPrefix(uploadTime.Message, "CANNOT_RETRIEVE_REPORT_FOR_VIDEO_") {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err("cannot retrieve report for video")
}
if uploadTime.Time == "" {
return ytdlUploadDate.Format(releaseTimeFormat), errUploadTimeEmpty
}
return uploadTime.Time, nil
}
func getClient(ip *net.TCPAddr) *http.Client { func getClient(ip *net.TCPAddr) *http.Client {
if ip == nil { if ip == nil {

View file

@ -1,6 +1,12 @@
package ytdl package ytdl
import "time" import (
"math"
"time"
"github.com/lbryio/ytsync/v5/sdk"
"github.com/sirupsen/logrus"
)
type YtdlVideo struct { type YtdlVideo struct {
ID string `json:"id"` ID string `json:"id"`
@ -50,7 +56,7 @@ type YtdlVideo struct {
Thumbnail string `json:"thumbnail"` Thumbnail string `json:"thumbnail"`
Description string `json:"description"` Description string `json:"description"`
UploadDate string `json:"upload_date"` UploadDate string `json:"upload_date"`
UploadDateForReal time.Time `json:"upload_date_for_real"` uploadDateForReal *time.Time `json:"upload_date_for_real"`
Uploader string `json:"uploader"` Uploader string `json:"uploader"`
UploaderId string `json:"uploader_id"` UploaderId string `json:"uploader_id"`
UploaderUrl string `json:"uploader_url"` UploaderUrl string `json:"uploader_url"`
@ -158,3 +164,37 @@ type HTTPHeaders struct {
Accept string `json:"Accept"` Accept string `json:"Accept"`
UserAgent string `json:"User-Agent"` UserAgent string `json:"User-Agent"`
} }
func (v *YtdlVideo) GetUploadTime() time.Time {
if v.uploadDateForReal != nil {
return *v.uploadDateForReal
}
release, err := sdk.GetAPIsConfigs().GetReleasedDate(v.ID)
if err != nil {
logrus.Error(err)
}
ytdlUploadDate, err := time.Parse("20060102", v.UploadDate)
if err != nil {
logrus.Error(err)
}
if v.ReleaseTimestamp != 0 {
ytdlUploadDate = time.Unix(v.ReleaseTimestamp, 0)
}
if release != nil {
sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC)
if err == nil {
hoursDiff := math.Abs(sqlTime.Sub(ytdlUploadDate).Hours())
if hoursDiff > 48 {
logrus.Infof("upload day from APIs differs from the ytdl one by more than 2 days.")
} else {
v.uploadDateForReal = &sqlTime
return sqlTime
}
} else {
logrus.Error(err)
}
}
v.uploadDateForReal = &ytdlUploadDate
return ytdlUploadDate
}

22
main.go
View file

@ -9,7 +9,6 @@ import (
"github.com/lbryio/ytsync/v5/configs" "github.com/lbryio/ytsync/v5/configs"
"github.com/lbryio/ytsync/v5/manager" "github.com/lbryio/ytsync/v5/manager"
"github.com/lbryio/ytsync/v5/sdk"
"github.com/lbryio/ytsync/v5/shared" "github.com/lbryio/ytsync/v5/shared"
ytUtils "github.com/lbryio/ytsync/v5/util" ytUtils "github.com/lbryio/ytsync/v5/util"
@ -75,22 +74,11 @@ func ytSync(cmd *cobra.Command, args []string) {
if err != nil { if err != nil {
log.Fatalf("could not parse configuration file: %s", errors.FullTrace(err)) log.Fatalf("could not parse configuration file: %s", errors.FullTrace(err))
} }
var hostname string
if configs.Configuration.SlackToken == "" { if configs.Configuration.SlackToken == "" {
log.Error("A slack token was not present in the config! Slack messages disabled!") log.Error("A slack token was not present in the config! Slack messages disabled!")
} else { } else {
var err error util.InitSlack(configs.Configuration.SlackToken, configs.Configuration.SlackChannel, configs.Configuration.GetHostname())
hostname, err = os.Hostname()
if err != nil {
log.Error("could not detect system hostname")
hostname = "ytsync-unknown"
}
if len(hostname) > 30 {
hostname = hostname[0:30]
}
util.InitSlack(configs.Configuration.SlackToken, configs.Configuration.SlackChannel, hostname)
} }
if cliFlags.Status != "" && !util.InSlice(cliFlags.Status, shared.SyncStatuses) { if cliFlags.Status != "" && !util.InSlice(cliFlags.Status, shared.SyncStatuses) {
@ -131,17 +119,9 @@ func ytSync(cmd *cobra.Command, args []string) {
blobsDir := ytUtils.GetBlobsDir() blobsDir := ytUtils.GetBlobsDir()
apiConfig := &sdk.APIConfig{
ApiURL: configs.Configuration.InternalApisEndpoint,
ApiToken: configs.Configuration.InternalApisAuthToken,
HostName: hostname,
}
sm := manager.NewSyncManager( sm := manager.NewSyncManager(
cliFlags, cliFlags,
blobsDir, blobsDir,
configs.Configuration.LbrycrdString,
apiConfig,
) )
err = sm.Start() err = sm.Start()
if err != nil { if err != nil {

View file

@ -8,6 +8,7 @@ import (
"time" "time"
"github.com/lbryio/ytsync/v5/blobs_reflector" "github.com/lbryio/ytsync/v5/blobs_reflector"
"github.com/lbryio/ytsync/v5/configs"
"github.com/lbryio/ytsync/v5/ip_manager" "github.com/lbryio/ytsync/v5/ip_manager"
"github.com/lbryio/ytsync/v5/namer" "github.com/lbryio/ytsync/v5/namer"
"github.com/lbryio/ytsync/v5/sdk" "github.com/lbryio/ytsync/v5/sdk"
@ -29,12 +30,12 @@ type SyncManager struct {
channelsToSync []Sync channelsToSync []Sync
} }
func NewSyncManager(cliFlags shared.SyncFlags, blobsDir, lbrycrdDsn string, apiConfig *sdk.APIConfig) *SyncManager { func NewSyncManager(cliFlags shared.SyncFlags, blobsDir string) *SyncManager {
return &SyncManager{ return &SyncManager{
CliFlags: cliFlags, CliFlags: cliFlags,
blobsDir: blobsDir, blobsDir: blobsDir,
LbrycrdDsn: lbrycrdDsn, LbrycrdDsn: configs.Configuration.LbrycrdString,
ApiConfig: apiConfig, ApiConfig: sdk.GetAPIsConfigs(),
} }
} }
func (s *SyncManager) enqueueChannel(channel *shared.YoutubeChannel) { func (s *SyncManager) enqueueChannel(channel *shared.YoutubeChannel) {

View file

@ -868,7 +868,7 @@ func (s *Sync) enqueueYoutubeVideos() error {
return err return err
} }
videos, err := ytapi.GetVideosToSync(s.Manager.ApiConfig, s.DbChannelData.ChannelId, s.syncedVideos, s.Manager.CliFlags.QuickSync, s.Manager.CliFlags.VideosToSync(s.DbChannelData.TotalSubscribers), ytapi.VideoParams{ videos, err := ytapi.GetVideosToSync(s.DbChannelData.ChannelId, s.syncedVideos, s.Manager.CliFlags.QuickSync, s.Manager.CliFlags.VideosToSync(s.DbChannelData.TotalSubscribers), ytapi.VideoParams{
VideoDir: s.videoDirectory, VideoDir: s.videoDirectory,
Stopper: s.grp, Stopper: s.grp,
IPPool: ipPool, IPPool: ipPool,

View file

@ -1,31 +1,17 @@
package metrics package metrics
import ( import (
"os" "github.com/lbryio/ytsync/v5/configs"
"regexp"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promauto"
log "github.com/sirupsen/logrus"
) )
var ( var (
Durations = promauto.NewHistogramVec(prometheus.HistogramOpts{ Durations = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "ytsync", Namespace: "ytsync",
Subsystem: getHostname(), Subsystem: configs.Configuration.GetHostname(),
Name: "duration", Name: "duration",
Help: "The durations of the individual modules", Help: "The durations of the individual modules",
}, []string{"path"}) }, []string{"path"})
) )
func getHostname() string {
hostname, err := os.Hostname()
if err != nil {
hostname = "ytsync_unknown"
}
reg, err := regexp.Compile("[^a-zA-Z0-9_]+")
if err != nil {
log.Fatal(err)
}
return reg.ReplaceAllString(hostname, "_")
}

View file

@ -13,6 +13,7 @@ import (
"github.com/lbryio/lbry.go/v2/extras/errors" "github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/null" "github.com/lbryio/lbry.go/v2/extras/null"
"github.com/lbryio/ytsync/v5/configs"
"github.com/lbryio/ytsync/v5/shared" "github.com/lbryio/ytsync/v5/shared"
"github.com/lbryio/ytsync/v5/util" "github.com/lbryio/ytsync/v5/util"
@ -30,6 +31,19 @@ type APIConfig struct {
HostName string HostName string
} }
var instance *APIConfig
func GetAPIsConfigs() *APIConfig {
if instance == nil {
instance = &APIConfig{
ApiURL: configs.Configuration.InternalApisEndpoint,
ApiToken: configs.Configuration.InternalApisAuthToken,
HostName: configs.Configuration.GetHostname(),
}
}
return instance
}
func (a *APIConfig) FetchChannels(status string, cliFlags *shared.SyncFlags) ([]shared.YoutubeChannel, error) { func (a *APIConfig) FetchChannels(status string, cliFlags *shared.SyncFlags) ([]shared.YoutubeChannel, error) {
type apiJobsResponse struct { type apiJobsResponse struct {
Success bool `json:"success"` Success bool `json:"success"`

View file

@ -107,7 +107,7 @@ func NewYoutubeVideo(directory string, videoData *ytdl.YtdlVideo, playlistPositi
title: videoData.Title, title: videoData.Title,
description: videoData.Description, description: videoData.Description,
playlistPosition: playlistPosition, playlistPosition: playlistPosition,
publishedAt: videoData.UploadDateForReal, publishedAt: videoData.GetUploadTime(),
dir: directory, dir: directory,
youtubeInfo: videoData, youtubeInfo: videoData,
mocked: false, mocked: false,

View file

@ -55,7 +55,7 @@ type VideoParams struct {
var mostRecentlyFailedChannel string // TODO: fix this hack! var mostRecentlyFailedChannel string // TODO: fix this hack!
func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) { func GetVideosToSync(channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) {
var videos []Video var videos []Video
if quickSync && maxVideos > 50 { if quickSync && maxVideos > 50 {
maxVideos = 50 maxVideos = 50
@ -94,7 +94,7 @@ func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[s
mostRecentlyFailedChannel = channelID mostRecentlyFailedChannel = channelID
} }
vids, err := getVideos(config, channelID, videoIDs, videoParams.Stopper.Ch(), videoParams.IPPool) vids, err := getVideos(channelID, videoIDs, videoParams.Stopper.Ch(), videoParams.IPPool)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -203,7 +203,8 @@ func ChannelInfo(channelID string) (*YoutubeStatsResponse, error) {
return &decodedResponse, nil return &decodedResponse, nil
} }
func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopChan stop.Chan, ipPool *ip_manager.IPPool) ([]*ytdl.YtdlVideo, error) { func getVideos(channelID string, videoIDs []string, stopChan stop.Chan, ipPool *ip_manager.IPPool) ([]*ytdl.YtdlVideo, error) {
config := sdk.GetAPIsConfigs()
var videos []*ytdl.YtdlVideo var videos []*ytdl.YtdlVideo
for _, videoID := range videoIDs { for _, videoID := range videoIDs {
if len(videoID) < 5 { if len(videoID) < 5 {
@ -215,11 +216,6 @@ func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopC
default: default:
} }
//ip, err := ipPool.GetIP(videoID)
//if err != nil {
// return nil, err
//}
//video, err := downloader.GetVideoInformation(videoID, &net.TCPAddr{IP: net.ParseIP(ip)})
state, err := config.VideoState(videoID) state, err := config.VideoState(videoID)
if err != nil { if err != nil {
return nil, errors.Err(err) return nil, errors.Err(err)
@ -227,7 +223,7 @@ func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopC
if state == "published" { if state == "published" {
continue continue
} }
video, err := downloader.GetVideoInformation(config, videoID, stopChan, nil, ipPool) video, err := downloader.GetVideoInformation(videoID, stopChan, nil, ipPool)
if err != nil { if err != nil {
errSDK := config.MarkVideoStatus(shared.VideoStatus{ errSDK := config.MarkVideoStatus(shared.VideoStatus{
ChannelID: channelID, ChannelID: channelID,