refactor get video time

remove broken time lookup
refactor quite some code
This commit is contained in:
Niko Storni 2021-12-30 13:17:11 -05:00
parent 3c18ae8de2
commit 17944fa46a
10 changed files with 95 additions and 137 deletions

View file

@ -1,10 +1,14 @@
package configs
import (
"os"
"regexp"
"github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
log "github.com/sirupsen/logrus"
"github.com/tkanos/gonfig"
)
@ -50,3 +54,22 @@ func (s *S3Configs) GetS3AWSConfig() *aws.Config {
S3ForcePathStyle: aws.Bool(true),
}
}
func (c *Configs) GetHostname() string {
var hostname string
var err error
hostname, err = os.Hostname()
if err != nil {
log.Error("could not detect system hostname")
hostname = "ytsync_unknown"
}
reg, err := regexp.Compile("[^a-zA-Z0-9_]+")
if err == nil {
hostname = reg.ReplaceAllString(hostname, "_")
}
if len(hostname) > 30 {
hostname = hostname[0:30]
}
return hostname
}

View file

@ -50,7 +50,7 @@ func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan,
const releaseTimeFormat = "2006-01-02, 15:04:05 (MST)"
func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) {
func GetVideoInformation(videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) {
args := []string{
"--skip-download",
"--write-info-json",
@ -80,50 +80,6 @@ func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Ch
return nil, errors.Err(err)
}
// now get an accurate time
const maxTries = 5
tries := 0
GetTime:
tries++
t, err := getUploadTime(config, videoID, ip, video.UploadDate)
if err != nil {
//slack(":warning: Upload time error: %v", err)
if tries <= maxTries && (errors.Is(err, errNotScraped) || errors.Is(err, errUploadTimeEmpty) || errors.Is(err, errStatusParse) || errors.Is(err, errConnectionIssue)) {
err := triggerScrape(videoID, ip)
if err == nil {
time.Sleep(2 * time.Second) // let them scrape it
goto GetTime
} else {
//slack("triggering scrape returned error: %v", err)
}
} else if !errors.Is(err, errNotScraped) && !errors.Is(err, errUploadTimeEmpty) {
//slack(":warning: Error while trying to get accurate upload time for %s: %v", videoID, err)
if t == "" {
return nil, errors.Err(err)
} else {
t = "" //TODO: get rid of the other piece below?
}
}
// do fallback below
}
//slack("After all that, upload time for %s is %s", videoID, t)
if t != "" {
parsed, err := time.Parse("2006-01-02, 15:04:05 (MST)", t) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date
if err != nil {
return nil, errors.Err(err)
}
//slack(":exclamation: Got an accurate time for %s", videoID)
video.UploadDateForReal = parsed
} else { //TODO: this is the piece that isn't needed!
slack(":warning: Could not get accurate time for %s. Falling back to time from upload ytdl: %s.", videoID, video.UploadDate)
// fall back to UploadDate from youtube-dl
video.UploadDateForReal, err = time.Parse("20060102", video.UploadDate)
if err != nil {
return nil, err
}
}
return video, nil
}
@ -213,45 +169,7 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa
}
}
if time.Now().AddDate(0, 0, -3).After(ytdlUploadDate) {
return ytdlUploadDate.Format(releaseTimeFormat), nil
}
client := getClient(ip)
req, err := http.NewRequest(http.MethodGet, "https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v="+videoID, nil)
if err != nil {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err)
}
req.Header.Set("User-Agent", ChromeUA)
res, err := client.Do(req)
if err != nil {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err)
}
defer res.Body.Close()
var uploadTime struct {
Time string `json:"video_upload_time"`
Message string `json:"message"`
Status string `json:"status"`
}
err = json.NewDecoder(res.Body).Decode(&uploadTime)
if err != nil {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err)
}
if uploadTime.Status == "ERROR1" {
return ytdlUploadDate.Format(releaseTimeFormat), errNotScraped
}
if uploadTime.Status == "" && strings.HasPrefix(uploadTime.Message, "CANNOT_RETRIEVE_REPORT_FOR_VIDEO_") {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err("cannot retrieve report for video")
}
if uploadTime.Time == "" {
return ytdlUploadDate.Format(releaseTimeFormat), errUploadTimeEmpty
}
return uploadTime.Time, nil
return ytdlUploadDate.Format(releaseTimeFormat), nil
}
func getClient(ip *net.TCPAddr) *http.Client {

View file

@ -1,6 +1,12 @@
package ytdl
import "time"
import (
"math"
"time"
"github.com/lbryio/ytsync/v5/sdk"
"github.com/sirupsen/logrus"
)
type YtdlVideo struct {
ID string `json:"id"`
@ -50,7 +56,7 @@ type YtdlVideo struct {
Thumbnail string `json:"thumbnail"`
Description string `json:"description"`
UploadDate string `json:"upload_date"`
UploadDateForReal time.Time `json:"upload_date_for_real"`
uploadDateForReal *time.Time `json:"upload_date_for_real"`
Uploader string `json:"uploader"`
UploaderId string `json:"uploader_id"`
UploaderUrl string `json:"uploader_url"`
@ -158,3 +164,37 @@ type HTTPHeaders struct {
Accept string `json:"Accept"`
UserAgent string `json:"User-Agent"`
}
func (v *YtdlVideo) GetUploadTime() time.Time {
if v.uploadDateForReal != nil {
return *v.uploadDateForReal
}
release, err := sdk.GetAPIsConfigs().GetReleasedDate(v.ID)
if err != nil {
logrus.Error(err)
}
ytdlUploadDate, err := time.Parse("20060102", v.UploadDate)
if err != nil {
logrus.Error(err)
}
if v.ReleaseTimestamp != 0 {
ytdlUploadDate = time.Unix(v.ReleaseTimestamp, 0)
}
if release != nil {
sqlTime, err := time.ParseInLocation(time.RFC3339, release.ReleaseTime, time.UTC)
if err == nil {
hoursDiff := math.Abs(sqlTime.Sub(ytdlUploadDate).Hours())
if hoursDiff > 48 {
logrus.Infof("upload day from APIs differs from the ytdl one by more than 2 days.")
} else {
v.uploadDateForReal = &sqlTime
return sqlTime
}
} else {
logrus.Error(err)
}
}
v.uploadDateForReal = &ytdlUploadDate
return ytdlUploadDate
}

22
main.go
View file

@ -9,7 +9,6 @@ import (
"github.com/lbryio/ytsync/v5/configs"
"github.com/lbryio/ytsync/v5/manager"
"github.com/lbryio/ytsync/v5/sdk"
"github.com/lbryio/ytsync/v5/shared"
ytUtils "github.com/lbryio/ytsync/v5/util"
@ -75,22 +74,11 @@ func ytSync(cmd *cobra.Command, args []string) {
if err != nil {
log.Fatalf("could not parse configuration file: %s", errors.FullTrace(err))
}
var hostname string
if configs.Configuration.SlackToken == "" {
log.Error("A slack token was not present in the config! Slack messages disabled!")
} else {
var err error
hostname, err = os.Hostname()
if err != nil {
log.Error("could not detect system hostname")
hostname = "ytsync-unknown"
}
if len(hostname) > 30 {
hostname = hostname[0:30]
}
util.InitSlack(configs.Configuration.SlackToken, configs.Configuration.SlackChannel, hostname)
util.InitSlack(configs.Configuration.SlackToken, configs.Configuration.SlackChannel, configs.Configuration.GetHostname())
}
if cliFlags.Status != "" && !util.InSlice(cliFlags.Status, shared.SyncStatuses) {
@ -131,17 +119,9 @@ func ytSync(cmd *cobra.Command, args []string) {
blobsDir := ytUtils.GetBlobsDir()
apiConfig := &sdk.APIConfig{
ApiURL: configs.Configuration.InternalApisEndpoint,
ApiToken: configs.Configuration.InternalApisAuthToken,
HostName: hostname,
}
sm := manager.NewSyncManager(
cliFlags,
blobsDir,
configs.Configuration.LbrycrdString,
apiConfig,
)
err = sm.Start()
if err != nil {

View file

@ -8,6 +8,7 @@ import (
"time"
"github.com/lbryio/ytsync/v5/blobs_reflector"
"github.com/lbryio/ytsync/v5/configs"
"github.com/lbryio/ytsync/v5/ip_manager"
"github.com/lbryio/ytsync/v5/namer"
"github.com/lbryio/ytsync/v5/sdk"
@ -29,12 +30,12 @@ type SyncManager struct {
channelsToSync []Sync
}
func NewSyncManager(cliFlags shared.SyncFlags, blobsDir, lbrycrdDsn string, apiConfig *sdk.APIConfig) *SyncManager {
func NewSyncManager(cliFlags shared.SyncFlags, blobsDir string) *SyncManager {
return &SyncManager{
CliFlags: cliFlags,
blobsDir: blobsDir,
LbrycrdDsn: lbrycrdDsn,
ApiConfig: apiConfig,
LbrycrdDsn: configs.Configuration.LbrycrdString,
ApiConfig: sdk.GetAPIsConfigs(),
}
}
func (s *SyncManager) enqueueChannel(channel *shared.YoutubeChannel) {

View file

@ -868,7 +868,7 @@ func (s *Sync) enqueueYoutubeVideos() error {
return err
}
videos, err := ytapi.GetVideosToSync(s.Manager.ApiConfig, s.DbChannelData.ChannelId, s.syncedVideos, s.Manager.CliFlags.QuickSync, s.Manager.CliFlags.VideosToSync(s.DbChannelData.TotalSubscribers), ytapi.VideoParams{
videos, err := ytapi.GetVideosToSync(s.DbChannelData.ChannelId, s.syncedVideos, s.Manager.CliFlags.QuickSync, s.Manager.CliFlags.VideosToSync(s.DbChannelData.TotalSubscribers), ytapi.VideoParams{
VideoDir: s.videoDirectory,
Stopper: s.grp,
IPPool: ipPool,

View file

@ -1,31 +1,17 @@
package metrics
import (
"os"
"regexp"
"github.com/lbryio/ytsync/v5/configs"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
log "github.com/sirupsen/logrus"
)
var (
Durations = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "ytsync",
Subsystem: getHostname(),
Subsystem: configs.Configuration.GetHostname(),
Name: "duration",
Help: "The durations of the individual modules",
}, []string{"path"})
)
func getHostname() string {
hostname, err := os.Hostname()
if err != nil {
hostname = "ytsync_unknown"
}
reg, err := regexp.Compile("[^a-zA-Z0-9_]+")
if err != nil {
log.Fatal(err)
}
return reg.ReplaceAllString(hostname, "_")
}

View file

@ -13,6 +13,7 @@ import (
"github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/null"
"github.com/lbryio/ytsync/v5/configs"
"github.com/lbryio/ytsync/v5/shared"
"github.com/lbryio/ytsync/v5/util"
@ -30,6 +31,19 @@ type APIConfig struct {
HostName string
}
var instance *APIConfig
func GetAPIsConfigs() *APIConfig {
if instance == nil {
instance = &APIConfig{
ApiURL: configs.Configuration.InternalApisEndpoint,
ApiToken: configs.Configuration.InternalApisAuthToken,
HostName: configs.Configuration.GetHostname(),
}
}
return instance
}
func (a *APIConfig) FetchChannels(status string, cliFlags *shared.SyncFlags) ([]shared.YoutubeChannel, error) {
type apiJobsResponse struct {
Success bool `json:"success"`

View file

@ -107,7 +107,7 @@ func NewYoutubeVideo(directory string, videoData *ytdl.YtdlVideo, playlistPositi
title: videoData.Title,
description: videoData.Description,
playlistPosition: playlistPosition,
publishedAt: videoData.UploadDateForReal,
publishedAt: videoData.GetUploadTime(),
dir: directory,
youtubeInfo: videoData,
mocked: false,

View file

@ -55,7 +55,7 @@ type VideoParams struct {
var mostRecentlyFailedChannel string // TODO: fix this hack!
func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) {
func GetVideosToSync(channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) {
var videos []Video
if quickSync && maxVideos > 50 {
maxVideos = 50
@ -94,7 +94,7 @@ func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[s
mostRecentlyFailedChannel = channelID
}
vids, err := getVideos(config, channelID, videoIDs, videoParams.Stopper.Ch(), videoParams.IPPool)
vids, err := getVideos(channelID, videoIDs, videoParams.Stopper.Ch(), videoParams.IPPool)
if err != nil {
return nil, err
}
@ -203,7 +203,8 @@ func ChannelInfo(channelID string) (*YoutubeStatsResponse, error) {
return &decodedResponse, nil
}
func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopChan stop.Chan, ipPool *ip_manager.IPPool) ([]*ytdl.YtdlVideo, error) {
func getVideos(channelID string, videoIDs []string, stopChan stop.Chan, ipPool *ip_manager.IPPool) ([]*ytdl.YtdlVideo, error) {
config := sdk.GetAPIsConfigs()
var videos []*ytdl.YtdlVideo
for _, videoID := range videoIDs {
if len(videoID) < 5 {
@ -215,11 +216,6 @@ func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopC
default:
}
//ip, err := ipPool.GetIP(videoID)
//if err != nil {
// return nil, err
//}
//video, err := downloader.GetVideoInformation(videoID, &net.TCPAddr{IP: net.ParseIP(ip)})
state, err := config.VideoState(videoID)
if err != nil {
return nil, errors.Err(err)
@ -227,7 +223,7 @@ func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopC
if state == "published" {
continue
}
video, err := downloader.GetVideoInformation(config, videoID, stopChan, nil, ipPool)
video, err := downloader.GetVideoInformation(videoID, stopChan, nil, ipPool)
if err != nil {
errSDK := config.MarkVideoStatus(shared.VideoStatus{
ChannelID: channelID,