WIP: trying to get the accurate api to work

This commit is contained in:
Alex Grintsvayg 2020-07-28 12:47:28 -04:00
parent f942bf8025
commit a3dd3dc626
No known key found for this signature in database
GPG key ID: AEB3F089F86A22B5
4 changed files with 118 additions and 76 deletions

View file

@ -2,13 +2,16 @@ package downloader
import ( import (
"encoding/json" "encoding/json"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"net/url"
"os/exec" "os/exec"
"strings" "strings"
"time" "time"
"github.com/davecgh/go-spew/spew"
"github.com/lbryio/ytsync/v5/downloader/ytdl" "github.com/lbryio/ytsync/v5/downloader/ytdl"
"github.com/lbryio/lbry.go/v2/extras/errors" "github.com/lbryio/lbry.go/v2/extras/errors"
@ -34,30 +37,38 @@ func GetPlaylistVideoIDs(channelName string, maxVideos int) ([]string, error) {
} }
func GetVideoInformation(videoID string) (*ytdl.YtdlVideo, error) { func GetVideoInformation(videoID string) (*ytdl.YtdlVideo, error) {
args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID} //args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID}
results, err := run(args, false, true) //results, err := run(args, false, true)
if err != nil { //if err != nil {
return nil, errors.Err(err) // return nil, errors.Err(err)
} //}
var video *ytdl.YtdlVideo var video *ytdl.YtdlVideo
err = json.Unmarshal([]byte(results[0]), &video) //err = json.Unmarshal([]byte(results[0]), &video)
if err != nil { //if err != nil {
return nil, errors.Err(err) // return nil, errors.Err(err)
} //}
video = &ytdl.YtdlVideo{}
// now get an accurate time // now get an accurate time
const maxTries = 5
tries := 0 tries := 0
GetTime: GetTime:
tries++ tries++
t, err := getUploadTime(videoID) t, err := getUploadTime(videoID)
if err != nil { if err != nil {
if errors.Is(err, errNotScraped) && tries <= 3 { slack(":warning: Upload time error: %v", err)
if tries <= maxTries && (errors.Is(err, errNotScraped) || errors.Is(err, errUploadTimeEmpty)) {
triggerScrape(videoID) triggerScrape(videoID)
time.Sleep(2 * time.Second) // let them scrape it time.Sleep(2 * time.Second) // let them scrape it
goto GetTime goto GetTime
} else if !errors.Is(err, errNotScraped) && !errors.Is(err, errUploadTimeEmpty) {
slack(":warning: Error while trying to get accurate upload time for %s: %v", videoID, err)
return nil, errors.Err(err)
} }
//return video, errors.Err(err) // just swallow this error and do fallback below // do fallback below
} }
slack("After all that, upload time for %s is %s", videoID, t)
if t != "" { if t != "" {
parsed, err := time.Parse("2006-01-02, 15:04:05 (MST)", t) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date parsed, err := time.Parse("2006-01-02, 15:04:05 (MST)", t) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date
@ -66,7 +77,7 @@ GetTime:
} }
video.UploadDateForReal = parsed video.UploadDateForReal = parsed
} else { } else {
_ = util.SendToSlack(":warning: Could not get accurate time for %s. Falling back to estimated time.", videoID) slack(":warning: Could not get accurate time for %s. Falling back to estimated time.", videoID)
// fall back to UploadDate from youtube-dl // fall back to UploadDate from youtube-dl
video.UploadDateForReal, err = time.Parse("20060102", video.UploadDate) video.UploadDateForReal, err = time.Parse("20060102", video.UploadDate)
if err != nil { if err != nil {
@ -78,19 +89,36 @@ GetTime:
} }
var errNotScraped = errors.Base("not yet scraped by caa.iti.gr") var errNotScraped = errors.Base("not yet scraped by caa.iti.gr")
var errUploadTimeEmpty = errors.Base("upload time is empty")
func slack(format string, a ...interface{}) {
fmt.Printf(format+"\n", a...)
util.SendToSlack(format, a...)
}
func triggerScrape(videoID string) error { func triggerScrape(videoID string) error {
res, err := http.Get("https://caa.iti.gr/verify_videoV3?twtimeline=0&url=https://www.youtube.com/watch?v=" + videoID) slack("Triggering scrape for %s", videoID)
u, err := url.Parse("https://caa.iti.gr/verify_videoV3")
q := u.Query()
q.Set("twtimeline", "0")
q.Set("url", "https://www.youtube.com/watch?v="+videoID)
u.RawQuery = q.Encode()
slack("GET %s", u.String())
res, err := http.Get(u.String())
if err != nil { if err != nil {
return errors.Err(err) return errors.Err(err)
} }
defer res.Body.Close() defer res.Body.Close()
all, err := ioutil.ReadAll(res.Body)
spew.Dump(string(all), err)
return nil return nil
//https://caa.iti.gr/caa/api/v4/videos/reports/h-tuxHS5lSM //https://caa.iti.gr/caa/api/v4/videos/reports/h-tuxHS5lSM
} }
func getUploadTime(videoID string) (string, error) { func getUploadTime(videoID string) (string, error) {
slack("Getting upload time for %s", videoID)
res, err := http.Get("https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v=" + videoID) res, err := http.Get("https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v=" + videoID)
if err != nil { if err != nil {
return "", errors.Err(err) return "", errors.Err(err)
@ -111,6 +139,10 @@ func getUploadTime(videoID string) (string, error) {
return "", errNotScraped return "", errNotScraped
} }
if uploadTime.Time == "" {
return "", errUploadTimeEmpty
}
return uploadTime.Time, nil return uploadTime.Time, nil
} }

View file

@ -37,40 +37,8 @@ type YtdlVideo struct {
WebpageURLBasename string `json:"webpage_url_basename"` WebpageURLBasename string `json:"webpage_url_basename"`
Acodec string `json:"acodec"` Acodec string `json:"acodec"`
DisplayID string `json:"display_id"` DisplayID string `json:"display_id"`
RequestedFormats []struct { //RequestedFormats []RequestedFormat `json:"requested_formats"`
Asr interface{} `json:"asr"` //AutomaticCaptions struct{} `json:"automatic_captions"`
Tbr float64 `json:"tbr"`
Container string `json:"container"`
Language interface{} `json:"language"`
Format string `json:"format"`
URL string `json:"url"`
Vcodec string `json:"vcodec"`
FormatNote string `json:"format_note"`
Height int `json:"height"`
Width int `json:"width"`
Ext string `json:"ext"`
FragmentBaseURL string `json:"fragment_base_url"`
Filesize interface{} `json:"filesize"`
Fps int `json:"fps"`
ManifestURL string `json:"manifest_url"`
Protocol string `json:"protocol"`
FormatID string `json:"format_id"`
HTTPHeaders struct {
AcceptCharset string `json:"Accept-Charset"`
AcceptLanguage string `json:"Accept-Language"`
AcceptEncoding string `json:"Accept-Encoding"`
Accept string `json:"Accept"`
UserAgent string `json:"User-Agent"`
} `json:"http_headers"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration,omitempty"`
} `json:"fragments"`
Acodec string `json:"acodec"`
Abr int `json:"abr,omitempty"`
} `json:"requested_formats"`
AutomaticCaptions struct {
} `json:"automatic_captions"`
Description string `json:"description"` Description string `json:"description"`
Tags []string `json:"tags"` Tags []string `json:"tags"`
Track interface{} `json:"track"` Track interface{} `json:"track"`
@ -81,8 +49,7 @@ type YtdlVideo struct {
FormatID string `json:"format_id"` FormatID string `json:"format_id"`
EpisodeNumber interface{} `json:"episode_number"` EpisodeNumber interface{} `json:"episode_number"`
UploaderID string `json:"uploader_id"` UploaderID string `json:"uploader_id"`
Subtitles struct { //Subtitles struct{} `json:"subtitles"`
} `json:"subtitles"`
ReleaseYear interface{} `json:"release_year"` ReleaseYear interface{} `json:"release_year"`
Thumbnails []Thumbnail `json:"thumbnails"` Thumbnails []Thumbnail `json:"thumbnails"`
License interface{} `json:"license"` License interface{} `json:"license"`
@ -96,38 +63,73 @@ type YtdlVideo struct {
Width int `json:"width"` Width int `json:"width"`
EndTime interface{} `json:"end_time"` EndTime interface{} `json:"end_time"`
WebpageURL string `json:"webpage_url"` WebpageURL string `json:"webpage_url"`
Formats []struct { //Formats []Format `json:"formats"`
Asr int `json:"asr"`
Tbr float64 `json:"tbr"`
Protocol string `json:"protocol"`
Format string `json:"format"`
FormatNote string `json:"format_note"`
Height interface{} `json:"height"`
ManifestURL string `json:"manifest_url,omitempty"`
FormatID string `json:"format_id"`
Container string `json:"container,omitempty"`
Language interface{} `json:"language,omitempty"`
HTTPHeaders HTTPHeaders `json:"http_headers"`
URL string `json:"url"`
Vcodec string `json:"vcodec"`
Abr int `json:"abr,omitempty"`
Width interface{} `json:"width"`
Ext string `json:"ext"`
FragmentBaseURL string `json:"fragment_base_url,omitempty"`
Filesize interface{} `json:"filesize"`
Fps interface{} `json:"fps"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration,omitempty"`
} `json:"fragments,omitempty"`
Acodec string `json:"acodec"`
PlayerURL interface{} `json:"player_url,omitempty"`
} `json:"formats"`
ChannelURL string `json:"channel_url"` ChannelURL string `json:"channel_url"`
Resolution interface{} `json:"resolution"` Resolution interface{} `json:"resolution"`
Vcodec string `json:"vcodec"` Vcodec string `json:"vcodec"`
} }
type RequestedFormat struct {
Asr interface{} `json:"asr"`
Tbr float64 `json:"tbr"`
Container string `json:"container"`
Language interface{} `json:"language"`
Format string `json:"format"`
URL string `json:"url"`
Vcodec string `json:"vcodec"`
FormatNote string `json:"format_note"`
Height int `json:"height"`
Width int `json:"width"`
Ext string `json:"ext"`
FragmentBaseURL string `json:"fragment_base_url"`
Filesize interface{} `json:"filesize"`
Fps int `json:"fps"`
ManifestURL string `json:"manifest_url"`
Protocol string `json:"protocol"`
FormatID string `json:"format_id"`
HTTPHeaders struct {
AcceptCharset string `json:"Accept-Charset"`
AcceptLanguage string `json:"Accept-Language"`
AcceptEncoding string `json:"Accept-Encoding"`
Accept string `json:"Accept"`
UserAgent string `json:"User-Agent"`
} `json:"http_headers"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration,omitempty"`
} `json:"fragments"`
Acodec string `json:"acodec"`
Abr int `json:"abr,omitempty"`
}
type Format struct {
Asr int `json:"asr"`
Tbr float64 `json:"tbr"`
Protocol string `json:"protocol"`
Format string `json:"format"`
FormatNote string `json:"format_note"`
Height interface{} `json:"height"`
ManifestURL string `json:"manifest_url,omitempty"`
FormatID string `json:"format_id"`
Container string `json:"container,omitempty"`
Language interface{} `json:"language,omitempty"`
HTTPHeaders HTTPHeaders `json:"http_headers"`
URL string `json:"url"`
Vcodec string `json:"vcodec"`
Abr int `json:"abr,omitempty"`
Width interface{} `json:"width"`
Ext string `json:"ext"`
FragmentBaseURL string `json:"fragment_base_url,omitempty"`
Filesize interface{} `json:"filesize"`
Fps interface{} `json:"fps"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration,omitempty"`
} `json:"fragments,omitempty"`
Acodec string `json:"acodec"`
PlayerURL interface{} `json:"player_url,omitempty"`
}
type Thumbnail struct { type Thumbnail struct {
URL string `json:"url"` URL string `json:"url"`
Width int `json:"width"` Width int `json:"width"`

View file

@ -7,8 +7,10 @@ import (
"os" "os"
"time" "time"
"github.com/davecgh/go-spew/spew"
"github.com/lbryio/lbry.go/v2/extras/errors" "github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/util" "github.com/lbryio/lbry.go/v2/extras/util"
"github.com/lbryio/ytsync/v5/downloader"
"github.com/lbryio/ytsync/v5/manager" "github.com/lbryio/ytsync/v5/manager"
"github.com/lbryio/ytsync/v5/sdk" "github.com/lbryio/ytsync/v5/sdk"
ytUtils "github.com/lbryio/ytsync/v5/util" ytUtils "github.com/lbryio/ytsync/v5/util"
@ -38,6 +40,9 @@ var (
) )
func main() { func main() {
spew.Dump(downloader.GetVideoInformation("oahaMa3XB0k"))
return
rand.Seed(time.Now().UnixNano()) rand.Seed(time.Now().UnixNano())
log.SetLevel(log.DebugLevel) log.SetLevel(log.DebugLevel)
http.Handle("/metrics", promhttp.Handler()) http.Handle("/metrics", promhttp.Handler())

View file

@ -250,6 +250,7 @@ func (s *Sync) FullCycle() (e error) {
defer signal.Stop(interruptChan) defer signal.Stop(interruptChan)
go func() { go func() {
<-interruptChan <-interruptChan
util.SendToSlack("got interrupt, shutting down")
log.Println("Got interrupt signal, shutting down (if publishing, will shut down after current publish)") log.Println("Got interrupt signal, shutting down (if publishing, will shut down after current publish)")
s.grp.Stop() s.grp.Stop()
}() }()
@ -856,6 +857,8 @@ func (s *Sync) startWorker(workerNum int) {
tryCount++ tryCount++
err := s.processVideo(v) err := s.processVideo(v)
util.SendToSlack("Tried to process %s. Error: %v", v.ID(), err)
if err != nil { if err != nil {
logMsg := fmt.Sprintf("error processing video %s: %s", v.ID(), err.Error()) logMsg := fmt.Sprintf("error processing video %s: %s", v.ID(), err.Error())
log.Errorln(logMsg) log.Errorln(logMsg)