WIP: trying to get the accurate api to work

This commit is contained in:
Alex Grintsvayg 2020-07-28 12:47:28 -04:00
parent f942bf8025
commit a3dd3dc626
No known key found for this signature in database
GPG key ID: AEB3F089F86A22B5
4 changed files with 118 additions and 76 deletions

View file

@ -2,13 +2,16 @@ package downloader
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"os/exec"
"strings"
"time"
"github.com/davecgh/go-spew/spew"
"github.com/lbryio/ytsync/v5/downloader/ytdl"
"github.com/lbryio/lbry.go/v2/extras/errors"
@ -34,30 +37,38 @@ func GetPlaylistVideoIDs(channelName string, maxVideos int) ([]string, error) {
}
func GetVideoInformation(videoID string) (*ytdl.YtdlVideo, error) {
args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID}
results, err := run(args, false, true)
if err != nil {
return nil, errors.Err(err)
}
//args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID}
//results, err := run(args, false, true)
//if err != nil {
// return nil, errors.Err(err)
//}
var video *ytdl.YtdlVideo
err = json.Unmarshal([]byte(results[0]), &video)
if err != nil {
return nil, errors.Err(err)
}
//err = json.Unmarshal([]byte(results[0]), &video)
//if err != nil {
// return nil, errors.Err(err)
//}
video = &ytdl.YtdlVideo{}
// now get an accurate time
const maxTries = 5
tries := 0
GetTime:
tries++
t, err := getUploadTime(videoID)
if err != nil {
if errors.Is(err, errNotScraped) && tries <= 3 {
slack(":warning: Upload time error: %v", err)
if tries <= maxTries && (errors.Is(err, errNotScraped) || errors.Is(err, errUploadTimeEmpty)) {
triggerScrape(videoID)
time.Sleep(2 * time.Second) // let them scrape it
goto GetTime
} else if !errors.Is(err, errNotScraped) && !errors.Is(err, errUploadTimeEmpty) {
slack(":warning: Error while trying to get accurate upload time for %s: %v", videoID, err)
return nil, errors.Err(err)
}
//return video, errors.Err(err) // just swallow this error and do fallback below
// do fallback below
}
slack("After all that, upload time for %s is %s", videoID, t)
if t != "" {
parsed, err := time.Parse("2006-01-02, 15:04:05 (MST)", t) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date
@ -66,7 +77,7 @@ GetTime:
}
video.UploadDateForReal = parsed
} else {
_ = util.SendToSlack(":warning: Could not get accurate time for %s. Falling back to estimated time.", videoID)
slack(":warning: Could not get accurate time for %s. Falling back to estimated time.", videoID)
// fall back to UploadDate from youtube-dl
video.UploadDateForReal, err = time.Parse("20060102", video.UploadDate)
if err != nil {
@ -78,19 +89,36 @@ GetTime:
}
var errNotScraped = errors.Base("not yet scraped by caa.iti.gr")
var errUploadTimeEmpty = errors.Base("upload time is empty")
func slack(format string, a ...interface{}) {
fmt.Printf(format+"\n", a...)
util.SendToSlack(format, a...)
}
func triggerScrape(videoID string) error {
res, err := http.Get("https://caa.iti.gr/verify_videoV3?twtimeline=0&url=https://www.youtube.com/watch?v=" + videoID)
slack("Triggering scrape for %s", videoID)
u, err := url.Parse("https://caa.iti.gr/verify_videoV3")
q := u.Query()
q.Set("twtimeline", "0")
q.Set("url", "https://www.youtube.com/watch?v="+videoID)
u.RawQuery = q.Encode()
slack("GET %s", u.String())
res, err := http.Get(u.String())
if err != nil {
return errors.Err(err)
}
defer res.Body.Close()
all, err := ioutil.ReadAll(res.Body)
spew.Dump(string(all), err)
return nil
//https://caa.iti.gr/caa/api/v4/videos/reports/h-tuxHS5lSM
}
func getUploadTime(videoID string) (string, error) {
slack("Getting upload time for %s", videoID)
res, err := http.Get("https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v=" + videoID)
if err != nil {
return "", errors.Err(err)
@ -111,6 +139,10 @@ func getUploadTime(videoID string) (string, error) {
return "", errNotScraped
}
if uploadTime.Time == "" {
return "", errUploadTimeEmpty
}
return uploadTime.Time, nil
}

View file

@ -37,40 +37,8 @@ type YtdlVideo struct {
WebpageURLBasename string `json:"webpage_url_basename"`
Acodec string `json:"acodec"`
DisplayID string `json:"display_id"`
RequestedFormats []struct {
Asr interface{} `json:"asr"`
Tbr float64 `json:"tbr"`
Container string `json:"container"`
Language interface{} `json:"language"`
Format string `json:"format"`
URL string `json:"url"`
Vcodec string `json:"vcodec"`
FormatNote string `json:"format_note"`
Height int `json:"height"`
Width int `json:"width"`
Ext string `json:"ext"`
FragmentBaseURL string `json:"fragment_base_url"`
Filesize interface{} `json:"filesize"`
Fps int `json:"fps"`
ManifestURL string `json:"manifest_url"`
Protocol string `json:"protocol"`
FormatID string `json:"format_id"`
HTTPHeaders struct {
AcceptCharset string `json:"Accept-Charset"`
AcceptLanguage string `json:"Accept-Language"`
AcceptEncoding string `json:"Accept-Encoding"`
Accept string `json:"Accept"`
UserAgent string `json:"User-Agent"`
} `json:"http_headers"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration,omitempty"`
} `json:"fragments"`
Acodec string `json:"acodec"`
Abr int `json:"abr,omitempty"`
} `json:"requested_formats"`
AutomaticCaptions struct {
} `json:"automatic_captions"`
//RequestedFormats []RequestedFormat `json:"requested_formats"`
//AutomaticCaptions struct{} `json:"automatic_captions"`
Description string `json:"description"`
Tags []string `json:"tags"`
Track interface{} `json:"track"`
@ -81,8 +49,7 @@ type YtdlVideo struct {
FormatID string `json:"format_id"`
EpisodeNumber interface{} `json:"episode_number"`
UploaderID string `json:"uploader_id"`
Subtitles struct {
} `json:"subtitles"`
//Subtitles struct{} `json:"subtitles"`
ReleaseYear interface{} `json:"release_year"`
Thumbnails []Thumbnail `json:"thumbnails"`
License interface{} `json:"license"`
@ -96,38 +63,73 @@ type YtdlVideo struct {
Width int `json:"width"`
EndTime interface{} `json:"end_time"`
WebpageURL string `json:"webpage_url"`
Formats []struct {
Asr int `json:"asr"`
Tbr float64 `json:"tbr"`
Protocol string `json:"protocol"`
Format string `json:"format"`
FormatNote string `json:"format_note"`
Height interface{} `json:"height"`
ManifestURL string `json:"manifest_url,omitempty"`
FormatID string `json:"format_id"`
Container string `json:"container,omitempty"`
Language interface{} `json:"language,omitempty"`
HTTPHeaders HTTPHeaders `json:"http_headers"`
URL string `json:"url"`
Vcodec string `json:"vcodec"`
Abr int `json:"abr,omitempty"`
Width interface{} `json:"width"`
Ext string `json:"ext"`
FragmentBaseURL string `json:"fragment_base_url,omitempty"`
Filesize interface{} `json:"filesize"`
Fps interface{} `json:"fps"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration,omitempty"`
} `json:"fragments,omitempty"`
Acodec string `json:"acodec"`
PlayerURL interface{} `json:"player_url,omitempty"`
} `json:"formats"`
//Formats []Format `json:"formats"`
ChannelURL string `json:"channel_url"`
Resolution interface{} `json:"resolution"`
Vcodec string `json:"vcodec"`
}
type RequestedFormat struct {
Asr interface{} `json:"asr"`
Tbr float64 `json:"tbr"`
Container string `json:"container"`
Language interface{} `json:"language"`
Format string `json:"format"`
URL string `json:"url"`
Vcodec string `json:"vcodec"`
FormatNote string `json:"format_note"`
Height int `json:"height"`
Width int `json:"width"`
Ext string `json:"ext"`
FragmentBaseURL string `json:"fragment_base_url"`
Filesize interface{} `json:"filesize"`
Fps int `json:"fps"`
ManifestURL string `json:"manifest_url"`
Protocol string `json:"protocol"`
FormatID string `json:"format_id"`
HTTPHeaders struct {
AcceptCharset string `json:"Accept-Charset"`
AcceptLanguage string `json:"Accept-Language"`
AcceptEncoding string `json:"Accept-Encoding"`
Accept string `json:"Accept"`
UserAgent string `json:"User-Agent"`
} `json:"http_headers"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration,omitempty"`
} `json:"fragments"`
Acodec string `json:"acodec"`
Abr int `json:"abr,omitempty"`
}
type Format struct {
Asr int `json:"asr"`
Tbr float64 `json:"tbr"`
Protocol string `json:"protocol"`
Format string `json:"format"`
FormatNote string `json:"format_note"`
Height interface{} `json:"height"`
ManifestURL string `json:"manifest_url,omitempty"`
FormatID string `json:"format_id"`
Container string `json:"container,omitempty"`
Language interface{} `json:"language,omitempty"`
HTTPHeaders HTTPHeaders `json:"http_headers"`
URL string `json:"url"`
Vcodec string `json:"vcodec"`
Abr int `json:"abr,omitempty"`
Width interface{} `json:"width"`
Ext string `json:"ext"`
FragmentBaseURL string `json:"fragment_base_url,omitempty"`
Filesize interface{} `json:"filesize"`
Fps interface{} `json:"fps"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration,omitempty"`
} `json:"fragments,omitempty"`
Acodec string `json:"acodec"`
PlayerURL interface{} `json:"player_url,omitempty"`
}
type Thumbnail struct {
URL string `json:"url"`
Width int `json:"width"`

View file

@ -7,8 +7,10 @@ import (
"os"
"time"
"github.com/davecgh/go-spew/spew"
"github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/util"
"github.com/lbryio/ytsync/v5/downloader"
"github.com/lbryio/ytsync/v5/manager"
"github.com/lbryio/ytsync/v5/sdk"
ytUtils "github.com/lbryio/ytsync/v5/util"
@ -38,6 +40,9 @@ var (
)
func main() {
spew.Dump(downloader.GetVideoInformation("oahaMa3XB0k"))
return
rand.Seed(time.Now().UnixNano())
log.SetLevel(log.DebugLevel)
http.Handle("/metrics", promhttp.Handler())

View file

@ -250,6 +250,7 @@ func (s *Sync) FullCycle() (e error) {
defer signal.Stop(interruptChan)
go func() {
<-interruptChan
util.SendToSlack("got interrupt, shutting down")
log.Println("Got interrupt signal, shutting down (if publishing, will shut down after current publish)")
s.grp.Stop()
}()
@ -856,6 +857,8 @@ func (s *Sync) startWorker(workerNum int) {
tryCount++
err := s.processVideo(v)
util.SendToSlack("Tried to process %s. Error: %v", v.ID(), err)
if err != nil {
logMsg := fmt.Sprintf("error processing video %s: %s", v.ID(), err.Error())
log.Errorln(logMsg)