WIP: trying to get the accurate api to work
This commit is contained in:
parent
f942bf8025
commit
a3dd3dc626
4 changed files with 118 additions and 76 deletions
|
@ -2,13 +2,16 @@ package downloader
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/davecgh/go-spew/spew"
|
||||||
"github.com/lbryio/ytsync/v5/downloader/ytdl"
|
"github.com/lbryio/ytsync/v5/downloader/ytdl"
|
||||||
|
|
||||||
"github.com/lbryio/lbry.go/v2/extras/errors"
|
"github.com/lbryio/lbry.go/v2/extras/errors"
|
||||||
|
@ -34,30 +37,38 @@ func GetPlaylistVideoIDs(channelName string, maxVideos int) ([]string, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetVideoInformation(videoID string) (*ytdl.YtdlVideo, error) {
|
func GetVideoInformation(videoID string) (*ytdl.YtdlVideo, error) {
|
||||||
args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID}
|
//args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID}
|
||||||
results, err := run(args, false, true)
|
//results, err := run(args, false, true)
|
||||||
if err != nil {
|
//if err != nil {
|
||||||
return nil, errors.Err(err)
|
// return nil, errors.Err(err)
|
||||||
}
|
//}
|
||||||
var video *ytdl.YtdlVideo
|
var video *ytdl.YtdlVideo
|
||||||
err = json.Unmarshal([]byte(results[0]), &video)
|
//err = json.Unmarshal([]byte(results[0]), &video)
|
||||||
if err != nil {
|
//if err != nil {
|
||||||
return nil, errors.Err(err)
|
// return nil, errors.Err(err)
|
||||||
}
|
//}
|
||||||
|
|
||||||
|
video = &ytdl.YtdlVideo{}
|
||||||
|
|
||||||
// now get an accurate time
|
// now get an accurate time
|
||||||
|
const maxTries = 5
|
||||||
tries := 0
|
tries := 0
|
||||||
GetTime:
|
GetTime:
|
||||||
tries++
|
tries++
|
||||||
t, err := getUploadTime(videoID)
|
t, err := getUploadTime(videoID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, errNotScraped) && tries <= 3 {
|
slack(":warning: Upload time error: %v", err)
|
||||||
|
if tries <= maxTries && (errors.Is(err, errNotScraped) || errors.Is(err, errUploadTimeEmpty)) {
|
||||||
triggerScrape(videoID)
|
triggerScrape(videoID)
|
||||||
time.Sleep(2 * time.Second) // let them scrape it
|
time.Sleep(2 * time.Second) // let them scrape it
|
||||||
goto GetTime
|
goto GetTime
|
||||||
|
} else if !errors.Is(err, errNotScraped) && !errors.Is(err, errUploadTimeEmpty) {
|
||||||
|
slack(":warning: Error while trying to get accurate upload time for %s: %v", videoID, err)
|
||||||
|
return nil, errors.Err(err)
|
||||||
}
|
}
|
||||||
//return video, errors.Err(err) // just swallow this error and do fallback below
|
// do fallback below
|
||||||
}
|
}
|
||||||
|
slack("After all that, upload time for %s is %s", videoID, t)
|
||||||
|
|
||||||
if t != "" {
|
if t != "" {
|
||||||
parsed, err := time.Parse("2006-01-02, 15:04:05 (MST)", t) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date
|
parsed, err := time.Parse("2006-01-02, 15:04:05 (MST)", t) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date
|
||||||
|
@ -66,7 +77,7 @@ GetTime:
|
||||||
}
|
}
|
||||||
video.UploadDateForReal = parsed
|
video.UploadDateForReal = parsed
|
||||||
} else {
|
} else {
|
||||||
_ = util.SendToSlack(":warning: Could not get accurate time for %s. Falling back to estimated time.", videoID)
|
slack(":warning: Could not get accurate time for %s. Falling back to estimated time.", videoID)
|
||||||
// fall back to UploadDate from youtube-dl
|
// fall back to UploadDate from youtube-dl
|
||||||
video.UploadDateForReal, err = time.Parse("20060102", video.UploadDate)
|
video.UploadDateForReal, err = time.Parse("20060102", video.UploadDate)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -78,19 +89,36 @@ GetTime:
|
||||||
}
|
}
|
||||||
|
|
||||||
var errNotScraped = errors.Base("not yet scraped by caa.iti.gr")
|
var errNotScraped = errors.Base("not yet scraped by caa.iti.gr")
|
||||||
|
var errUploadTimeEmpty = errors.Base("upload time is empty")
|
||||||
|
|
||||||
|
func slack(format string, a ...interface{}) {
|
||||||
|
fmt.Printf(format+"\n", a...)
|
||||||
|
util.SendToSlack(format, a...)
|
||||||
|
}
|
||||||
|
|
||||||
func triggerScrape(videoID string) error {
|
func triggerScrape(videoID string) error {
|
||||||
res, err := http.Get("https://caa.iti.gr/verify_videoV3?twtimeline=0&url=https://www.youtube.com/watch?v=" + videoID)
|
slack("Triggering scrape for %s", videoID)
|
||||||
|
u, err := url.Parse("https://caa.iti.gr/verify_videoV3")
|
||||||
|
q := u.Query()
|
||||||
|
q.Set("twtimeline", "0")
|
||||||
|
q.Set("url", "https://www.youtube.com/watch?v="+videoID)
|
||||||
|
u.RawQuery = q.Encode()
|
||||||
|
slack("GET %s", u.String())
|
||||||
|
res, err := http.Get(u.String())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.Err(err)
|
return errors.Err(err)
|
||||||
}
|
}
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
|
|
||||||
|
all, err := ioutil.ReadAll(res.Body)
|
||||||
|
spew.Dump(string(all), err)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
//https://caa.iti.gr/caa/api/v4/videos/reports/h-tuxHS5lSM
|
//https://caa.iti.gr/caa/api/v4/videos/reports/h-tuxHS5lSM
|
||||||
}
|
}
|
||||||
|
|
||||||
func getUploadTime(videoID string) (string, error) {
|
func getUploadTime(videoID string) (string, error) {
|
||||||
|
slack("Getting upload time for %s", videoID)
|
||||||
res, err := http.Get("https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v=" + videoID)
|
res, err := http.Get("https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v=" + videoID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", errors.Err(err)
|
return "", errors.Err(err)
|
||||||
|
@ -111,6 +139,10 @@ func getUploadTime(videoID string) (string, error) {
|
||||||
return "", errNotScraped
|
return "", errNotScraped
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if uploadTime.Time == "" {
|
||||||
|
return "", errUploadTimeEmpty
|
||||||
|
}
|
||||||
|
|
||||||
return uploadTime.Time, nil
|
return uploadTime.Time, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,40 +37,8 @@ type YtdlVideo struct {
|
||||||
WebpageURLBasename string `json:"webpage_url_basename"`
|
WebpageURLBasename string `json:"webpage_url_basename"`
|
||||||
Acodec string `json:"acodec"`
|
Acodec string `json:"acodec"`
|
||||||
DisplayID string `json:"display_id"`
|
DisplayID string `json:"display_id"`
|
||||||
RequestedFormats []struct {
|
//RequestedFormats []RequestedFormat `json:"requested_formats"`
|
||||||
Asr interface{} `json:"asr"`
|
//AutomaticCaptions struct{} `json:"automatic_captions"`
|
||||||
Tbr float64 `json:"tbr"`
|
|
||||||
Container string `json:"container"`
|
|
||||||
Language interface{} `json:"language"`
|
|
||||||
Format string `json:"format"`
|
|
||||||
URL string `json:"url"`
|
|
||||||
Vcodec string `json:"vcodec"`
|
|
||||||
FormatNote string `json:"format_note"`
|
|
||||||
Height int `json:"height"`
|
|
||||||
Width int `json:"width"`
|
|
||||||
Ext string `json:"ext"`
|
|
||||||
FragmentBaseURL string `json:"fragment_base_url"`
|
|
||||||
Filesize interface{} `json:"filesize"`
|
|
||||||
Fps int `json:"fps"`
|
|
||||||
ManifestURL string `json:"manifest_url"`
|
|
||||||
Protocol string `json:"protocol"`
|
|
||||||
FormatID string `json:"format_id"`
|
|
||||||
HTTPHeaders struct {
|
|
||||||
AcceptCharset string `json:"Accept-Charset"`
|
|
||||||
AcceptLanguage string `json:"Accept-Language"`
|
|
||||||
AcceptEncoding string `json:"Accept-Encoding"`
|
|
||||||
Accept string `json:"Accept"`
|
|
||||||
UserAgent string `json:"User-Agent"`
|
|
||||||
} `json:"http_headers"`
|
|
||||||
Fragments []struct {
|
|
||||||
Path string `json:"path"`
|
|
||||||
Duration float64 `json:"duration,omitempty"`
|
|
||||||
} `json:"fragments"`
|
|
||||||
Acodec string `json:"acodec"`
|
|
||||||
Abr int `json:"abr,omitempty"`
|
|
||||||
} `json:"requested_formats"`
|
|
||||||
AutomaticCaptions struct {
|
|
||||||
} `json:"automatic_captions"`
|
|
||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
Tags []string `json:"tags"`
|
Tags []string `json:"tags"`
|
||||||
Track interface{} `json:"track"`
|
Track interface{} `json:"track"`
|
||||||
|
@ -81,8 +49,7 @@ type YtdlVideo struct {
|
||||||
FormatID string `json:"format_id"`
|
FormatID string `json:"format_id"`
|
||||||
EpisodeNumber interface{} `json:"episode_number"`
|
EpisodeNumber interface{} `json:"episode_number"`
|
||||||
UploaderID string `json:"uploader_id"`
|
UploaderID string `json:"uploader_id"`
|
||||||
Subtitles struct {
|
//Subtitles struct{} `json:"subtitles"`
|
||||||
} `json:"subtitles"`
|
|
||||||
ReleaseYear interface{} `json:"release_year"`
|
ReleaseYear interface{} `json:"release_year"`
|
||||||
Thumbnails []Thumbnail `json:"thumbnails"`
|
Thumbnails []Thumbnail `json:"thumbnails"`
|
||||||
License interface{} `json:"license"`
|
License interface{} `json:"license"`
|
||||||
|
@ -96,38 +63,73 @@ type YtdlVideo struct {
|
||||||
Width int `json:"width"`
|
Width int `json:"width"`
|
||||||
EndTime interface{} `json:"end_time"`
|
EndTime interface{} `json:"end_time"`
|
||||||
WebpageURL string `json:"webpage_url"`
|
WebpageURL string `json:"webpage_url"`
|
||||||
Formats []struct {
|
//Formats []Format `json:"formats"`
|
||||||
Asr int `json:"asr"`
|
|
||||||
Tbr float64 `json:"tbr"`
|
|
||||||
Protocol string `json:"protocol"`
|
|
||||||
Format string `json:"format"`
|
|
||||||
FormatNote string `json:"format_note"`
|
|
||||||
Height interface{} `json:"height"`
|
|
||||||
ManifestURL string `json:"manifest_url,omitempty"`
|
|
||||||
FormatID string `json:"format_id"`
|
|
||||||
Container string `json:"container,omitempty"`
|
|
||||||
Language interface{} `json:"language,omitempty"`
|
|
||||||
HTTPHeaders HTTPHeaders `json:"http_headers"`
|
|
||||||
URL string `json:"url"`
|
|
||||||
Vcodec string `json:"vcodec"`
|
|
||||||
Abr int `json:"abr,omitempty"`
|
|
||||||
Width interface{} `json:"width"`
|
|
||||||
Ext string `json:"ext"`
|
|
||||||
FragmentBaseURL string `json:"fragment_base_url,omitempty"`
|
|
||||||
Filesize interface{} `json:"filesize"`
|
|
||||||
Fps interface{} `json:"fps"`
|
|
||||||
Fragments []struct {
|
|
||||||
Path string `json:"path"`
|
|
||||||
Duration float64 `json:"duration,omitempty"`
|
|
||||||
} `json:"fragments,omitempty"`
|
|
||||||
Acodec string `json:"acodec"`
|
|
||||||
PlayerURL interface{} `json:"player_url,omitempty"`
|
|
||||||
} `json:"formats"`
|
|
||||||
ChannelURL string `json:"channel_url"`
|
ChannelURL string `json:"channel_url"`
|
||||||
Resolution interface{} `json:"resolution"`
|
Resolution interface{} `json:"resolution"`
|
||||||
Vcodec string `json:"vcodec"`
|
Vcodec string `json:"vcodec"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type RequestedFormat struct {
|
||||||
|
Asr interface{} `json:"asr"`
|
||||||
|
Tbr float64 `json:"tbr"`
|
||||||
|
Container string `json:"container"`
|
||||||
|
Language interface{} `json:"language"`
|
||||||
|
Format string `json:"format"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Vcodec string `json:"vcodec"`
|
||||||
|
FormatNote string `json:"format_note"`
|
||||||
|
Height int `json:"height"`
|
||||||
|
Width int `json:"width"`
|
||||||
|
Ext string `json:"ext"`
|
||||||
|
FragmentBaseURL string `json:"fragment_base_url"`
|
||||||
|
Filesize interface{} `json:"filesize"`
|
||||||
|
Fps int `json:"fps"`
|
||||||
|
ManifestURL string `json:"manifest_url"`
|
||||||
|
Protocol string `json:"protocol"`
|
||||||
|
FormatID string `json:"format_id"`
|
||||||
|
HTTPHeaders struct {
|
||||||
|
AcceptCharset string `json:"Accept-Charset"`
|
||||||
|
AcceptLanguage string `json:"Accept-Language"`
|
||||||
|
AcceptEncoding string `json:"Accept-Encoding"`
|
||||||
|
Accept string `json:"Accept"`
|
||||||
|
UserAgent string `json:"User-Agent"`
|
||||||
|
} `json:"http_headers"`
|
||||||
|
Fragments []struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Duration float64 `json:"duration,omitempty"`
|
||||||
|
} `json:"fragments"`
|
||||||
|
Acodec string `json:"acodec"`
|
||||||
|
Abr int `json:"abr,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Format struct {
|
||||||
|
Asr int `json:"asr"`
|
||||||
|
Tbr float64 `json:"tbr"`
|
||||||
|
Protocol string `json:"protocol"`
|
||||||
|
Format string `json:"format"`
|
||||||
|
FormatNote string `json:"format_note"`
|
||||||
|
Height interface{} `json:"height"`
|
||||||
|
ManifestURL string `json:"manifest_url,omitempty"`
|
||||||
|
FormatID string `json:"format_id"`
|
||||||
|
Container string `json:"container,omitempty"`
|
||||||
|
Language interface{} `json:"language,omitempty"`
|
||||||
|
HTTPHeaders HTTPHeaders `json:"http_headers"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Vcodec string `json:"vcodec"`
|
||||||
|
Abr int `json:"abr,omitempty"`
|
||||||
|
Width interface{} `json:"width"`
|
||||||
|
Ext string `json:"ext"`
|
||||||
|
FragmentBaseURL string `json:"fragment_base_url,omitempty"`
|
||||||
|
Filesize interface{} `json:"filesize"`
|
||||||
|
Fps interface{} `json:"fps"`
|
||||||
|
Fragments []struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Duration float64 `json:"duration,omitempty"`
|
||||||
|
} `json:"fragments,omitempty"`
|
||||||
|
Acodec string `json:"acodec"`
|
||||||
|
PlayerURL interface{} `json:"player_url,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type Thumbnail struct {
|
type Thumbnail struct {
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
Width int `json:"width"`
|
Width int `json:"width"`
|
||||||
|
|
5
main.go
5
main.go
|
@ -7,8 +7,10 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/davecgh/go-spew/spew"
|
||||||
"github.com/lbryio/lbry.go/v2/extras/errors"
|
"github.com/lbryio/lbry.go/v2/extras/errors"
|
||||||
"github.com/lbryio/lbry.go/v2/extras/util"
|
"github.com/lbryio/lbry.go/v2/extras/util"
|
||||||
|
"github.com/lbryio/ytsync/v5/downloader"
|
||||||
"github.com/lbryio/ytsync/v5/manager"
|
"github.com/lbryio/ytsync/v5/manager"
|
||||||
"github.com/lbryio/ytsync/v5/sdk"
|
"github.com/lbryio/ytsync/v5/sdk"
|
||||||
ytUtils "github.com/lbryio/ytsync/v5/util"
|
ytUtils "github.com/lbryio/ytsync/v5/util"
|
||||||
|
@ -38,6 +40,9 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
spew.Dump(downloader.GetVideoInformation("oahaMa3XB0k"))
|
||||||
|
return
|
||||||
|
|
||||||
rand.Seed(time.Now().UnixNano())
|
rand.Seed(time.Now().UnixNano())
|
||||||
log.SetLevel(log.DebugLevel)
|
log.SetLevel(log.DebugLevel)
|
||||||
http.Handle("/metrics", promhttp.Handler())
|
http.Handle("/metrics", promhttp.Handler())
|
||||||
|
|
|
@ -250,6 +250,7 @@ func (s *Sync) FullCycle() (e error) {
|
||||||
defer signal.Stop(interruptChan)
|
defer signal.Stop(interruptChan)
|
||||||
go func() {
|
go func() {
|
||||||
<-interruptChan
|
<-interruptChan
|
||||||
|
util.SendToSlack("got interrupt, shutting down")
|
||||||
log.Println("Got interrupt signal, shutting down (if publishing, will shut down after current publish)")
|
log.Println("Got interrupt signal, shutting down (if publishing, will shut down after current publish)")
|
||||||
s.grp.Stop()
|
s.grp.Stop()
|
||||||
}()
|
}()
|
||||||
|
@ -856,6 +857,8 @@ func (s *Sync) startWorker(workerNum int) {
|
||||||
tryCount++
|
tryCount++
|
||||||
err := s.processVideo(v)
|
err := s.processVideo(v)
|
||||||
|
|
||||||
|
util.SendToSlack("Tried to process %s. Error: %v", v.ID(), err)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logMsg := fmt.Sprintf("error processing video %s: %s", v.ID(), err.Error())
|
logMsg := fmt.Sprintf("error processing video %s: %s", v.ID(), err.Error())
|
||||||
log.Errorln(logMsg)
|
log.Errorln(logMsg)
|
||||||
|
|
Loading…
Reference in a new issue