267 lines
7 KiB
Go
267 lines
7 KiB
Go
package downloader
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"os/exec"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/davecgh/go-spew/spew"
|
|
"github.com/lbryio/ytsync/v5/downloader/ytdl"
|
|
|
|
"github.com/lbryio/lbry.go/v2/extras/errors"
|
|
"github.com/lbryio/lbry.go/v2/extras/stop"
|
|
"github.com/lbryio/lbry.go/v2/extras/util"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan) ([]string, error) {
|
|
args := []string{"--skip-download", "https://www.youtube.com/channel/" + channelName, "--get-id", "--flat-playlist"}
|
|
ids, err := run(args, false, true, stopChan)
|
|
if err != nil {
|
|
return nil, errors.Err(err)
|
|
}
|
|
videoIDs := make([]string, maxVideos)
|
|
for i, v := range ids {
|
|
if i >= maxVideos {
|
|
break
|
|
}
|
|
videoIDs[i] = v
|
|
}
|
|
return videoIDs, nil
|
|
}
|
|
|
|
func GetVideoInformation(videoID string, stopChan stop.Chan, ip *net.TCPAddr) (*ytdl.YtdlVideo, error) {
|
|
args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID}
|
|
results, err := run(args, false, true, stopChan)
|
|
if err != nil {
|
|
return nil, errors.Err(err)
|
|
}
|
|
var video *ytdl.YtdlVideo
|
|
err = json.Unmarshal([]byte(results[0]), &video)
|
|
if err != nil {
|
|
return nil, errors.Err(err)
|
|
}
|
|
|
|
// now get an accurate time
|
|
const maxTries = 5
|
|
tries := 0
|
|
GetTime:
|
|
tries++
|
|
t, err := getUploadTime(videoID, ip)
|
|
if err != nil {
|
|
//slack(":warning: Upload time error: %v", err)
|
|
if tries <= maxTries && (errors.Is(err, errNotScraped) || errors.Is(err, errUploadTimeEmpty)) {
|
|
err := triggerScrape(videoID, ip)
|
|
if err == nil {
|
|
time.Sleep(2 * time.Second) // let them scrape it
|
|
goto GetTime
|
|
} else {
|
|
//slack("triggering scrape returned error: %v", err)
|
|
}
|
|
} else if !errors.Is(err, errNotScraped) && !errors.Is(err, errUploadTimeEmpty) {
|
|
//slack(":warning: Error while trying to get accurate upload time for %s: %v", videoID, err)
|
|
return nil, errors.Err(err)
|
|
}
|
|
// do fallback below
|
|
}
|
|
//slack("After all that, upload time for %s is %s", videoID, t)
|
|
|
|
if t != "" {
|
|
parsed, err := time.Parse("2006-01-02, 15:04:05 (MST)", t) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date
|
|
if err != nil {
|
|
return nil, errors.Err(err)
|
|
}
|
|
slack(":exclamation: Got an accurate time for %s", videoID)
|
|
video.UploadDateForReal = parsed
|
|
} else {
|
|
//slack(":warning: Could not get accurate time for %s. Falling back to time from upload ytdl: %s.", videoID, video.UploadDate)
|
|
// fall back to UploadDate from youtube-dl
|
|
video.UploadDateForReal, err = time.Parse("20060102", video.UploadDate)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return video, nil
|
|
}
|
|
|
|
var errNotScraped = errors.Base("not yet scraped by caa.iti.gr")
|
|
var errUploadTimeEmpty = errors.Base("upload time is empty")
|
|
|
|
func slack(format string, a ...interface{}) {
|
|
fmt.Printf(format+"\n", a...)
|
|
util.SendToSlack(format, a...)
|
|
}
|
|
|
|
func triggerScrape(videoID string, ip *net.TCPAddr) error {
|
|
//slack("Triggering scrape for %s", videoID)
|
|
u, err := url.Parse("https://caa.iti.gr/verify_videoV3")
|
|
q := u.Query()
|
|
q.Set("twtimeline", "0")
|
|
q.Set("url", "https://www.youtube.com/watch?v="+videoID)
|
|
u.RawQuery = q.Encode()
|
|
//slack("GET %s", u.String())
|
|
|
|
client := getClient(ip)
|
|
req, err := http.NewRequest(http.MethodGet, u.String(), nil)
|
|
if err != nil {
|
|
return errors.Err(err)
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36")
|
|
|
|
res, err := client.Do(req)
|
|
if err != nil {
|
|
return errors.Err(err)
|
|
}
|
|
defer res.Body.Close()
|
|
|
|
var response struct {
|
|
Message string `json:"message"`
|
|
Status string `json:"status"`
|
|
VideoURL string `json:"video_url"`
|
|
}
|
|
err = json.NewDecoder(res.Body).Decode(&response)
|
|
if err != nil {
|
|
return errors.Err(err)
|
|
}
|
|
|
|
switch response.Status {
|
|
case "removed_video":
|
|
return errors.Err("video previously removed from service")
|
|
case "no_video":
|
|
return errors.Err("they say 'video cannot be found'. wtf?")
|
|
default:
|
|
spew.Dump(response)
|
|
}
|
|
|
|
return nil
|
|
//https://caa.iti.gr/caa/api/v4/videos/reports/h-tuxHS5lSM
|
|
}
|
|
|
|
func getUploadTime(videoID string, ip *net.TCPAddr) (string, error) {
|
|
//slack("Getting upload time for %s", videoID)
|
|
|
|
client := getClient(ip)
|
|
req, err := http.NewRequest(http.MethodGet, "https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v="+videoID, nil)
|
|
if err != nil {
|
|
return "", errors.Err(err)
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36")
|
|
|
|
res, err := client.Do(req)
|
|
if err != nil {
|
|
return "", errors.Err(err)
|
|
}
|
|
defer res.Body.Close()
|
|
|
|
var uploadTime struct {
|
|
Time string `json:"video_upload_time"`
|
|
Message string `json:"message"`
|
|
Status string `json:"status"`
|
|
}
|
|
err = json.NewDecoder(res.Body).Decode(&uploadTime)
|
|
if err != nil {
|
|
return "", errors.Err(err)
|
|
}
|
|
|
|
if uploadTime.Status == "ERROR1" {
|
|
return "", errNotScraped
|
|
}
|
|
|
|
if uploadTime.Status == "" && strings.HasPrefix(uploadTime.Message, "CANNOT_RETRIEVE_REPORT_FOR_VIDEO_") {
|
|
return "", errors.Err("cannot retrieve report for video")
|
|
}
|
|
|
|
if uploadTime.Time == "" {
|
|
return "", errUploadTimeEmpty
|
|
}
|
|
|
|
return uploadTime.Time, nil
|
|
}
|
|
|
|
func getClient(ip *net.TCPAddr) *http.Client {
|
|
if ip == nil {
|
|
return http.DefaultClient
|
|
}
|
|
|
|
return &http.Client{
|
|
Transport: &http.Transport{
|
|
Proxy: http.ProxyFromEnvironment,
|
|
DialContext: (&net.Dialer{
|
|
LocalAddr: ip,
|
|
Timeout: 30 * time.Second,
|
|
KeepAlive: 30 * time.Second,
|
|
}).DialContext,
|
|
MaxIdleConns: 100,
|
|
IdleConnTimeout: 90 * time.Second,
|
|
TLSHandshakeTimeout: 10 * time.Second,
|
|
ExpectContinueTimeout: 1 * time.Second,
|
|
},
|
|
}
|
|
}
|
|
|
|
func run(args []string, withStdErr, withStdOut bool, stopChan stop.Chan) ([]string, error) {
|
|
cmd := exec.Command("youtube-dl", args...)
|
|
logrus.Printf("Running command youtube-dl %s", strings.Join(args, " "))
|
|
|
|
var stderr io.ReadCloser
|
|
var errorLog []byte
|
|
if withStdErr {
|
|
var err error
|
|
stderr, err = cmd.StderrPipe()
|
|
if err != nil {
|
|
return nil, errors.Err(err)
|
|
}
|
|
errorLog, err = ioutil.ReadAll(stderr)
|
|
if err != nil {
|
|
return nil, errors.Err(err)
|
|
}
|
|
}
|
|
|
|
var stdout io.ReadCloser
|
|
var outLog []byte
|
|
if withStdOut {
|
|
var err error
|
|
stdout, err = cmd.StdoutPipe()
|
|
if err != nil {
|
|
return nil, errors.Err(err)
|
|
}
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
return nil, errors.Err(err)
|
|
}
|
|
outLog, err = ioutil.ReadAll(stdout)
|
|
if err != nil {
|
|
return nil, errors.Err(err)
|
|
}
|
|
}
|
|
|
|
done := make(chan error, 1)
|
|
go func() {
|
|
done <- cmd.Wait()
|
|
}()
|
|
select {
|
|
case <-stopChan:
|
|
if err := cmd.Process.Kill(); err != nil {
|
|
return nil, errors.Prefix("failed to kill command after stopper cancellation", err)
|
|
}
|
|
return nil, errors.Err("canceled by stopper")
|
|
case err := <-done:
|
|
if err != nil {
|
|
return nil, errors.Prefix("youtube-dl "+strings.Join(args, " "), err)
|
|
}
|
|
}
|
|
|
|
if len(errorLog) > 0 {
|
|
return nil, errors.Err(string(errorLog))
|
|
}
|
|
return strings.Split(strings.Replace(string(outLog), "\r\n", "\n", -1), "\n"), nil
|
|
}
|