2020-07-27 15:42:45 -04:00
package downloader
import (
2020-07-27 17:14:06 -04:00
"encoding/json"
2020-07-28 12:47:28 -04:00
"fmt"
2020-07-27 17:14:06 -04:00
"io"
2020-07-27 15:42:45 -04:00
"io/ioutil"
2020-07-28 21:34:08 -04:00
"net"
2020-07-28 11:05:24 -04:00
"net/http"
2020-07-28 12:47:28 -04:00
"net/url"
2020-07-27 15:42:45 -04:00
"os/exec"
"strings"
2020-07-28 11:05:24 -04:00
"time"
2020-07-27 15:42:45 -04:00
2020-07-28 12:47:28 -04:00
"github.com/davecgh/go-spew/spew"
2020-07-27 17:14:06 -04:00
"github.com/lbryio/ytsync/v5/downloader/ytdl"
2020-07-27 15:42:45 -04:00
"github.com/lbryio/lbry.go/v2/extras/errors"
2020-07-28 21:34:08 -04:00
"github.com/lbryio/lbry.go/v2/extras/stop"
2020-07-28 11:05:24 -04:00
"github.com/lbryio/lbry.go/v2/extras/util"
2020-07-27 15:42:45 -04:00
"github.com/sirupsen/logrus"
)
2020-07-28 21:34:08 -04:00
func GetPlaylistVideoIDs ( channelName string , maxVideos int , stopChan stop . Chan ) ( [ ] string , error ) {
2020-07-27 15:42:45 -04:00
args := [ ] string { "--skip-download" , "https://www.youtube.com/channel/" + channelName , "--get-id" , "--flat-playlist" }
2020-07-28 21:34:08 -04:00
ids , err := run ( args , false , true , stopChan )
2020-07-27 15:57:19 -04:00
if err != nil {
return nil , errors . Err ( err )
}
videoIDs := make ( [ ] string , maxVideos )
for i , v := range ids {
if i >= maxVideos {
break
}
videoIDs [ i ] = v
}
return videoIDs , nil
2020-07-27 15:42:45 -04:00
}
2020-07-28 21:34:08 -04:00
func GetVideoInformation ( videoID string , stopChan stop . Chan , ip * net . TCPAddr ) ( * ytdl . YtdlVideo , error ) {
args := [ ] string { "--skip-download" , "--print-json" , "https://www.youtube.com/watch?v=" + videoID }
results , err := run ( args , false , true , stopChan )
if err != nil {
return nil , errors . Err ( err )
}
2020-07-27 17:14:06 -04:00
var video * ytdl . YtdlVideo
2020-07-28 21:34:08 -04:00
err = json . Unmarshal ( [ ] byte ( results [ 0 ] ) , & video )
if err != nil {
return nil , errors . Err ( err )
}
2020-07-28 11:05:24 -04:00
// now get an accurate time
2020-07-28 12:47:28 -04:00
const maxTries = 5
2020-07-28 11:05:24 -04:00
tries := 0
GetTime :
tries ++
2020-07-28 21:34:08 -04:00
t , err := getUploadTime ( videoID , ip )
2020-07-28 11:05:24 -04:00
if err != nil {
2020-07-28 21:34:08 -04:00
//slack(":warning: Upload time error: %v", err)
2020-07-28 12:47:28 -04:00
if tries <= maxTries && ( errors . Is ( err , errNotScraped ) || errors . Is ( err , errUploadTimeEmpty ) ) {
2020-07-28 21:34:08 -04:00
err := triggerScrape ( videoID , ip )
if err == nil {
time . Sleep ( 2 * time . Second ) // let them scrape it
goto GetTime
} else {
//slack("triggering scrape returned error: %v", err)
}
2020-07-28 12:47:28 -04:00
} else if ! errors . Is ( err , errNotScraped ) && ! errors . Is ( err , errUploadTimeEmpty ) {
2020-07-28 21:34:08 -04:00
//slack(":warning: Error while trying to get accurate upload time for %s: %v", videoID, err)
2020-07-28 12:47:28 -04:00
return nil , errors . Err ( err )
2020-07-28 11:05:24 -04:00
}
2020-07-28 12:47:28 -04:00
// do fallback below
2020-07-28 11:05:24 -04:00
}
2020-07-28 21:34:08 -04:00
//slack("After all that, upload time for %s is %s", videoID, t)
2020-07-28 11:05:24 -04:00
if t != "" {
parsed , err := time . Parse ( "2006-01-02, 15:04:05 (MST)" , t ) // this will probably be UTC, but Go's timezone parsing is fucked up. it ignores the timezone in the date
if err != nil {
return nil , errors . Err ( err )
}
2020-07-28 21:34:08 -04:00
slack ( ":exclamation: Got an accurate time for %s" , videoID )
2020-07-28 11:05:24 -04:00
video . UploadDateForReal = parsed
} else {
2020-07-28 21:34:08 -04:00
//slack(":warning: Could not get accurate time for %s. Falling back to time from upload ytdl: %s.", videoID, video.UploadDate)
2020-07-28 11:05:24 -04:00
// fall back to UploadDate from youtube-dl
video . UploadDateForReal , err = time . Parse ( "20060102" , video . UploadDate )
if err != nil {
return nil , err
}
}
2020-07-27 17:14:06 -04:00
return video , nil
2020-07-28 11:05:24 -04:00
}
var errNotScraped = errors . Base ( "not yet scraped by caa.iti.gr" )
2020-07-28 12:47:28 -04:00
var errUploadTimeEmpty = errors . Base ( "upload time is empty" )
func slack ( format string , a ... interface { } ) {
fmt . Printf ( format + "\n" , a ... )
util . SendToSlack ( format , a ... )
}
2020-07-28 11:05:24 -04:00
2020-07-28 21:34:08 -04:00
func triggerScrape ( videoID string , ip * net . TCPAddr ) error {
//slack("Triggering scrape for %s", videoID)
2020-07-28 12:47:28 -04:00
u , err := url . Parse ( "https://caa.iti.gr/verify_videoV3" )
q := u . Query ( )
q . Set ( "twtimeline" , "0" )
q . Set ( "url" , "https://www.youtube.com/watch?v=" + videoID )
u . RawQuery = q . Encode ( )
2020-07-28 21:34:08 -04:00
//slack("GET %s", u.String())
client := getClient ( ip )
req , err := http . NewRequest ( http . MethodGet , u . String ( ) , nil )
if err != nil {
return errors . Err ( err )
}
req . Header . Set ( "User-Agent" , "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36" )
res , err := client . Do ( req )
2020-07-28 11:05:24 -04:00
if err != nil {
return errors . Err ( err )
}
defer res . Body . Close ( )
2020-07-28 21:34:08 -04:00
var response struct {
Message string ` json:"message" `
Status string ` json:"status" `
VideoURL string ` json:"video_url" `
}
err = json . NewDecoder ( res . Body ) . Decode ( & response )
if err != nil {
return errors . Err ( err )
}
switch response . Status {
case "removed_video" :
return errors . Err ( "video previously removed from service" )
case "no_video" :
return errors . Err ( "they say 'video cannot be found'. wtf?" )
default :
spew . Dump ( response )
}
2020-07-28 12:47:28 -04:00
2020-07-28 11:05:24 -04:00
return nil
//https://caa.iti.gr/caa/api/v4/videos/reports/h-tuxHS5lSM
}
2020-07-28 21:34:08 -04:00
func getUploadTime ( videoID string , ip * net . TCPAddr ) ( string , error ) {
//slack("Getting upload time for %s", videoID)
client := getClient ( ip )
req , err := http . NewRequest ( http . MethodGet , "https://caa.iti.gr/get_verificationV3?url=https://www.youtube.com/watch?v=" + videoID , nil )
if err != nil {
return "" , errors . Err ( err )
}
req . Header . Set ( "User-Agent" , "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36" )
res , err := client . Do ( req )
2020-07-28 11:05:24 -04:00
if err != nil {
return "" , errors . Err ( err )
}
defer res . Body . Close ( )
var uploadTime struct {
Time string ` json:"video_upload_time" `
Message string ` json:"message" `
Status string ` json:"status" `
}
err = json . NewDecoder ( res . Body ) . Decode ( & uploadTime )
if err != nil {
return "" , errors . Err ( err )
}
if uploadTime . Status == "ERROR1" {
return "" , errNotScraped
}
2020-07-28 21:34:08 -04:00
if uploadTime . Status == "" && strings . HasPrefix ( uploadTime . Message , "CANNOT_RETRIEVE_REPORT_FOR_VIDEO_" ) {
return "" , errors . Err ( "cannot retrieve report for video" )
}
2020-07-28 12:47:28 -04:00
if uploadTime . Time == "" {
return "" , errUploadTimeEmpty
}
2020-07-28 11:05:24 -04:00
return uploadTime . Time , nil
2020-07-27 17:14:06 -04:00
}
2020-07-27 15:42:45 -04:00
2020-07-28 21:34:08 -04:00
func getClient ( ip * net . TCPAddr ) * http . Client {
if ip == nil {
return http . DefaultClient
}
return & http . Client {
Transport : & http . Transport {
Proxy : http . ProxyFromEnvironment ,
DialContext : ( & net . Dialer {
LocalAddr : ip ,
Timeout : 30 * time . Second ,
KeepAlive : 30 * time . Second ,
} ) . DialContext ,
MaxIdleConns : 100 ,
IdleConnTimeout : 90 * time . Second ,
TLSHandshakeTimeout : 10 * time . Second ,
ExpectContinueTimeout : 1 * time . Second ,
} ,
}
}
func run ( args [ ] string , withStdErr , withStdOut bool , stopChan stop . Chan ) ( [ ] string , error ) {
2020-07-27 17:14:06 -04:00
cmd := exec . Command ( "youtube-dl" , args ... )
logrus . Printf ( "Running command youtube-dl %s" , strings . Join ( args , " " ) )
var stderr io . ReadCloser
var errorLog [ ] byte
if withStdErr {
var err error
stderr , err = cmd . StderrPipe ( )
if err != nil {
return nil , errors . Err ( err )
}
errorLog , err = ioutil . ReadAll ( stderr )
if err != nil {
return nil , errors . Err ( err )
}
2020-07-27 15:42:45 -04:00
}
2020-07-27 17:14:06 -04:00
var stdout io . ReadCloser
var outLog [ ] byte
if withStdOut {
var err error
stdout , err = cmd . StdoutPipe ( )
if err != nil {
return nil , errors . Err ( err )
}
if err := cmd . Start ( ) ; err != nil {
return nil , errors . Err ( err )
}
outLog , err = ioutil . ReadAll ( stdout )
if err != nil {
return nil , errors . Err ( err )
}
}
2020-07-28 21:34:08 -04:00
done := make ( chan error , 1 )
go func ( ) {
done <- cmd . Wait ( )
} ( )
select {
case <- stopChan :
if err := cmd . Process . Kill ( ) ; err != nil {
return nil , errors . Prefix ( "failed to kill command after stopper cancellation" , err )
}
return nil , errors . Err ( "canceled by stopper" )
case err := <- done :
if err != nil {
return nil , errors . Prefix ( "youtube-dl " + strings . Join ( args , " " ) , err )
}
2020-07-27 15:42:45 -04:00
}
2020-07-28 21:34:08 -04:00
2020-07-27 15:42:45 -04:00
if len ( errorLog ) > 0 {
return nil , errors . Err ( string ( errorLog ) )
}
return strings . Split ( strings . Replace ( string ( outLog ) , "\r\n" , "\n" , - 1 ) , "\n" ) , nil
}