switch to yt-dlp

add progressbars
avoid unnecessary calls to youtube
update user agents
cookies fixes
bug fixes
introduction of new bugs
This commit is contained in:
Niko Storni 2021-06-17 17:51:21 +02:00
parent 087f20c133
commit 519e1e4648
13 changed files with 403 additions and 63 deletions

View file

@ -23,7 +23,7 @@ addons:
- python3-pip
before_script:
- sudo pip3 install -U youtube-dl
- sudo pip3 install -U yt-dlp
- sudo add-apt-repository -y ppa:savoury1/ffmpeg4
env:

View file

@ -8,7 +8,9 @@ import (
"net"
"net/http"
"net/url"
"os"
"os/exec"
"path"
"strings"
"time"
@ -16,6 +18,7 @@ import (
"github.com/lbryio/ytsync/v5/downloader/ytdl"
"github.com/lbryio/ytsync/v5/ip_manager"
"github.com/lbryio/ytsync/v5/sdk"
util2 "github.com/lbryio/ytsync/v5/util"
"github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/stop"
@ -25,7 +28,7 @@ import (
)
func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan, pool *ip_manager.IPPool) ([]string, error) {
args := []string{"--skip-download", "https://www.youtube.com/channel/" + channelName, "--get-id", "--flat-playlist", "--cookies", "cookies.txt"}
args := []string{"--skip-download", "https://www.youtube.com/channel/" + channelName + "/videos", "--get-id", "--flat-playlist", "--cookies", "cookies.txt"}
ids, err := run(channelName, args, stopChan, pool, true)
if err != nil {
return nil, errors.Err(err)
@ -44,13 +47,31 @@ func GetPlaylistVideoIDs(channelName string, maxVideos int, stopChan stop.Chan,
const releaseTimeFormat = "2006-01-02, 15:04:05 (MST)"
func GetVideoInformation(config *sdk.APIConfig, videoID string, stopChan stop.Chan, ip *net.TCPAddr, pool *ip_manager.IPPool) (*ytdl.YtdlVideo, error) {
args := []string{"--skip-download", "--print-json", "https://www.youtube.com/watch?v=" + videoID, "--cookies", "cookies.txt"}
results, err := run(videoID, args, stopChan, pool, false)
args := []string{
"--skip-download",
"--write-info-json",
videoID,
"--cookies",
"cookies.txt",
"-o",
path.Join(util2.GetVideoMetadataDir(), videoID),
}
_, err := run(videoID, args, stopChan, pool, false)
if err != nil {
return nil, errors.Err(err)
}
f, err := os.Open(path.Join(util2.GetVideoMetadataDir(), videoID+".info.json"))
if err != nil {
return nil, errors.Err(err)
}
// defer the closing of our jsonFile so that we can parse it later on
defer f.Close()
// read our opened jsonFile as a byte array.
byteValue, _ := ioutil.ReadAll(f)
var video *ytdl.YtdlVideo
err = json.Unmarshal([]byte(results[0]), &video)
err = json.Unmarshal(byteValue, &video)
if err != nil {
return nil, errors.Err(err)
}
@ -126,7 +147,7 @@ func triggerScrape(videoID string, ip *net.TCPAddr) error {
if err != nil {
return errors.Err(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36")
req.Header.Set("User-Agent", ChromeUA)
res, err := client.Do(req)
if err != nil {
@ -196,7 +217,7 @@ func getUploadTime(config *sdk.APIConfig, videoID string, ip *net.TCPAddr, uploa
if err != nil {
return ytdlUploadDate.Format(releaseTimeFormat), errors.Err(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36")
req.Header.Set("User-Agent", ChromeUA)
res, err := client.Do(req)
if err != nil {
@ -251,8 +272,8 @@ func getClient(ip *net.TCPAddr) *http.Client {
}
const (
googleBotUA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
chromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
GoogleBotUA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
ChromeUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
maxAttempts = 3
extractionError = "YouTube said: Unable to extract video data"
throttledError = "HTTP Error 429"
@ -270,9 +291,9 @@ func run(use string, args []string, stopChan stop.Chan, pool *ip_manager.IPPool,
}
argsForCommand := append(args, "--source-address", sourceAddress)
argsForCommand = append(argsForCommand, useragent...)
binary := "youtube-dl"
binary := "yt-dlp"
if dlc {
binary = "youtube-dlc"
binary = "yt-dlp"
}
cmd := exec.Command(binary, argsForCommand...)
@ -299,13 +320,13 @@ func run(use string, args []string, stopChan stop.Chan, pool *ip_manager.IPPool,
func nextUA(current []string) []string {
if len(current) == 0 {
return []string{"--user-agent", googleBotUA}
return []string{"--user-agent", GoogleBotUA}
}
return []string{"--user-agent", chromeUA}
return []string{"--user-agent", ChromeUA}
}
func runCmd(cmd *exec.Cmd, stopChan stop.Chan) ([]string, error) {
logrus.Infof("running youtube-dl(c) cmd: %s", strings.Join(cmd.Args, " "))
logrus.Infof("running yt-dlp cmd: %s", strings.Join(cmd.Args, " "))
var err error
stderr, err := cmd.StderrPipe()
if err != nil {
@ -341,7 +362,7 @@ func runCmd(cmd *exec.Cmd, stopChan stop.Chan) ([]string, error) {
return nil, errors.Err("interrupted by user")
case err := <-done:
if err != nil {
return nil, errors.Prefix("youtube-dl(c) "+strings.Join(cmd.Args, " ")+" ["+string(errorLog)+"]", err)
return nil, errors.Prefix("yt-dlp "+strings.Join(cmd.Args, " ")+" ["+string(errorLog)+"]", err)
}
return strings.Split(strings.Replace(string(outLog), "\r\n", "\n", -1), "\n"), nil
}

View file

@ -63,10 +63,10 @@ type YtdlVideo struct {
Width int `json:"width"`
EndTime interface{} `json:"end_time"`
WebpageURL string `json:"webpage_url"`
//Formats []Format `json:"formats"`
ChannelURL string `json:"channel_url"`
Resolution interface{} `json:"resolution"`
Vcodec string `json:"vcodec"`
Formats []Format `json:"formats"`
ChannelURL string `json:"channel_url"`
Resolution interface{} `json:"resolution"`
Vcodec string `json:"vcodec"`
}
type RequestedFormat struct {
@ -103,31 +103,34 @@ type RequestedFormat struct {
}
type Format struct {
Asr int `json:"asr"`
Tbr float64 `json:"tbr"`
Protocol string `json:"protocol"`
Format string `json:"format"`
FormatNote string `json:"format_note"`
Height interface{} `json:"height"`
ManifestURL string `json:"manifest_url,omitempty"`
FormatID string `json:"format_id"`
Container string `json:"container,omitempty"`
Language interface{} `json:"language,omitempty"`
HTTPHeaders HTTPHeaders `json:"http_headers"`
URL string `json:"url"`
Vcodec string `json:"vcodec"`
Abr int `json:"abr,omitempty"`
Width interface{} `json:"width"`
Ext string `json:"ext"`
FragmentBaseURL string `json:"fragment_base_url,omitempty"`
Filesize interface{} `json:"filesize"`
Fps float64 `json:"fps"`
Fragments []struct {
Path string `json:"path"`
Duration float64 `json:"duration,omitempty"`
} `json:"fragments,omitempty"`
Acodec string `json:"acodec"`
PlayerURL interface{} `json:"player_url,omitempty"`
Asr int `json:"asr"`
Filesize int `json:"filesize"`
FormatID string `json:"format_id"`
FormatNote string `json:"format_note"`
Fps interface{} `json:"fps"`
Height interface{} `json:"height"`
Quality int `json:"quality"`
Tbr float64 `json:"tbr"`
URL string `json:"url"`
Width interface{} `json:"width"`
Ext string `json:"ext"`
Vcodec string `json:"vcodec"`
Acodec string `json:"acodec"`
Abr float64 `json:"abr,omitempty"`
DownloaderOptions struct {
HTTPChunkSize int `json:"http_chunk_size"`
} `json:"downloader_options,omitempty"`
Container string `json:"container,omitempty"`
Format string `json:"format"`
Protocol string `json:"protocol"`
HTTPHeaders struct {
UserAgent string `json:"User-Agent"`
AcceptCharset string `json:"Accept-Charset"`
Accept string `json:"Accept"`
AcceptEncoding string `json:"Accept-Encoding"`
AcceptLanguage string `json:"Accept-Language"`
} `json:"http_headers"`
Vbr float64 `json:"vbr,omitempty"`
}
type Thumbnail struct {

View file

@ -31,12 +31,14 @@ services:
- BANDWIDTH_LIMIT=80000000000
- SESSION_TIMEOUT=10000000000000000000000000
- TCP_PORT=50001
- ELASTIC_HOST=es01
ports:
- "15300:50001"
expose:
- "50001"
depends_on:
- lbrycrd
- es01
ulimits:
nofile:
soft: 90000
@ -44,10 +46,30 @@ services:
#command: lbry.wallet.server.coin.LBC
command: lbry.wallet.server.coin.LBCRegTest
#############
## elasticsearch ##
#############
es01:
image: docker.elastic.co/elasticsearch/elasticsearch:7.11.0
container_name: es01
environment:
- node.name=es01
- discovery.type=single-node
- indices.query.bool.max_clause_count=8196
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms4g -Xmx4g"
ulimits:
memlock:
soft: -1
hard: -1
ports:
- "9200:9200"
expose:
- "9200"
#############
## Lbrynet ##
#############
lbrynet:
image: lbry/lbrynet:v0.90.1
image: lbry/lbrynet:v0.99.0
restart: always
ports:
- "15100:5279"

View file

@ -95,4 +95,5 @@ if [[ $status != "synced" || $videoStatus != "published" || $channelTransferStat
exit 1;
else
echo "SUCCESSSSSSSSSSSSS!"
fi;
fi;
docker-compose down

View file

@ -8,7 +8,7 @@ services:
## Lbrynet ##
#############
lbrynet:
image: lbry/lbrynet:v0.90.1
image: lbry/lbrynet:v0.99.0
restart: "no"
networks:
lbry-network:

1
go.mod
View file

@ -32,6 +32,7 @@ require (
github.com/spf13/cobra v0.0.5
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.7.0
github.com/vbauerster/mpb/v7 v7.0.2
google.golang.org/appengine v1.6.5 // indirect
gopkg.in/ini.v1 v1.60.2 // indirect
gopkg.in/vansante/go-ffprobe.v2 v2.0.2

13
go.sum
View file

@ -26,8 +26,12 @@ github.com/Microsoft/go-winio v0.4.14 h1:+hMXMk01us9KgxGb7ftKQt2Xpf5hH/yky+TDA+q
github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/StackExchange/wmi v0.0.0-20170410192909-ea383cf3ba6e/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4=
github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc=
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
@ -311,6 +315,8 @@ github.com/marten-seemann/qtls-go1-16 v0.1.3 h1:XEZ1xGorVy9u+lJq+WXNE+hiqRYLNvJG
github.com/marten-seemann/qtls-go1-16 v0.1.3/go.mod h1:gNpI2Ol+lRS3WwSOtIUUtRwZEQMXjYK+dQSBFbethAk=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
@ -391,6 +397,8 @@ github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R
github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084 h1:sofwID9zm4tzrgykg80hfFph1mryUeLRsUfoocVVmRY=
github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rubenv/sql-migrate v0.0.0-20170330050058-38004e7a77f2/go.mod h1:WS0rl9eEliYI8DPnr3TOwz4439pay+qNgzJoVya/DmY=
@ -483,6 +491,8 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/uber-go/atomic v1.3.2/go.mod h1:/Ct5t2lcmbJ4OSe/waGBoaVvVqtO0bmtfVNex1PFV8g=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/vbauerster/mpb/v7 v7.0.2 h1:eN6AD/ytv1nqCO7Dm8MO0/pGMKmMyH/WMnTJhAUuc/w=
github.com/vbauerster/mpb/v7 v7.0.2/go.mod h1:Mnq3gESXJ9eQhccbGZDggJ1faTCrmaA4iN57fUloRGE=
github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU=
github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM=
github.com/volatiletech/inflect v0.0.0-20170731032912-e7201282ae8d h1:gI4/tqP6lCY5k6Sg+4k9qSoBXmPwG+xXgMpK7jivD4M=
@ -616,8 +626,9 @@ golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201231184435-2d18734c6014 h1:joucsQqXmyBVxViHCPFjG3hx8JzIFSaym3l3MM/Jsdg=
golang.org/x/sys v0.0.0-20201231184435-2d18734c6014/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210603125802-9665404d3644 h1:CA1DEQ4NdKphKeL70tvsWNdT5oFh1lOjihRcEDROi0I=
golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=

View file

@ -153,6 +153,10 @@ func (s *SyncManager) Start() error {
logUtils.SendInfoToSlack("A non fatal error was reported by the sync process.\n%s", errors.FullTrace(err))
}
}
err = logUtils.CleanupMetadata()
if err != nil {
log.Errorf("something went wrong while trying to clear out the video metadata directory: %s", errors.FullTrace(err))
}
err = blobs_reflector.ReflectAndClean()
if err != nil {
return errors.Prefix("@Nikooo777 something went wrong while reflecting blobs", err)

View file

@ -21,6 +21,7 @@ import (
"github.com/lbryio/ytsync/v5/timing"
logUtils "github.com/lbryio/ytsync/v5/util"
"github.com/lbryio/ytsync/v5/ytapi"
"github.com/vbauerster/mpb/v7"
"github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/jsonrpc"
@ -985,8 +986,13 @@ func (s *Sync) processVideo(v ytapi.Video) (err error) {
Fee: s.DbChannelData.Fee,
DefaultAccount: da,
}
var pbWg sync.WaitGroup
// passed &wg will be accounted at p.Wait() call
p := mpb.New(mpb.WithWaitGroup(&pbWg))
summary, err := v.Sync(s.daemon, sp, &sv, videoRequiresUpgrade, s.walletMux)
summary, err := v.Sync(s.daemon, sp, &sv, videoRequiresUpgrade, s.walletMux, &pbWg, p)
// Waiting for passed &wg and for all bars to complete and flush
p.Wait()
if err != nil {
return err
}

View file

@ -2,10 +2,12 @@ package sources
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path"
"path/filepath"
"regexp"
"strconv"
@ -14,8 +16,11 @@ import (
"time"
"github.com/abadojack/whatlanggo"
"github.com/lbryio/ytsync/v5/downloader"
"github.com/lbryio/ytsync/v5/downloader/ytdl"
"github.com/lbryio/ytsync/v5/shared"
"github.com/vbauerster/mpb/v7"
"github.com/vbauerster/mpb/v7/decor"
"gopkg.in/vansante/go-ffprobe.v2"
"github.com/lbryio/ytsync/v5/ip_manager"
@ -56,6 +61,8 @@ type YoutubeVideo struct {
walletLock *sync.RWMutex
stopGroup *stop.Group
pool *ip_manager.IPPool
progressBars *mpb.Progress
progressBarWg *sync.WaitGroup
}
var youtubeCategories = map[string]string{
@ -183,6 +190,25 @@ func (v *YoutubeVideo) getAbbrevDescription() string {
}
return description + "\n..." + additionalDescription
}
func checkCookiesIntegrity() error {
fi, err := os.Stat("cookies.txt")
if err != nil {
return errors.Err(err)
}
if fi.Size() == 0 {
log.Errorf("cookies were cleared out. Attempting a restore from cookies-backup.txt")
input, err := ioutil.ReadFile("cookies-backup.txt")
if err != nil {
return errors.Err(err)
}
err = ioutil.WriteFile("cookies.txt", input, 0644)
if err != nil {
return errors.Err(err)
}
}
return nil
}
func (v *YoutubeVideo) download() error {
start := time.Now()
@ -221,21 +247,39 @@ func (v *YoutubeVideo) download() error {
}
}
metadataPath := path.Join(logUtils.GetVideoMetadataDir(), v.id+".info.json")
_, err = os.Stat(metadataPath)
if err != nil {
if os.IsNotExist(err) {
return errors.Err("metadata information for video %s is missing! Why?", v.id)
}
return errors.Err(err)
}
metadata, err := parseVideoMetadata(metadataPath)
err = checkCookiesIntegrity()
if err != nil {
return err
}
ytdlArgs := []string{
"--no-progress",
"-o" + strings.TrimSuffix(v.getFullPath(), ".mp4"),
"--merge-output-format",
"mp4",
"--rm-cache-dir",
"--postprocessor-args",
"-movflags faststart",
"ffmpeg:-movflags faststart",
"--abort-on-unavailable-fragment",
"--fragment-retries",
"1",
"--cookies",
"cookies.txt",
"--load-info-json",
metadataPath,
}
userAgent := []string{"--user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"}
userAgent := []string{"--user-agent", downloader.ChromeUA}
if v.maxVideoSize > 0 {
ytdlArgs = append(ytdlArgs,
"--max-filesize",
@ -272,15 +316,15 @@ func (v *YoutubeVideo) download() error {
ytdlArgs = append(ytdlArgs,
"--source-address",
sourceAddress,
"https://www.youtube.com/watch?v="+v.ID(),
v.ID(),
)
for i := 0; i < len(qualities); i++ {
quality := qualities[i]
argsWithFilters := append(ytdlArgs, "-fbestvideo[ext=mp4][vcodec!*=av01][height<="+quality+"]+bestaudio[ext!=webm][format_id!=258][format_id!=251][format_id!=256][format_id!=327]")
argsWithFilters = append(argsWithFilters, userAgent...)
cmd := exec.Command("youtube-dl", argsWithFilters...)
log.Printf("Running command youtube-dl %s", strings.Join(argsWithFilters, " "))
cmd := exec.Command("yt-dlp", argsWithFilters...)
log.Printf("Running command yt-dlp %s", strings.Join(argsWithFilters, " "))
stderr, err := cmd.StderrPipe()
if err != nil {
@ -294,10 +338,95 @@ func (v *YoutubeVideo) download() error {
if err := cmd.Start(); err != nil {
return errors.Err(err)
}
ticker := time.NewTicker(400 * time.Millisecond)
done := make(chan bool)
v.progressBarWg.Add(1)
go func() {
defer v.progressBarWg.Done()
//get size of the video before downloading
cmd := exec.Command("yt-dlp", append(argsWithFilters, "-s")...)
stdout, err := cmd.StdoutPipe()
if err != nil {
log.Errorf("error while getting final file size: %s", errors.FullTrace(err))
return
}
if err := cmd.Start(); err != nil {
log.Errorf("error while getting final file size: %s", errors.FullTrace(err))
return
}
outLog, _ := ioutil.ReadAll(stdout)
err = cmd.Wait()
output := string(outLog)
parts := strings.Split(output, ": ")
if len(parts) != 3 {
log.Errorf("couldn't parse audio and video parts from the output (%s)", output)
return
}
formats := strings.Split(parts[2], "+")
if len(formats) != 2 {
log.Errorf("couldn't parse formats from the output (%s)", output)
return
}
log.Debugf("'%s'", output)
videoFormat := formats[0]
audioFormat := strings.Replace(formats[1], "\n", "", -1)
videoSize := 0
audioSize := 0
for _, f := range metadata.Formats {
if f.FormatID == videoFormat {
videoSize = f.Filesize
}
if f.FormatID == audioFormat {
audioSize = f.Filesize
}
}
if audioSize+videoSize == 0 {
videoSize = 50 * 1024 * 1024
}
log.Debugf("(%s) - videoSize: %d (%s), audiosize: %d (%s)", v.id, videoSize, videoFormat, audioSize, audioFormat)
bar := v.progressBars.AddBar(int64(videoSize+audioSize),
mpb.PrependDecorators(
decor.CountersKibiByte("% .2f / % .2f "),
// simple name decorator
decor.Name(fmt.Sprintf("id: %s src-ip: (%s)", v.id, sourceAddress)),
// decor.DSyncWidth bit enables column width synchronization
decor.Percentage(decor.WCSyncSpace),
),
mpb.AppendDecorators(
decor.EwmaETA(decor.ET_STYLE_GO, 90),
decor.Name(" ] "),
decor.EwmaSpeed(decor.UnitKiB, "% .2f ", 60),
),
)
for {
select {
case <-done:
bar.Completed()
bar.Abort(true)
return
case <-ticker.C:
size, err := logUtils.DirSize(v.videoDir())
if err != nil {
log.Errorf("error while getting size of download directory: %s", errors.FullTrace(err))
return
}
bar.SetCurrent(size)
bar.DecoratorEwmaUpdate(400 * time.Millisecond)
}
}
}()
errorLog, _ := ioutil.ReadAll(stderr)
outLog, _ := ioutil.ReadAll(stdout)
err = cmd.Wait()
//stop the progress bar
ticker.Stop()
done <- true
if err != nil {
if strings.Contains(err.Error(), "exit status 1") {
if strings.Contains(string(errorLog), "HTTP Error 429") || strings.Contains(string(errorLog), "returned non-zero exit status 8") {
@ -309,7 +438,7 @@ func (v *YoutubeVideo) download() error {
continue //this bypasses the yt throttling IP redistribution... TODO: don't
} else if strings.Contains(string(errorLog), "YouTube said: Unable to extract video data") && !strings.Contains(userAgent[1], "Googlebot") {
i-- //do not lower quality when trying a different user agent
userAgent = []string{"--user-agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"}
userAgent = []string{downloader.GoogleBotUA}
log.Infof("trying different user agent for video %s", v.ID())
continue
}
@ -350,9 +479,113 @@ func (v *YoutubeVideo) download() error {
return nil
}
func (v *YoutubeVideo) videoDir() string {
return v.dir + "/" + v.id
type ytMetadata struct {
ID string `json:"id"`
Title string `json:"title"`
Formats []struct {
Asr int `json:"asr"`
Filesize int `json:"filesize"`
FormatID string `json:"format_id"`
FormatNote string `json:"format_note"`
Fps interface{} `json:"fps"`
Height interface{} `json:"height"`
Quality int `json:"quality"`
Tbr float64 `json:"tbr"`
URL string `json:"url"`
Width interface{} `json:"width"`
Ext string `json:"ext"`
Vcodec string `json:"vcodec"`
Acodec string `json:"acodec"`
Abr float64 `json:"abr,omitempty"`
DownloaderOptions struct {
HTTPChunkSize int `json:"http_chunk_size"`
} `json:"downloader_options,omitempty"`
Container string `json:"container,omitempty"`
Format string `json:"format"`
Protocol string `json:"protocol"`
HTTPHeaders struct {
UserAgent string `json:"User-Agent"`
AcceptCharset string `json:"Accept-Charset"`
Accept string `json:"Accept"`
AcceptEncoding string `json:"Accept-Encoding"`
AcceptLanguage string `json:"Accept-Language"`
} `json:"http_headers"`
Vbr float64 `json:"vbr,omitempty"`
} `json:"formats"`
Thumbnails []struct {
Height int `json:"height"`
URL string `json:"url"`
Width int `json:"width"`
Resolution string `json:"resolution"`
ID string `json:"id"`
} `json:"thumbnails"`
Description string `json:"description"`
UploadDate string `json:"upload_date"`
Uploader string `json:"uploader"`
UploaderID string `json:"uploader_id"`
UploaderURL string `json:"uploader_url"`
ChannelID string `json:"channel_id"`
ChannelURL string `json:"channel_url"`
Duration int `json:"duration"`
ViewCount int `json:"view_count"`
AverageRating float64 `json:"average_rating"`
AgeLimit int `json:"age_limit"`
WebpageURL string `json:"webpage_url"`
Categories []string `json:"categories"`
Tags []interface{} `json:"tags"`
IsLive interface{} `json:"is_live"`
LikeCount int `json:"like_count"`
DislikeCount int `json:"dislike_count"`
Channel string `json:"channel"`
Extractor string `json:"extractor"`
WebpageURLBasename string `json:"webpage_url_basename"`
ExtractorKey string `json:"extractor_key"`
Playlist interface{} `json:"playlist"`
PlaylistIndex interface{} `json:"playlist_index"`
Thumbnail string `json:"thumbnail"`
DisplayID string `json:"display_id"`
Format string `json:"format"`
FormatID string `json:"format_id"`
Width int `json:"width"`
Height int `json:"height"`
Resolution interface{} `json:"resolution"`
Fps int `json:"fps"`
Vcodec string `json:"vcodec"`
Vbr float64 `json:"vbr"`
StretchedRatio interface{} `json:"stretched_ratio"`
Acodec string `json:"acodec"`
Abr float64 `json:"abr"`
Ext string `json:"ext"`
Fulltitle string `json:"fulltitle"`
Filename string `json:"_filename"`
}
func parseVideoMetadata(metadataPath string) (*ytMetadata, error) {
f, err := os.Open(metadataPath)
if err != nil {
return nil, errors.Err(err)
}
// defer the closing of our jsonFile so that we can parse it later on
defer f.Close()
// read our opened jsonFile as a byte array.
byteValue, _ := ioutil.ReadAll(f)
// we initialize our Users array
var m ytMetadata
// we unmarshal our byteArray which contains our
// jsonFile's content into 'users' which we defined above
err = json.Unmarshal(byteValue, &m)
if err != nil {
return nil, errors.Err(err)
}
return &m, nil
}
func (v *YoutubeVideo) videoDir() string {
return path.Join(v.dir, v.id)
}
func (v *YoutubeVideo) getDownloadedPath() (string, error) {
files, err := ioutil.ReadDir(v.videoDir())
log.Infoln(v.videoDir())
@ -367,7 +600,7 @@ func (v *YoutubeVideo) getDownloadedPath() (string, error) {
continue
}
if strings.Contains(v.getFullPath(), strings.TrimSuffix(f.Name(), filepath.Ext(f.Name()))) {
return v.videoDir() + "/" + f.Name(), nil
return path.Join(v.videoDir(), f.Name()), nil
}
}
return "", errors.Err("could not find any downloaded videos")
@ -466,11 +699,13 @@ type SyncParams struct {
DefaultAccount string
}
func (v *YoutubeVideo) Sync(daemon *jsonrpc.Client, params SyncParams, existingVideoData *sdk.SyncedVideo, reprocess bool, walletLock *sync.RWMutex) (*SyncSummary, error) {
func (v *YoutubeVideo) Sync(daemon *jsonrpc.Client, params SyncParams, existingVideoData *sdk.SyncedVideo, reprocess bool, walletLock *sync.RWMutex, pbWg *sync.WaitGroup, pb *mpb.Progress) (*SyncSummary, error) {
v.maxVideoSize = int64(params.MaxVideoSize)
v.maxVideoLength = params.MaxVideoLength
v.lbryChannelID = params.ChannelID
v.walletLock = walletLock
v.progressBars = pb
v.progressBarWg = pbWg
if reprocess && existingVideoData != nil && existingVideoData.Published {
summary, err := v.reprocess(daemon, params, existingVideoData)
return summary, errors.Prefix("upgrade failed", err)

View file

@ -261,6 +261,27 @@ func CleanupLbrynet() error {
return nil
}
var metadataDirInitialized = false
func GetVideoMetadataDir() string {
dir := "./videos_metadata"
if !metadataDirInitialized {
metadataDirInitialized = true
_ = os.MkdirAll(dir, 0755)
}
return dir
}
func CleanupMetadata() error {
dir := GetVideoMetadataDir()
err := os.RemoveAll(dir)
if err != nil {
return errors.Err(err)
}
metadataDirInitialized = false
return nil
}
func SleepUntilQuotaReset() {
PST, _ := time.LoadLocation("America/Los_Angeles")
t := time.Now().In(PST)
@ -384,3 +405,17 @@ func GetBlockchainDirectoryName() string {
}
return ledger
}
func DirSize(path string) (int64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
size += info.Size()
}
return err
})
return size, err
}

View file

@ -15,6 +15,7 @@ import (
"github.com/lbryio/ytsync/v5/shared"
logUtils "github.com/lbryio/ytsync/v5/util"
"github.com/vbauerster/mpb/v7"
"github.com/lbryio/ytsync/v5/downloader/ytdl"
@ -38,7 +39,7 @@ type Video interface {
IDAndNum() string
PlaylistPosition() int
PublishedAt() time.Time
Sync(*jsonrpc.Client, sources.SyncParams, *sdk.SyncedVideo, bool, *sync.RWMutex) (*sources.SyncSummary, error)
Sync(*jsonrpc.Client, sources.SyncParams, *sdk.SyncedVideo, bool, *sync.RWMutex, *sync.WaitGroup, *mpb.Progress) (*sources.SyncSummary, error)
}
type byPublishedAt []Video
@ -129,7 +130,7 @@ func CountVideosInChannel(channelID string) (int, error) {
req, _ := http.NewRequest("GET", url, nil)
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36")
req.Header.Add("User-Agent", downloader.ChromeUA)
req.Header.Add("Accept", "*/*")
req.Header.Add("Host", "socialblade.com")
@ -173,7 +174,7 @@ func ChannelInfo(channelID string) (*YoutubeStatsResponse, error) {
req, _ := http.NewRequest("GET", url, nil)
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36")
req.Header.Add("User-Agent", downloader.ChromeUA)
req.Header.Add("Accept", "*/*")
res, err := http.DefaultClient.Do(req)