lbry.go/cmd/ytsync.go

499 lines
12 KiB
Go
Raw Normal View History

2017-09-27 20:10:41 +02:00
package cmd
2017-09-15 13:58:54 +02:00
2017-09-16 00:13:13 +02:00
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"os"
"regexp"
2017-09-27 01:48:10 +02:00
"sort"
"strconv"
2017-09-16 00:13:13 +02:00
"strings"
"sync"
"sync/atomic"
2017-09-16 00:13:13 +02:00
"time"
2017-09-15 13:58:54 +02:00
2017-09-16 00:13:13 +02:00
"github.com/lbryio/lbry.go/jsonrpc"
2017-09-19 20:14:52 +02:00
"github.com/garyburd/redigo/redis"
2017-09-16 00:13:13 +02:00
ytdl "github.com/kkdai/youtube"
log "github.com/sirupsen/logrus"
2017-09-27 20:10:41 +02:00
"github.com/spf13/cobra"
2017-09-16 00:13:13 +02:00
"google.golang.org/api/googleapi/transport"
"google.golang.org/api/youtube/v3"
)
2017-09-27 20:10:41 +02:00
func init() {
var ytSyncCmd = &cobra.Command{
Use: "ytsync <youtube_api_key> <youtube_channel_id> [<lbry_channel_name>]",
Args: cobra.RangeArgs(2, 3),
Short: "Publish youtube channel into LBRY network.",
Run: ytsync,
}
ytSyncCmd.Flags().BoolVar(&stopOnError, "stop-on-error", false, "If a publish fails, stop all publishing and exit")
ytSyncCmd.Flags().IntVar(&maxTries, "max-tries", defaultMaxTries, "Number of times to try a publish that fails")
2017-09-27 20:10:41 +02:00
RootCmd.AddCommand(ytSyncCmd)
}
2017-09-16 00:13:13 +02:00
const (
concurrentVideos = 1
2017-09-19 20:14:52 +02:00
redisHashKey = "ytsync"
redisSyncedVal = "t"
defaultMaxTries = 1
2017-09-16 00:13:13 +02:00
)
type video struct {
2017-09-27 01:48:10 +02:00
id string
channelID string
channelTitle string
title string
description string
playlistPosition int64
publishedAt time.Time
2017-09-16 00:13:13 +02:00
}
2017-09-19 20:14:52 +02:00
func (v video) getFilename() string {
return videoDirectory + "/" + v.id + ".mp4"
}
2017-09-27 01:48:10 +02:00
// sorting videos
type byPublishedAt []video
func (a byPublishedAt) Len() int { return len(a) }
func (a byPublishedAt) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byPublishedAt) Less(i, j int) bool { return a[i].publishedAt.Before(a[j].publishedAt) }
type byPlaylistPosition []video
func (a byPlaylistPosition) Len() int { return len(a) }
func (a byPlaylistPosition) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byPlaylistPosition) Less(i, j int) bool { return a[i].playlistPosition < a[j].playlistPosition }
2017-09-16 00:13:13 +02:00
var (
2017-09-27 20:10:41 +02:00
ytAPIKey string
2017-09-16 00:13:13 +02:00
channelID string
lbryChannelName string
stopOnError bool
maxTries int
2017-09-27 20:10:41 +02:00
daemon *jsonrpc.Client
claimAddress string
videoDirectory string
redisPool *redis.Pool
2017-09-16 00:13:13 +02:00
)
2017-09-27 20:10:41 +02:00
func ytsync(cmd *cobra.Command, args []string) {
2017-09-16 00:13:13 +02:00
var err error
2017-09-27 01:48:10 +02:00
2017-09-27 20:10:41 +02:00
ytAPIKey = args[0]
channelID = args[1]
if len(args) > 2 {
lbryChannelName = args[2]
2017-09-16 00:13:13 +02:00
}
if stopOnError && maxTries != defaultMaxTries {
log.Errorln("--stop-on-error and --max-tries are mutually exclusive")
return
}
if maxTries < 1 {
log.Errorln("setting --max-tries less than 1 doesn't make sense")
return
}
2017-09-19 20:14:52 +02:00
redisPool = &redis.Pool{
MaxIdle: 3,
IdleTimeout: 5 * time.Minute,
Dial: func() (redis.Conn, error) { return redis.Dial("tcp", ":6379") },
TestOnBorrow: func(c redis.Conn, t time.Time) error {
if time.Since(t) < time.Minute {
return nil
}
_, err := c.Do("PING")
return err
},
}
2017-09-16 00:13:13 +02:00
var wg sync.WaitGroup
videoQueue := make(chan video)
stopEnqueuing := make(chan struct{})
sendStopEnqueuing := sync.Once{}
var videoErrored atomic.Value
videoErrored.Store(false)
if stopOnError {
log.Println("Will stop publishing if an error is detected")
}
2017-09-16 00:13:13 +02:00
daemon = jsonrpc.NewClient("")
videoDirectory, err = ioutil.TempDir("", "ytsync")
if err != nil {
2017-09-22 15:24:43 +02:00
panic(err)
2017-09-16 00:13:13 +02:00
}
if lbryChannelName != "" {
err = ensureChannelOwnership()
if err != nil {
2017-09-22 15:24:43 +02:00
panic(err)
2017-09-16 00:13:13 +02:00
}
}
addresses, err := daemon.WalletList()
if err != nil {
2017-09-22 15:24:43 +02:00
panic(err)
2017-09-16 00:13:13 +02:00
} else if addresses == nil || len(*addresses) == 0 {
2017-09-27 01:48:10 +02:00
panic(fmt.Errorf("could not find an address in wallet"))
2017-09-16 00:13:13 +02:00
}
claimAddress = (*addresses)[0]
if claimAddress == "" {
2017-09-27 01:48:10 +02:00
panic(fmt.Errorf("found blank claim address"))
2017-09-16 00:13:13 +02:00
}
for i := 0; i < concurrentVideos; i++ {
go func() {
wg.Add(1)
defer wg.Done()
for {
v, more := <-videoQueue
if !more {
return
}
if stopOnError && videoErrored.Load().(bool) {
log.Println("Video errored. Exiting")
return
}
tryCount := 0
for {
tryCount++
err := processVideo(v)
if err != nil {
log.Errorln("error processing video: " + err.Error())
if stopOnError {
videoErrored.Store(true)
sendStopEnqueuing.Do(func() {
stopEnqueuing <- struct{}{}
})
} else if maxTries != defaultMaxTries {
if strings.Contains(err.Error(), "non 200 status code received") ||
strings.Contains(err.Error(), " reason: 'This video contains content from") {
log.Println("This error should not be retried at all")
} else if tryCount >= maxTries {
log.Println("Video failed after " + strconv.Itoa(maxTries) + " retries, moving on")
} else {
log.Println("Retrying")
continue
}
}
}
break
2017-09-16 00:13:13 +02:00
}
}
}()
}
err = enqueueVideosFromChannel(channelID, &videoQueue, &stopEnqueuing)
2017-09-16 00:13:13 +02:00
if err != nil {
2017-09-22 15:24:43 +02:00
panic(err)
2017-09-16 00:13:13 +02:00
}
close(videoQueue)
wg.Wait()
}
func ensureChannelOwnership() error {
channels, err := daemon.ChannelListMine()
if err != nil {
return err
} else if channels == nil {
2017-09-27 01:48:10 +02:00
return fmt.Errorf("no channels")
2017-09-16 00:13:13 +02:00
}
for _, channel := range *channels {
if channel.Name == lbryChannelName {
return nil
}
}
resolveResp, err := daemon.Resolve(lbryChannelName)
if err != nil {
return err
}
channelNotFound := (*resolveResp)[lbryChannelName].Error == nil || strings.Contains(*((*resolveResp)[lbryChannelName].Error), "cannot be resolved")
if !channelNotFound {
return fmt.Errorf("Channel exists and we don't own it. Pick another channel.")
}
_, err = daemon.ChannelNew(lbryChannelName, 0.01)
if err != nil {
return err
}
// niko's code says "unfortunately the queues in the daemon are not yet merged so we must give it some time for the channel to go through"
wait := 15 * time.Second
log.Println("Waiting " + wait.String() + " for channel claim to go through")
time.Sleep(wait)
return nil
}
func enqueueVideosFromChannel(channelID string, videoChan *chan video, stopEnqueuing *chan struct{}) error {
2017-09-16 00:13:13 +02:00
client := &http.Client{
Transport: &transport.APIKey{Key: ytAPIKey},
}
service, err := youtube.New(client)
if err != nil {
2017-09-27 01:48:10 +02:00
return fmt.Errorf("error creating YouTube service: %v", err)
2017-09-16 00:13:13 +02:00
}
response, err := service.Channels.List("contentDetails").Id(channelID).Do()
if err != nil {
2017-09-27 01:48:10 +02:00
return fmt.Errorf("error getting channels: %v", err)
2017-09-16 00:13:13 +02:00
}
if len(response.Items) < 1 {
2017-09-27 01:48:10 +02:00
return fmt.Errorf("youtube channel not found")
2017-09-16 00:13:13 +02:00
}
if response.Items[0].ContentDetails.RelatedPlaylists == nil {
2017-09-27 01:48:10 +02:00
return fmt.Errorf("no related playlists")
2017-09-16 00:13:13 +02:00
}
playlistID := response.Items[0].ContentDetails.RelatedPlaylists.Uploads
if playlistID == "" {
2017-09-27 01:48:10 +02:00
return fmt.Errorf("no channel playlist")
2017-09-16 00:13:13 +02:00
}
2017-09-27 01:48:10 +02:00
videos := []video{}
2017-09-16 00:13:13 +02:00
2017-09-27 01:48:10 +02:00
nextPageToken := ""
for {
req := service.PlaylistItems.List("snippet").
PlaylistId(playlistID).
MaxResults(50).
PageToken(nextPageToken)
2017-09-16 00:13:13 +02:00
playlistResponse, err := req.Do()
if err != nil {
2017-09-27 01:48:10 +02:00
return fmt.Errorf("error getting playlist items: %v", err)
2017-09-16 00:13:13 +02:00
}
if len(playlistResponse.Items) < 1 {
2017-09-27 01:48:10 +02:00
return fmt.Errorf("playlist items not found")
2017-09-16 00:13:13 +02:00
}
for _, item := range playlistResponse.Items {
// todo: there's thumbnail info here. why did we need lambda???
2017-09-27 01:48:10 +02:00
publishedAt, err := time.Parse(time.RFC3339Nano, item.Snippet.PublishedAt)
if err != nil {
return fmt.Errorf("failed to parse time: %v", err.Error())
2017-09-16 00:13:13 +02:00
}
2017-09-27 01:48:10 +02:00
// normally we'd send the video into the channel here, but youtube api doesn't have sorting
// so we have to get ALL the videos, then sort them, then send them in
videos = append(videos, video{
id: item.Snippet.ResourceId.VideoId,
channelID: channelID,
title: item.Snippet.Title,
description: item.Snippet.Description,
channelTitle: item.Snippet.ChannelTitle,
playlistPosition: item.Snippet.Position,
publishedAt: publishedAt,
})
2017-09-16 00:13:13 +02:00
}
2017-09-27 01:48:10 +02:00
log.Infoln("Got info for " + strconv.Itoa(len(videos)) + " videos from youtube API")
2017-09-16 00:13:13 +02:00
nextPageToken = playlistResponse.NextPageToken
2017-09-27 01:48:10 +02:00
if nextPageToken == "" {
break
}
}
sort.Sort(byPublishedAt(videos))
//or sort.Sort(sort.Reverse(byPlaylistPosition(videos)))
for _, v := range videos {
select {
case *videoChan <- v:
case <-*stopEnqueuing:
return nil
}
2017-09-16 00:13:13 +02:00
}
return nil
}
func processVideo(v video) error {
log.Println("========================================")
log.Println("Processing " + v.id + " (" + strconv.Itoa(int(v.playlistPosition)) + " in channel)")
2017-09-16 00:13:13 +02:00
2017-09-19 20:14:52 +02:00
conn := redisPool.Get()
defer conn.Close()
alreadyPublished, err := redis.String(conn.Do("HGET", redisHashKey, v.id))
if err != nil && err != redis.ErrNil {
return fmt.Errorf("redis error: %s", err.Error())
}
if alreadyPublished == redisSyncedVal {
log.Println(v.id + " already published")
return nil
}
2017-09-16 00:13:13 +02:00
//download and thumbnail can be done in parallel
2017-09-19 20:14:52 +02:00
err = downloadVideo(v)
2017-09-16 00:13:13 +02:00
if err != nil {
return fmt.Errorf("download error: %s", err.Error())
}
err = triggerThumbnailSave(v.id)
if err != nil {
return fmt.Errorf("thumbnail error: %s", err.Error())
}
2017-09-19 20:14:52 +02:00
err = publish(v, conn)
2017-09-16 00:13:13 +02:00
if err != nil {
return fmt.Errorf("publish error: %s", err.Error())
}
return nil
}
2017-09-19 20:14:52 +02:00
func downloadVideo(v video) error {
2017-09-16 00:13:13 +02:00
verbose := false
2017-09-19 20:14:52 +02:00
videoPath := v.getFilename()
2017-09-16 00:13:13 +02:00
_, err := os.Stat(videoPath)
if err != nil && !os.IsNotExist(err) {
return err
} else if err == nil {
2017-09-19 20:14:52 +02:00
log.Println(v.id + " already exists at " + videoPath)
2017-09-16 00:13:13 +02:00
return nil
}
downloader := ytdl.NewYoutube(verbose)
2017-09-19 20:14:52 +02:00
err = downloader.DecodeURL("https://www.youtube.com/watch?v=" + v.id)
2017-09-16 00:13:13 +02:00
if err != nil {
return err
}
err = downloader.StartDownload(videoPath)
if err != nil {
return err
}
2017-09-19 20:14:52 +02:00
log.Debugln("Downloaded " + v.id)
2017-09-16 00:13:13 +02:00
return nil
}
func triggerThumbnailSave(videoID string) error {
client := &http.Client{Timeout: 30 * time.Second}
params, err := json.Marshal(map[string]string{"videoid": videoID})
if err != nil {
return err
}
request, err := http.NewRequest(http.MethodPut, "https://jgp4g1qoud.execute-api.us-east-1.amazonaws.com/prod/thumbnail", bytes.NewBuffer(params))
if err != nil {
return err
}
response, err := client.Do(request)
if err != nil {
return err
}
defer response.Body.Close()
contents, err := ioutil.ReadAll(response.Body)
if err != nil {
return err
}
var decoded struct {
error int `json:"error"`
url string `json:"url,omitempty"`
message string `json:"message,omitempty"`
}
err = json.Unmarshal(contents, &decoded)
if err != nil {
return err
}
if decoded.error != 0 {
return fmt.Errorf("error creating thumbnail: " + decoded.message)
}
log.Debugln("Created thumbnail for " + videoID)
return nil
}
func strPtr(s string) *string { return &s }
func titleToClaimName(name string) string {
maxLen := 40
reg := regexp.MustCompile(`[^a-zA-Z0-9]+`)
chunks := strings.Split(strings.ToLower(strings.Trim(reg.ReplaceAllString(name, "-"), "-")), "-")
name = chunks[0]
if len(name) > maxLen {
return name[:maxLen]
}
for _, chunk := range chunks[1:] {
tmpName := name + "-" + chunk
if len(tmpName) > maxLen {
if len(name) < 20 {
name = tmpName[:maxLen]
}
break
}
name = tmpName
}
return name
}
2017-09-19 20:14:52 +02:00
func limitDescription(description string) string {
maxLines := 10
description = strings.TrimSpace(description)
if strings.Count(description, "\n") < maxLines {
return description
2017-09-16 00:13:13 +02:00
}
2017-09-19 20:14:52 +02:00
return strings.Join(strings.Split(description, "\n")[:maxLines], "\n") + "\n..."
}
2017-09-16 00:13:13 +02:00
2017-09-19 20:14:52 +02:00
func publish(v video, conn redis.Conn) error {
2017-09-16 00:13:13 +02:00
options := jsonrpc.PublishOptions{
Title: &v.title,
Author: &v.channelTitle,
2017-09-27 01:48:10 +02:00
Description: strPtr(limitDescription(v.description) + "\nhttps://www.youtube.com/watch?v=" + v.id),
2017-09-16 00:13:13 +02:00
Language: strPtr("en"),
ClaimAddress: &claimAddress,
Thumbnail: strPtr("http://berk.ninja/thumbnails/" + v.id),
2017-09-19 20:14:52 +02:00
License: strPtr("Copyrighted (contact author)"),
2017-09-16 00:13:13 +02:00
}
if lbryChannelName != "" {
options.ChannelName = &lbryChannelName
}
2017-09-19 20:14:52 +02:00
_, err := daemon.Publish(titleToClaimName(v.title), v.getFilename(), 0.01, options)
2017-09-16 00:13:13 +02:00
if err != nil {
return err
}
2017-09-19 20:14:52 +02:00
_, err = redis.Bool(conn.Do("HSET", redisHashKey, v.id, redisSyncedVal))
if err != nil {
return fmt.Errorf("redis error: %s", err.Error())
}
2017-09-16 00:13:13 +02:00
return nil
2017-09-15 13:58:54 +02:00
}