lbry.go/ytsync/ytsync.go

537 lines
12 KiB
Go
Raw Normal View History

2017-10-11 04:02:16 +02:00
package ytsync
import (
2018-02-13 18:47:05 +01:00
"bufio"
"encoding/csv"
"encoding/json"
2018-02-13 18:47:05 +01:00
"io"
2017-10-11 04:02:16 +02:00
"io/ioutil"
"net/http"
"os"
"os/exec"
"os/signal"
2017-10-11 04:02:16 +02:00
"sort"
"strings"
"sync"
"syscall"
2017-10-11 04:02:16 +02:00
"time"
"github.com/lbryio/lbry.go/errors"
2017-10-11 04:02:16 +02:00
"github.com/lbryio/lbry.go/jsonrpc"
"github.com/lbryio/lbry.go/stop"
"github.com/lbryio/lbry.go/ytsync/redisdb"
"github.com/lbryio/lbry.go/ytsync/sources"
2017-10-11 04:02:16 +02:00
"fmt"
"github.com/lbryio/lbry.go/util"
2018-06-13 18:44:09 +02:00
"github.com/mitchellh/go-ps"
2017-10-11 04:02:16 +02:00
log "github.com/sirupsen/logrus"
"google.golang.org/api/googleapi/transport"
"google.golang.org/api/youtube/v3"
)
const (
2017-10-18 02:35:19 +02:00
channelClaimAmount = 0.01
publishAmount = 0.01
2017-10-11 04:02:16 +02:00
)
type video interface {
ID() string
IDAndNum() string
PlaylistPosition() int
PublishedAt() time.Time
Sync(*jsonrpc.Client, string, float64, string) error
}
// sorting videos
type byPublishedAt []video
func (a byPublishedAt) Len() int { return len(a) }
func (a byPublishedAt) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byPublishedAt) Less(i, j int) bool { return a[i].PublishedAt().Before(a[j].PublishedAt()) }
2017-10-11 04:02:16 +02:00
// Sync stores the options that control how syncing happens
type Sync struct {
2017-11-06 22:42:52 +01:00
YoutubeAPIKey string
YoutubeChannelID string
LbryChannelName string
StopOnError bool
MaxTries int
ConcurrentVideos int
TakeOverExistingChannel bool
Refill int
2017-10-11 04:02:16 +02:00
daemon *jsonrpc.Client
claimAddress string
videoDirectory string
db *redisdb.DB
2017-10-11 04:02:16 +02:00
grp *stop.Group
wg sync.WaitGroup
queue chan video
}
func (s *Sync) FullCycle() error {
var err error
2017-11-03 16:46:19 +01:00
if os.Getenv("HOME") == "" {
return errors.Err("no $HOME env var found")
2017-11-03 16:46:19 +01:00
}
if s.YoutubeChannelID == "" {
channelID, err := getChannelIDFromFile(s.LbryChannelName)
if err != nil {
return err
}
s.YoutubeChannelID = channelID
}
2017-12-08 21:11:44 +01:00
defaultWalletDir := os.Getenv("HOME") + "/.lbryum/wallets/default_wallet"
walletBackupDir := os.Getenv("HOME") + "/wallets/" + strings.Replace(s.LbryChannelName, "@", "", 1)
2018-02-13 18:47:05 +01:00
if _, err := os.Stat(defaultWalletDir); !os.IsNotExist(err) {
return errors.Err("default_wallet already exists")
2018-02-13 18:47:05 +01:00
}
2017-11-03 16:46:19 +01:00
2018-02-13 18:47:05 +01:00
if _, err = os.Stat(walletBackupDir); !os.IsNotExist(err) {
2017-12-08 21:11:44 +01:00
err = os.Rename(walletBackupDir, defaultWalletDir)
2017-11-03 16:46:19 +01:00
if err != nil {
return errors.Wrap(err, 0)
}
log.Println("Continuing previous upload")
}
defer func() {
log.Printf("Stopping daemon")
shutdownErr := stopDaemonViaSystemd()
if shutdownErr != nil {
logShutdownError(shutdownErr)
} else {
// the cli will return long before the daemon effectively stops. we must observe the processes running
// before moving the wallet
2018-04-24 20:50:01 +02:00
var waitTimeout time.Duration = 60 * 6
processDeathError := waitForDaemonProcess(waitTimeout)
if processDeathError != nil {
logShutdownError(processDeathError)
} else {
walletErr := os.Rename(defaultWalletDir, walletBackupDir)
if walletErr != nil {
log.Errorf("error moving wallet to backup dir: %v", walletErr)
}
}
2017-11-03 16:46:19 +01:00
}
}()
s.videoDirectory, err = ioutil.TempDir("", "ytsync")
if err != nil {
return errors.Wrap(err, 0)
}
s.db = redisdb.New()
s.grp = stop.New()
s.queue = make(chan video)
interruptChan := make(chan os.Signal, 1)
signal.Notify(interruptChan, os.Interrupt, syscall.SIGTERM)
go func() {
<-interruptChan
log.Println("Got interrupt signal, shutting down (if publishing, will shut down after current publish)")
s.grp.Stop()
}()
log.Printf("Starting daemon")
err = startDaemonViaSystemd()
if err != nil {
return err
}
log.Infoln("Waiting for daemon to finish starting...")
s.daemon = jsonrpc.NewClient("")
s.daemon.SetRPCTimeout(5 * time.Minute)
WaitForDaemonStart:
for {
select {
case <-s.grp.Ch():
return nil
default:
_, err := s.daemon.WalletBalance()
if err == nil {
break WaitForDaemonStart
}
time.Sleep(5 * time.Second)
}
}
err = s.doSync()
if err != nil {
return err
} else {
// wait for reflection to finish???
wait := 15 * time.Second // should bump this up to a few min, but keeping it low for testing
log.Println("Waiting " + wait.String() + " to finish reflecting everything")
time.Sleep(wait)
}
return nil
}
func logShutdownError(shutdownErr error) {
util.SendToSlackError("error shutting down daemon: %v", shutdownErr)
util.SendToSlackError("WALLET HAS NOT BEEN MOVED TO THE WALLET BACKUP DIR")
}
func (s *Sync) doSync() error {
2017-10-11 19:13:47 +02:00
var err error
err = s.walletSetup()
2017-10-11 19:13:47 +02:00
if err != nil {
return err
}
2017-10-11 04:02:16 +02:00
if s.StopOnError {
log.Println("Will stop publishing if an error is detected")
}
for i := 0; i < s.ConcurrentVideos; i++ {
go s.startWorker(i)
2017-10-11 04:02:16 +02:00
}
2018-02-13 18:47:05 +01:00
if s.LbryChannelName == "@UCBerkeley" {
err = s.enqueueUCBVideos()
} else {
err = s.enqueueYoutubeVideos()
}
close(s.queue)
s.wg.Wait()
2017-10-11 04:02:16 +02:00
return err
}
func (s *Sync) startWorker(workerNum int) {
s.wg.Add(1)
defer s.wg.Done()
var v video
var more bool
for {
select {
case <-s.grp.Ch():
log.Printf("Stopping worker %d", workerNum)
return
default:
}
select {
case v, more = <-s.queue:
if !more {
return
}
case <-s.grp.Ch():
log.Printf("Stopping worker %d", workerNum)
return
2017-10-11 04:02:16 +02:00
}
2018-02-13 18:47:05 +01:00
log.Println("================================================================================")
2017-10-11 04:02:16 +02:00
tryCount := 0
for {
tryCount++
err := s.processVideo(v)
2017-10-11 04:02:16 +02:00
if err != nil {
logMsg := fmt.Sprintf("error processing video: " + err.Error())
log.Errorln(logMsg)
fatalErrors := []string{
":5279: read: connection reset by peer",
"net/http: request canceled (Client.Timeout exceeded while awaiting headers)",
}
if util.InSlice(err.Error(), fatalErrors) || s.StopOnError {
s.grp.Stop()
} else if s.MaxTries > 1 {
errorsNoRetry := []string{
"non 200 status code received",
" reason: 'This video contains content from",
"dont know which claim to update",
"uploader has not made this video available in your country",
"download error: AccessDenied: Access Denied",
"Playback on other websites has been disabled by the video owner",
"Error in daemon: Cannot publish empty file",
}
if util.InSlice(err.Error(), errorsNoRetry) {
log.Println("This error should not be retried at all")
} else if tryCount < s.MaxTries {
if strings.Contains(err.Error(), "The transaction was rejected by network rules.(258: txn-mempool-conflict)") ||
strings.Contains(err.Error(), "failed: Not enough funds") ||
strings.Contains(err.Error(), "Error in daemon: Insufficient funds, please deposit additional LBC") {
log.Println("waiting for a block and refilling addresses before retrying")
err = s.walletSetup()
if err != nil {
s.stop.Stop()
util.SendToSlackError("Failed to setup the wallet for a refill: %v", err)
break
}
}
log.Println("Retrying")
continue
}
util.SendToSlackInfo("Video failed after %d retries, skipping. Stack: %s", s.MaxTries, logMsg)
}
}
break
2017-11-06 22:42:52 +01:00
}
2017-10-11 04:02:16 +02:00
}
}
2018-02-13 18:47:05 +01:00
func (s *Sync) enqueueYoutubeVideos() error {
2017-10-11 04:02:16 +02:00
client := &http.Client{
Transport: &transport.APIKey{Key: s.YoutubeAPIKey},
}
service, err := youtube.New(client)
if err != nil {
return errors.Prefix("error creating YouTube service", err)
2017-10-11 04:02:16 +02:00
}
response, err := service.Channels.List("contentDetails").Id(s.YoutubeChannelID).Do()
2017-10-11 04:02:16 +02:00
if err != nil {
return errors.Prefix("error getting channels", err)
2017-10-11 04:02:16 +02:00
}
if len(response.Items) < 1 {
return errors.Err("youtube channel not found")
2017-10-11 04:02:16 +02:00
}
if response.Items[0].ContentDetails.RelatedPlaylists == nil {
return errors.Err("no related playlists")
2017-10-11 04:02:16 +02:00
}
playlistID := response.Items[0].ContentDetails.RelatedPlaylists.Uploads
if playlistID == "" {
return errors.Err("no channel playlist")
2017-10-11 04:02:16 +02:00
}
var videos []video
2017-10-11 04:02:16 +02:00
nextPageToken := ""
for {
req := service.PlaylistItems.List("snippet").
PlaylistId(playlistID).
MaxResults(50).
PageToken(nextPageToken)
playlistResponse, err := req.Do()
if err != nil {
return errors.Prefix("error getting playlist items", err)
2017-10-11 04:02:16 +02:00
}
if len(playlistResponse.Items) < 1 {
return errors.Err("playlist items not found")
2017-10-11 04:02:16 +02:00
}
for _, item := range playlistResponse.Items {
// todo: there's thumbnail info here. why did we need lambda???
// because when videos are taken down from youtube, the thumb is gone too
2017-10-11 04:02:16 +02:00
// normally we'd send the video into the channel here, but youtube api doesn't have sorting
// so we have to get ALL the videos, then sort them, then send them in
videos = append(videos, sources.NewYoutubeVideo(s.videoDirectory, item.Snippet))
2017-10-11 04:02:16 +02:00
}
log.Infof("Got info for %d videos from youtube API", len(videos))
2017-10-11 04:02:16 +02:00
nextPageToken = playlistResponse.NextPageToken
if nextPageToken == "" {
break
}
}
sort.Sort(byPublishedAt(videos))
//or sort.Sort(sort.Reverse(byPlaylistPosition(videos)))
2017-10-11 19:13:47 +02:00
Enqueue:
2017-10-11 04:02:16 +02:00
for _, v := range videos {
select {
case <-s.grp.Ch():
break Enqueue
default:
}
select {
case s.queue <- v:
case <-s.grp.Ch():
2017-10-11 19:13:47 +02:00
break Enqueue
2017-10-11 04:02:16 +02:00
}
}
return nil
}
2018-02-13 18:47:05 +01:00
func (s *Sync) enqueueUCBVideos() error {
var videos []video
csvFile, err := os.Open("ucb.csv")
if err != nil {
return err
}
reader := csv.NewReader(bufio.NewReader(csvFile))
for {
line, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
return err
}
data := struct {
PublishedAt string `json:"publishedAt"`
}{}
err = json.Unmarshal([]byte(line[4]), &data)
if err != nil {
return err
}
videos = append(videos, sources.NewUCBVideo(line[0], line[2], line[1], line[3], data.PublishedAt, s.videoDirectory))
}
log.Printf("Publishing %d videos\n", len(videos))
sort.Sort(byPublishedAt(videos))
Enqueue:
for _, v := range videos {
select {
case <-s.grp.Ch():
2018-02-13 18:47:05 +01:00
break Enqueue
default:
}
select {
case s.queue <- v:
case <-s.grp.Ch():
2018-02-13 18:47:05 +01:00
break Enqueue
}
}
return nil
}
func (s *Sync) processVideo(v video) (err error) {
defer func() {
if p := recover(); p != nil {
var ok bool
err, ok = p.(error)
if !ok {
err = errors.Err("%v", p)
}
err = errors.Wrap(p, 2)
}
}()
log.Println("Processing " + v.IDAndNum())
defer func(start time.Time) {
log.Println(v.ID() + " took " + time.Since(start).String())
}(time.Now())
2017-10-11 04:02:16 +02:00
alreadyPublished, err := s.db.IsPublished(v.ID())
2017-10-11 04:02:16 +02:00
if err != nil {
return err
}
if alreadyPublished {
log.Println(v.ID() + " already published")
return nil
2017-10-11 04:02:16 +02:00
}
if v.PlaylistPosition() > 1000 {
2018-04-25 23:06:17 +02:00
log.Println(v.ID() + " is old: skipping")
return nil
}
err = v.Sync(s.daemon, s.claimAddress, publishAmount, s.LbryChannelName)
2017-10-11 04:02:16 +02:00
if err != nil {
return err
}
err = s.db.SetPublished(v.ID())
2017-10-11 04:02:16 +02:00
if err != nil {
return err
}
return nil
}
func startDaemonViaSystemd() error {
err := exec.Command("/usr/bin/sudo", "/bin/systemctl", "start", "lbrynet.service").Run()
if err != nil {
return errors.Err(err)
}
return nil
}
func stopDaemonViaSystemd() error {
err := exec.Command("/usr/bin/sudo", "/bin/systemctl", "stop", "lbrynet.service").Run()
if err != nil {
return errors.Err(err)
}
return nil
}
func getChannelIDFromFile(channelName string) (string, error) {
channelsJSON, err := ioutil.ReadFile("./channels")
if err != nil {
return "", errors.Wrap(err, 0)
}
var channels map[string]string
err = json.Unmarshal(channelsJSON, &channels)
if err != nil {
return "", errors.Wrap(err, 0)
}
channelID, ok := channels[channelName]
if !ok {
return "", errors.Err("channel not in list")
}
return channelID, nil
}
// waitForDaemonProcess observes the running processes and returns when the process is no longer running or when the timeout is up
func waitForDaemonProcess(timeout time.Duration) error {
processes, err := ps.Processes()
if err != nil {
return err
}
var daemonProcessId = -1
for _, p := range processes {
if p.Executable() == "lbrynet-daemon" {
daemonProcessId = p.Pid()
break
}
}
if daemonProcessId == -1 {
return nil
}
then := time.Now()
stopTime := then.Add(time.Duration(timeout * time.Second))
for !time.Now().After(stopTime) {
wait := 10 * time.Second
log.Println("the daemon is still running, waiting for it to exit")
time.Sleep(wait)
proc, err := os.FindProcess(daemonProcessId)
if err != nil {
// couldn't find the process, that means the daemon is stopped and can continue
return nil
}
//double check if process is running and alive
//by sending a signal 0
//NOTE : syscall.Signal is not available in Windows
err = proc.Signal(syscall.Signal(0))
//the process doesn't exist anymore! we're free to go
2018-04-24 20:50:01 +02:00
if err != nil && (err == syscall.ESRCH || err.Error() == "os: process already finished") {
return nil
}
}
return errors.Err("timeout reached")
}