38c1883dde
refactoring address reviews
615 lines
16 KiB
Go
615 lines
16 KiB
Go
package ytsync
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/csv"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"os/signal"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/lbryio/lbry.go/errors"
|
|
"github.com/lbryio/lbry.go/jsonrpc"
|
|
"github.com/lbryio/lbry.go/stop"
|
|
"github.com/lbryio/lbry.go/util"
|
|
"github.com/lbryio/lbry.go/ytsync/redisdb"
|
|
"github.com/lbryio/lbry.go/ytsync/sources"
|
|
"github.com/mitchellh/go-ps"
|
|
log "github.com/sirupsen/logrus"
|
|
"google.golang.org/api/googleapi/transport"
|
|
"google.golang.org/api/youtube/v3"
|
|
)
|
|
|
|
const (
|
|
channelClaimAmount = 0.01
|
|
publishAmount = 0.01
|
|
)
|
|
|
|
type video interface {
|
|
ID() string
|
|
IDAndNum() string
|
|
PlaylistPosition() int
|
|
PublishedAt() time.Time
|
|
Sync(*jsonrpc.Client, string, float64, string, int) (*sources.SyncSummary, error)
|
|
}
|
|
|
|
// sorting videos
|
|
type byPublishedAt []video
|
|
|
|
func (a byPublishedAt) Len() int { return len(a) }
|
|
func (a byPublishedAt) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
func (a byPublishedAt) Less(i, j int) bool { return a[i].PublishedAt().Before(a[j].PublishedAt()) }
|
|
|
|
// Sync stores the options that control how syncing happens
|
|
type Sync struct {
|
|
YoutubeAPIKey string
|
|
YoutubeChannelID string
|
|
LbryChannelName string
|
|
StopOnError bool
|
|
MaxTries int
|
|
ConcurrentVideos int
|
|
TakeOverExistingChannel bool
|
|
Refill int
|
|
Manager *SyncManager
|
|
|
|
daemon *jsonrpc.Client
|
|
claimAddress string
|
|
videoDirectory string
|
|
db *redisdb.DB
|
|
syncedVideos map[string]syncedVideo
|
|
syncedVideosMux *sync.Mutex
|
|
grp *stop.Group
|
|
lbryChannelID string
|
|
|
|
walletMux *sync.Mutex
|
|
queue chan video
|
|
}
|
|
|
|
func (s *Sync) AppendSyncedVideo(videoID string, published bool, failureReason string) {
|
|
s.syncedVideosMux.Lock()
|
|
defer s.syncedVideosMux.Unlock()
|
|
s.syncedVideos[videoID] = syncedVideo{
|
|
VideoID: videoID,
|
|
Published: published,
|
|
FailureReason: failureReason,
|
|
}
|
|
}
|
|
|
|
// SendErrorToSlack Sends an error message to the default channel and to the process log.
|
|
func SendErrorToSlack(format string, a ...interface{}) error {
|
|
message := format
|
|
if len(a) > 0 {
|
|
message = fmt.Sprintf(format, a...)
|
|
}
|
|
log.Errorln(message)
|
|
return util.SendToSlack(":sos: " + message)
|
|
}
|
|
|
|
// SendInfoToSlack Sends an info message to the default channel and to the process log.
|
|
func SendInfoToSlack(format string, a ...interface{}) error {
|
|
message := format
|
|
if len(a) > 0 {
|
|
message = fmt.Sprintf(format, a...)
|
|
}
|
|
log.Infoln(message)
|
|
return util.SendToSlack(":information_source: " + message)
|
|
}
|
|
|
|
// IsInterrupted can be queried to discover if the sync process was interrupted manually
|
|
func (s *Sync) IsInterrupted() bool {
|
|
select {
|
|
case <-s.grp.Ch():
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func (s *Sync) FullCycle() (e error) {
|
|
if os.Getenv("HOME") == "" {
|
|
return errors.Err("no $HOME env var found")
|
|
}
|
|
if s.YoutubeChannelID == "" {
|
|
return errors.Err("channel ID not provided")
|
|
}
|
|
s.syncedVideosMux = &sync.Mutex{}
|
|
s.walletMux = &sync.Mutex{}
|
|
s.db = redisdb.New()
|
|
s.grp = stop.New()
|
|
s.queue = make(chan video)
|
|
interruptChan := make(chan os.Signal, 1)
|
|
signal.Notify(interruptChan, os.Interrupt, syscall.SIGTERM)
|
|
go func() {
|
|
<-interruptChan
|
|
log.Println("Got interrupt signal, shutting down (if publishing, will shut down after current publish)")
|
|
s.grp.Stop()
|
|
}()
|
|
syncedVideos, err := s.Manager.setChannelStatus(s.YoutubeChannelID, StatusSyncing)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
s.syncedVideosMux.Lock()
|
|
s.syncedVideos = syncedVideos
|
|
s.syncedVideosMux.Unlock()
|
|
|
|
defer func() {
|
|
if e != nil {
|
|
//conditions for which a channel shouldn't be marked as failed
|
|
noFailConditions := []string{
|
|
"this youtube channel is being managed by another server",
|
|
}
|
|
if util.SubstringInSlice(e.Error(), noFailConditions) {
|
|
return
|
|
}
|
|
_, err := s.Manager.setChannelStatus(s.YoutubeChannelID, StatusFailed)
|
|
if err != nil {
|
|
msg := fmt.Sprintf("Failed setting failed state for channel %s.", s.LbryChannelName)
|
|
err = errors.Prefix(msg, err)
|
|
e = errors.Prefix(err.Error(), e)
|
|
}
|
|
} else if !s.IsInterrupted() {
|
|
_, err := s.Manager.setChannelStatus(s.YoutubeChannelID, StatusSynced)
|
|
if err != nil {
|
|
e = err
|
|
}
|
|
}
|
|
}()
|
|
|
|
defaultWalletDir := os.Getenv("HOME") + "/.lbryum/wallets/default_wallet"
|
|
if os.Getenv("REGTEST") == "true" {
|
|
defaultWalletDir = os.Getenv("HOME") + "/.lbryum_regtest/wallets/default_wallet"
|
|
}
|
|
walletBackupDir := os.Getenv("HOME") + "/wallets/" + strings.Replace(s.LbryChannelName, "@", "", 1)
|
|
|
|
if _, err := os.Stat(defaultWalletDir); !os.IsNotExist(err) {
|
|
return errors.Err("default_wallet already exists")
|
|
}
|
|
|
|
if _, err = os.Stat(walletBackupDir); !os.IsNotExist(err) {
|
|
err = os.Rename(walletBackupDir, defaultWalletDir)
|
|
if err != nil {
|
|
return errors.Wrap(err, 0)
|
|
}
|
|
log.Println("Continuing previous upload")
|
|
}
|
|
|
|
defer func() {
|
|
log.Printf("Stopping daemon")
|
|
shutdownErr := stopDaemonViaSystemd()
|
|
if shutdownErr != nil {
|
|
logShutdownError(shutdownErr)
|
|
} else {
|
|
// the cli will return long before the daemon effectively stops. we must observe the processes running
|
|
// before moving the wallet
|
|
waitTimeout := 8 * time.Minute
|
|
processDeathError := waitForDaemonProcess(waitTimeout)
|
|
if processDeathError != nil {
|
|
logShutdownError(processDeathError)
|
|
} else {
|
|
walletErr := os.Rename(defaultWalletDir, walletBackupDir)
|
|
if walletErr != nil {
|
|
log.Errorf("error moving wallet to backup dir: %v", walletErr)
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
s.videoDirectory, err = ioutil.TempDir("", "ytsync")
|
|
if err != nil {
|
|
return errors.Wrap(err, 0)
|
|
}
|
|
|
|
log.Printf("Starting daemon")
|
|
err = startDaemonViaSystemd()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
log.Infoln("Waiting for daemon to finish starting...")
|
|
s.daemon = jsonrpc.NewClient("")
|
|
s.daemon.SetRPCTimeout(40 * time.Minute)
|
|
|
|
WaitForDaemonStart:
|
|
for {
|
|
select {
|
|
case <-s.grp.Ch():
|
|
return nil
|
|
default:
|
|
_, err := s.daemon.WalletBalance()
|
|
if err == nil {
|
|
break WaitForDaemonStart
|
|
}
|
|
time.Sleep(5 * time.Second)
|
|
}
|
|
}
|
|
|
|
err = s.doSync()
|
|
if err != nil {
|
|
return err
|
|
} else {
|
|
// wait for reflection to finish???
|
|
wait := 15 * time.Second // should bump this up to a few min, but keeping it low for testing
|
|
log.Println("Waiting " + wait.String() + " to finish reflecting everything")
|
|
time.Sleep(wait)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
func logShutdownError(shutdownErr error) {
|
|
SendErrorToSlack("error shutting down daemon: %v", shutdownErr)
|
|
SendErrorToSlack("WALLET HAS NOT BEEN MOVED TO THE WALLET BACKUP DIR")
|
|
}
|
|
|
|
func (s *Sync) doSync() error {
|
|
var err error
|
|
|
|
err = s.walletSetup()
|
|
if err != nil {
|
|
return errors.Prefix("Initial wallet setup failed! Manual Intervention is required.", err)
|
|
}
|
|
|
|
if s.StopOnError {
|
|
log.Println("Will stop publishing if an error is detected")
|
|
}
|
|
|
|
for i := 0; i < s.ConcurrentVideos; i++ {
|
|
s.grp.Add(1)
|
|
go func() {
|
|
defer s.grp.Done()
|
|
s.startWorker(i)
|
|
}()
|
|
}
|
|
|
|
if s.LbryChannelName == "@UCBerkeley" {
|
|
err = s.enqueueUCBVideos()
|
|
} else {
|
|
err = s.enqueueYoutubeVideos()
|
|
}
|
|
close(s.queue)
|
|
s.grp.Wait()
|
|
return err
|
|
}
|
|
|
|
func (s *Sync) startWorker(workerNum int) {
|
|
var v video
|
|
var more bool
|
|
|
|
for {
|
|
select {
|
|
case <-s.grp.Ch():
|
|
log.Printf("Stopping worker %d", workerNum)
|
|
return
|
|
default:
|
|
}
|
|
|
|
select {
|
|
case v, more = <-s.queue:
|
|
if !more {
|
|
return
|
|
}
|
|
case <-s.grp.Ch():
|
|
log.Printf("Stopping worker %d", workerNum)
|
|
return
|
|
}
|
|
|
|
log.Println("================================================================================")
|
|
|
|
tryCount := 0
|
|
for {
|
|
tryCount++
|
|
err := s.processVideo(v)
|
|
|
|
if err != nil {
|
|
logMsg := fmt.Sprintf("error processing video: " + err.Error())
|
|
log.Errorln(logMsg)
|
|
fatalErrors := []string{
|
|
":5279: read: connection reset by peer",
|
|
"no space left on device",
|
|
"NotEnoughFunds",
|
|
"Cannot publish using channel",
|
|
}
|
|
if util.SubstringInSlice(err.Error(), fatalErrors) || s.StopOnError {
|
|
s.grp.Stop()
|
|
} else if s.MaxTries > 1 {
|
|
errorsNoRetry := []string{
|
|
"non 200 status code received",
|
|
" reason: 'This video contains content from",
|
|
"dont know which claim to update",
|
|
"uploader has not made this video available in your country",
|
|
"download error: AccessDenied: Access Denied",
|
|
"Playback on other websites has been disabled by the video owner",
|
|
"Error in daemon: Cannot publish empty file",
|
|
"Error extracting sts from embedded url response",
|
|
"Client.Timeout exceeded while awaiting headers)",
|
|
"the video is too big to sync, skipping for now",
|
|
}
|
|
if util.SubstringInSlice(err.Error(), errorsNoRetry) {
|
|
log.Println("This error should not be retried at all")
|
|
} else if tryCount < s.MaxTries {
|
|
if strings.Contains(err.Error(), "txn-mempool-conflict") ||
|
|
strings.Contains(err.Error(), "failed: Not enough funds") ||
|
|
strings.Contains(err.Error(), "Error in daemon: Insufficient funds, please deposit additional LBC") ||
|
|
strings.Contains(err.Error(), "too-long-mempool-chain") {
|
|
log.Println("waiting for a block and refilling addresses before retrying")
|
|
err = s.walletSetup()
|
|
if err != nil {
|
|
s.grp.Stop()
|
|
SendErrorToSlack("Failed to setup the wallet for a refill: %v", err)
|
|
break
|
|
}
|
|
}
|
|
log.Println("Retrying")
|
|
continue
|
|
}
|
|
SendErrorToSlack("Video failed after %d retries, skipping. Stack: %s", tryCount, logMsg)
|
|
}
|
|
s.AppendSyncedVideo(v.ID(), false, err.Error())
|
|
err = s.Manager.MarkVideoStatus(s.YoutubeChannelID, v.ID(), VideoStatusFailed, "", "", err.Error())
|
|
if err != nil {
|
|
SendErrorToSlack("Failed to mark video on the database: %s", err.Error())
|
|
}
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Sync) enqueueYoutubeVideos() error {
|
|
client := &http.Client{
|
|
Transport: &transport.APIKey{Key: s.YoutubeAPIKey},
|
|
}
|
|
|
|
service, err := youtube.New(client)
|
|
if err != nil {
|
|
return errors.Prefix("error creating YouTube service", err)
|
|
}
|
|
|
|
response, err := service.Channels.List("contentDetails").Id(s.YoutubeChannelID).Do()
|
|
if err != nil {
|
|
return errors.Prefix("error getting channels", err)
|
|
}
|
|
|
|
if len(response.Items) < 1 {
|
|
return errors.Err("youtube channel not found")
|
|
}
|
|
|
|
if response.Items[0].ContentDetails.RelatedPlaylists == nil {
|
|
return errors.Err("no related playlists")
|
|
}
|
|
|
|
playlistID := response.Items[0].ContentDetails.RelatedPlaylists.Uploads
|
|
if playlistID == "" {
|
|
return errors.Err("no channel playlist")
|
|
}
|
|
|
|
var videos []video
|
|
|
|
nextPageToken := ""
|
|
for {
|
|
req := service.PlaylistItems.List("snippet").
|
|
PlaylistId(playlistID).
|
|
MaxResults(50).
|
|
PageToken(nextPageToken)
|
|
|
|
playlistResponse, err := req.Do()
|
|
if err != nil {
|
|
return errors.Prefix("error getting playlist items", err)
|
|
}
|
|
|
|
if len(playlistResponse.Items) < 1 {
|
|
return errors.Err("playlist items not found")
|
|
}
|
|
|
|
for _, item := range playlistResponse.Items {
|
|
// normally we'd send the video into the channel here, but youtube api doesn't have sorting
|
|
// so we have to get ALL the videos, then sort them, then send them in
|
|
videos = append(videos, sources.NewYoutubeVideo(s.videoDirectory, item.Snippet))
|
|
}
|
|
|
|
log.Infof("Got info for %d videos from youtube API", len(videos))
|
|
|
|
nextPageToken = playlistResponse.NextPageToken
|
|
if nextPageToken == "" {
|
|
break
|
|
}
|
|
}
|
|
|
|
sort.Sort(byPublishedAt(videos))
|
|
//or sort.Sort(sort.Reverse(byPlaylistPosition(videos)))
|
|
|
|
Enqueue:
|
|
for _, v := range videos {
|
|
select {
|
|
case <-s.grp.Ch():
|
|
break Enqueue
|
|
default:
|
|
}
|
|
|
|
select {
|
|
case s.queue <- v:
|
|
case <-s.grp.Ch():
|
|
break Enqueue
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Sync) enqueueUCBVideos() error {
|
|
var videos []video
|
|
|
|
csvFile, err := os.Open("ucb.csv")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
reader := csv.NewReader(bufio.NewReader(csvFile))
|
|
for {
|
|
line, err := reader.Read()
|
|
if err == io.EOF {
|
|
break
|
|
} else if err != nil {
|
|
return err
|
|
}
|
|
data := struct {
|
|
PublishedAt string `json:"publishedAt"`
|
|
}{}
|
|
err = json.Unmarshal([]byte(line[4]), &data)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
videos = append(videos, sources.NewUCBVideo(line[0], line[2], line[1], line[3], data.PublishedAt, s.videoDirectory))
|
|
}
|
|
|
|
log.Printf("Publishing %d videos\n", len(videos))
|
|
|
|
sort.Sort(byPublishedAt(videos))
|
|
|
|
Enqueue:
|
|
for _, v := range videos {
|
|
select {
|
|
case <-s.grp.Ch():
|
|
break Enqueue
|
|
default:
|
|
}
|
|
|
|
select {
|
|
case s.queue <- v:
|
|
case <-s.grp.Ch():
|
|
break Enqueue
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Sync) processVideo(v video) (err error) {
|
|
defer func() {
|
|
if p := recover(); p != nil {
|
|
var ok bool
|
|
err, ok = p.(error)
|
|
if !ok {
|
|
err = errors.Err("%v", p)
|
|
}
|
|
err = errors.Wrap(p, 2)
|
|
}
|
|
}()
|
|
|
|
log.Println("Processing " + v.IDAndNum())
|
|
defer func(start time.Time) {
|
|
log.Println(v.ID() + " took " + time.Since(start).String())
|
|
}(time.Now())
|
|
|
|
s.syncedVideosMux.Lock()
|
|
sv, ok := s.syncedVideos[v.ID()]
|
|
s.syncedVideosMux.Unlock()
|
|
alreadyPublished := ok && sv.Published
|
|
|
|
neverRetryFailures := []string{
|
|
"Error extracting sts from embedded url response",
|
|
"the video is too big to sync, skipping for now",
|
|
}
|
|
if ok && !sv.Published && util.SubstringInSlice(sv.FailureReason, neverRetryFailures) {
|
|
log.Println(v.ID() + " can't ever be published")
|
|
return nil
|
|
}
|
|
|
|
//TODO: remove this after a few runs...
|
|
alreadyPublishedOld, err := s.db.IsPublished(v.ID())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
//TODO: remove this after a few runs...
|
|
if alreadyPublishedOld && !alreadyPublished {
|
|
//seems like something in the migration of blobs didn't go perfectly right so warn about it!
|
|
SendInfoToSlack("A video that was previously published is on the local database but isn't on the remote db! fix it @Nikooo777! \nchannelID: %s, videoID: %s",
|
|
s.YoutubeChannelID, v.ID())
|
|
return nil
|
|
}
|
|
|
|
if alreadyPublished {
|
|
log.Println(v.ID() + " already published")
|
|
return nil
|
|
}
|
|
|
|
if v.PlaylistPosition() > s.Manager.VideosLimit {
|
|
log.Println(v.ID() + " is old: skipping")
|
|
return nil
|
|
}
|
|
summary, err := v.Sync(s.daemon, s.claimAddress, publishAmount, s.lbryChannelID, s.Manager.MaxVideoSize)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = s.Manager.MarkVideoStatus(s.YoutubeChannelID, v.ID(), VideoStatusPublished, summary.ClaimID, summary.ClaimName, "")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
s.AppendSyncedVideo(v.ID(), true, "")
|
|
|
|
return nil
|
|
}
|
|
|
|
func startDaemonViaSystemd() error {
|
|
err := exec.Command("/usr/bin/sudo", "/bin/systemctl", "start", "lbrynet.service").Run()
|
|
if err != nil {
|
|
return errors.Err(err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func stopDaemonViaSystemd() error {
|
|
err := exec.Command("/usr/bin/sudo", "/bin/systemctl", "stop", "lbrynet.service").Run()
|
|
if err != nil {
|
|
return errors.Err(err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// waitForDaemonProcess observes the running processes and returns when the process is no longer running or when the timeout is up
|
|
func waitForDaemonProcess(timeout time.Duration) error {
|
|
processes, err := ps.Processes()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var daemonProcessId = -1
|
|
for _, p := range processes {
|
|
if p.Executable() == "lbrynet-daemon" {
|
|
daemonProcessId = p.Pid()
|
|
break
|
|
}
|
|
}
|
|
if daemonProcessId == -1 {
|
|
return nil
|
|
}
|
|
then := time.Now()
|
|
stopTime := then.Add(time.Duration(timeout * time.Second))
|
|
for !time.Now().After(stopTime) {
|
|
wait := 10 * time.Second
|
|
log.Println("the daemon is still running, waiting for it to exit")
|
|
time.Sleep(wait)
|
|
proc, err := os.FindProcess(daemonProcessId)
|
|
if err != nil {
|
|
// couldn't find the process, that means the daemon is stopped and can continue
|
|
return nil
|
|
}
|
|
//double check if process is running and alive
|
|
//by sending a signal 0
|
|
//NOTE : syscall.Signal is not available in Windows
|
|
err = proc.Signal(syscall.Signal(0))
|
|
//the process doesn't exist anymore! we're free to go
|
|
if err != nil && (err == syscall.ESRCH || err.Error() == "os: process already finished") {
|
|
return nil
|
|
}
|
|
}
|
|
return errors.Err("timeout reached")
|
|
}
|