2020-07-27 20:48:05 +02:00
|
|
|
package ytapi
|
|
|
|
|
|
|
|
import (
|
2020-07-27 21:51:03 +02:00
|
|
|
"bufio"
|
2020-08-04 00:55:26 +02:00
|
|
|
"encoding/json"
|
2020-08-08 03:29:57 +02:00
|
|
|
"fmt"
|
2020-08-04 00:55:26 +02:00
|
|
|
"io/ioutil"
|
2020-07-27 20:48:05 +02:00
|
|
|
"net/http"
|
2020-07-27 21:51:03 +02:00
|
|
|
"regexp"
|
2020-07-27 20:48:05 +02:00
|
|
|
"sort"
|
2020-07-27 21:51:03 +02:00
|
|
|
"strconv"
|
2020-07-27 20:48:05 +02:00
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
2020-08-08 01:12:55 +02:00
|
|
|
"github.com/lbryio/ytsync/v5/shared"
|
2020-08-18 00:03:38 +02:00
|
|
|
logUtils "github.com/lbryio/ytsync/v5/util"
|
2020-08-03 07:05:03 +02:00
|
|
|
|
2020-07-27 23:14:06 +02:00
|
|
|
"github.com/lbryio/ytsync/v5/downloader/ytdl"
|
|
|
|
|
2020-07-27 21:42:45 +02:00
|
|
|
"github.com/lbryio/ytsync/v5/downloader"
|
2020-07-27 20:48:05 +02:00
|
|
|
"github.com/lbryio/ytsync/v5/ip_manager"
|
|
|
|
"github.com/lbryio/ytsync/v5/sdk"
|
|
|
|
"github.com/lbryio/ytsync/v5/sources"
|
|
|
|
|
|
|
|
"github.com/lbryio/lbry.go/v2/extras/errors"
|
|
|
|
"github.com/lbryio/lbry.go/v2/extras/jsonrpc"
|
|
|
|
"github.com/lbryio/lbry.go/v2/extras/stop"
|
2020-08-18 00:03:38 +02:00
|
|
|
"github.com/lbryio/lbry.go/v2/extras/util"
|
2020-07-27 20:48:05 +02:00
|
|
|
|
|
|
|
"github.com/aws/aws-sdk-go/aws"
|
|
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Video interface {
|
|
|
|
Size() *int64
|
|
|
|
ID() string
|
|
|
|
IDAndNum() string
|
|
|
|
PlaylistPosition() int
|
|
|
|
PublishedAt() time.Time
|
|
|
|
Sync(*jsonrpc.Client, sources.SyncParams, *sdk.SyncedVideo, bool, *sync.RWMutex) (*sources.SyncSummary, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
type byPublishedAt []Video
|
|
|
|
|
|
|
|
func (a byPublishedAt) Len() int { return len(a) }
|
|
|
|
func (a byPublishedAt) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
|
|
func (a byPublishedAt) Less(i, j int) bool { return a[i].PublishedAt().Before(a[j].PublishedAt()) }
|
|
|
|
|
|
|
|
type VideoParams struct {
|
|
|
|
VideoDir string
|
|
|
|
S3Config aws.Config
|
2020-07-29 03:34:08 +02:00
|
|
|
Stopper *stop.Group
|
2020-07-27 20:48:05 +02:00
|
|
|
IPPool *ip_manager.IPPool
|
|
|
|
}
|
|
|
|
|
|
|
|
var mostRecentlyFailedChannel string // TODO: fix this hack!
|
|
|
|
|
2020-08-06 20:32:49 +02:00
|
|
|
func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) {
|
2020-07-27 20:48:05 +02:00
|
|
|
var videos []Video
|
2020-08-04 00:55:26 +02:00
|
|
|
if quickSync && maxVideos > 50 {
|
2020-07-27 21:57:19 +02:00
|
|
|
maxVideos = 50
|
|
|
|
}
|
2020-08-18 00:03:38 +02:00
|
|
|
allVideos, err := downloader.GetPlaylistVideoIDs(channelID, maxVideos, videoParams.Stopper.Ch(), videoParams.IPPool)
|
2020-07-27 21:42:45 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Err(err)
|
|
|
|
}
|
2020-08-18 00:03:38 +02:00
|
|
|
videoIDs := make([]string, 0, len(allVideos))
|
|
|
|
for _, video := range allVideos {
|
|
|
|
sv, ok := syncedVideos[video]
|
|
|
|
if ok && util.SubstringInSlice(sv.FailureReason, shared.NeverRetryFailures) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
videoIDs = append(videoIDs, video)
|
|
|
|
}
|
2020-07-27 21:42:45 +02:00
|
|
|
log.Infof("Got info for %d videos from youtube downloader", len(videoIDs))
|
2020-07-27 20:48:05 +02:00
|
|
|
|
2020-07-27 21:42:45 +02:00
|
|
|
playlistMap := make(map[string]int64)
|
|
|
|
for i, videoID := range videoIDs {
|
|
|
|
playlistMap[videoID] = int64(i)
|
|
|
|
}
|
2020-08-06 20:32:49 +02:00
|
|
|
//this will ensure that we at least try to sync the video that was marked as last uploaded video in the database.
|
|
|
|
if lastUploadedVideo != "" {
|
|
|
|
_, ok := playlistMap[lastUploadedVideo]
|
|
|
|
if !ok {
|
|
|
|
playlistMap[lastUploadedVideo] = 0
|
|
|
|
videoIDs = append(videoIDs, lastUploadedVideo)
|
|
|
|
}
|
|
|
|
}
|
2020-07-27 20:48:05 +02:00
|
|
|
|
2020-07-27 21:42:45 +02:00
|
|
|
if len(videoIDs) < 1 {
|
|
|
|
if channelID == mostRecentlyFailedChannel {
|
|
|
|
return nil, errors.Err("playlist items not found")
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
2020-07-27 21:42:45 +02:00
|
|
|
mostRecentlyFailedChannel = channelID
|
|
|
|
}
|
2020-07-27 20:48:05 +02:00
|
|
|
|
2020-08-03 07:05:03 +02:00
|
|
|
vids, err := getVideos(config, channelID, videoIDs, videoParams.Stopper.Ch(), videoParams.IPPool)
|
2020-07-27 21:42:45 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-07-27 20:48:05 +02:00
|
|
|
|
2020-07-27 21:42:45 +02:00
|
|
|
for _, item := range vids {
|
2020-07-27 23:14:06 +02:00
|
|
|
positionInList := playlistMap[item.ID]
|
2020-07-29 03:34:08 +02:00
|
|
|
videoToAdd, err := sources.NewYoutubeVideo(videoParams.VideoDir, item, positionInList, videoParams.S3Config, videoParams.Stopper, videoParams.IPPool)
|
2020-07-27 23:14:06 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Err(err)
|
|
|
|
}
|
|
|
|
videos = append(videos, videoToAdd)
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for k, v := range syncedVideos {
|
|
|
|
if !v.Published {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if _, ok := playlistMap[k]; !ok {
|
2020-07-29 03:34:08 +02:00
|
|
|
videos = append(videos, sources.NewMockedVideo(videoParams.VideoDir, k, channelID, videoParams.S3Config, videoParams.Stopper, videoParams.IPPool))
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sort.Sort(byPublishedAt(videos))
|
|
|
|
|
|
|
|
return videos, nil
|
|
|
|
}
|
|
|
|
|
2020-08-21 00:15:14 +02:00
|
|
|
// CountVideosInChannel is unused for now... keeping it here just in case
|
2020-07-27 21:51:03 +02:00
|
|
|
func CountVideosInChannel(channelID string) (int, error) {
|
2020-08-21 00:15:14 +02:00
|
|
|
url := "https://socialblade.com/youtube/channel/" + channelID
|
|
|
|
|
|
|
|
req, _ := http.NewRequest("GET", url, nil)
|
|
|
|
|
|
|
|
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36")
|
|
|
|
req.Header.Add("Accept", "*/*")
|
|
|
|
req.Header.Add("Host", "socialblade.com")
|
|
|
|
|
|
|
|
res, err := http.DefaultClient.Do(req)
|
2020-07-27 21:51:03 +02:00
|
|
|
if err != nil {
|
2020-08-21 00:15:14 +02:00
|
|
|
return 0, errors.Err(err)
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
2020-07-27 21:51:03 +02:00
|
|
|
defer res.Body.Close()
|
2020-07-27 20:48:05 +02:00
|
|
|
|
2020-07-27 21:51:03 +02:00
|
|
|
var line string
|
|
|
|
scanner := bufio.NewScanner(res.Body)
|
|
|
|
for scanner.Scan() {
|
|
|
|
if strings.Contains(scanner.Text(), "youtube-stats-header-uploads") {
|
|
|
|
line = scanner.Text()
|
|
|
|
break
|
|
|
|
}
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
|
|
|
|
2020-07-27 21:51:03 +02:00
|
|
|
if err := scanner.Err(); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if line == "" {
|
|
|
|
return 0, errors.Err("upload count line not found")
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
|
|
|
|
2020-07-27 21:51:03 +02:00
|
|
|
matches := regexp.MustCompile(">([0-9]+)<").FindStringSubmatch(line)
|
|
|
|
if len(matches) != 2 {
|
|
|
|
return 0, errors.Err("upload count not found with regex")
|
|
|
|
}
|
|
|
|
|
|
|
|
num, err := strconv.Atoi(matches[1])
|
|
|
|
if err != nil {
|
|
|
|
return 0, errors.Err(err)
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
|
|
|
|
2020-07-27 21:51:03 +02:00
|
|
|
return num, nil
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
|
|
|
|
2020-08-04 00:55:26 +02:00
|
|
|
func ChannelInfo(channelID string) (*YoutubeStatsResponse, error) {
|
|
|
|
url := "https://www.youtube.com/channel/" + channelID + "/about"
|
2020-07-29 03:34:08 +02:00
|
|
|
|
2020-08-04 00:55:26 +02:00
|
|
|
req, _ := http.NewRequest("GET", url, nil)
|
|
|
|
|
|
|
|
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36")
|
|
|
|
req.Header.Add("Accept", "*/*")
|
|
|
|
|
|
|
|
res, err := http.DefaultClient.Do(req)
|
2020-07-27 20:48:05 +02:00
|
|
|
if err != nil {
|
2020-08-04 00:55:26 +02:00
|
|
|
return nil, errors.Err(err)
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
2020-08-04 00:55:26 +02:00
|
|
|
defer res.Body.Close()
|
|
|
|
body, err := ioutil.ReadAll(res.Body)
|
2020-07-27 20:48:05 +02:00
|
|
|
if err != nil {
|
2020-08-04 00:55:26 +02:00
|
|
|
return nil, errors.Err(err)
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
2020-08-04 00:55:26 +02:00
|
|
|
pageBody := string(body)
|
|
|
|
dataStartIndex := strings.Index(pageBody, "window[\"ytInitialData\"] = ") + 26
|
|
|
|
dataEndIndex := strings.Index(pageBody, "]}}};") + 4
|
2020-07-27 20:48:05 +02:00
|
|
|
|
2020-08-04 00:55:26 +02:00
|
|
|
data := pageBody[dataStartIndex:dataEndIndex]
|
|
|
|
var decodedResponse YoutubeStatsResponse
|
|
|
|
err = json.Unmarshal([]byte(data), &decodedResponse)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Err(err)
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
|
|
|
|
2020-08-04 00:55:26 +02:00
|
|
|
return &decodedResponse, nil
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
|
|
|
|
2020-08-03 07:05:03 +02:00
|
|
|
func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopChan stop.Chan, ipPool *ip_manager.IPPool) ([]*ytdl.YtdlVideo, error) {
|
2020-07-27 23:14:06 +02:00
|
|
|
var videos []*ytdl.YtdlVideo
|
|
|
|
for _, videoID := range videoIDs {
|
2020-07-30 18:48:05 +02:00
|
|
|
if len(videoID) < 5 {
|
|
|
|
continue
|
|
|
|
}
|
2020-07-29 03:34:08 +02:00
|
|
|
select {
|
|
|
|
case <-stopChan:
|
2020-08-26 19:05:16 +02:00
|
|
|
return videos, errors.Err("interrupted by user")
|
2020-07-29 03:34:08 +02:00
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
//ip, err := ipPool.GetIP(videoID)
|
|
|
|
//if err != nil {
|
|
|
|
// return nil, err
|
|
|
|
//}
|
|
|
|
//video, err := downloader.GetVideoInformation(videoID, &net.TCPAddr{IP: net.ParseIP(ip)})
|
2020-07-29 06:12:23 +02:00
|
|
|
state, err := config.VideoState(videoID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Err(err)
|
|
|
|
}
|
|
|
|
if state == "published" {
|
|
|
|
continue
|
|
|
|
}
|
2020-07-30 18:48:05 +02:00
|
|
|
video, err := downloader.GetVideoInformation(config, videoID, stopChan, nil, ipPool)
|
2020-07-27 23:14:06 +02:00
|
|
|
if err != nil {
|
2020-08-08 01:12:55 +02:00
|
|
|
errSDK := config.MarkVideoStatus(shared.VideoStatus{
|
2020-08-03 07:05:03 +02:00
|
|
|
ChannelID: channelID,
|
|
|
|
VideoID: videoID,
|
|
|
|
Status: "failed",
|
|
|
|
FailureReason: err.Error(),
|
|
|
|
})
|
2020-08-18 00:03:38 +02:00
|
|
|
logUtils.SendErrorToSlack(fmt.Sprintf("Skipping video (%s): %s", videoID, errors.FullTrace(err)))
|
2020-08-03 07:05:03 +02:00
|
|
|
if errSDK != nil {
|
|
|
|
return nil, errors.Err(errSDK)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
videos = append(videos, video)
|
2020-07-27 23:14:06 +02:00
|
|
|
}
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
2020-07-27 23:14:06 +02:00
|
|
|
return videos, nil
|
2020-07-27 20:48:05 +02:00
|
|
|
}
|
2020-08-04 00:55:26 +02:00
|
|
|
|
|
|
|
type YoutubeStatsResponse struct {
|
|
|
|
Contents struct {
|
|
|
|
TwoColumnBrowseResultsRenderer struct {
|
|
|
|
Tabs []struct {
|
|
|
|
TabRenderer struct {
|
|
|
|
Title string `json:"title"`
|
|
|
|
Selected bool `json:"selected"`
|
|
|
|
Content struct {
|
|
|
|
SectionListRenderer struct {
|
|
|
|
Contents []struct {
|
|
|
|
ItemSectionRenderer struct {
|
|
|
|
Contents []struct {
|
|
|
|
ChannelAboutFullMetadataRenderer struct {
|
|
|
|
Description struct {
|
|
|
|
SimpleText string `json:"simpleText"`
|
|
|
|
} `json:"description"`
|
|
|
|
ViewCountText struct {
|
|
|
|
SimpleText string `json:"simpleText"`
|
|
|
|
} `json:"viewCountText"`
|
|
|
|
JoinedDateText struct {
|
|
|
|
Runs []struct {
|
|
|
|
Text string `json:"text"`
|
|
|
|
} `json:"runs"`
|
|
|
|
} `json:"joinedDateText"`
|
|
|
|
CanonicalChannelURL string `json:"canonicalChannelUrl"`
|
|
|
|
BypassBusinessEmailCaptcha bool `json:"bypassBusinessEmailCaptcha"`
|
|
|
|
Title struct {
|
|
|
|
SimpleText string `json:"simpleText"`
|
|
|
|
} `json:"title"`
|
|
|
|
Avatar struct {
|
|
|
|
Thumbnails []struct {
|
|
|
|
URL string `json:"url"`
|
|
|
|
Width int `json:"width"`
|
|
|
|
Height int `json:"height"`
|
|
|
|
} `json:"thumbnails"`
|
|
|
|
} `json:"avatar"`
|
|
|
|
ShowDescription bool `json:"showDescription"`
|
|
|
|
DescriptionLabel struct {
|
|
|
|
Runs []struct {
|
|
|
|
Text string `json:"text"`
|
|
|
|
} `json:"runs"`
|
|
|
|
} `json:"descriptionLabel"`
|
|
|
|
DetailsLabel struct {
|
|
|
|
Runs []struct {
|
|
|
|
Text string `json:"text"`
|
|
|
|
} `json:"runs"`
|
|
|
|
} `json:"detailsLabel"`
|
|
|
|
ChannelID string `json:"channelId"`
|
|
|
|
} `json:"channelAboutFullMetadataRenderer"`
|
|
|
|
} `json:"contents"`
|
|
|
|
} `json:"itemSectionRenderer"`
|
|
|
|
} `json:"contents"`
|
|
|
|
} `json:"sectionListRenderer"`
|
|
|
|
} `json:"content"`
|
|
|
|
} `json:"tabRenderer"`
|
|
|
|
} `json:"tabs"`
|
|
|
|
} `json:"twoColumnBrowseResultsRenderer"`
|
|
|
|
} `json:"contents"`
|
|
|
|
Header struct {
|
|
|
|
C4TabbedHeaderRenderer struct {
|
|
|
|
ChannelID string `json:"channelId"`
|
|
|
|
Title string `json:"title"`
|
|
|
|
Avatar struct {
|
|
|
|
Thumbnails []struct {
|
|
|
|
URL string `json:"url"`
|
|
|
|
Width int `json:"width"`
|
|
|
|
Height int `json:"height"`
|
|
|
|
} `json:"thumbnails"`
|
|
|
|
} `json:"avatar"`
|
|
|
|
Banner struct {
|
|
|
|
Thumbnails []struct {
|
|
|
|
URL string `json:"url"`
|
|
|
|
Width int `json:"width"`
|
|
|
|
Height int `json:"height"`
|
|
|
|
} `json:"thumbnails"`
|
|
|
|
} `json:"banner"`
|
|
|
|
VisitTracking struct {
|
|
|
|
RemarketingPing string `json:"remarketingPing"`
|
|
|
|
} `json:"visitTracking"`
|
|
|
|
SubscriberCountText struct {
|
|
|
|
SimpleText string `json:"simpleText"`
|
|
|
|
} `json:"subscriberCountText"`
|
|
|
|
} `json:"c4TabbedHeaderRenderer"`
|
|
|
|
} `json:"header"`
|
|
|
|
Metadata struct {
|
|
|
|
ChannelMetadataRenderer struct {
|
|
|
|
Title string `json:"title"`
|
|
|
|
Description string `json:"description"`
|
|
|
|
RssURL string `json:"rssUrl"`
|
|
|
|
ChannelConversionURL string `json:"channelConversionUrl"`
|
|
|
|
ExternalID string `json:"externalId"`
|
|
|
|
Keywords string `json:"keywords"`
|
|
|
|
OwnerUrls []string `json:"ownerUrls"`
|
|
|
|
Avatar struct {
|
|
|
|
Thumbnails []struct {
|
|
|
|
URL string `json:"url"`
|
|
|
|
Width int `json:"width"`
|
|
|
|
Height int `json:"height"`
|
|
|
|
} `json:"thumbnails"`
|
|
|
|
} `json:"avatar"`
|
|
|
|
ChannelURL string `json:"channelUrl"`
|
|
|
|
IsFamilySafe bool `json:"isFamilySafe"`
|
|
|
|
VanityChannelURL string `json:"vanityChannelUrl"`
|
|
|
|
} `json:"channelMetadataRenderer"`
|
|
|
|
} `json:"metadata"`
|
|
|
|
Topbar struct {
|
|
|
|
DesktopTopbarRenderer struct {
|
|
|
|
CountryCode string `json:"countryCode"`
|
|
|
|
} `json:"desktopTopbarRenderer"`
|
|
|
|
} `json:"topbar"`
|
|
|
|
Microformat struct {
|
|
|
|
MicroformatDataRenderer struct {
|
|
|
|
URLCanonical string `json:"urlCanonical"`
|
|
|
|
Title string `json:"title"`
|
|
|
|
Description string `json:"description"`
|
|
|
|
Thumbnail struct {
|
|
|
|
Thumbnails []struct {
|
|
|
|
URL string `json:"url"`
|
|
|
|
Width int `json:"width"`
|
|
|
|
Height int `json:"height"`
|
|
|
|
} `json:"thumbnails"`
|
|
|
|
} `json:"thumbnail"`
|
|
|
|
SiteName string `json:"siteName"`
|
|
|
|
AppName string `json:"appName"`
|
|
|
|
AndroidPackage string `json:"androidPackage"`
|
|
|
|
IosAppStoreID string `json:"iosAppStoreId"`
|
|
|
|
IosAppArguments string `json:"iosAppArguments"`
|
|
|
|
OgType string `json:"ogType"`
|
|
|
|
URLApplinksWeb string `json:"urlApplinksWeb"`
|
|
|
|
URLApplinksIos string `json:"urlApplinksIos"`
|
|
|
|
URLApplinksAndroid string `json:"urlApplinksAndroid"`
|
|
|
|
URLTwitterIos string `json:"urlTwitterIos"`
|
|
|
|
URLTwitterAndroid string `json:"urlTwitterAndroid"`
|
|
|
|
TwitterCardType string `json:"twitterCardType"`
|
|
|
|
TwitterSiteHandle string `json:"twitterSiteHandle"`
|
|
|
|
SchemaDotOrgType string `json:"schemaDotOrgType"`
|
|
|
|
Noindex bool `json:"noindex"`
|
|
|
|
Unlisted bool `json:"unlisted"`
|
|
|
|
FamilySafe bool `json:"familySafe"`
|
|
|
|
Tags []string `json:"tags"`
|
|
|
|
} `json:"microformatDataRenderer"`
|
|
|
|
} `json:"microformat"`
|
|
|
|
}
|