ytsync/ytapi/ytapi.go

388 lines
12 KiB
Go
Raw Permalink Normal View History

package ytapi
import (
"bufio"
"encoding/json"
2020-08-08 03:29:57 +02:00
"fmt"
"io/ioutil"
"net/http"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"time"
2020-08-08 01:12:55 +02:00
"github.com/lbryio/ytsync/v5/shared"
2020-08-18 00:03:38 +02:00
logUtils "github.com/lbryio/ytsync/v5/util"
2020-07-27 23:14:06 +02:00
"github.com/lbryio/ytsync/v5/downloader/ytdl"
"github.com/lbryio/ytsync/v5/downloader"
"github.com/lbryio/ytsync/v5/ip_manager"
"github.com/lbryio/ytsync/v5/sdk"
"github.com/lbryio/ytsync/v5/sources"
"github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/jsonrpc"
"github.com/lbryio/lbry.go/v2/extras/stop"
2020-08-18 00:03:38 +02:00
"github.com/lbryio/lbry.go/v2/extras/util"
"github.com/aws/aws-sdk-go/aws"
log "github.com/sirupsen/logrus"
)
type Video interface {
Size() *int64
ID() string
IDAndNum() string
PlaylistPosition() int
PublishedAt() time.Time
Sync(*jsonrpc.Client, sources.SyncParams, *sdk.SyncedVideo, bool, *sync.RWMutex) (*sources.SyncSummary, error)
}
type byPublishedAt []Video
func (a byPublishedAt) Len() int { return len(a) }
func (a byPublishedAt) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byPublishedAt) Less(i, j int) bool { return a[i].PublishedAt().Before(a[j].PublishedAt()) }
type VideoParams struct {
VideoDir string
S3Config aws.Config
Stopper *stop.Group
IPPool *ip_manager.IPPool
}
var mostRecentlyFailedChannel string // TODO: fix this hack!
func GetVideosToSync(config *sdk.APIConfig, channelID string, syncedVideos map[string]sdk.SyncedVideo, quickSync bool, maxVideos int, videoParams VideoParams, lastUploadedVideo string) ([]Video, error) {
var videos []Video
if quickSync && maxVideos > 50 {
2020-07-27 21:57:19 +02:00
maxVideos = 50
}
2020-08-18 00:03:38 +02:00
allVideos, err := downloader.GetPlaylistVideoIDs(channelID, maxVideos, videoParams.Stopper.Ch(), videoParams.IPPool)
if err != nil {
return nil, errors.Err(err)
}
2020-08-18 00:03:38 +02:00
videoIDs := make([]string, 0, len(allVideos))
for _, video := range allVideos {
sv, ok := syncedVideos[video]
if ok && util.SubstringInSlice(sv.FailureReason, shared.NeverRetryFailures) {
continue
}
videoIDs = append(videoIDs, video)
}
log.Infof("Got info for %d videos from youtube downloader", len(videoIDs))
playlistMap := make(map[string]int64)
for i, videoID := range videoIDs {
playlistMap[videoID] = int64(i)
}
//this will ensure that we at least try to sync the video that was marked as last uploaded video in the database.
if lastUploadedVideo != "" {
_, ok := playlistMap[lastUploadedVideo]
if !ok {
playlistMap[lastUploadedVideo] = 0
videoIDs = append(videoIDs, lastUploadedVideo)
}
}
if len(videoIDs) < 1 {
if channelID == mostRecentlyFailedChannel {
return nil, errors.Err("playlist items not found")
}
mostRecentlyFailedChannel = channelID
}
vids, err := getVideos(config, channelID, videoIDs, videoParams.Stopper.Ch(), videoParams.IPPool)
if err != nil {
return nil, err
}
for _, item := range vids {
2020-07-27 23:14:06 +02:00
positionInList := playlistMap[item.ID]
videoToAdd, err := sources.NewYoutubeVideo(videoParams.VideoDir, item, positionInList, videoParams.S3Config, videoParams.Stopper, videoParams.IPPool)
2020-07-27 23:14:06 +02:00
if err != nil {
return nil, errors.Err(err)
}
videos = append(videos, videoToAdd)
}
for k, v := range syncedVideos {
if !v.Published {
continue
}
if _, ok := playlistMap[k]; !ok {
videos = append(videos, sources.NewMockedVideo(videoParams.VideoDir, k, channelID, videoParams.S3Config, videoParams.Stopper, videoParams.IPPool))
}
}
sort.Sort(byPublishedAt(videos))
return videos, nil
}
// CountVideosInChannel is unused for now... keeping it here just in case
func CountVideosInChannel(channelID string) (int, error) {
url := "https://socialblade.com/youtube/channel/" + channelID
req, _ := http.NewRequest("GET", url, nil)
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36")
req.Header.Add("Accept", "*/*")
req.Header.Add("Host", "socialblade.com")
res, err := http.DefaultClient.Do(req)
if err != nil {
return 0, errors.Err(err)
}
defer res.Body.Close()
var line string
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
if strings.Contains(scanner.Text(), "youtube-stats-header-uploads") {
line = scanner.Text()
break
}
}
if err := scanner.Err(); err != nil {
return 0, err
}
if line == "" {
return 0, errors.Err("upload count line not found")
}
matches := regexp.MustCompile(">([0-9]+)<").FindStringSubmatch(line)
if len(matches) != 2 {
return 0, errors.Err("upload count not found with regex")
}
num, err := strconv.Atoi(matches[1])
if err != nil {
return 0, errors.Err(err)
}
return num, nil
}
func ChannelInfo(channelID string) (*YoutubeStatsResponse, error) {
url := "https://www.youtube.com/channel/" + channelID + "/about"
req, _ := http.NewRequest("GET", url, nil)
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36")
req.Header.Add("Accept", "*/*")
res, err := http.DefaultClient.Do(req)
if err != nil {
return nil, errors.Err(err)
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, errors.Err(err)
}
pageBody := string(body)
dataStartIndex := strings.Index(pageBody, "window[\"ytInitialData\"] = ") + 26
dataEndIndex := strings.Index(pageBody, "]}}};") + 4
data := pageBody[dataStartIndex:dataEndIndex]
var decodedResponse YoutubeStatsResponse
err = json.Unmarshal([]byte(data), &decodedResponse)
if err != nil {
return nil, errors.Err(err)
}
return &decodedResponse, nil
}
func getVideos(config *sdk.APIConfig, channelID string, videoIDs []string, stopChan stop.Chan, ipPool *ip_manager.IPPool) ([]*ytdl.YtdlVideo, error) {
2020-07-27 23:14:06 +02:00
var videos []*ytdl.YtdlVideo
for _, videoID := range videoIDs {
2020-07-30 18:48:05 +02:00
if len(videoID) < 5 {
continue
}
select {
case <-stopChan:
return videos, errors.Err("interrupted by user")
default:
}
//ip, err := ipPool.GetIP(videoID)
//if err != nil {
// return nil, err
//}
//video, err := downloader.GetVideoInformation(videoID, &net.TCPAddr{IP: net.ParseIP(ip)})
state, err := config.VideoState(videoID)
if err != nil {
return nil, errors.Err(err)
}
if state == "published" {
continue
}
2020-07-30 18:48:05 +02:00
video, err := downloader.GetVideoInformation(config, videoID, stopChan, nil, ipPool)
2020-07-27 23:14:06 +02:00
if err != nil {
2020-08-08 01:12:55 +02:00
errSDK := config.MarkVideoStatus(shared.VideoStatus{
ChannelID: channelID,
VideoID: videoID,
Status: "failed",
FailureReason: err.Error(),
})
2020-08-18 00:03:38 +02:00
logUtils.SendErrorToSlack(fmt.Sprintf("Skipping video (%s): %s", videoID, errors.FullTrace(err)))
if errSDK != nil {
return nil, errors.Err(errSDK)
}
} else {
videos = append(videos, video)
2020-07-27 23:14:06 +02:00
}
}
2020-07-27 23:14:06 +02:00
return videos, nil
}
type YoutubeStatsResponse struct {
Contents struct {
TwoColumnBrowseResultsRenderer struct {
Tabs []struct {
TabRenderer struct {
Title string `json:"title"`
Selected bool `json:"selected"`
Content struct {
SectionListRenderer struct {
Contents []struct {
ItemSectionRenderer struct {
Contents []struct {
ChannelAboutFullMetadataRenderer struct {
Description struct {
SimpleText string `json:"simpleText"`
} `json:"description"`
ViewCountText struct {
SimpleText string `json:"simpleText"`
} `json:"viewCountText"`
JoinedDateText struct {
Runs []struct {
Text string `json:"text"`
} `json:"runs"`
} `json:"joinedDateText"`
CanonicalChannelURL string `json:"canonicalChannelUrl"`
BypassBusinessEmailCaptcha bool `json:"bypassBusinessEmailCaptcha"`
Title struct {
SimpleText string `json:"simpleText"`
} `json:"title"`
Avatar struct {
Thumbnails []struct {
URL string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"thumbnails"`
} `json:"avatar"`
ShowDescription bool `json:"showDescription"`
DescriptionLabel struct {
Runs []struct {
Text string `json:"text"`
} `json:"runs"`
} `json:"descriptionLabel"`
DetailsLabel struct {
Runs []struct {
Text string `json:"text"`
} `json:"runs"`
} `json:"detailsLabel"`
ChannelID string `json:"channelId"`
} `json:"channelAboutFullMetadataRenderer"`
} `json:"contents"`
} `json:"itemSectionRenderer"`
} `json:"contents"`
} `json:"sectionListRenderer"`
} `json:"content"`
} `json:"tabRenderer"`
} `json:"tabs"`
} `json:"twoColumnBrowseResultsRenderer"`
} `json:"contents"`
Header struct {
C4TabbedHeaderRenderer struct {
ChannelID string `json:"channelId"`
Title string `json:"title"`
Avatar struct {
Thumbnails []struct {
URL string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"thumbnails"`
} `json:"avatar"`
Banner struct {
Thumbnails []struct {
URL string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"thumbnails"`
} `json:"banner"`
VisitTracking struct {
RemarketingPing string `json:"remarketingPing"`
} `json:"visitTracking"`
SubscriberCountText struct {
SimpleText string `json:"simpleText"`
} `json:"subscriberCountText"`
} `json:"c4TabbedHeaderRenderer"`
} `json:"header"`
Metadata struct {
ChannelMetadataRenderer struct {
Title string `json:"title"`
Description string `json:"description"`
RssURL string `json:"rssUrl"`
ChannelConversionURL string `json:"channelConversionUrl"`
ExternalID string `json:"externalId"`
Keywords string `json:"keywords"`
OwnerUrls []string `json:"ownerUrls"`
Avatar struct {
Thumbnails []struct {
URL string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"thumbnails"`
} `json:"avatar"`
ChannelURL string `json:"channelUrl"`
IsFamilySafe bool `json:"isFamilySafe"`
VanityChannelURL string `json:"vanityChannelUrl"`
} `json:"channelMetadataRenderer"`
} `json:"metadata"`
Topbar struct {
DesktopTopbarRenderer struct {
CountryCode string `json:"countryCode"`
} `json:"desktopTopbarRenderer"`
} `json:"topbar"`
Microformat struct {
MicroformatDataRenderer struct {
URLCanonical string `json:"urlCanonical"`
Title string `json:"title"`
Description string `json:"description"`
Thumbnail struct {
Thumbnails []struct {
URL string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"thumbnails"`
} `json:"thumbnail"`
SiteName string `json:"siteName"`
AppName string `json:"appName"`
AndroidPackage string `json:"androidPackage"`
IosAppStoreID string `json:"iosAppStoreId"`
IosAppArguments string `json:"iosAppArguments"`
OgType string `json:"ogType"`
URLApplinksWeb string `json:"urlApplinksWeb"`
URLApplinksIos string `json:"urlApplinksIos"`
URLApplinksAndroid string `json:"urlApplinksAndroid"`
URLTwitterIos string `json:"urlTwitterIos"`
URLTwitterAndroid string `json:"urlTwitterAndroid"`
TwitterCardType string `json:"twitterCardType"`
TwitterSiteHandle string `json:"twitterSiteHandle"`
SchemaDotOrgType string `json:"schemaDotOrgType"`
Noindex bool `json:"noindex"`
Unlisted bool `json:"unlisted"`
FamilySafe bool `json:"familySafe"`
Tags []string `json:"tags"`
} `json:"microformatDataRenderer"`
} `json:"microformat"`
}