2019-07-02 18:13:20 +02:00
package reflector
import (
"io/ioutil"
"os"
"path"
"sync"
"time"
"github.com/lbryio/reflector.go/db"
"github.com/lbryio/reflector.go/store"
2019-11-14 01:11:35 +01:00
"github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/stop"
2019-07-02 18:13:20 +02:00
log "github.com/sirupsen/logrus"
)
2019-07-03 22:17:31 +02:00
type increment int
2019-07-02 18:13:20 +02:00
const (
2019-07-03 22:17:31 +02:00
sdInc increment = iota + 1
blobInc
errInc
2019-07-02 18:13:20 +02:00
)
2019-07-10 17:33:51 +02:00
type Summary struct {
2019-07-10 17:36:12 +02:00
Total , AlreadyStored , Sd , Blob , Err int
2019-07-10 17:33:51 +02:00
}
2019-07-02 18:13:20 +02:00
type Uploader struct {
2020-05-02 20:31:10 +02:00
db * db . SQL
store * store . DBBackedStore // could just be store.BlobStore interface
workers int
skipExistsCheck bool
deleteBlobsAfterUpload bool
stopper * stop . Group
countChan chan increment
2019-07-02 18:13:20 +02:00
2019-07-10 17:33:51 +02:00
count Summary
2019-07-02 18:13:20 +02:00
}
2020-05-02 20:31:10 +02:00
func NewUploader ( db * db . SQL , store * store . DBBackedStore , workers int , skipExistsCheck , deleteBlobsAfterUpload bool ) * Uploader {
2019-07-02 18:13:20 +02:00
return & Uploader {
2020-05-02 20:31:10 +02:00
db : db ,
store : store ,
workers : workers ,
skipExistsCheck : skipExistsCheck ,
deleteBlobsAfterUpload : deleteBlobsAfterUpload ,
stopper : stop . New ( ) ,
countChan : make ( chan increment ) ,
2019-07-02 18:13:20 +02:00
}
}
func ( u * Uploader ) Stop ( ) {
log . Infoln ( "stopping uploader" )
u . stopper . StopAndWait ( )
}
func ( u * Uploader ) Upload ( dirOrFilePath string ) error {
paths , err := getPaths ( dirOrFilePath )
if err != nil {
return err
}
2019-07-10 17:36:12 +02:00
u . count . Total = len ( paths )
2019-07-02 18:13:20 +02:00
hashes := make ( [ ] string , len ( paths ) )
for i , p := range paths {
hashes [ i ] = path . Base ( p )
}
2019-12-16 15:52:51 +01:00
log . Debug ( "checking for existing blobs" )
2019-07-02 18:13:20 +02:00
var exists map [ string ] bool
if ! u . skipExistsCheck {
2021-01-05 17:36:33 +01:00
exists , err = u . db . HasBlobs ( hashes , false )
2019-07-02 18:13:20 +02:00
if err != nil {
return err
}
2019-07-10 17:36:12 +02:00
u . count . AlreadyStored = len ( exists )
2019-07-02 18:13:20 +02:00
}
2019-12-16 15:52:51 +01:00
log . Debugf ( "%d new blobs to upload" , u . count . Total - u . count . AlreadyStored )
2019-07-02 18:13:20 +02:00
workerWG := sync . WaitGroup { }
pathChan := make ( chan string )
for i := 0 ; i < u . workers ; i ++ {
workerWG . Add ( 1 )
go func ( i int ) {
defer workerWG . Done ( )
defer func ( i int ) { log . Debugf ( "worker %d quitting" , i ) } ( i )
u . worker ( pathChan )
} ( i )
}
countWG := sync . WaitGroup { }
countWG . Add ( 1 )
go func ( ) {
defer countWG . Done ( )
u . counter ( )
} ( )
Upload :
for _ , f := range paths {
if exists != nil && exists [ path . Base ( f ) ] {
continue
}
select {
case pathChan <- f :
case <- u . stopper . Ch ( ) :
break Upload
}
}
close ( pathChan )
workerWG . Wait ( )
close ( u . countChan )
countWG . Wait ( )
u . stopper . Stop ( )
2019-12-16 15:52:51 +01:00
log . Debugf (
"upload stats: %d blobs total, %d already stored, %d SD blobs uploaded, %d content blobs uploaded, %d errors" ,
u . count . Total , u . count . AlreadyStored , u . count . Sd , u . count . Blob , u . count . Err ,
)
2019-07-02 18:13:20 +02:00
return nil
}
2020-05-02 20:31:10 +02:00
// worker reads paths from a channel, uploads them, and optionally deletes them
2019-07-02 18:13:20 +02:00
func ( u * Uploader ) worker ( pathChan chan string ) {
for {
select {
case <- u . stopper . Ch ( ) :
return
case filepath , ok := <- pathChan :
if ! ok {
return
}
err := u . uploadBlob ( filepath )
if err != nil {
log . Errorln ( err )
2020-05-02 20:31:10 +02:00
} else if u . deleteBlobsAfterUpload {
err = os . Remove ( filepath )
if err != nil {
log . Errorln ( errors . Prefix ( "deleting blob" , err ) )
}
2019-07-02 18:13:20 +02:00
}
}
}
}
// uploadBlob uploads a blob
2019-07-03 22:17:31 +02:00
func ( u * Uploader ) uploadBlob ( filepath string ) ( err error ) {
defer func ( ) {
if err != nil {
u . inc ( errInc )
}
} ( )
2019-07-02 18:13:20 +02:00
blob , err := ioutil . ReadFile ( filepath )
if err != nil {
2020-05-02 20:31:10 +02:00
return errors . Err ( err )
2019-07-02 18:13:20 +02:00
}
hash := BlobHash ( blob )
if hash != path . Base ( filepath ) {
return errors . Err ( "file name does not match hash (%s != %s), skipping" , filepath , hash )
}
if IsValidJSON ( blob ) {
2019-12-16 15:52:51 +01:00
log . Debugf ( "uploading SD blob %s" , hash )
2019-07-02 18:13:20 +02:00
err := u . store . PutSD ( hash , blob )
if err != nil {
2019-12-16 15:52:51 +01:00
return errors . Prefix ( "uploading SD blob " + hash , err )
2019-07-02 18:13:20 +02:00
}
2019-07-03 22:17:31 +02:00
u . inc ( sdInc )
2019-07-02 18:13:20 +02:00
} else {
2019-12-16 15:52:51 +01:00
log . Debugf ( "uploading blob %s" , hash )
2019-07-02 18:13:20 +02:00
err = u . store . Put ( hash , blob )
if err != nil {
2019-12-16 15:52:51 +01:00
return errors . Prefix ( "uploading blob " + hash , err )
2019-07-02 18:13:20 +02:00
}
2019-07-03 22:17:31 +02:00
u . inc ( blobInc )
2019-07-02 18:13:20 +02:00
}
return nil
}
// counter updates the counts of how many sd blobs and content blobs were uploaded, and how many
// errors were encountered. It occasionally prints the upload progress to debug.
func ( u * Uploader ) counter ( ) {
start := time . Now ( )
for {
select {
case <- u . stopper . Ch ( ) :
return
2019-07-03 22:17:31 +02:00
case incrementType , ok := <- u . countChan :
2019-07-02 18:13:20 +02:00
if ! ok {
return
}
2019-07-03 22:17:31 +02:00
switch incrementType {
2019-07-02 18:13:20 +02:00
case sdInc :
2019-07-10 17:36:12 +02:00
u . count . Sd ++
2019-07-02 18:13:20 +02:00
case blobInc :
2019-07-10 17:36:12 +02:00
u . count . Blob ++
2019-07-02 18:13:20 +02:00
case errInc :
2019-07-10 17:36:12 +02:00
u . count . Err ++
2019-07-02 18:13:20 +02:00
}
}
2019-07-10 17:36:12 +02:00
if ( u . count . Sd + u . count . Blob ) % 50 == 0 {
2019-12-16 15:52:51 +01:00
log . Debugf ( "%d of %d done (%s elapsed, %.3fs per blob)" , u . count . Sd + u . count . Blob , u . count . Total - u . count . AlreadyStored , time . Since ( start ) . String ( ) , time . Since ( start ) . Seconds ( ) / float64 ( u . count . Sd + u . count . Blob ) )
2019-07-02 18:13:20 +02:00
}
}
}
2019-07-10 17:33:51 +02:00
func ( u * Uploader ) GetSummary ( ) Summary {
return u . count
}
2019-07-03 22:17:31 +02:00
func ( u * Uploader ) inc ( t increment ) {
select {
case u . countChan <- t :
case <- u . stopper . Ch ( ) :
}
}
2019-07-02 18:13:20 +02:00
// getPaths returns the paths for files to upload. it takes a path to a file or a dir. for a file,
// it returns the full path to that file. for a dir, it returns the paths for all the files in the
// dir
func getPaths ( dirOrFilePath string ) ( [ ] string , error ) {
info , err := os . Stat ( dirOrFilePath )
if err != nil {
return nil , errors . Err ( err )
}
if info . Mode ( ) . IsRegular ( ) {
return [ ] string { dirOrFilePath } , nil
}
f , err := os . Open ( dirOrFilePath )
if err != nil {
return nil , errors . Err ( err )
}
files , err := f . Readdir ( - 1 )
if err != nil {
return nil , errors . Err ( err )
}
err = f . Close ( )
if err != nil {
return nil , errors . Err ( err )
}
var filenames [ ] string
for _ , file := range files {
if ! file . IsDir ( ) {
filenames = append ( filenames , dirOrFilePath + "/" + file . Name ( ) )
}
}
return filenames , nil
}