2018-05-15 02:55:45 +02:00
package cmd
import (
"encoding/json"
"io/ioutil"
"os"
"os/signal"
2018-08-08 02:19:04 +02:00
"path"
2018-05-15 02:55:45 +02:00
"sync"
"syscall"
"time"
"github.com/lbryio/reflector.go/db"
"github.com/lbryio/reflector.go/peer"
"github.com/lbryio/reflector.go/store"
2018-08-08 02:19:04 +02:00
"github.com/lbryio/lbry.go/stop"
2018-05-15 02:55:45 +02:00
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
2018-08-08 02:19:04 +02:00
var uploadWorkers int
var uploadSkipExistsCheck bool
2018-05-15 19:24:13 +02:00
2018-05-30 03:38:55 +02:00
const (
sdInc = 1
blobInc = 2
errInc = 3
)
type uploaderParams struct {
2018-08-08 02:19:04 +02:00
workerWG * sync . WaitGroup
counterWG * sync . WaitGroup
stopper * stop . Group
pathChan chan string
countChan chan int
sdCount int
blobCount int
errCount int
2018-05-30 03:38:55 +02:00
}
2018-05-15 02:55:45 +02:00
func init ( ) {
var cmd = & cobra . Command {
2018-08-08 02:19:04 +02:00
Use : "upload PATH" ,
2018-05-15 02:55:45 +02:00
Short : "Upload blobs to S3" ,
Args : cobra . ExactArgs ( 1 ) ,
Run : uploadCmd ,
}
2018-08-08 02:19:04 +02:00
cmd . PersistentFlags ( ) . IntVar ( & uploadWorkers , "workers" , 1 , "How many worker threads to run at once" )
cmd . PersistentFlags ( ) . BoolVar ( & uploadSkipExistsCheck , "skipExistsCheck" , false , "Dont check if blobs exist before uploading" )
2018-05-30 03:38:55 +02:00
rootCmd . AddCommand ( cmd )
2018-05-15 02:55:45 +02:00
}
func uploadCmd ( cmd * cobra . Command , args [ ] string ) {
startTime := time . Now ( )
db := new ( db . SQL )
2018-05-30 03:38:55 +02:00
err := db . Connect ( globalConfig . DBConn )
2018-05-15 02:55:45 +02:00
checkErr ( err )
2018-05-30 03:38:55 +02:00
params := uploaderParams {
2018-08-08 02:19:04 +02:00
workerWG : & sync . WaitGroup { } ,
counterWG : & sync . WaitGroup { } ,
pathChan : make ( chan string ) ,
countChan : make ( chan int ) ,
stopper : stop . New ( ) }
2018-05-15 02:55:45 +02:00
2018-05-30 03:38:55 +02:00
setInterrupt ( params . stopper )
2018-05-15 02:55:45 +02:00
2018-08-08 02:19:04 +02:00
paths , err := getPaths ( args [ 0 ] )
2018-05-15 02:55:45 +02:00
checkErr ( err )
2018-08-08 02:19:04 +02:00
totalCount := len ( paths )
hashes := make ( [ ] string , len ( paths ) )
for i , p := range paths {
hashes [ i ] = path . Base ( p )
}
2018-05-15 02:55:45 +02:00
log . Println ( "checking for existing blobs" )
2018-08-08 02:19:04 +02:00
exists := make ( map [ string ] bool )
if ! uploadSkipExistsCheck {
exists , err = db . HasBlobs ( hashes )
checkErr ( err )
}
2018-05-15 02:55:45 +02:00
existsCount := len ( exists )
log . Printf ( "%d new blobs to upload" , totalCount - existsCount )
2018-08-08 02:19:04 +02:00
startUploadWorkers ( & params )
2018-05-30 03:38:55 +02:00
params . counterWG . Add ( 1 )
2018-05-15 19:24:13 +02:00
go func ( ) {
2018-05-30 03:38:55 +02:00
defer params . counterWG . Done ( )
runCountReceiver ( & params , startTime , totalCount , existsCount )
2018-05-15 19:24:13 +02:00
} ( )
2018-05-15 02:55:45 +02:00
Upload :
2018-08-08 02:19:04 +02:00
for _ , f := range paths {
if exists [ path . Base ( f ) ] {
2018-05-15 02:55:45 +02:00
continue
}
select {
2018-08-08 02:19:04 +02:00
case params . pathChan <- f :
2018-05-30 03:38:55 +02:00
case <- params . stopper . Ch ( ) :
2018-05-15 02:55:45 +02:00
log . Warnln ( "Caught interrupt, quitting at first opportunity..." )
break Upload
}
}
2018-08-08 02:19:04 +02:00
close ( params . pathChan )
2018-05-30 03:38:55 +02:00
params . workerWG . Wait ( )
close ( params . countChan )
params . counterWG . Wait ( )
params . stopper . Stop ( )
2018-05-15 02:55:45 +02:00
log . Println ( "SUMMARY" )
log . Printf ( "%d blobs total" , totalCount )
2018-05-30 03:38:55 +02:00
log . Printf ( "%d SD blobs uploaded" , params . sdCount )
log . Printf ( "%d content blobs uploaded" , params . blobCount )
2018-05-15 02:55:45 +02:00
log . Printf ( "%d blobs already stored" , existsCount )
2018-05-30 03:38:55 +02:00
log . Printf ( "%d errors encountered" , params . errCount )
2018-05-15 02:55:45 +02:00
}
func isJSON ( data [ ] byte ) bool {
var js json . RawMessage
return json . Unmarshal ( data , & js ) == nil
}
func newBlobStore ( ) * store . DBBackedS3Store {
db := new ( db . SQL )
2018-05-30 03:38:55 +02:00
err := db . Connect ( globalConfig . DBConn )
2018-05-15 02:55:45 +02:00
checkErr ( err )
2018-05-30 03:38:55 +02:00
s3 := store . NewS3BlobStore ( globalConfig . AwsID , globalConfig . AwsSecret , globalConfig . BucketRegion , globalConfig . BucketName )
2018-05-15 02:55:45 +02:00
return store . NewDBBackedS3Store ( s3 , db )
}
2018-05-30 03:38:55 +02:00
2018-06-25 22:49:40 +02:00
func setInterrupt ( stopper * stop . Group ) {
2018-05-30 03:38:55 +02:00
interruptChan := make ( chan os . Signal , 1 )
signal . Notify ( interruptChan , os . Interrupt , syscall . SIGTERM )
go func ( ) {
<- interruptChan
stopper . Stop ( )
} ( )
}
2018-08-08 02:19:04 +02:00
func startUploadWorkers ( params * uploaderParams ) {
for i := 0 ; i < uploadWorkers ; i ++ {
2018-05-30 03:38:55 +02:00
params . workerWG . Add ( 1 )
go func ( i int ) {
defer params . workerWG . Done ( )
defer func ( i int ) {
log . Printf ( "worker %d quitting" , i )
} ( i )
blobStore := newBlobStore ( )
2018-08-08 02:19:04 +02:00
launchFileUploader ( params , blobStore , i )
2018-05-30 03:38:55 +02:00
} ( i )
}
}
2018-08-08 02:19:04 +02:00
func launchFileUploader ( params * uploaderParams , blobStore * store . DBBackedS3Store , worker int ) {
2018-05-30 03:38:55 +02:00
for {
select {
case <- params . stopper . Ch ( ) :
return
2018-08-08 02:19:04 +02:00
case filepath , ok := <- params . pathChan :
2018-05-30 03:38:55 +02:00
if ! ok {
return
}
2018-08-08 02:19:04 +02:00
blob , err := ioutil . ReadFile ( filepath )
2018-05-30 03:38:55 +02:00
checkErr ( err )
hash := peer . GetBlobHash ( blob )
2018-08-08 02:19:04 +02:00
if hash != path . Base ( filepath ) {
log . Errorf ( "worker %d: file name does not match hash (%s != %s), skipping" , worker , filepath , hash )
2018-05-30 03:38:55 +02:00
select {
case params . countChan <- errInc :
case <- params . stopper . Ch ( ) :
}
continue
}
if isJSON ( blob ) {
log . Printf ( "worker %d: PUTTING SD BLOB %s" , worker , hash )
2018-06-15 04:30:37 +02:00
err := blobStore . PutSD ( hash , blob )
if err != nil {
2018-05-30 03:38:55 +02:00
log . Error ( "PutSD Error: " , err )
}
select {
case params . countChan <- sdInc :
case <- params . stopper . Ch ( ) :
}
} else {
log . Printf ( "worker %d: putting %s" , worker , hash )
2018-06-19 19:47:13 +02:00
err = blobStore . Put ( hash , blob )
2018-06-15 04:30:37 +02:00
if err != nil {
2018-06-19 19:47:13 +02:00
log . Error ( "put Blob Error: " , err )
2018-05-30 03:38:55 +02:00
}
select {
case params . countChan <- blobInc :
case <- params . stopper . Ch ( ) :
}
}
}
}
}
func runCountReceiver ( params * uploaderParams , startTime time . Time , totalCount int , existsCount int ) {
for {
select {
case <- params . stopper . Ch ( ) :
return
case countType , ok := <- params . countChan :
if ! ok {
return
}
switch countType {
case sdInc :
params . sdCount ++
case blobInc :
params . blobCount ++
case errInc :
params . errCount ++
}
}
if ( params . sdCount + params . blobCount ) % 50 == 0 {
log . Printf ( "%d of %d done (%s elapsed, %.3fs per blob)" , params . sdCount + params . blobCount , totalCount - existsCount , time . Since ( startTime ) . String ( ) , time . Since ( startTime ) . Seconds ( ) / float64 ( params . sdCount + params . blobCount ) )
}
}
}
2018-08-08 02:19:04 +02:00
func getPaths ( path string ) ( [ ] string , error ) {
info , err := os . Stat ( path )
if err != nil {
return nil , err
}
if info . Mode ( ) . IsRegular ( ) {
return [ ] string { path } , nil
}
f , err := os . Open ( path )
2018-05-30 03:38:55 +02:00
if err != nil {
return nil , err
}
files , err := f . Readdir ( - 1 )
if err != nil {
return nil , err
}
err = f . Close ( )
if err != nil {
return nil , err
}
var filenames [ ] string
for _ , file := range files {
if ! file . IsDir ( ) {
2018-08-08 02:19:04 +02:00
filenames = append ( filenames , path + "/" + file . Name ( ) )
2018-05-30 03:38:55 +02:00
}
}
return filenames , nil
}