2018-05-15 02:55:45 +02:00
package cmd
import (
"encoding/json"
"io/ioutil"
"os"
"os/signal"
"sync"
"syscall"
"time"
2018-06-25 22:49:40 +02:00
"github.com/lbryio/lbry.go/stop"
2018-05-15 02:55:45 +02:00
"github.com/lbryio/reflector.go/db"
"github.com/lbryio/reflector.go/peer"
"github.com/lbryio/reflector.go/store"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
2018-05-15 19:24:13 +02:00
var workers int
2018-05-30 03:38:55 +02:00
const (
sdInc = 1
blobInc = 2
errInc = 3
)
type uploaderParams struct {
workerWG * sync . WaitGroup
counterWG * sync . WaitGroup
2018-06-25 22:49:40 +02:00
stopper * stop . Group
2018-05-30 03:38:55 +02:00
filenameChan chan string
countChan chan int
sdCount int
blobCount int
errCount int
}
2018-05-15 02:55:45 +02:00
func init ( ) {
var cmd = & cobra . Command {
Use : "upload DIR" ,
Short : "Upload blobs to S3" ,
Args : cobra . ExactArgs ( 1 ) ,
Run : uploadCmd ,
}
2018-05-15 19:24:13 +02:00
cmd . PersistentFlags ( ) . IntVar ( & workers , "workers" , 1 , "How many worker threads to run at once" )
2018-05-30 03:38:55 +02:00
rootCmd . AddCommand ( cmd )
2018-05-15 02:55:45 +02:00
}
func uploadCmd ( cmd * cobra . Command , args [ ] string ) {
startTime := time . Now ( )
db := new ( db . SQL )
2018-05-30 03:38:55 +02:00
err := db . Connect ( globalConfig . DBConn )
2018-05-15 02:55:45 +02:00
checkErr ( err )
2018-05-30 03:38:55 +02:00
params := uploaderParams {
workerWG : & sync . WaitGroup { } ,
counterWG : & sync . WaitGroup { } ,
filenameChan : make ( chan string ) ,
countChan : make ( chan int ) ,
2018-06-25 22:49:40 +02:00
stopper : stop . New ( ) }
2018-05-15 02:55:45 +02:00
2018-05-30 03:38:55 +02:00
setInterrupt ( params . stopper )
2018-05-15 02:55:45 +02:00
2018-05-30 03:38:55 +02:00
filenames , err := getFileNames ( args [ 0 ] )
2018-05-15 02:55:45 +02:00
checkErr ( err )
totalCount := len ( filenames )
log . Println ( "checking for existing blobs" )
exists , err := db . HasBlobs ( filenames )
checkErr ( err )
existsCount := len ( exists )
log . Printf ( "%d new blobs to upload" , totalCount - existsCount )
2018-05-30 03:38:55 +02:00
startUploadWorkers ( & params , args [ 0 ] )
params . counterWG . Add ( 1 )
2018-05-15 19:24:13 +02:00
go func ( ) {
2018-05-30 03:38:55 +02:00
defer params . counterWG . Done ( )
runCountReceiver ( & params , startTime , totalCount , existsCount )
2018-05-15 19:24:13 +02:00
} ( )
2018-05-15 02:55:45 +02:00
Upload :
for _ , filename := range filenames {
if exists [ filename ] {
continue
}
select {
2018-05-30 03:38:55 +02:00
case params . filenameChan <- filename :
case <- params . stopper . Ch ( ) :
2018-05-15 02:55:45 +02:00
log . Warnln ( "Caught interrupt, quitting at first opportunity..." )
break Upload
}
}
2018-05-30 03:38:55 +02:00
close ( params . filenameChan )
params . workerWG . Wait ( )
close ( params . countChan )
params . counterWG . Wait ( )
params . stopper . Stop ( )
2018-05-15 02:55:45 +02:00
log . Println ( "SUMMARY" )
log . Printf ( "%d blobs total" , totalCount )
2018-05-30 03:38:55 +02:00
log . Printf ( "%d SD blobs uploaded" , params . sdCount )
log . Printf ( "%d content blobs uploaded" , params . blobCount )
2018-05-15 02:55:45 +02:00
log . Printf ( "%d blobs already stored" , existsCount )
2018-05-30 03:38:55 +02:00
log . Printf ( "%d errors encountered" , params . errCount )
2018-05-15 02:55:45 +02:00
}
func isJSON ( data [ ] byte ) bool {
var js json . RawMessage
return json . Unmarshal ( data , & js ) == nil
}
func newBlobStore ( ) * store . DBBackedS3Store {
db := new ( db . SQL )
2018-05-30 03:38:55 +02:00
err := db . Connect ( globalConfig . DBConn )
2018-05-15 02:55:45 +02:00
checkErr ( err )
2018-05-30 03:38:55 +02:00
s3 := store . NewS3BlobStore ( globalConfig . AwsID , globalConfig . AwsSecret , globalConfig . BucketRegion , globalConfig . BucketName )
2018-05-15 02:55:45 +02:00
return store . NewDBBackedS3Store ( s3 , db )
}
2018-05-30 03:38:55 +02:00
2018-06-25 22:49:40 +02:00
func setInterrupt ( stopper * stop . Group ) {
2018-05-30 03:38:55 +02:00
interruptChan := make ( chan os . Signal , 1 )
signal . Notify ( interruptChan , os . Interrupt , syscall . SIGTERM )
go func ( ) {
<- interruptChan
stopper . Stop ( )
} ( )
}
func startUploadWorkers ( params * uploaderParams , dir string ) {
for i := 0 ; i < workers ; i ++ {
params . workerWG . Add ( 1 )
go func ( i int ) {
defer params . workerWG . Done ( )
defer func ( i int ) {
log . Printf ( "worker %d quitting" , i )
} ( i )
blobStore := newBlobStore ( )
launchFileUploader ( params , blobStore , dir , i )
} ( i )
}
}
func launchFileUploader ( params * uploaderParams , blobStore * store . DBBackedS3Store , dir string , worker int ) {
for {
select {
case <- params . stopper . Ch ( ) :
return
case filename , ok := <- params . filenameChan :
if ! ok {
return
}
blob , err := ioutil . ReadFile ( dir + "/" + filename )
checkErr ( err )
hash := peer . GetBlobHash ( blob )
if hash != filename {
log . Errorf ( "worker %d: filename does not match hash (%s != %s), skipping" , worker , filename , hash )
select {
case params . countChan <- errInc :
case <- params . stopper . Ch ( ) :
}
continue
}
if isJSON ( blob ) {
log . Printf ( "worker %d: PUTTING SD BLOB %s" , worker , hash )
2018-06-15 04:30:37 +02:00
err := blobStore . PutSD ( hash , blob )
if err != nil {
2018-05-30 03:38:55 +02:00
log . Error ( "PutSD Error: " , err )
}
select {
case params . countChan <- sdInc :
case <- params . stopper . Ch ( ) :
}
} else {
log . Printf ( "worker %d: putting %s" , worker , hash )
2018-06-19 19:47:13 +02:00
err = blobStore . Put ( hash , blob )
2018-06-15 04:30:37 +02:00
if err != nil {
2018-06-19 19:47:13 +02:00
log . Error ( "put Blob Error: " , err )
2018-05-30 03:38:55 +02:00
}
select {
case params . countChan <- blobInc :
case <- params . stopper . Ch ( ) :
}
}
}
}
}
func runCountReceiver ( params * uploaderParams , startTime time . Time , totalCount int , existsCount int ) {
for {
select {
case <- params . stopper . Ch ( ) :
return
case countType , ok := <- params . countChan :
if ! ok {
return
}
switch countType {
case sdInc :
params . sdCount ++
case blobInc :
params . blobCount ++
case errInc :
params . errCount ++
}
}
if ( params . sdCount + params . blobCount ) % 50 == 0 {
log . Printf ( "%d of %d done (%s elapsed, %.3fs per blob)" , params . sdCount + params . blobCount , totalCount - existsCount , time . Since ( startTime ) . String ( ) , time . Since ( startTime ) . Seconds ( ) / float64 ( params . sdCount + params . blobCount ) )
}
}
}
func getFileNames ( dir string ) ( [ ] string , error ) {
f , err := os . Open ( dir )
if err != nil {
return nil , err
}
files , err := f . Readdir ( - 1 )
if err != nil {
return nil , err
}
err = f . Close ( )
if err != nil {
return nil , err
}
var filenames [ ] string
for _ , file := range files {
if ! file . IsDir ( ) {
filenames = append ( filenames , file . Name ( ) )
}
}
return filenames , nil
}