actual proper fix for db statement issue, added skipExists flag and the ability to upload a single blob

This commit is contained in:
Alex Grintsvayg 2018-08-07 20:19:04 -04:00
parent 0e0b2aaea3
commit 3855d5c281
3 changed files with 66 additions and 60 deletions

View file

@ -41,6 +41,7 @@ var rootCmd = &cobra.Command{
debugLogger.SetLevel(logrus.DebugLevel) debugLogger.SetLevel(logrus.DebugLevel)
if util.InSlice(verboseAll, verbose) { if util.InSlice(verboseAll, verbose) {
logrus.SetLevel(logrus.DebugLevel)
verbose = []string{verboseDHT, verboseNodeFinder} verbose = []string{verboseDHT, verboseNodeFinder}
} }

View file

@ -5,20 +5,23 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"os/signal" "os/signal"
"path"
"sync" "sync"
"syscall" "syscall"
"time" "time"
"github.com/lbryio/lbry.go/stop"
"github.com/lbryio/reflector.go/db" "github.com/lbryio/reflector.go/db"
"github.com/lbryio/reflector.go/peer" "github.com/lbryio/reflector.go/peer"
"github.com/lbryio/reflector.go/store" "github.com/lbryio/reflector.go/store"
"github.com/lbryio/lbry.go/stop"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
var workers int var uploadWorkers int
var uploadSkipExistsCheck bool
const ( const (
sdInc = 1 sdInc = 1
@ -27,24 +30,25 @@ const (
) )
type uploaderParams struct { type uploaderParams struct {
workerWG *sync.WaitGroup workerWG *sync.WaitGroup
counterWG *sync.WaitGroup counterWG *sync.WaitGroup
stopper *stop.Group stopper *stop.Group
filenameChan chan string pathChan chan string
countChan chan int countChan chan int
sdCount int sdCount int
blobCount int blobCount int
errCount int errCount int
} }
func init() { func init() {
var cmd = &cobra.Command{ var cmd = &cobra.Command{
Use: "upload DIR", Use: "upload PATH",
Short: "Upload blobs to S3", Short: "Upload blobs to S3",
Args: cobra.ExactArgs(1), Args: cobra.ExactArgs(1),
Run: uploadCmd, Run: uploadCmd,
} }
cmd.PersistentFlags().IntVar(&workers, "workers", 1, "How many worker threads to run at once") cmd.PersistentFlags().IntVar(&uploadWorkers, "workers", 1, "How many worker threads to run at once")
cmd.PersistentFlags().BoolVar(&uploadSkipExistsCheck, "skipExistsCheck", false, "Dont check if blobs exist before uploading")
rootCmd.AddCommand(cmd) rootCmd.AddCommand(cmd)
} }
@ -55,28 +59,36 @@ func uploadCmd(cmd *cobra.Command, args []string) {
checkErr(err) checkErr(err)
params := uploaderParams{ params := uploaderParams{
workerWG: &sync.WaitGroup{}, workerWG: &sync.WaitGroup{},
counterWG: &sync.WaitGroup{}, counterWG: &sync.WaitGroup{},
filenameChan: make(chan string), pathChan: make(chan string),
countChan: make(chan int), countChan: make(chan int),
stopper: stop.New()} stopper: stop.New()}
setInterrupt(params.stopper) setInterrupt(params.stopper)
filenames, err := getFileNames(args[0]) paths, err := getPaths(args[0])
checkErr(err) checkErr(err)
totalCount := len(filenames) totalCount := len(paths)
hashes := make([]string, len(paths))
for i, p := range paths {
hashes[i] = path.Base(p)
}
log.Println("checking for existing blobs") log.Println("checking for existing blobs")
exists, err := db.HasBlobs(filenames) exists := make(map[string]bool)
checkErr(err) if !uploadSkipExistsCheck {
exists, err = db.HasBlobs(hashes)
checkErr(err)
}
existsCount := len(exists) existsCount := len(exists)
log.Printf("%d new blobs to upload", totalCount-existsCount) log.Printf("%d new blobs to upload", totalCount-existsCount)
startUploadWorkers(&params, args[0]) startUploadWorkers(&params)
params.counterWG.Add(1) params.counterWG.Add(1)
go func() { go func() {
defer params.counterWG.Done() defer params.counterWG.Done()
@ -84,20 +96,20 @@ func uploadCmd(cmd *cobra.Command, args []string) {
}() }()
Upload: Upload:
for _, filename := range filenames { for _, f := range paths {
if exists[filename] { if exists[path.Base(f)] {
continue continue
} }
select { select {
case params.filenameChan <- filename: case params.pathChan <- f:
case <-params.stopper.Ch(): case <-params.stopper.Ch():
log.Warnln("Caught interrupt, quitting at first opportunity...") log.Warnln("Caught interrupt, quitting at first opportunity...")
break Upload break Upload
} }
} }
close(params.filenameChan) close(params.pathChan)
params.workerWG.Wait() params.workerWG.Wait()
close(params.countChan) close(params.countChan)
params.counterWG.Wait() params.counterWG.Wait()
@ -134,8 +146,8 @@ func setInterrupt(stopper *stop.Group) {
}() }()
} }
func startUploadWorkers(params *uploaderParams, dir string) { func startUploadWorkers(params *uploaderParams) {
for i := 0; i < workers; i++ { for i := 0; i < uploadWorkers; i++ {
params.workerWG.Add(1) params.workerWG.Add(1)
go func(i int) { go func(i int) {
defer params.workerWG.Done() defer params.workerWG.Done()
@ -144,27 +156,27 @@ func startUploadWorkers(params *uploaderParams, dir string) {
}(i) }(i)
blobStore := newBlobStore() blobStore := newBlobStore()
launchFileUploader(params, blobStore, dir, i) launchFileUploader(params, blobStore, i)
}(i) }(i)
} }
} }
func launchFileUploader(params *uploaderParams, blobStore *store.DBBackedS3Store, dir string, worker int) { func launchFileUploader(params *uploaderParams, blobStore *store.DBBackedS3Store, worker int) {
for { for {
select { select {
case <-params.stopper.Ch(): case <-params.stopper.Ch():
return return
case filename, ok := <-params.filenameChan: case filepath, ok := <-params.pathChan:
if !ok { if !ok {
return return
} }
blob, err := ioutil.ReadFile(dir + "/" + filename) blob, err := ioutil.ReadFile(filepath)
checkErr(err) checkErr(err)
hash := peer.GetBlobHash(blob) hash := peer.GetBlobHash(blob)
if hash != filename { if hash != path.Base(filepath) {
log.Errorf("worker %d: filename does not match hash (%s != %s), skipping", worker, filename, hash) log.Errorf("worker %d: file name does not match hash (%s != %s), skipping", worker, filepath, hash)
select { select {
case params.countChan <- errInc: case params.countChan <- errInc:
case <-params.stopper.Ch(): case <-params.stopper.Ch():
@ -221,8 +233,17 @@ func runCountReceiver(params *uploaderParams, startTime time.Time, totalCount in
} }
} }
func getFileNames(dir string) ([]string, error) { func getPaths(path string) ([]string, error) {
f, err := os.Open(dir) info, err := os.Stat(path)
if err != nil {
return nil, err
}
if info.Mode().IsRegular() {
return []string{path}, nil
}
f, err := os.Open(path)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -239,7 +260,7 @@ func getFileNames(dir string) ([]string, error) {
var filenames []string var filenames []string
for _, file := range files { for _, file := range files {
if !file.IsDir() { if !file.IsDir() {
filenames = append(filenames, file.Name()) filenames = append(filenames, path+"/"+file.Name())
} }
} }

View file

@ -72,7 +72,7 @@ func addBlob(tx *sql.Tx, hash string, length int, isStored bool) error {
return errors.Err("length must be positive") return errors.Err("length must be positive")
} }
err := execPrepared(tx, err := execTx(tx,
"INSERT INTO blob_ (hash, is_stored, length) VALUES (?,?,?) ON DUPLICATE KEY UPDATE is_stored = (is_stored or VALUES(is_stored))", "INSERT INTO blob_ (hash, is_stored, length) VALUES (?,?,?) ON DUPLICATE KEY UPDATE is_stored = (is_stored or VALUES(is_stored))",
[]interface{}{hash, isStored, length}, []interface{}{hash, isStored, length},
) )
@ -200,7 +200,7 @@ func (s *SQL) AddSDBlob(sdHash string, sdBlobLength int, sdBlob SdBlob) error {
} }
// insert stream // insert stream
err = execPrepared(tx, err = execTx(tx,
"INSERT IGNORE INTO stream (hash, sd_hash) VALUES (?,?)", "INSERT IGNORE INTO stream (hash, sd_hash) VALUES (?,?)",
[]interface{}{sdBlob.StreamHash, sdHash}, []interface{}{sdBlob.StreamHash, sdHash},
) )
@ -220,7 +220,7 @@ func (s *SQL) AddSDBlob(sdHash string, sdBlobLength int, sdBlob SdBlob) error {
return err return err
} }
err = execPrepared(tx, err = execTx(tx,
"INSERT IGNORE INTO stream_blob (stream_hash, blob_hash, num) VALUES (?,?,?)", "INSERT IGNORE INTO stream_blob (stream_hash, blob_hash, num) VALUES (?,?,?)",
[]interface{}{sdBlob.StreamHash, contentBlob.BlobHash, contentBlob.BlobNum}, []interface{}{sdBlob.StreamHash, contentBlob.BlobHash, contentBlob.BlobNum},
) )
@ -264,13 +264,8 @@ func (s *SQL) GetStoredHashesInRange(ctx context.Context, start, end bits.Bitmap
return return
} }
//query := "SELECT hash FROM blob_ WHERE hash >= ? AND hash <= ? AND is_stored = 1" query := "SELECT hash FROM blob_ WHERE hash >= ? AND hash <= ? AND is_stored = 1"
//args := []interface{}{start.Hex(), end.Hex()} args := []interface{}{start.Hex(), end.Hex()}
query := "SELECT hash FROM blob_ WHERE hash IN (?,?) AND is_stored = 1"
args := []interface{}{
"6363e3ed8d32156aebbbe8c0dd077e7029c7cdaec58e08271aa35baa4250ec531129cb4f7a9ac9b7285dbb7ba375ab11",
"89c5c3f9794b0b24a03406e3b74361edb9ae70828e4c133512fc75db0a2d312673cdd4e30eed37892a46692d2fe439f3",
}
logQuery(query, args...) logQuery(query, args...)
@ -352,20 +347,9 @@ func closeRows(rows *sql.Rows) {
} }
} }
func execPrepared(tx *sql.Tx, query string, args []interface{}) error { func execTx(tx *sql.Tx, query string, args []interface{}) error {
logQuery(query, args...) logQuery(query, args...)
_, err := tx.Exec(query, args...)
stmt, err := tx.Prepare(query)
if err != nil {
return errors.Err(err)
}
_, err = stmt.Exec(args...)
if err != nil {
return errors.Err(err)
}
err = stmt.Close()
return errors.Err(err) return errors.Err(err)
} }