actual proper fix for db statement issue, added skipExists flag and the ability to upload a single blob
This commit is contained in:
parent
0e0b2aaea3
commit
3855d5c281
3 changed files with 66 additions and 60 deletions
|
@ -41,6 +41,7 @@ var rootCmd = &cobra.Command{
|
||||||
debugLogger.SetLevel(logrus.DebugLevel)
|
debugLogger.SetLevel(logrus.DebugLevel)
|
||||||
|
|
||||||
if util.InSlice(verboseAll, verbose) {
|
if util.InSlice(verboseAll, verbose) {
|
||||||
|
logrus.SetLevel(logrus.DebugLevel)
|
||||||
verbose = []string{verboseDHT, verboseNodeFinder}
|
verbose = []string{verboseDHT, verboseNodeFinder}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,20 +5,23 @@ import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
|
"path"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/lbryio/lbry.go/stop"
|
|
||||||
"github.com/lbryio/reflector.go/db"
|
"github.com/lbryio/reflector.go/db"
|
||||||
"github.com/lbryio/reflector.go/peer"
|
"github.com/lbryio/reflector.go/peer"
|
||||||
"github.com/lbryio/reflector.go/store"
|
"github.com/lbryio/reflector.go/store"
|
||||||
|
|
||||||
|
"github.com/lbryio/lbry.go/stop"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
|
||||||
var workers int
|
var uploadWorkers int
|
||||||
|
var uploadSkipExistsCheck bool
|
||||||
|
|
||||||
const (
|
const (
|
||||||
sdInc = 1
|
sdInc = 1
|
||||||
|
@ -30,7 +33,7 @@ type uploaderParams struct {
|
||||||
workerWG *sync.WaitGroup
|
workerWG *sync.WaitGroup
|
||||||
counterWG *sync.WaitGroup
|
counterWG *sync.WaitGroup
|
||||||
stopper *stop.Group
|
stopper *stop.Group
|
||||||
filenameChan chan string
|
pathChan chan string
|
||||||
countChan chan int
|
countChan chan int
|
||||||
sdCount int
|
sdCount int
|
||||||
blobCount int
|
blobCount int
|
||||||
|
@ -39,12 +42,13 @@ type uploaderParams struct {
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
var cmd = &cobra.Command{
|
var cmd = &cobra.Command{
|
||||||
Use: "upload DIR",
|
Use: "upload PATH",
|
||||||
Short: "Upload blobs to S3",
|
Short: "Upload blobs to S3",
|
||||||
Args: cobra.ExactArgs(1),
|
Args: cobra.ExactArgs(1),
|
||||||
Run: uploadCmd,
|
Run: uploadCmd,
|
||||||
}
|
}
|
||||||
cmd.PersistentFlags().IntVar(&workers, "workers", 1, "How many worker threads to run at once")
|
cmd.PersistentFlags().IntVar(&uploadWorkers, "workers", 1, "How many worker threads to run at once")
|
||||||
|
cmd.PersistentFlags().BoolVar(&uploadSkipExistsCheck, "skipExistsCheck", false, "Dont check if blobs exist before uploading")
|
||||||
rootCmd.AddCommand(cmd)
|
rootCmd.AddCommand(cmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,26 +61,34 @@ func uploadCmd(cmd *cobra.Command, args []string) {
|
||||||
params := uploaderParams{
|
params := uploaderParams{
|
||||||
workerWG: &sync.WaitGroup{},
|
workerWG: &sync.WaitGroup{},
|
||||||
counterWG: &sync.WaitGroup{},
|
counterWG: &sync.WaitGroup{},
|
||||||
filenameChan: make(chan string),
|
pathChan: make(chan string),
|
||||||
countChan: make(chan int),
|
countChan: make(chan int),
|
||||||
stopper: stop.New()}
|
stopper: stop.New()}
|
||||||
|
|
||||||
setInterrupt(params.stopper)
|
setInterrupt(params.stopper)
|
||||||
|
|
||||||
filenames, err := getFileNames(args[0])
|
paths, err := getPaths(args[0])
|
||||||
checkErr(err)
|
checkErr(err)
|
||||||
|
|
||||||
totalCount := len(filenames)
|
totalCount := len(paths)
|
||||||
|
|
||||||
|
hashes := make([]string, len(paths))
|
||||||
|
for i, p := range paths {
|
||||||
|
hashes[i] = path.Base(p)
|
||||||
|
}
|
||||||
|
|
||||||
log.Println("checking for existing blobs")
|
log.Println("checking for existing blobs")
|
||||||
|
|
||||||
exists, err := db.HasBlobs(filenames)
|
exists := make(map[string]bool)
|
||||||
|
if !uploadSkipExistsCheck {
|
||||||
|
exists, err = db.HasBlobs(hashes)
|
||||||
checkErr(err)
|
checkErr(err)
|
||||||
|
}
|
||||||
existsCount := len(exists)
|
existsCount := len(exists)
|
||||||
|
|
||||||
log.Printf("%d new blobs to upload", totalCount-existsCount)
|
log.Printf("%d new blobs to upload", totalCount-existsCount)
|
||||||
|
|
||||||
startUploadWorkers(¶ms, args[0])
|
startUploadWorkers(¶ms)
|
||||||
params.counterWG.Add(1)
|
params.counterWG.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer params.counterWG.Done()
|
defer params.counterWG.Done()
|
||||||
|
@ -84,20 +96,20 @@ func uploadCmd(cmd *cobra.Command, args []string) {
|
||||||
}()
|
}()
|
||||||
|
|
||||||
Upload:
|
Upload:
|
||||||
for _, filename := range filenames {
|
for _, f := range paths {
|
||||||
if exists[filename] {
|
if exists[path.Base(f)] {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case params.filenameChan <- filename:
|
case params.pathChan <- f:
|
||||||
case <-params.stopper.Ch():
|
case <-params.stopper.Ch():
|
||||||
log.Warnln("Caught interrupt, quitting at first opportunity...")
|
log.Warnln("Caught interrupt, quitting at first opportunity...")
|
||||||
break Upload
|
break Upload
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
close(params.filenameChan)
|
close(params.pathChan)
|
||||||
params.workerWG.Wait()
|
params.workerWG.Wait()
|
||||||
close(params.countChan)
|
close(params.countChan)
|
||||||
params.counterWG.Wait()
|
params.counterWG.Wait()
|
||||||
|
@ -134,8 +146,8 @@ func setInterrupt(stopper *stop.Group) {
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func startUploadWorkers(params *uploaderParams, dir string) {
|
func startUploadWorkers(params *uploaderParams) {
|
||||||
for i := 0; i < workers; i++ {
|
for i := 0; i < uploadWorkers; i++ {
|
||||||
params.workerWG.Add(1)
|
params.workerWG.Add(1)
|
||||||
go func(i int) {
|
go func(i int) {
|
||||||
defer params.workerWG.Done()
|
defer params.workerWG.Done()
|
||||||
|
@ -144,27 +156,27 @@ func startUploadWorkers(params *uploaderParams, dir string) {
|
||||||
}(i)
|
}(i)
|
||||||
|
|
||||||
blobStore := newBlobStore()
|
blobStore := newBlobStore()
|
||||||
launchFileUploader(params, blobStore, dir, i)
|
launchFileUploader(params, blobStore, i)
|
||||||
}(i)
|
}(i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func launchFileUploader(params *uploaderParams, blobStore *store.DBBackedS3Store, dir string, worker int) {
|
func launchFileUploader(params *uploaderParams, blobStore *store.DBBackedS3Store, worker int) {
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-params.stopper.Ch():
|
case <-params.stopper.Ch():
|
||||||
return
|
return
|
||||||
case filename, ok := <-params.filenameChan:
|
case filepath, ok := <-params.pathChan:
|
||||||
if !ok {
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
blob, err := ioutil.ReadFile(dir + "/" + filename)
|
blob, err := ioutil.ReadFile(filepath)
|
||||||
checkErr(err)
|
checkErr(err)
|
||||||
|
|
||||||
hash := peer.GetBlobHash(blob)
|
hash := peer.GetBlobHash(blob)
|
||||||
if hash != filename {
|
if hash != path.Base(filepath) {
|
||||||
log.Errorf("worker %d: filename does not match hash (%s != %s), skipping", worker, filename, hash)
|
log.Errorf("worker %d: file name does not match hash (%s != %s), skipping", worker, filepath, hash)
|
||||||
select {
|
select {
|
||||||
case params.countChan <- errInc:
|
case params.countChan <- errInc:
|
||||||
case <-params.stopper.Ch():
|
case <-params.stopper.Ch():
|
||||||
|
@ -221,8 +233,17 @@ func runCountReceiver(params *uploaderParams, startTime time.Time, totalCount in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func getFileNames(dir string) ([]string, error) {
|
func getPaths(path string) ([]string, error) {
|
||||||
f, err := os.Open(dir)
|
info, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if info.Mode().IsRegular() {
|
||||||
|
return []string{path}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -239,7 +260,7 @@ func getFileNames(dir string) ([]string, error) {
|
||||||
var filenames []string
|
var filenames []string
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
if !file.IsDir() {
|
if !file.IsDir() {
|
||||||
filenames = append(filenames, file.Name())
|
filenames = append(filenames, path+"/"+file.Name())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
30
db/db.go
30
db/db.go
|
@ -72,7 +72,7 @@ func addBlob(tx *sql.Tx, hash string, length int, isStored bool) error {
|
||||||
return errors.Err("length must be positive")
|
return errors.Err("length must be positive")
|
||||||
}
|
}
|
||||||
|
|
||||||
err := execPrepared(tx,
|
err := execTx(tx,
|
||||||
"INSERT INTO blob_ (hash, is_stored, length) VALUES (?,?,?) ON DUPLICATE KEY UPDATE is_stored = (is_stored or VALUES(is_stored))",
|
"INSERT INTO blob_ (hash, is_stored, length) VALUES (?,?,?) ON DUPLICATE KEY UPDATE is_stored = (is_stored or VALUES(is_stored))",
|
||||||
[]interface{}{hash, isStored, length},
|
[]interface{}{hash, isStored, length},
|
||||||
)
|
)
|
||||||
|
@ -200,7 +200,7 @@ func (s *SQL) AddSDBlob(sdHash string, sdBlobLength int, sdBlob SdBlob) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// insert stream
|
// insert stream
|
||||||
err = execPrepared(tx,
|
err = execTx(tx,
|
||||||
"INSERT IGNORE INTO stream (hash, sd_hash) VALUES (?,?)",
|
"INSERT IGNORE INTO stream (hash, sd_hash) VALUES (?,?)",
|
||||||
[]interface{}{sdBlob.StreamHash, sdHash},
|
[]interface{}{sdBlob.StreamHash, sdHash},
|
||||||
)
|
)
|
||||||
|
@ -220,7 +220,7 @@ func (s *SQL) AddSDBlob(sdHash string, sdBlobLength int, sdBlob SdBlob) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = execPrepared(tx,
|
err = execTx(tx,
|
||||||
"INSERT IGNORE INTO stream_blob (stream_hash, blob_hash, num) VALUES (?,?,?)",
|
"INSERT IGNORE INTO stream_blob (stream_hash, blob_hash, num) VALUES (?,?,?)",
|
||||||
[]interface{}{sdBlob.StreamHash, contentBlob.BlobHash, contentBlob.BlobNum},
|
[]interface{}{sdBlob.StreamHash, contentBlob.BlobHash, contentBlob.BlobNum},
|
||||||
)
|
)
|
||||||
|
@ -264,13 +264,8 @@ func (s *SQL) GetStoredHashesInRange(ctx context.Context, start, end bits.Bitmap
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
//query := "SELECT hash FROM blob_ WHERE hash >= ? AND hash <= ? AND is_stored = 1"
|
query := "SELECT hash FROM blob_ WHERE hash >= ? AND hash <= ? AND is_stored = 1"
|
||||||
//args := []interface{}{start.Hex(), end.Hex()}
|
args := []interface{}{start.Hex(), end.Hex()}
|
||||||
query := "SELECT hash FROM blob_ WHERE hash IN (?,?) AND is_stored = 1"
|
|
||||||
args := []interface{}{
|
|
||||||
"6363e3ed8d32156aebbbe8c0dd077e7029c7cdaec58e08271aa35baa4250ec531129cb4f7a9ac9b7285dbb7ba375ab11",
|
|
||||||
"89c5c3f9794b0b24a03406e3b74361edb9ae70828e4c133512fc75db0a2d312673cdd4e30eed37892a46692d2fe439f3",
|
|
||||||
}
|
|
||||||
|
|
||||||
logQuery(query, args...)
|
logQuery(query, args...)
|
||||||
|
|
||||||
|
@ -352,20 +347,9 @@ func closeRows(rows *sql.Rows) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func execPrepared(tx *sql.Tx, query string, args []interface{}) error {
|
func execTx(tx *sql.Tx, query string, args []interface{}) error {
|
||||||
logQuery(query, args...)
|
logQuery(query, args...)
|
||||||
|
_, err := tx.Exec(query, args...)
|
||||||
stmt, err := tx.Prepare(query)
|
|
||||||
if err != nil {
|
|
||||||
return errors.Err(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = stmt.Exec(args...)
|
|
||||||
if err != nil {
|
|
||||||
return errors.Err(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = stmt.Close()
|
|
||||||
return errors.Err(err)
|
return errors.Err(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue