optimize batch insertions
reduce touch time to every 6 hours
This commit is contained in:
parent
6f95b3395f
commit
74925ebba2
1 changed files with 49 additions and 16 deletions
65
db/db.go
65
db/db.go
|
@ -3,18 +3,22 @@ package db
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/lbryio/lbry.go/v2/dht/bits"
|
"github.com/lbryio/lbry.go/v2/dht/bits"
|
||||||
"github.com/lbryio/lbry.go/v2/extras/errors"
|
"github.com/lbryio/lbry.go/v2/extras/errors"
|
||||||
qt "github.com/lbryio/lbry.go/v2/extras/query"
|
qt "github.com/lbryio/lbry.go/v2/extras/query"
|
||||||
|
"github.com/lbryio/lbry.go/v2/extras/stop"
|
||||||
"github.com/lbryio/lbry.go/v2/stream"
|
"github.com/lbryio/lbry.go/v2/stream"
|
||||||
|
|
||||||
"github.com/go-sql-driver/mysql"
|
"github.com/go-sql-driver/mysql"
|
||||||
_ "github.com/go-sql-driver/mysql" // blank import for db driver ensures its imported even if its not used
|
_ "github.com/go-sql-driver/mysql" // blank import for db driver ensures its imported even if its not used
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"github.com/volatiletech/null"
|
"github.com/volatiletech/null"
|
||||||
|
"go.uber.org/atomic"
|
||||||
)
|
)
|
||||||
|
|
||||||
// SdBlob is a special blob that contains information on the rest of the blobs in the stream
|
// SdBlob is a special blob that contains information on the rest of the blobs in the stream
|
||||||
|
@ -104,33 +108,62 @@ func (s *SQL) AddBlobs(hash []string) error {
|
||||||
}
|
}
|
||||||
// Split the slice into batches of 20 items.
|
// Split the slice into batches of 20 items.
|
||||||
batch := 10000
|
batch := 10000
|
||||||
|
totalBlobs := int64(len(hash))
|
||||||
for i := 0; i < len(hash); i += batch {
|
work := make(chan []string, 1000)
|
||||||
j := i + batch
|
stopper := stop.New()
|
||||||
if j > len(hash) {
|
var totalInserted atomic.Int64
|
||||||
j = len(hash)
|
start := time.Now()
|
||||||
}
|
go func() {
|
||||||
err := s.insertBlobs(hash[i:j]) // Process the batch.
|
for i := 0; i < len(hash); i += batch {
|
||||||
if err != nil {
|
j := i + batch
|
||||||
log.Errorf("error while inserting batch: %s", errors.FullTrace(err))
|
if j > len(hash) {
|
||||||
|
j = len(hash)
|
||||||
|
}
|
||||||
|
work <- hash[i:j]
|
||||||
}
|
}
|
||||||
|
log.Infof("done loading %d hashes in the work queue", len(hash))
|
||||||
|
close(work)
|
||||||
|
}()
|
||||||
|
for i := 0; i < runtime.NumCPU(); i++ {
|
||||||
|
stopper.Add(1)
|
||||||
|
go func(worker int) {
|
||||||
|
log.Infof("starting worker %d", worker)
|
||||||
|
defer stopper.Done()
|
||||||
|
for hashes := range work {
|
||||||
|
inserted := totalInserted.Load()
|
||||||
|
remaining := totalBlobs - inserted
|
||||||
|
if inserted > 0 {
|
||||||
|
timePerBlob := time.Since(start).Microseconds() / inserted
|
||||||
|
remainingTime := time.Duration(remaining*timePerBlob) * time.Microsecond
|
||||||
|
log.Infof("[T%d] processing batch of %d items. ETA: %s", worker, len(hashes), remainingTime.String())
|
||||||
|
}
|
||||||
|
err := s.insertBlobs(hashes) // Process the batch.
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error while inserting batch: %s", errors.FullTrace(err))
|
||||||
|
}
|
||||||
|
totalInserted.Add(int64(len(hashes)))
|
||||||
|
}
|
||||||
|
}(i)
|
||||||
}
|
}
|
||||||
|
stopper.Wait()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SQL) insertBlobs(hashes []string) error {
|
func (s *SQL) insertBlobs(hashes []string) error {
|
||||||
var (
|
var (
|
||||||
q string
|
q string
|
||||||
args []interface{}
|
//args []interface{}
|
||||||
)
|
)
|
||||||
dayAgo := time.Now().AddDate(0, 0, -1)
|
dayAgo := time.Now().AddDate(0, 0, -1).Format("2006-01-02 15:04:05")
|
||||||
q = "insert into blob_ (hash, is_stored, length, last_accessed_at) values "
|
q = "insert into blob_ (hash, is_stored, length, last_accessed_at) values "
|
||||||
for _, hash := range hashes {
|
for _, hash := range hashes {
|
||||||
q += "(?,?,?,?),"
|
// prepared statements slow everything down by a lot due to reflection
|
||||||
args = append(args, hash, true, stream.MaxBlobSize, dayAgo)
|
// for this specific instance we'll go ahead and hardcode the query to make it go faster
|
||||||
|
q += fmt.Sprintf("('%s',1,%d,'%s'),", hash, stream.MaxBlobSize, dayAgo)
|
||||||
|
//args = append(args, hash, true, stream.MaxBlobSize, dayAgo)
|
||||||
}
|
}
|
||||||
q = strings.TrimSuffix(q, ",")
|
q = strings.TrimSuffix(q, ",")
|
||||||
_, err := s.exec(q, args...)
|
_, err := s.exec(q)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -292,7 +325,7 @@ func (s *SQL) hasBlobs(hashes []string) (map[string]bool, []uint64, error) {
|
||||||
var needsTouch []uint64
|
var needsTouch []uint64
|
||||||
exists := make(map[string]bool)
|
exists := make(map[string]bool)
|
||||||
|
|
||||||
touchDeadline := time.Now().AddDate(0, 0, -1) // touch blob if last accessed before this time
|
touchDeadline := time.Now().Add(-6 * time.Hour) // touch blob if last accessed before this time
|
||||||
maxBatchSize := 10000
|
maxBatchSize := 10000
|
||||||
doneIndex := 0
|
doneIndex := 0
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue