reflector.go/cmd/integrity.go
2021-07-24 01:03:51 +02:00

95 lines
2.5 KiB
Go

package cmd
import (
"crypto/sha512"
"encoding/hex"
"io/ioutil"
"os"
"path"
"runtime"
"sync/atomic"
"time"
"github.com/lbryio/reflector.go/meta"
"github.com/lbryio/reflector.go/store/speedwalk"
"github.com/lbryio/lbry.go/v2/extras/errors"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
var threads int
func init() {
var cmd = &cobra.Command{
Use: "check-integrity",
Short: "check blobs integrity for a given path",
Run: integrityCheckCmd,
}
cmd.Flags().StringVar(&diskStorePath, "store-path", "", "path of the store where all blobs are cached")
cmd.Flags().IntVar(&threads, "threads", runtime.NumCPU()-1, "number of concurrent threads to process blobs")
rootCmd.AddCommand(cmd)
}
func integrityCheckCmd(cmd *cobra.Command, args []string) {
log.Printf("reflector %s", meta.VersionString())
if diskStorePath == "" {
log.Fatal("store-path must be defined")
}
blobs, err := speedwalk.AllFiles(diskStorePath, true)
if err != nil {
log.Fatalf("error while reading blobs from disk %s", errors.FullTrace(err))
}
tasks := make(chan string, len(blobs))
done := make(chan bool)
processed := new(int32)
go produce(tasks, blobs)
cpus := runtime.NumCPU()
for i := 0; i < cpus-1; i++ {
go consume(i, tasks, done, len(blobs), processed)
}
<-done
}
func produce(tasks chan<- string, blobs []string) {
for _, b := range blobs {
tasks <- b
}
close(tasks)
}
func consume(worker int, tasks <-chan string, done chan<- bool, totalTasks int, processed *int32) {
start := time.Now()
for b := range tasks {
processedSoFar := atomic.AddInt32(processed, 1)
if worker == 0 {
remaining := int32(totalTasks) - processedSoFar
timePerBlob := time.Since(start).Microseconds() / int64(processedSoFar)
remainingTime := time.Duration(int64(remaining)*timePerBlob) * time.Microsecond
log.Infof("[T%d] %d/%d blobs processed so far. ETA: %s", worker, processedSoFar, totalTasks, remainingTime.String())
}
blobPath := path.Join(diskStorePath, b[:2], b)
blob, err := ioutil.ReadFile(blobPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
log.Errorf("[Worker %d] Error looking up blob %s: %s", worker, b, err.Error())
continue
}
hashBytes := sha512.Sum384(blob)
readHash := hex.EncodeToString(hashBytes[:])
if readHash != b {
log.Infof("[%s] found a broken blob while reading from disk. Actual hash: %s", b, readHash)
err := os.Remove(blobPath)
if err != nil {
log.Errorf("Error while deleting broken blob %s: %s", b, err.Error())
}
}
}
done <- true
}