From 363f33dadf4598cf46838c0a68ac4ed98dacd072 Mon Sep 17 00:00:00 2001 From: Niko Storni Date: Thu, 14 May 2020 03:09:56 +0200 Subject: [PATCH] make it fast! --- main.go | 115 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 76 insertions(+), 39 deletions(-) diff --git a/main.go b/main.go index 46395d6..625b895 100644 --- a/main.go +++ b/main.go @@ -1,9 +1,14 @@ package main import ( + "fmt" + "io/ioutil" "os" + "path/filepath" + "runtime" "sort" "strconv" + "sync" "sync/atomic" "syscall" "time" @@ -38,7 +43,7 @@ func main() { if _, err := os.Stat(blobsDir); os.IsNotExist(err) { panic(errors.Err("directory doesn't exist: %s", blobsDir)) } - used, err := getUsedSpace() + used, _, err := getUsedSpace() if err != nil { logrus.Errorln(err.Error()) return @@ -51,29 +56,29 @@ func main() { logrus.Errorln(err.Error()) return } - used, err := getUsedSpace() + usage, _, err := getUsedSpace() if err != nil { logrus.Errorln(err.Error()) return } - logrus.Infof("disk usage: %.2f%%\n", used*100) + logrus.Infof("disk usage: %.2f%%\n", usage*100) logrus.Infoln("Done cleaning up") } } // GetUsedSpace returns a value between 0 and 1, with 0 being completely empty and 1 being full, for the disk that holds the provided path -func getUsedSpace() (float64, error) { +func getUsedSpace() (usage float64, used uint64, err error) { var stat syscall.Statfs_t - err := syscall.Statfs(blobsDir, &stat) + err = syscall.Statfs(blobsDir, &stat) if err != nil { - return 0, err + return 0, 0, err } // Available blocks * size per block = available space in bytes all := stat.Blocks * uint64(stat.Bsize) free := stat.Bfree * uint64(stat.Bsize) - used := all - free + used = all - free - return float64(used) / float64(all), nil + return float64(used) / float64(all), used, nil } func WipeOldestBlobs() (err error) { @@ -83,47 +88,79 @@ func WipeOldestBlobs() (err error) { } datedFiles := make([]datedFile, 0, 5000) checkedBlobs := int32(0) - err = godirwalk.Walk(blobsDir, &godirwalk.Options{ - Callback: func(osPathname string, de *godirwalk.Dirent) error { - if !de.IsDir() { - i := atomic.AddInt32(&checkedBlobs, 1) - if i%100 == 0 { - logrus.Infof("checked %d blobs", i) - } - if de.IsRegular() { - stat, err := os.Stat(osPathname) - if err != nil { - return err - } - datedFiles = append(datedFiles, datedFile{ - Atime: atime.Atime(stat), - FullPath: osPathname, - }) - } - } - return nil - }, - Unsorted: true, // (optional) set true for faster yet non-deterministic enumeration (see godoc) - }) + dirs, err := ioutil.ReadDir(blobsDir) if err != nil { return err } - + lock := &sync.Mutex{} + wg := &sync.WaitGroup{} + startTime := time.Now() + usage, used, err := getUsedSpace() + if err != nil { + logrus.Errorln(err.Error()) + return + } + for i, dir := range dirs { + if (i+1)%(runtime.NumCPU()-1) == 0 { + wg.Wait() + } + wg.Add(1) + go func(dir string, lock *sync.Mutex, wg *sync.WaitGroup) { + defer wg.Done() + err = godirwalk.Walk(filepath.Join(blobsDir, dir), &godirwalk.Options{ + Callback: func(osPathname string, de *godirwalk.Dirent) error { + if !de.IsDir() { + i := atomic.AddInt32(&checkedBlobs, 1) + if i%100 == 0 { + logrus.Infof("checked %d blobs", i) + } + if i%100 == 0 { + processedSize := float64(i * 2) + speed := processedSize / time.Since(startTime).Seconds() + eta, err := time.ParseDuration(fmt.Sprintf("%ds", int(float64(used)/1024./1024./speed))) + if err != nil { + return err + } + logrus.Infof("%.2f GB checked (speed: %.2f MB/s) ETA: %s", processedSize/1024.0, speed, eta.String()) + } + if de.IsRegular() { + stat, err := os.Stat(osPathname) + if err != nil { + return err + } + lock.Lock() + datedFiles = append(datedFiles, datedFile{ + Atime: atime.Atime(stat), + FullPath: osPathname, + }) + lock.Unlock() + } + } + return nil + }, + Unsorted: true, // (optional) set true for faster yet non-deterministic enumeration (see godoc) + }) + if err != nil { + logrus.Errorf(errors.FullTrace(err)) + } + }(dir.Name(), lock, wg) + } + wg.Wait() sort.Slice(datedFiles, func(i, j int) bool { return datedFiles[i].Atime.Before(datedFiles[j].Atime) }) - //delete the first 5000 blobs + + spaceToFreeUp := ((float64(used) / usage) * 0.1) / 1024. / 1024. + blobsToDelete := int(spaceToFreeUp / 2) + logrus.Infof("spaceToFreeUp: %.2f MB - %d blobs", spaceToFreeUp, blobsToDelete) for i, df := range datedFiles { - if i >= 5000 { + if i >= blobsToDelete { break } - if i%100 == 0 { - logrus.Infof("[%s] would delete %s", df.Atime.String(), df.FullPath) + err = os.Remove(df.FullPath) + if err != nil { + return err } - //err = os.Remove(df.FullPath) - //if err != nil { - // return err - //} } return nil }