From 131fed28d27729b0eade6beb54cb7b0e16e64fdb Mon Sep 17 00:00:00 2001 From: Alex Grintsvayg Date: Fri, 30 Oct 2020 15:01:56 -0400 Subject: [PATCH] add faster file.Walk fn. meant for DiskStore.list() --- go.mod | 1 + go.sum | 2 + store/speedwalk/speedwalk.go | 84 ++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 store/speedwalk/speedwalk.go diff --git a/go.mod b/go.mod index b7de5f7..9df120d 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/hashicorp/serf v0.8.2 github.com/inconshreveable/go-update v0.0.0-20160112193335-8152e7eb6ccf github.com/johntdyer/slackrus v0.0.0-20180518184837-f7aae3243a07 + github.com/karrick/godirwalk v1.16.1 github.com/lbryio/chainquery v1.9.0 github.com/lbryio/lbry.go v1.1.2 // indirect github.com/lbryio/lbry.go/v2 v2.6.1-0.20200901175808-73382bb02128 diff --git a/go.sum b/go.sum index 01d1580..84b207a 100644 --- a/go.sum +++ b/go.sum @@ -188,6 +188,8 @@ github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1 h1:PJPDf8OUfOK1bb/NeTKd4f1QXZItOX389VN3B6qC8ro= github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8= +github.com/karrick/godirwalk v1.16.1 h1:DynhcF+bztK8gooS0+NDJFrdNZjJ3gzVzC545UNA9iw= +github.com/karrick/godirwalk v1.16.1/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk= github.com/keybase/go-ps v0.0.0-20161005175911-668c8856d999/go.mod h1:hY+WOq6m2FpbvyrI93sMaypsttvaIL5nhVR92dTMUcQ= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kkdai/bstream v0.0.0-20161212061736-f391b8402d23/go.mod h1:J+Gs4SYgM6CZQHDETBtE9HaSEkGmuNXF86RwHhHUvq4= diff --git a/store/speedwalk/speedwalk.go b/store/speedwalk/speedwalk.go new file mode 100644 index 0000000..1c94ed1 --- /dev/null +++ b/store/speedwalk/speedwalk.go @@ -0,0 +1,84 @@ +package speedwalk + +import ( + "io/ioutil" + "path/filepath" + "runtime" + "sync" + + "github.com/lbryio/lbry.go/v2/extras/errors" + + "github.com/karrick/godirwalk" + "github.com/sirupsen/logrus" +) + +// AllFiles recursively lists every file in every subdirectory of a given directory +// If basename is true, retun the basename of each file. Otherwise return the full path starting at startDir. +func AllFiles(startDir string, basename bool) ([]string, error) { + items, err := ioutil.ReadDir(startDir) + if err != nil { + return nil, err + } + + pathChan := make(chan string) + paths := make([]string, 0, 1000) + go func() { + for { + path, ok := <-pathChan + if !ok { + return + } + paths = append(paths, path) + } + }() + + wg := &sync.WaitGroup{} + maxThreads := runtime.NumCPU() - 1 + goroutineLimiter := make(chan struct{}, maxThreads) + for i := 0; i < maxThreads; i++ { + goroutineLimiter <- struct{}{} + } + + for _, item := range items { + if !item.IsDir() { + if basename { + pathChan <- item.Name() + } else { + pathChan <- filepath.Join(startDir, item.Name()) + } + continue + } + + <-goroutineLimiter + wg.Add(1) + + go func(dir string) { + defer func() { + wg.Done() + goroutineLimiter <- struct{}{} + }() + + err = godirwalk.Walk(filepath.Join(startDir, dir), &godirwalk.Options{ + Unsorted: true, // faster this way + Callback: func(osPathname string, de *godirwalk.Dirent) error { + if de.IsRegular() { + if basename { + pathChan <- de.Name() + } else { + pathChan <- filepath.Join(startDir, osPathname) + } + } + return nil + }, + }) + if err != nil { + logrus.Errorf(errors.FullTrace(err)) + } + }(item.Name()) + } + + wg.Wait() + + close(pathChan) + return paths, nil +}