add faster file.Walk fn. meant for DiskStore.list()

This commit is contained in:
Alex Grintsvayg 2020-10-30 15:01:56 -04:00
parent 72571236ab
commit 131fed28d2
No known key found for this signature in database
GPG key ID: AEB3F089F86A22B5
3 changed files with 87 additions and 0 deletions

1
go.mod
View file

@ -19,6 +19,7 @@ require (
github.com/hashicorp/serf v0.8.2
github.com/inconshreveable/go-update v0.0.0-20160112193335-8152e7eb6ccf
github.com/johntdyer/slackrus v0.0.0-20180518184837-f7aae3243a07
github.com/karrick/godirwalk v1.16.1
github.com/lbryio/chainquery v1.9.0
github.com/lbryio/lbry.go v1.1.2 // indirect
github.com/lbryio/lbry.go/v2 v2.6.1-0.20200901175808-73382bb02128

2
go.sum
View file

@ -188,6 +188,8 @@ github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1 h1:PJPDf8OUfOK1bb/NeTKd4f1QXZItOX389VN3B6qC8ro=
github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
github.com/karrick/godirwalk v1.16.1 h1:DynhcF+bztK8gooS0+NDJFrdNZjJ3gzVzC545UNA9iw=
github.com/karrick/godirwalk v1.16.1/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk=
github.com/keybase/go-ps v0.0.0-20161005175911-668c8856d999/go.mod h1:hY+WOq6m2FpbvyrI93sMaypsttvaIL5nhVR92dTMUcQ=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/kkdai/bstream v0.0.0-20161212061736-f391b8402d23/go.mod h1:J+Gs4SYgM6CZQHDETBtE9HaSEkGmuNXF86RwHhHUvq4=

View file

@ -0,0 +1,84 @@
package speedwalk
import (
"io/ioutil"
"path/filepath"
"runtime"
"sync"
"github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/karrick/godirwalk"
"github.com/sirupsen/logrus"
)
// AllFiles recursively lists every file in every subdirectory of a given directory
// If basename is true, retun the basename of each file. Otherwise return the full path starting at startDir.
func AllFiles(startDir string, basename bool) ([]string, error) {
items, err := ioutil.ReadDir(startDir)
if err != nil {
return nil, err
}
pathChan := make(chan string)
paths := make([]string, 0, 1000)
go func() {
for {
path, ok := <-pathChan
if !ok {
return
}
paths = append(paths, path)
}
}()
wg := &sync.WaitGroup{}
maxThreads := runtime.NumCPU() - 1
goroutineLimiter := make(chan struct{}, maxThreads)
for i := 0; i < maxThreads; i++ {
goroutineLimiter <- struct{}{}
}
for _, item := range items {
if !item.IsDir() {
if basename {
pathChan <- item.Name()
} else {
pathChan <- filepath.Join(startDir, item.Name())
}
continue
}
<-goroutineLimiter
wg.Add(1)
go func(dir string) {
defer func() {
wg.Done()
goroutineLimiter <- struct{}{}
}()
err = godirwalk.Walk(filepath.Join(startDir, dir), &godirwalk.Options{
Unsorted: true, // faster this way
Callback: func(osPathname string, de *godirwalk.Dirent) error {
if de.IsRegular() {
if basename {
pathChan <- de.Name()
} else {
pathChan <- filepath.Join(startDir, osPathname)
}
}
return nil
},
})
if err != nil {
logrus.Errorf(errors.FullTrace(err))
}
}(item.Name())
}
wg.Wait()
close(pathChan)
return paths, nil
}