separate singleflight cache wrapper, component names for cache metrics

2020-10-29 12:39:53 -04:00 · 2020-10-29 12:39:53 -04:00 · 560e180e36
commit 560e180e36
parent 070c378dfd
8 changed files with 126 additions and 59 deletions
--- a/cmd/getstream.go
+++ b/cmd/getstream.go
@ -29,6 +29,7 @@ func getStreamCmd(cmd *cobra.Command, args []string) {
 	sdHash := args[1]

 	s := store.NewCachingStore(
+		"getstream",
 		peer.NewStore(peer.StoreOpts{Address: addr}),
 		store.NewDiskStore("/tmp/lbry_downloaded_blobs", 2),
 	)
--- a/cmd/reflector.go
+++ b/cmd/reflector.go
@ -153,13 +153,19 @@ func wrapWithCache(s store.BlobStore) store.BlobStore {
 		if err != nil {
 			log.Fatal(err)
 		}
-		wrapped = store.NewCachingStore(wrapped,
-			store.NewLRUStore(store.NewDiskStore(diskCachePath, 2), diskCacheMaxSize))
+		wrapped = store.NewCachingStore(
+			"reflector",
+			wrapped,
+			store.NewLRUStore("peer_server", store.NewDiskStore(diskCachePath, 2), diskCacheMaxSize),
+		)
 	}

 	if reflectorCmdMemCache > 0 {
-		wrapped = store.NewCachingStore(wrapped,
-			store.NewLRUStore(store.NewMemStore(), reflectorCmdMemCache))
+		wrapped = store.NewCachingStore(
+			"reflector",
+			wrapped,
+			store.NewLRUStore("peer_server", store.NewMemStore(), reflectorCmdMemCache),
+		)
 	}

 	return wrapped
--- a/internal/metrics/metrics.go
+++ b/internal/metrics/metrics.go
@ -12,6 +12,7 @@ import (

 	ee "github.com/lbryio/lbry.go/v2/extras/errors"
 	"github.com/lbryio/lbry.go/v2/extras/stop"
+
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
@ -67,6 +68,7 @@ const (
 	DirectionDownload = "download" // from reflector

 	LabelCacheType = "cache_type"
+	LabelComponent = "component"
 	LabelSource    = "source"

 	errConnReset         = "conn_reset"
@ -116,37 +118,42 @@ var (
 		Help:      "Total number of blobs downloaded from reflector through QUIC protocol",
 	})

-	CacheHitCount = promauto.NewCounter(prometheus.CounterOpts{
+	CacheHitCount = promauto.NewCounterVec(prometheus.CounterOpts{
 		Namespace: ns,
 		Subsystem: subsystemCache,
 		Name:      "hit_total",
 		Help:      "Total number of blobs retrieved from the cache storage",
-	})
-	CacheMissCount = promauto.NewCounter(prometheus.CounterOpts{
+	}, []string{LabelCacheType, LabelComponent})
+	CacheMissCount = promauto.NewCounterVec(prometheus.CounterOpts{
 		Namespace: ns,
 		Subsystem: subsystemCache,
 		Name:      "miss_total",
 		Help:      "Total number of blobs retrieved from origin rather than cache storage",
-	})
-	CacheOriginRequestsCount = promauto.NewGauge(prometheus.GaugeOpts{
+	}, []string{LabelCacheType, LabelComponent})
+	CacheOriginRequestsCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: ns,
 		Subsystem: subsystemCache,
 		Name:      "origin_requests_total",
 		Help:      "How many Get requests are in flight from the cache to the origin",
-	})
+	}, []string{LabelCacheType, LabelComponent})
 	// during thundering-herd situations, the metric below should be a lot smaller than the metric above
-	CacheWaitingRequestsCount = promauto.NewGauge(prometheus.GaugeOpts{
+	CacheWaitingRequestsCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: ns,
 		Subsystem: subsystemCache,
 		Name:      "waiting_requests_total",
 		Help:      "How many cache requests are waiting for an in-flight origin request",
-	})
+	}, []string{LabelCacheType, LabelComponent})
 	CacheLRUEvictCount = promauto.NewCounterVec(prometheus.CounterOpts{
 		Namespace: ns,
 		Subsystem: subsystemCache,
 		Name:      "evict_total",
 		Help:      "Count of blobs evicted from cache",
-	}, []string{LabelCacheType})
+	}, []string{LabelCacheType, LabelComponent})
+	CacheRetrievalSpeed = promauto.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: ns,
+		Name:      "speed_mbps",
+		Help:      "Speed of blob retrieval from cache or from origin",
+	}, []string{LabelCacheType, LabelComponent, LabelSource})

 	BlobUploadCount = promauto.NewCounter(prometheus.CounterOpts{
 		Namespace: ns,
@ -159,12 +166,6 @@ var (
 		Help:      "Total number of SD blobs (and therefore streams) uploaded to reflector",
 	})

-	RetrieverSpeed = promauto.NewGaugeVec(prometheus.GaugeOpts{
-		Namespace: ns,
-		Name:      "speed_mbps",
-		Help:      "Speed of blob retrieval",
-	}, []string{LabelSource})
-
 	MtrInBytesTcp = promauto.NewCounter(prometheus.CounterOpts{
 		Namespace: ns,
 		Name:      "tcp_in_bytes",
@ -202,6 +203,13 @@ var (
 	})
 )

+func CacheLabels(name, component string) prometheus.Labels {
+	return prometheus.Labels{
+		LabelCacheType: name,
+		LabelComponent: component,
+	}
+}
+
 func TrackError(direction string, e error) (shouldLog bool) { // shouldLog is a hack, but whatever
 	if e == nil {
 		return
--- a/store/caching.go
+++ b/store/caching.go
@ -7,8 +7,6 @@ import (
 	"github.com/lbryio/lbry.go/v2/stream"

 	"github.com/lbryio/reflector.go/internal/metrics"
-
-	"golang.org/x/sync/singleflight"
 )

 // CachingStore combines two stores, typically a local and a remote store, to improve performance.
@ -16,13 +14,16 @@ import (
 // are retrieved from the origin and cached. Puts are cached and also forwarded to the origin.
 type CachingStore struct {
 	origin, cache BlobStore
-
-	sf *singleflight.Group
+	component     string
 }

 // NewCachingStore makes a new caching disk store and returns a pointer to it.
-func NewCachingStore(origin, cache BlobStore) *CachingStore {
-	return &CachingStore{origin: origin, cache: cache, sf: new(singleflight.Group)}
+func NewCachingStore(component string, origin, cache BlobStore) *CachingStore {
+	return &CachingStore{
+		component: component,
+		origin:    WithSingleFlight(component, origin),
+		cache:     cache,
+	}
 }

 const nameCaching = "caching"
@ -45,41 +46,25 @@ func (c *CachingStore) Get(hash string) (stream.Blob, error) {
 	start := time.Now()
 	blob, err := c.cache.Get(hash)
 	if err == nil || !errors.Is(err, ErrBlobNotFound) {
-		metrics.CacheHitCount.Inc()
+		metrics.CacheHitCount.With(metrics.CacheLabels(c.cache.Name(), c.component)).Inc()
 		rate := float64(len(blob)) / 1024 / 1024 / time.Since(start).Seconds()
-		metrics.RetrieverSpeed.With(map[string]string{metrics.LabelSource: "cache"}).Set(rate)
+		metrics.CacheRetrievalSpeed.With(map[string]string{
+			metrics.LabelCacheType: c.cache.Name(),
+			metrics.LabelComponent: c.component,
+			metrics.LabelSource:    "cache",
+		}).Set(rate)
 		return blob, err
 	}

-	metrics.CacheMissCount.Inc()
-	return c.getFromOrigin(hash)
-}
+	metrics.CacheMissCount.With(metrics.CacheLabels(c.cache.Name(), c.component)).Inc()

-// getFromOrigin ensures that only one Get per hash is sent to the origin at a time,
-// thereby protecting against https://en.wikipedia.org/wiki/Thundering_herd_problem
-func (c *CachingStore) getFromOrigin(hash string) (stream.Blob, error) {
-	metrics.CacheWaitingRequestsCount.Inc()
-	defer metrics.CacheWaitingRequestsCount.Dec()
-	originBlob, err, _ := c.sf.Do(hash, func() (interface{}, error) {
-		metrics.CacheOriginRequestsCount.Inc()
-		defer metrics.CacheOriginRequestsCount.Dec()
-
-		start := time.Now()
-		blob, err := c.origin.Get(hash)
+	blob, err = c.origin.Get(hash)
 	if err != nil {
 		return nil, err
 	}

-		rate := float64(len(blob)) / 1024 / 1024 / time.Since(start).Seconds()
-		metrics.RetrieverSpeed.With(map[string]string{metrics.LabelSource: "origin"}).Set(rate)
-
 	err = c.cache.Put(hash, blob)
 	return blob, err
-	})
-	if err != nil {
-		return nil, err
-	}
-	return originBlob.(stream.Blob), nil
 }

 // Put stores the blob in the origin and the cache
--- a/store/caching_test.go
+++ b/store/caching_test.go
@ -12,7 +12,7 @@ import (
 func TestCachingStore_Put(t *testing.T) {
 	origin := NewMemStore()
 	cache := NewMemStore()
-	s := NewCachingStore(origin, cache)
+	s := NewCachingStore("test", origin, cache)

 	b := []byte("this is a blob of stuff")
 	hash := "hash"
@ -42,7 +42,7 @@ func TestCachingStore_Put(t *testing.T) {
 func TestCachingStore_CacheMiss(t *testing.T) {
 	origin := NewMemStore()
 	cache := NewMemStore()
-	s := NewCachingStore(origin, cache)
+	s := NewCachingStore("test", origin, cache)

 	b := []byte("this is a blob of stuff")
 	hash := "hash"
@ -80,7 +80,7 @@ func TestCachingStore_ThunderingHerd(t *testing.T) {
 	storeDelay := 100 * time.Millisecond
 	origin := NewSlowBlobStore(storeDelay)
 	cache := NewMemStore()
-	s := NewCachingStore(origin, cache)
+	s := NewCachingStore("test", origin, cache)

 	b := []byte("this is a blob of stuff")
 	hash := "hash"
--- a/store/lru.go
+++ b/store/lru.go
@ -17,9 +17,9 @@ type LRUStore struct {
 }

 // NewLRUStore initialize a new LRUStore
-func NewLRUStore(store BlobStore, maxItems int) *LRUStore {
+func NewLRUStore(component string, store BlobStore, maxItems int) *LRUStore {
 	lru, err := golru.NewWithEvict(maxItems, func(key interface{}, value interface{}) {
-		metrics.CacheLRUEvictCount.WithLabelValues(store.Name()).Inc()
+		metrics.CacheLRUEvictCount.With(metrics.CacheLabels(store.Name(), component)).Inc()
 		_ = store.Delete(key.(string)) // TODO: log this error. may happen if underlying entry is gone but cache entry still there
 	})
 	if err != nil {
--- a/store/lru_test.go
+++ b/store/lru_test.go
@ -17,7 +17,7 @@ const cacheMaxBlobs = 3
 func getTestLRUStore() (*LRUStore, *DiskStore) {
 	d := NewDiskStore("/", 2)
 	d.fs = afero.NewMemMapFs()
-	return NewLRUStore(d, 3), d
+	return NewLRUStore("test", d, 3), d
 }

 func countOnDisk(t *testing.T, disk *DiskStore) int {
@ -134,7 +134,7 @@ func TestLRUStore_loadExisting(t *testing.T) {
 	require.Equal(t, 1, len(existing), "blob should exist in cache")
 	assert.Equal(t, hash, existing[0])

-	lru := NewLRUStore(d, 3) // lru should load existing blobs when it's created
+	lru := NewLRUStore("test", d, 3) // lru should load existing blobs when it's created
 	has, err := lru.Has(hash)
 	require.NoError(t, err)
 	assert.True(t, has, "hash should be loaded from disk store but it's not")
--- a/store/singleflight.go
+++ b/store/singleflight.go
@ -0,0 +1,67 @@
+package store
+
+import (
+	"time"
+
+	"github.com/lbryio/reflector.go/internal/metrics"
+
+	"github.com/lbryio/lbry.go/v2/stream"
+
+	"golang.org/x/sync/singleflight"
+)
+
+func WithSingleFlight(component string, origin BlobStore) BlobStore {
+	return &singleflightStore{
+		BlobStore: origin,
+		component: component,
+		sf:        new(singleflight.Group),
+	}
+}
+
+type singleflightStore struct {
+	BlobStore
+
+	component string
+	sf        *singleflight.Group
+}
+
+func (s *singleflightStore) Name() string {
+	return "sf_" + s.BlobStore.Name()
+}
+
+// Get ensures that only one request per hash is sent to the origin at a time,
+// thereby protecting against https://en.wikipedia.org/wiki/Thundering_herd_problem
+func (s *singleflightStore) Get(hash string) (stream.Blob, error) {
+	metrics.CacheWaitingRequestsCount.With(metrics.CacheLabels(s.BlobStore.Name(), s.component)).Inc()
+	defer metrics.CacheWaitingRequestsCount.With(metrics.CacheLabels(s.BlobStore.Name(), s.component)).Dec()
+
+	blob, err, _ := s.sf.Do(hash, s.getter(hash))
+	if err != nil {
+		return nil, err
+	}
+	return blob.(stream.Blob), nil
+}
+
+// getter returns a function that gets a blob from the origin
+// only one getter per hash will be executing at a time
+func (s *singleflightStore) getter(hash string) func() (interface{}, error) {
+	return func() (interface{}, error) {
+		metrics.CacheOriginRequestsCount.With(metrics.CacheLabels(s.BlobStore.Name(), s.component)).Inc()
+		defer metrics.CacheOriginRequestsCount.With(metrics.CacheLabels(s.BlobStore.Name(), s.component)).Dec()
+
+		start := time.Now()
+		blob, err := s.BlobStore.Get(hash)
+		if err != nil {
+			return nil, err
+		}
+
+		rate := float64(len(blob)) / 1024 / 1024 / time.Since(start).Seconds()
+		metrics.CacheRetrievalSpeed.With(map[string]string{
+			metrics.LabelCacheType: s.BlobStore.Name(),
+			metrics.LabelComponent: s.component,
+			metrics.LabelSource:    "origin",
+		}).Set(rate)
+
+		return blob, nil
+	}
+}