Extract percentile implementation

2014-07-28 13:25:48 -04:00 · 2014-07-28 13:25:48 -04:00 · dfeda26c16
commit dfeda26c16
parent dda56c4357
3 changed files with 9 additions and 221 deletions
--- a/stats/percentile.go
+++ b/stats/percentile.go
@ -1,116 +0,0 @@
-// Copyright 2014 The Chihaya Authors. All rights reserved.
-// Use of this source code is governed by the BSD 2-Clause license,
-// which can be found in the LICENSE file.
-
-package stats
-
-import (
-	"encoding/json"
-	"math"
-	"sort"
-	"sync/atomic"
-)
-
-// Percentile implements an efficient percentile calculation of
-// arbitrary float64 samples.
-type Percentile struct {
-	percentile float64
-
-	samples int64
-	offset  int64
-
-	values []float64
-	value  uint64 // These bits are really a float64.
-}
-
-// NewPercentile returns a Percentile with a given threshold.
-func NewPercentile(percentile float64) *Percentile {
-	return &Percentile{
-		percentile: percentile,
-
-		// 256 samples is fast, and accurate for most distributions.
-		values: make([]float64, 0, 256),
-	}
-}
-
-// NewPercentileWithWindow returns a Percentile with a given threshold
-// and window size (accuracy).
-func NewPercentileWithWindow(percentile float64, sampleWindow int) *Percentile {
-	return &Percentile{
-		percentile: percentile,
-		values:     make([]float64, 0, sampleWindow),
-	}
-}
-
-// Value returns the current value at the stored percentile.
-// It is thread-safe, and may be called concurrently with AddSample.
-func (p *Percentile) Value() float64 {
-	bits := atomic.LoadUint64(&p.value)
-	return math.Float64frombits(bits)
-}
-
-// AddSample adds a single float64 sample to the data set.
-// It is not thread-safe, and not be called concurrently.
-func (p *Percentile) AddSample(sample float64) {
-	p.samples++
-
-	if len(p.values) == cap(p.values) {
-		target := float64(p.samples)*p.percentile - float64(cap(p.values))/2
-		offset := round(math.Max(target, 0))
-
-		if sample > p.values[0] {
-			if offset > p.offset {
-				idx := sort.SearchFloat64s(p.values[1:], sample)
-				copy(p.values, p.values[1:idx+1])
-
-				p.values[idx] = sample
-				p.offset++
-			} else if sample < p.values[len(p.values)-1] {
-				idx := sort.SearchFloat64s(p.values, sample)
-				copy(p.values[idx+1:], p.values[idx:])
-
-				p.values[idx] = sample
-			}
-		} else {
-			if offset > p.offset {
-				p.offset++
-			} else {
-				copy(p.values[1:], p.values)
-				p.values[0] = sample
-			}
-		}
-	} else {
-		idx := sort.SearchFloat64s(p.values, sample)
-		p.values = p.values[:len(p.values)+1]
-		copy(p.values[idx+1:], p.values[idx:])
-		p.values[idx] = sample
-	}
-
-	bits := math.Float64bits(p.values[p.index()])
-	atomic.StoreUint64(&p.value, bits)
-}
-
-func (p *Percentile) index() int64 {
-	idx := round(float64(p.samples)*p.percentile - float64(p.offset))
-	last := int64(len(p.values)) - 1
-
-	if idx > last {
-		return last
-	}
-
-	return idx
-}
-
-func (p *Percentile) MarshalJSON() ([]byte, error) {
-	return json.Marshal(p.Value())
-}
-
-func round(value float64) int64 {
-	if value < 0.0 {
-		value -= 0.5
-	} else {
-		value += 0.5
-	}
-
-	return int64(value)
-}
--- a/stats/percentile_test.go
+++ b/stats/percentile_test.go
@ -1,98 +0,0 @@
-// Copyright 2014 The Chihaya Authors. All rights reserved.
-// Use of this source code is governed by the BSD 2-Clause license,
-// which can be found in the LICENSE file.
-
-package stats
-
-import (
-	"math"
-	"math/rand"
-	"sort"
-	"testing"
-	"time"
-)
-
-func TestPercentiles(t *testing.T) {
-	rand.Seed(time.Now().Unix())
-
-	testSlice(t, uniform(10000, 1), 0.5)
-	testSlice(t, uniform(10000, 1), 0.9)
-	testSlice(t, uniform(10000, 10000), 0.5)
-	testSlice(t, uniform(10000, 10000), 0.9)
-}
-
-func TestLogNormPercentiles(t *testing.T) {
-	rand.Seed(time.Now().Unix())
-
-	testSlice(t, logNorm(10000, 1), 0.5)
-	testSlice(t, logNorm(10000, 1), 0.9)
-}
-
-func uniform(n int, scale float64) sort.Float64Slice {
-	numbers := make(sort.Float64Slice, n)
-
-	for i := 0; i < n; i++ {
-		numbers[i] = rand.Float64() * scale
-	}
-
-	return numbers
-}
-
-func logNorm(n int, scale float64) sort.Float64Slice {
-	numbers := make(sort.Float64Slice, n)
-
-	for i := 0; i < n; i++ {
-		numbers[i] = math.Exp(rand.NormFloat64()) * scale
-	}
-
-	return numbers
-}
-
-func testSlice(t *testing.T, numbers sort.Float64Slice, percentile float64) {
-	p := NewPercentile(percentile)
-
-	for i := 0; i < len(numbers); i++ {
-		p.AddSample(numbers[i])
-	}
-
-	sort.Sort(numbers)
-	got := p.Value()
-	index := round(float64(len(numbers)) * percentile)
-
-	if got != numbers[index] && got != numbers[index-1] && got != numbers[index+1] {
-		t.Errorf("Percentile incorrect\n  actual: %f\nexpected: %f, %f, %f\n", got, numbers[index-1], numbers[index], numbers[index+1])
-	}
-}
-
-func BenchmarkPercentiles64(b *testing.B) {
-	benchmarkSlice(b, uniform(b.N, 1), 64, 0.5)
-}
-
-func BenchmarkPercentiles128(b *testing.B) {
-	benchmarkSlice(b, uniform(b.N, 1), 128, 0.5)
-}
-
-func BenchmarkPercentiles256(b *testing.B) {
-	benchmarkSlice(b, uniform(b.N, 1), 256, 0.5)
-}
-
-func BenchmarkPercentiles512(b *testing.B) {
-	benchmarkSlice(b, uniform(b.N, 1), 512, 0.5)
-}
-
-func BenchmarkLNPercentiles128(b *testing.B) {
-	benchmarkSlice(b, logNorm(b.N, 1), 128, 0.5)
-}
-
-func BenchmarkLNPercentiles256(b *testing.B) {
-	benchmarkSlice(b, logNorm(b.N, 1), 258, 0.5)
-}
-
-func benchmarkSlice(b *testing.B, numbers sort.Float64Slice, window int, percentile float64) {
-	p := NewPercentileWithWindow(percentile, window)
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		p.AddSample(numbers[i])
-	}
-}
--- a/stats/stats.go
+++ b/stats/stats.go
@ -9,8 +9,10 @@ package stats
 import (
 	"time"

-	"github.com/chihaya/chihaya/config"
+	"github.com/pushrax/faststats"
 	"github.com/pushrax/flatjson"
+
+	"github.com/chihaya/chihaya/config"
 )

 const (
@ -59,9 +61,9 @@ type PeerStats struct {
 }

 type PercentileTimes struct {
-	P50 *Percentile
-	P90 *Percentile
-	P95 *Percentile
+	P50 *faststats.Percentile
+	P90 *faststats.Percentile
+	P95 *faststats.Percentile
 }

 type Stats struct {
@ -107,9 +109,9 @@ func New(cfg config.StatsConfig) *Stats {
 		responseTimeEvents: make(chan time.Duration, cfg.BufferSize),

 		ResponseTime: PercentileTimes{
-			P50: NewPercentile(0.5),
-			P90: NewPercentile(0.9),
-			P95: NewPercentile(0.95),
+			P50: faststats.NewPercentile(0.5),
+			P90: faststats.NewPercentile(0.9),
+			P95: faststats.NewPercentile(0.95),
 		},
 	}