From dfeda26c164b94beb303d11fa9530ff998ae6ff5 Mon Sep 17 00:00:00 2001 From: Justin Li Date: Mon, 28 Jul 2014 13:25:48 -0400 Subject: [PATCH] Extract percentile implementation --- stats/percentile.go | 116 --------------------------------------- stats/percentile_test.go | 98 --------------------------------- stats/stats.go | 16 +++--- 3 files changed, 9 insertions(+), 221 deletions(-) delete mode 100644 stats/percentile.go delete mode 100644 stats/percentile_test.go diff --git a/stats/percentile.go b/stats/percentile.go deleted file mode 100644 index 5e1c13f..0000000 --- a/stats/percentile.go +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2014 The Chihaya Authors. All rights reserved. -// Use of this source code is governed by the BSD 2-Clause license, -// which can be found in the LICENSE file. - -package stats - -import ( - "encoding/json" - "math" - "sort" - "sync/atomic" -) - -// Percentile implements an efficient percentile calculation of -// arbitrary float64 samples. -type Percentile struct { - percentile float64 - - samples int64 - offset int64 - - values []float64 - value uint64 // These bits are really a float64. -} - -// NewPercentile returns a Percentile with a given threshold. -func NewPercentile(percentile float64) *Percentile { - return &Percentile{ - percentile: percentile, - - // 256 samples is fast, and accurate for most distributions. - values: make([]float64, 0, 256), - } -} - -// NewPercentileWithWindow returns a Percentile with a given threshold -// and window size (accuracy). -func NewPercentileWithWindow(percentile float64, sampleWindow int) *Percentile { - return &Percentile{ - percentile: percentile, - values: make([]float64, 0, sampleWindow), - } -} - -// Value returns the current value at the stored percentile. -// It is thread-safe, and may be called concurrently with AddSample. -func (p *Percentile) Value() float64 { - bits := atomic.LoadUint64(&p.value) - return math.Float64frombits(bits) -} - -// AddSample adds a single float64 sample to the data set. -// It is not thread-safe, and not be called concurrently. -func (p *Percentile) AddSample(sample float64) { - p.samples++ - - if len(p.values) == cap(p.values) { - target := float64(p.samples)*p.percentile - float64(cap(p.values))/2 - offset := round(math.Max(target, 0)) - - if sample > p.values[0] { - if offset > p.offset { - idx := sort.SearchFloat64s(p.values[1:], sample) - copy(p.values, p.values[1:idx+1]) - - p.values[idx] = sample - p.offset++ - } else if sample < p.values[len(p.values)-1] { - idx := sort.SearchFloat64s(p.values, sample) - copy(p.values[idx+1:], p.values[idx:]) - - p.values[idx] = sample - } - } else { - if offset > p.offset { - p.offset++ - } else { - copy(p.values[1:], p.values) - p.values[0] = sample - } - } - } else { - idx := sort.SearchFloat64s(p.values, sample) - p.values = p.values[:len(p.values)+1] - copy(p.values[idx+1:], p.values[idx:]) - p.values[idx] = sample - } - - bits := math.Float64bits(p.values[p.index()]) - atomic.StoreUint64(&p.value, bits) -} - -func (p *Percentile) index() int64 { - idx := round(float64(p.samples)*p.percentile - float64(p.offset)) - last := int64(len(p.values)) - 1 - - if idx > last { - return last - } - - return idx -} - -func (p *Percentile) MarshalJSON() ([]byte, error) { - return json.Marshal(p.Value()) -} - -func round(value float64) int64 { - if value < 0.0 { - value -= 0.5 - } else { - value += 0.5 - } - - return int64(value) -} diff --git a/stats/percentile_test.go b/stats/percentile_test.go deleted file mode 100644 index 66594fc..0000000 --- a/stats/percentile_test.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2014 The Chihaya Authors. All rights reserved. -// Use of this source code is governed by the BSD 2-Clause license, -// which can be found in the LICENSE file. - -package stats - -import ( - "math" - "math/rand" - "sort" - "testing" - "time" -) - -func TestPercentiles(t *testing.T) { - rand.Seed(time.Now().Unix()) - - testSlice(t, uniform(10000, 1), 0.5) - testSlice(t, uniform(10000, 1), 0.9) - testSlice(t, uniform(10000, 10000), 0.5) - testSlice(t, uniform(10000, 10000), 0.9) -} - -func TestLogNormPercentiles(t *testing.T) { - rand.Seed(time.Now().Unix()) - - testSlice(t, logNorm(10000, 1), 0.5) - testSlice(t, logNorm(10000, 1), 0.9) -} - -func uniform(n int, scale float64) sort.Float64Slice { - numbers := make(sort.Float64Slice, n) - - for i := 0; i < n; i++ { - numbers[i] = rand.Float64() * scale - } - - return numbers -} - -func logNorm(n int, scale float64) sort.Float64Slice { - numbers := make(sort.Float64Slice, n) - - for i := 0; i < n; i++ { - numbers[i] = math.Exp(rand.NormFloat64()) * scale - } - - return numbers -} - -func testSlice(t *testing.T, numbers sort.Float64Slice, percentile float64) { - p := NewPercentile(percentile) - - for i := 0; i < len(numbers); i++ { - p.AddSample(numbers[i]) - } - - sort.Sort(numbers) - got := p.Value() - index := round(float64(len(numbers)) * percentile) - - if got != numbers[index] && got != numbers[index-1] && got != numbers[index+1] { - t.Errorf("Percentile incorrect\n actual: %f\nexpected: %f, %f, %f\n", got, numbers[index-1], numbers[index], numbers[index+1]) - } -} - -func BenchmarkPercentiles64(b *testing.B) { - benchmarkSlice(b, uniform(b.N, 1), 64, 0.5) -} - -func BenchmarkPercentiles128(b *testing.B) { - benchmarkSlice(b, uniform(b.N, 1), 128, 0.5) -} - -func BenchmarkPercentiles256(b *testing.B) { - benchmarkSlice(b, uniform(b.N, 1), 256, 0.5) -} - -func BenchmarkPercentiles512(b *testing.B) { - benchmarkSlice(b, uniform(b.N, 1), 512, 0.5) -} - -func BenchmarkLNPercentiles128(b *testing.B) { - benchmarkSlice(b, logNorm(b.N, 1), 128, 0.5) -} - -func BenchmarkLNPercentiles256(b *testing.B) { - benchmarkSlice(b, logNorm(b.N, 1), 258, 0.5) -} - -func benchmarkSlice(b *testing.B, numbers sort.Float64Slice, window int, percentile float64) { - p := NewPercentileWithWindow(percentile, window) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - p.AddSample(numbers[i]) - } -} diff --git a/stats/stats.go b/stats/stats.go index 51ef1a9..6061fd8 100644 --- a/stats/stats.go +++ b/stats/stats.go @@ -9,8 +9,10 @@ package stats import ( "time" - "github.com/chihaya/chihaya/config" + "github.com/pushrax/faststats" "github.com/pushrax/flatjson" + + "github.com/chihaya/chihaya/config" ) const ( @@ -59,9 +61,9 @@ type PeerStats struct { } type PercentileTimes struct { - P50 *Percentile - P90 *Percentile - P95 *Percentile + P50 *faststats.Percentile + P90 *faststats.Percentile + P95 *faststats.Percentile } type Stats struct { @@ -107,9 +109,9 @@ func New(cfg config.StatsConfig) *Stats { responseTimeEvents: make(chan time.Duration, cfg.BufferSize), ResponseTime: PercentileTimes{ - P50: NewPercentile(0.5), - P90: NewPercentile(0.9), - P95: NewPercentile(0.95), + P50: faststats.NewPercentile(0.5), + P90: faststats.NewPercentile(0.9), + P95: faststats.NewPercentile(0.95), }, }