diff --git a/stats/percentile.go b/stats/percentile.go index 2332af8..9ca3a4a 100644 --- a/stats/percentile.go +++ b/stats/percentile.go @@ -1,25 +1,60 @@ package stats import ( + "math" "sort" ) type Percentile struct { percentile float64 - values sort.Float64Slice - offset int + + samples int64 + offset int64 + values []float64 } func NewPercentile(percentile float64, sampleWindow int) *Percentile { return &Percentile{ percentile: percentile, - values: make([]float64, 0, sampleWindow), + values: make([]float64, 0, sampleWindow), } } +// Not thread safe. func (p *Percentile) AddSample(sample float64) { - p.values = append(p.values, sample) - sort.Sort(p.values) + p.samples++ + + if p.samples > int64(cap(p.values)) { + target := float64(p.samples)*p.percentile - float64(cap(p.values))/2 + offset := round(math.Max(target, 0)) + + if sample > p.values[0] { + if offset > p.offset { + idx := sort.SearchFloat64s(p.values[1:], sample) + copy(p.values, p.values[1:idx+1]) + + p.values[idx] = sample + p.offset++ + } else if sample < p.values[len(p.values)-1] { + idx := sort.SearchFloat64s(p.values, sample) + copy(p.values[idx+1:], p.values[idx:]) + + p.values[idx] = sample + } + } else { + if offset > p.offset { + p.offset++ + } else { + copy(p.values[1:], p.values) + p.values[0] = sample + } + } + } else { + idx := sort.SearchFloat64s(p.values, sample) + p.values = p.values[:len(p.values)+1] + copy(p.values[idx+1:], p.values[idx:]) + p.values[idx] = sample + } } func (p *Percentile) Value() float64 { @@ -27,11 +62,18 @@ func (p *Percentile) Value() float64 { return 0 } - return p.values[round(p.index())] + return p.values[p.index()] } -func (p *Percentile) index() float64 { - return float64(len(p.values)) * p.percentile - float64(p.offset) +func (p *Percentile) index() int64 { + idx := round(float64(p.samples)*p.percentile - float64(p.offset)) + last := int64(len(p.values)) - 1 + + if idx > last { + return last + } + + return idx } func round(value float64) int64 { diff --git a/stats/percentile_test.go b/stats/percentile_test.go index 59addbe..d78795c 100644 --- a/stats/percentile_test.go +++ b/stats/percentile_test.go @@ -1,30 +1,65 @@ package stats import ( - "testing" "math/rand" + "testing" + "time" ) func TestPercentiles(t *testing.T) { - testInRange(t, 1, 0.5) - testInRange(t, 1, 0.9) - testInRange(t, 1, 0.95) - testInRange(t, 10000, 0.5) - testInRange(t, 10000, 0.9) - testInRange(t, 10000, 0.95) + rand.Seed(time.Now().Unix()) + + testUniformRandom(t, 1, 0.5) + testUniformRandom(t, 1, 0.9) + testUniformRandom(t, 1, 0.95) + testUniformRandom(t, 10000, 0.5) + testUniformRandom(t, 10000, 0.9) + testUniformRandom(t, 10000, 0.95) } -func testInRange(t *testing.T, max, percentile float64) { - p := NewPercentile(percentile, 10) +func testUniformRandom(t *testing.T, max, percentile float64) { + p := NewPercentile(percentile, 256) - for i := 0; i < 1000; i++ { + for i := 0; i < 100000; i++ { p.AddSample(rand.Float64() * max) } got := p.Value() expected := percentile * max + maxError := 0.01 - if got < expected * (1 - 0.02) || got > expected * (1 + 0.02) { - t.Errorf("Percentile out of range\n actual: %f\nexpected: %f", got, expected) + if got < expected*(1-maxError) || got > expected*(1+maxError) { + t.Errorf("Percentile out of range\n actual: %f\nexpected: %f\n error: %f%%\n", got, expected, (got-expected)/expected*100) + } +} + +func BenchmarkPercentiles64(b *testing.B) { + benchmarkUniformRandom(b, 64, 0.5) +} + +func BenchmarkPercentiles128(b *testing.B) { + benchmarkUniformRandom(b, 128, 0.5) +} + +func BenchmarkPercentiles256(b *testing.B) { + benchmarkUniformRandom(b, 256, 0.5) +} + +func BenchmarkPercentiles512(b *testing.B) { + benchmarkUniformRandom(b, 512, 0.5) +} + +func benchmarkUniformRandom(b *testing.B, window int, percentile float64) { + p := NewPercentile(percentile, window) + + numbers := make([]float64, b.N) + + for i := 0; i < b.N; i++ { + numbers[i] = rand.Float64() + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + p.AddSample(numbers[i]) } }