Fast percentile implementation

2014-07-22 12:26:31 -04:00 · 2014-07-22 12:26:31 -04:00 · a162f38281
commit a162f38281
parent 2f4d0b0f9a
2 changed files with 97 additions and 20 deletions
--- a/stats/percentile.go
+++ b/stats/percentile.go
@ -1,13 +1,16 @@
 package stats
 import (
 	"math"
 	"sort"
 )
 type Percentile struct {
 	percentile float64
-	values sort.Float64Slice
+
-	offset int
+	samples int64
 	offset  int64
 	values  []float64
 }
 func NewPercentile(percentile float64, sampleWindow int) *Percentile {
@ -17,9 +20,41 @@ func NewPercentile(percentile float64, sampleWindow int) *Percentile {
 	}
 }
 // Not thread safe.
 func (p *Percentile) AddSample(sample float64) {
-	p.values = append(p.values, sample)
+	p.samples++
-	sort.Sort(p.values)
+
 	if p.samples > int64(cap(p.values)) {
 		target := float64(p.samples)*p.percentile - float64(cap(p.values))/2
 		offset := round(math.Max(target, 0))
 		if sample > p.values[0] {
 			if offset > p.offset {
 				idx := sort.SearchFloat64s(p.values[1:], sample)
 				copy(p.values, p.values[1:idx+1])
 				p.values[idx] = sample
 				p.offset++
 			} else if sample < p.values[len(p.values)-1] {
 				idx := sort.SearchFloat64s(p.values, sample)
 				copy(p.values[idx+1:], p.values[idx:])
 				p.values[idx] = sample
 			}
 		} else {
 			if offset > p.offset {
 				p.offset++
 			} else {
 				copy(p.values[1:], p.values)
 				p.values[0] = sample
 			}
 		}
 	} else {
 		idx := sort.SearchFloat64s(p.values, sample)
 		p.values = p.values[:len(p.values)+1]
 		copy(p.values[idx+1:], p.values[idx:])
 		p.values[idx] = sample
 	}
 }
 func (p *Percentile) Value() float64 {
@ -27,11 +62,18 @@ func (p *Percentile) Value() float64 {
 		return 0
 	}
-	return p.values[round(p.index())]
+	return p.values[p.index()]
 }
-func (p *Percentile) index() float64 {
+func (p *Percentile) index() int64 {
-	return float64(len(p.values)) * p.percentile - float64(p.offset)
+	idx := round(float64(p.samples)*p.percentile - float64(p.offset))
 	last := int64(len(p.values)) - 1
 	if idx > last {
 		return last
 	}
 	return idx
 }
 func round(value float64) int64 {
--- a/stats/percentile_test.go
+++ b/stats/percentile_test.go
@ -1,30 +1,65 @@
 package stats
 import (
 	"testing"
 	"math/rand"
 	"testing"
 	"time"
 )
 func TestPercentiles(t *testing.T) {
-	testInRange(t, 1, 0.5)
+	rand.Seed(time.Now().Unix())
-	testInRange(t, 1, 0.9)
+
-	testInRange(t, 1, 0.95)
+	testUniformRandom(t, 1, 0.5)
-	testInRange(t, 10000, 0.5)
+	testUniformRandom(t, 1, 0.9)
-	testInRange(t, 10000, 0.9)
+	testUniformRandom(t, 1, 0.95)
-	testInRange(t, 10000, 0.95)
+	testUniformRandom(t, 10000, 0.5)
 	testUniformRandom(t, 10000, 0.9)
 	testUniformRandom(t, 10000, 0.95)
 }
-func testInRange(t *testing.T, max, percentile float64) {
+func testUniformRandom(t *testing.T, max, percentile float64) {
-	p := NewPercentile(percentile, 10)
+	p := NewPercentile(percentile, 256)
-	for i := 0; i < 1000; i++ {
+	for i := 0; i < 100000; i++ {
 		p.AddSample(rand.Float64() * max)
 	}
 	got := p.Value()
 	expected := percentile * max
 	maxError := 0.01
-	if got < expected * (1 - 0.02) || got > expected * (1 + 0.02) {
+	if got < expected*(1-maxError) || got > expected*(1+maxError) {
-		t.Errorf("Percentile out of range\n  actual: %f\nexpected: %f", got, expected)
+		t.Errorf("Percentile out of range\n  actual: %f\nexpected: %f\n   error: %f%%\n", got, expected, (got-expected)/expected*100)
 	}
 }
 func BenchmarkPercentiles64(b *testing.B) {
 	benchmarkUniformRandom(b, 64, 0.5)
 }
 func BenchmarkPercentiles128(b *testing.B) {
 	benchmarkUniformRandom(b, 128, 0.5)
 }
 func BenchmarkPercentiles256(b *testing.B) {
 	benchmarkUniformRandom(b, 256, 0.5)
 }
 func BenchmarkPercentiles512(b *testing.B) {
 	benchmarkUniformRandom(b, 512, 0.5)
 }
 func benchmarkUniformRandom(b *testing.B, window int, percentile float64) {
 	p := NewPercentile(percentile, window)
 	numbers := make([]float64, b.N)
 	for i := 0; i < b.N; i++ {
 		numbers[i] = rand.Float64()
 	}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		p.AddSample(numbers[i])
 	}
 }