Fast percentile implementation

This commit is contained in:
Justin Li 2014-07-22 12:26:31 -04:00
parent 2f4d0b0f9a
commit a162f38281
2 changed files with 97 additions and 20 deletions

View file

@ -1,13 +1,16 @@
package stats package stats
import ( import (
"math"
"sort" "sort"
) )
type Percentile struct { type Percentile struct {
percentile float64 percentile float64
values sort.Float64Slice
offset int samples int64
offset int64
values []float64
} }
func NewPercentile(percentile float64, sampleWindow int) *Percentile { func NewPercentile(percentile float64, sampleWindow int) *Percentile {
@ -17,9 +20,41 @@ func NewPercentile(percentile float64, sampleWindow int) *Percentile {
} }
} }
// Not thread safe.
func (p *Percentile) AddSample(sample float64) { func (p *Percentile) AddSample(sample float64) {
p.values = append(p.values, sample) p.samples++
sort.Sort(p.values)
if p.samples > int64(cap(p.values)) {
target := float64(p.samples)*p.percentile - float64(cap(p.values))/2
offset := round(math.Max(target, 0))
if sample > p.values[0] {
if offset > p.offset {
idx := sort.SearchFloat64s(p.values[1:], sample)
copy(p.values, p.values[1:idx+1])
p.values[idx] = sample
p.offset++
} else if sample < p.values[len(p.values)-1] {
idx := sort.SearchFloat64s(p.values, sample)
copy(p.values[idx+1:], p.values[idx:])
p.values[idx] = sample
}
} else {
if offset > p.offset {
p.offset++
} else {
copy(p.values[1:], p.values)
p.values[0] = sample
}
}
} else {
idx := sort.SearchFloat64s(p.values, sample)
p.values = p.values[:len(p.values)+1]
copy(p.values[idx+1:], p.values[idx:])
p.values[idx] = sample
}
} }
func (p *Percentile) Value() float64 { func (p *Percentile) Value() float64 {
@ -27,11 +62,18 @@ func (p *Percentile) Value() float64 {
return 0 return 0
} }
return p.values[round(p.index())] return p.values[p.index()]
} }
func (p *Percentile) index() float64 { func (p *Percentile) index() int64 {
return float64(len(p.values)) * p.percentile - float64(p.offset) idx := round(float64(p.samples)*p.percentile - float64(p.offset))
last := int64(len(p.values)) - 1
if idx > last {
return last
}
return idx
} }
func round(value float64) int64 { func round(value float64) int64 {

View file

@ -1,30 +1,65 @@
package stats package stats
import ( import (
"testing"
"math/rand" "math/rand"
"testing"
"time"
) )
func TestPercentiles(t *testing.T) { func TestPercentiles(t *testing.T) {
testInRange(t, 1, 0.5) rand.Seed(time.Now().Unix())
testInRange(t, 1, 0.9)
testInRange(t, 1, 0.95) testUniformRandom(t, 1, 0.5)
testInRange(t, 10000, 0.5) testUniformRandom(t, 1, 0.9)
testInRange(t, 10000, 0.9) testUniformRandom(t, 1, 0.95)
testInRange(t, 10000, 0.95) testUniformRandom(t, 10000, 0.5)
testUniformRandom(t, 10000, 0.9)
testUniformRandom(t, 10000, 0.95)
} }
func testInRange(t *testing.T, max, percentile float64) { func testUniformRandom(t *testing.T, max, percentile float64) {
p := NewPercentile(percentile, 10) p := NewPercentile(percentile, 256)
for i := 0; i < 1000; i++ { for i := 0; i < 100000; i++ {
p.AddSample(rand.Float64() * max) p.AddSample(rand.Float64() * max)
} }
got := p.Value() got := p.Value()
expected := percentile * max expected := percentile * max
maxError := 0.01
if got < expected * (1 - 0.02) || got > expected * (1 + 0.02) { if got < expected*(1-maxError) || got > expected*(1+maxError) {
t.Errorf("Percentile out of range\n actual: %f\nexpected: %f", got, expected) t.Errorf("Percentile out of range\n actual: %f\nexpected: %f\n error: %f%%\n", got, expected, (got-expected)/expected*100)
}
}
func BenchmarkPercentiles64(b *testing.B) {
benchmarkUniformRandom(b, 64, 0.5)
}
func BenchmarkPercentiles128(b *testing.B) {
benchmarkUniformRandom(b, 128, 0.5)
}
func BenchmarkPercentiles256(b *testing.B) {
benchmarkUniformRandom(b, 256, 0.5)
}
func BenchmarkPercentiles512(b *testing.B) {
benchmarkUniformRandom(b, 512, 0.5)
}
func benchmarkUniformRandom(b *testing.B, window int, percentile float64) {
p := NewPercentile(percentile, window)
numbers := make([]float64, b.N)
for i := 0; i < b.N; i++ {
numbers[i] = rand.Float64()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
p.AddSample(numbers[i])
} }
} }