Fast percentile implementation

This commit is contained in:
Justin Li 2014-07-22 12:26:31 -04:00
parent 2f4d0b0f9a
commit a162f38281
2 changed files with 97 additions and 20 deletions

View file

@ -1,25 +1,60 @@
package stats
import (
"math"
"sort"
)
type Percentile struct {
percentile float64
values sort.Float64Slice
offset int
samples int64
offset int64
values []float64
}
func NewPercentile(percentile float64, sampleWindow int) *Percentile {
return &Percentile{
percentile: percentile,
values: make([]float64, 0, sampleWindow),
values: make([]float64, 0, sampleWindow),
}
}
// Not thread safe.
func (p *Percentile) AddSample(sample float64) {
p.values = append(p.values, sample)
sort.Sort(p.values)
p.samples++
if p.samples > int64(cap(p.values)) {
target := float64(p.samples)*p.percentile - float64(cap(p.values))/2
offset := round(math.Max(target, 0))
if sample > p.values[0] {
if offset > p.offset {
idx := sort.SearchFloat64s(p.values[1:], sample)
copy(p.values, p.values[1:idx+1])
p.values[idx] = sample
p.offset++
} else if sample < p.values[len(p.values)-1] {
idx := sort.SearchFloat64s(p.values, sample)
copy(p.values[idx+1:], p.values[idx:])
p.values[idx] = sample
}
} else {
if offset > p.offset {
p.offset++
} else {
copy(p.values[1:], p.values)
p.values[0] = sample
}
}
} else {
idx := sort.SearchFloat64s(p.values, sample)
p.values = p.values[:len(p.values)+1]
copy(p.values[idx+1:], p.values[idx:])
p.values[idx] = sample
}
}
func (p *Percentile) Value() float64 {
@ -27,11 +62,18 @@ func (p *Percentile) Value() float64 {
return 0
}
return p.values[round(p.index())]
return p.values[p.index()]
}
func (p *Percentile) index() float64 {
return float64(len(p.values)) * p.percentile - float64(p.offset)
func (p *Percentile) index() int64 {
idx := round(float64(p.samples)*p.percentile - float64(p.offset))
last := int64(len(p.values)) - 1
if idx > last {
return last
}
return idx
}
func round(value float64) int64 {

View file

@ -1,30 +1,65 @@
package stats
import (
"testing"
"math/rand"
"testing"
"time"
)
func TestPercentiles(t *testing.T) {
testInRange(t, 1, 0.5)
testInRange(t, 1, 0.9)
testInRange(t, 1, 0.95)
testInRange(t, 10000, 0.5)
testInRange(t, 10000, 0.9)
testInRange(t, 10000, 0.95)
rand.Seed(time.Now().Unix())
testUniformRandom(t, 1, 0.5)
testUniformRandom(t, 1, 0.9)
testUniformRandom(t, 1, 0.95)
testUniformRandom(t, 10000, 0.5)
testUniformRandom(t, 10000, 0.9)
testUniformRandom(t, 10000, 0.95)
}
func testInRange(t *testing.T, max, percentile float64) {
p := NewPercentile(percentile, 10)
func testUniformRandom(t *testing.T, max, percentile float64) {
p := NewPercentile(percentile, 256)
for i := 0; i < 1000; i++ {
for i := 0; i < 100000; i++ {
p.AddSample(rand.Float64() * max)
}
got := p.Value()
expected := percentile * max
maxError := 0.01
if got < expected * (1 - 0.02) || got > expected * (1 + 0.02) {
t.Errorf("Percentile out of range\n actual: %f\nexpected: %f", got, expected)
if got < expected*(1-maxError) || got > expected*(1+maxError) {
t.Errorf("Percentile out of range\n actual: %f\nexpected: %f\n error: %f%%\n", got, expected, (got-expected)/expected*100)
}
}
func BenchmarkPercentiles64(b *testing.B) {
benchmarkUniformRandom(b, 64, 0.5)
}
func BenchmarkPercentiles128(b *testing.B) {
benchmarkUniformRandom(b, 128, 0.5)
}
func BenchmarkPercentiles256(b *testing.B) {
benchmarkUniformRandom(b, 256, 0.5)
}
func BenchmarkPercentiles512(b *testing.B) {
benchmarkUniformRandom(b, 512, 0.5)
}
func benchmarkUniformRandom(b *testing.B, window int, percentile float64) {
p := NewPercentile(percentile, window)
numbers := make([]float64, b.N)
for i := 0; i < b.N; i++ {
numbers[i] = rand.Float64()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
p.AddSample(numbers[i])
}
}