Fast percentile implementation
This commit is contained in:
parent
2f4d0b0f9a
commit
a162f38281
2 changed files with 97 additions and 20 deletions
|
@ -1,13 +1,16 @@
|
|||
package stats
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type Percentile struct {
|
||||
percentile float64
|
||||
values sort.Float64Slice
|
||||
offset int
|
||||
|
||||
samples int64
|
||||
offset int64
|
||||
values []float64
|
||||
}
|
||||
|
||||
func NewPercentile(percentile float64, sampleWindow int) *Percentile {
|
||||
|
@ -17,9 +20,41 @@ func NewPercentile(percentile float64, sampleWindow int) *Percentile {
|
|||
}
|
||||
}
|
||||
|
||||
// Not thread safe.
|
||||
func (p *Percentile) AddSample(sample float64) {
|
||||
p.values = append(p.values, sample)
|
||||
sort.Sort(p.values)
|
||||
p.samples++
|
||||
|
||||
if p.samples > int64(cap(p.values)) {
|
||||
target := float64(p.samples)*p.percentile - float64(cap(p.values))/2
|
||||
offset := round(math.Max(target, 0))
|
||||
|
||||
if sample > p.values[0] {
|
||||
if offset > p.offset {
|
||||
idx := sort.SearchFloat64s(p.values[1:], sample)
|
||||
copy(p.values, p.values[1:idx+1])
|
||||
|
||||
p.values[idx] = sample
|
||||
p.offset++
|
||||
} else if sample < p.values[len(p.values)-1] {
|
||||
idx := sort.SearchFloat64s(p.values, sample)
|
||||
copy(p.values[idx+1:], p.values[idx:])
|
||||
|
||||
p.values[idx] = sample
|
||||
}
|
||||
} else {
|
||||
if offset > p.offset {
|
||||
p.offset++
|
||||
} else {
|
||||
copy(p.values[1:], p.values)
|
||||
p.values[0] = sample
|
||||
}
|
||||
}
|
||||
} else {
|
||||
idx := sort.SearchFloat64s(p.values, sample)
|
||||
p.values = p.values[:len(p.values)+1]
|
||||
copy(p.values[idx+1:], p.values[idx:])
|
||||
p.values[idx] = sample
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Percentile) Value() float64 {
|
||||
|
@ -27,11 +62,18 @@ func (p *Percentile) Value() float64 {
|
|||
return 0
|
||||
}
|
||||
|
||||
return p.values[round(p.index())]
|
||||
return p.values[p.index()]
|
||||
}
|
||||
|
||||
func (p *Percentile) index() float64 {
|
||||
return float64(len(p.values)) * p.percentile - float64(p.offset)
|
||||
func (p *Percentile) index() int64 {
|
||||
idx := round(float64(p.samples)*p.percentile - float64(p.offset))
|
||||
last := int64(len(p.values)) - 1
|
||||
|
||||
if idx > last {
|
||||
return last
|
||||
}
|
||||
|
||||
return idx
|
||||
}
|
||||
|
||||
func round(value float64) int64 {
|
||||
|
|
|
@ -1,30 +1,65 @@
|
|||
package stats
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestPercentiles(t *testing.T) {
|
||||
testInRange(t, 1, 0.5)
|
||||
testInRange(t, 1, 0.9)
|
||||
testInRange(t, 1, 0.95)
|
||||
testInRange(t, 10000, 0.5)
|
||||
testInRange(t, 10000, 0.9)
|
||||
testInRange(t, 10000, 0.95)
|
||||
rand.Seed(time.Now().Unix())
|
||||
|
||||
testUniformRandom(t, 1, 0.5)
|
||||
testUniformRandom(t, 1, 0.9)
|
||||
testUniformRandom(t, 1, 0.95)
|
||||
testUniformRandom(t, 10000, 0.5)
|
||||
testUniformRandom(t, 10000, 0.9)
|
||||
testUniformRandom(t, 10000, 0.95)
|
||||
}
|
||||
|
||||
func testInRange(t *testing.T, max, percentile float64) {
|
||||
p := NewPercentile(percentile, 10)
|
||||
func testUniformRandom(t *testing.T, max, percentile float64) {
|
||||
p := NewPercentile(percentile, 256)
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
for i := 0; i < 100000; i++ {
|
||||
p.AddSample(rand.Float64() * max)
|
||||
}
|
||||
|
||||
got := p.Value()
|
||||
expected := percentile * max
|
||||
maxError := 0.01
|
||||
|
||||
if got < expected * (1 - 0.02) || got > expected * (1 + 0.02) {
|
||||
t.Errorf("Percentile out of range\n actual: %f\nexpected: %f", got, expected)
|
||||
if got < expected*(1-maxError) || got > expected*(1+maxError) {
|
||||
t.Errorf("Percentile out of range\n actual: %f\nexpected: %f\n error: %f%%\n", got, expected, (got-expected)/expected*100)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPercentiles64(b *testing.B) {
|
||||
benchmarkUniformRandom(b, 64, 0.5)
|
||||
}
|
||||
|
||||
func BenchmarkPercentiles128(b *testing.B) {
|
||||
benchmarkUniformRandom(b, 128, 0.5)
|
||||
}
|
||||
|
||||
func BenchmarkPercentiles256(b *testing.B) {
|
||||
benchmarkUniformRandom(b, 256, 0.5)
|
||||
}
|
||||
|
||||
func BenchmarkPercentiles512(b *testing.B) {
|
||||
benchmarkUniformRandom(b, 512, 0.5)
|
||||
}
|
||||
|
||||
func benchmarkUniformRandom(b *testing.B, window int, percentile float64) {
|
||||
p := NewPercentile(percentile, window)
|
||||
|
||||
numbers := make([]float64, b.N)
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
numbers[i] = rand.Float64()
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
p.AddSample(numbers[i])
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue