Extract percentile implementation

This commit is contained in:
Justin Li 2014-07-28 13:25:48 -04:00
parent dda56c4357
commit dfeda26c16
3 changed files with 9 additions and 221 deletions

View file

@ -1,116 +0,0 @@
// Copyright 2014 The Chihaya Authors. All rights reserved.
// Use of this source code is governed by the BSD 2-Clause license,
// which can be found in the LICENSE file.
package stats
import (
"encoding/json"
"math"
"sort"
"sync/atomic"
)
// Percentile implements an efficient percentile calculation of
// arbitrary float64 samples.
type Percentile struct {
percentile float64
samples int64
offset int64
values []float64
value uint64 // These bits are really a float64.
}
// NewPercentile returns a Percentile with a given threshold.
func NewPercentile(percentile float64) *Percentile {
return &Percentile{
percentile: percentile,
// 256 samples is fast, and accurate for most distributions.
values: make([]float64, 0, 256),
}
}
// NewPercentileWithWindow returns a Percentile with a given threshold
// and window size (accuracy).
func NewPercentileWithWindow(percentile float64, sampleWindow int) *Percentile {
return &Percentile{
percentile: percentile,
values: make([]float64, 0, sampleWindow),
}
}
// Value returns the current value at the stored percentile.
// It is thread-safe, and may be called concurrently with AddSample.
func (p *Percentile) Value() float64 {
bits := atomic.LoadUint64(&p.value)
return math.Float64frombits(bits)
}
// AddSample adds a single float64 sample to the data set.
// It is not thread-safe, and not be called concurrently.
func (p *Percentile) AddSample(sample float64) {
p.samples++
if len(p.values) == cap(p.values) {
target := float64(p.samples)*p.percentile - float64(cap(p.values))/2
offset := round(math.Max(target, 0))
if sample > p.values[0] {
if offset > p.offset {
idx := sort.SearchFloat64s(p.values[1:], sample)
copy(p.values, p.values[1:idx+1])
p.values[idx] = sample
p.offset++
} else if sample < p.values[len(p.values)-1] {
idx := sort.SearchFloat64s(p.values, sample)
copy(p.values[idx+1:], p.values[idx:])
p.values[idx] = sample
}
} else {
if offset > p.offset {
p.offset++
} else {
copy(p.values[1:], p.values)
p.values[0] = sample
}
}
} else {
idx := sort.SearchFloat64s(p.values, sample)
p.values = p.values[:len(p.values)+1]
copy(p.values[idx+1:], p.values[idx:])
p.values[idx] = sample
}
bits := math.Float64bits(p.values[p.index()])
atomic.StoreUint64(&p.value, bits)
}
func (p *Percentile) index() int64 {
idx := round(float64(p.samples)*p.percentile - float64(p.offset))
last := int64(len(p.values)) - 1
if idx > last {
return last
}
return idx
}
func (p *Percentile) MarshalJSON() ([]byte, error) {
return json.Marshal(p.Value())
}
func round(value float64) int64 {
if value < 0.0 {
value -= 0.5
} else {
value += 0.5
}
return int64(value)
}

View file

@ -1,98 +0,0 @@
// Copyright 2014 The Chihaya Authors. All rights reserved.
// Use of this source code is governed by the BSD 2-Clause license,
// which can be found in the LICENSE file.
package stats
import (
"math"
"math/rand"
"sort"
"testing"
"time"
)
func TestPercentiles(t *testing.T) {
rand.Seed(time.Now().Unix())
testSlice(t, uniform(10000, 1), 0.5)
testSlice(t, uniform(10000, 1), 0.9)
testSlice(t, uniform(10000, 10000), 0.5)
testSlice(t, uniform(10000, 10000), 0.9)
}
func TestLogNormPercentiles(t *testing.T) {
rand.Seed(time.Now().Unix())
testSlice(t, logNorm(10000, 1), 0.5)
testSlice(t, logNorm(10000, 1), 0.9)
}
func uniform(n int, scale float64) sort.Float64Slice {
numbers := make(sort.Float64Slice, n)
for i := 0; i < n; i++ {
numbers[i] = rand.Float64() * scale
}
return numbers
}
func logNorm(n int, scale float64) sort.Float64Slice {
numbers := make(sort.Float64Slice, n)
for i := 0; i < n; i++ {
numbers[i] = math.Exp(rand.NormFloat64()) * scale
}
return numbers
}
func testSlice(t *testing.T, numbers sort.Float64Slice, percentile float64) {
p := NewPercentile(percentile)
for i := 0; i < len(numbers); i++ {
p.AddSample(numbers[i])
}
sort.Sort(numbers)
got := p.Value()
index := round(float64(len(numbers)) * percentile)
if got != numbers[index] && got != numbers[index-1] && got != numbers[index+1] {
t.Errorf("Percentile incorrect\n actual: %f\nexpected: %f, %f, %f\n", got, numbers[index-1], numbers[index], numbers[index+1])
}
}
func BenchmarkPercentiles64(b *testing.B) {
benchmarkSlice(b, uniform(b.N, 1), 64, 0.5)
}
func BenchmarkPercentiles128(b *testing.B) {
benchmarkSlice(b, uniform(b.N, 1), 128, 0.5)
}
func BenchmarkPercentiles256(b *testing.B) {
benchmarkSlice(b, uniform(b.N, 1), 256, 0.5)
}
func BenchmarkPercentiles512(b *testing.B) {
benchmarkSlice(b, uniform(b.N, 1), 512, 0.5)
}
func BenchmarkLNPercentiles128(b *testing.B) {
benchmarkSlice(b, logNorm(b.N, 1), 128, 0.5)
}
func BenchmarkLNPercentiles256(b *testing.B) {
benchmarkSlice(b, logNorm(b.N, 1), 258, 0.5)
}
func benchmarkSlice(b *testing.B, numbers sort.Float64Slice, window int, percentile float64) {
p := NewPercentileWithWindow(percentile, window)
b.ResetTimer()
for i := 0; i < b.N; i++ {
p.AddSample(numbers[i])
}
}

View file

@ -9,8 +9,10 @@ package stats
import (
"time"
"github.com/chihaya/chihaya/config"
"github.com/pushrax/faststats"
"github.com/pushrax/flatjson"
"github.com/chihaya/chihaya/config"
)
const (
@ -59,9 +61,9 @@ type PeerStats struct {
}
type PercentileTimes struct {
P50 *Percentile
P90 *Percentile
P95 *Percentile
P50 *faststats.Percentile
P90 *faststats.Percentile
P95 *faststats.Percentile
}
type Stats struct {
@ -107,9 +109,9 @@ func New(cfg config.StatsConfig) *Stats {
responseTimeEvents: make(chan time.Duration, cfg.BufferSize),
ResponseTime: PercentileTimes{
P50: NewPercentile(0.5),
P90: NewPercentile(0.9),
P95: NewPercentile(0.95),
P50: faststats.NewPercentile(0.5),
P90: faststats.NewPercentile(0.9),
P95: faststats.NewPercentile(0.95),
},
}