diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 25226ee39c..c5530f7c62 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -31,8 +31,8 @@ import ( "bytes" "encoding/binary" + "github.com/ava-labs/subnet-evm/metrics" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/metrics" ) // The fields below define the low level database schema prefixing. diff --git a/core/state/metrics.go b/core/state/metrics.go index 547897cf9b..5c07ad6f52 100644 --- a/core/state/metrics.go +++ b/core/state/metrics.go @@ -26,7 +26,7 @@ package state -import "github.com/ethereum/go-ethereum/metrics" +import "github.com/ava-labs/subnet-evm/metrics" var ( accountUpdatedMeter = metrics.NewRegisteredMeter("state/update/account", nil) diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index 156c43b346..554098612d 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -38,10 +38,10 @@ import ( "github.com/VictoriaMetrics/fastcache" "github.com/ava-labs/subnet-evm/core/rawdb" "github.com/ava-labs/subnet-evm/ethdb" + "github.com/ava-labs/subnet-evm/metrics" "github.com/ava-labs/subnet-evm/trie" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" ) const ( diff --git a/core/state/state_object.go b/core/state/state_object.go index e9d596c075..ae2a34e5a5 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -35,9 +35,9 @@ import ( "time" "github.com/ava-labs/subnet-evm/core/types" + "github.com/ava-labs/subnet-evm/metrics" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" ) diff --git a/core/state/statedb.go b/core/state/statedb.go index 51649ca9f8..75c643bf95 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -37,11 +37,11 @@ import ( "github.com/ava-labs/subnet-evm/core/rawdb" "github.com/ava-labs/subnet-evm/core/state/snapshot" "github.com/ava-labs/subnet-evm/core/types" + "github.com/ava-labs/subnet-evm/metrics" "github.com/ava-labs/subnet-evm/trie" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" ) diff --git a/core/state/trie_prefetcher.go b/core/state/trie_prefetcher.go index f83cd02535..079ede9e45 100644 --- a/core/state/trie_prefetcher.go +++ b/core/state/trie_prefetcher.go @@ -29,15 +29,13 @@ package state import ( "sync" + "github.com/ava-labs/subnet-evm/metrics" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" ) -var ( - // triePrefetchMetricsPrefix is the prefix under which to publis the metrics. - triePrefetchMetricsPrefix = "trie/prefetch/" -) +// triePrefetchMetricsPrefix is the prefix under which to publis the metrics. +var triePrefetchMetricsPrefix = "trie/prefetch/" // triePrefetcher is an active prefetcher, which receives accounts or storage // items and does trie-loading of them. The goal is to get as much useful content diff --git a/core/tx_pool.go b/core/tx_pool.go index 357e35bf2a..9fee0e5229 100644 --- a/core/tx_pool.go +++ b/core/tx_pool.go @@ -39,13 +39,13 @@ import ( "github.com/ava-labs/subnet-evm/consensus/dummy" "github.com/ava-labs/subnet-evm/core/state" "github.com/ava-labs/subnet-evm/core/types" + "github.com/ava-labs/subnet-evm/metrics" "github.com/ava-labs/subnet-evm/params" "github.com/ava-labs/subnet-evm/precompile" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/prque" "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" ) const ( diff --git a/ethdb/leveldb/leveldb.go b/ethdb/leveldb/leveldb.go index 406de1eb3b..78aeffb242 100644 --- a/ethdb/leveldb/leveldb.go +++ b/ethdb/leveldb/leveldb.go @@ -38,9 +38,9 @@ import ( "time" "github.com/ava-labs/subnet-evm/ethdb" + "github.com/ava-labs/subnet-evm/metrics" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" "github.com/syndtr/goleveldb/leveldb" "github.com/syndtr/goleveldb/leveldb/errors" "github.com/syndtr/goleveldb/leveldb/filter" diff --git a/go.mod b/go.mod index f6319509dc..c090af354e 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/prometheus/client_golang v1.12.1 github.com/prometheus/client_model v0.2.0 github.com/rjeczalik/notify v0.9.2 + github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible github.com/spf13/cast v1.4.1 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.10.0 @@ -33,6 +34,7 @@ require ( github.com/tyler-smith/go-bip39 v1.0.2 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c + golang.org/x/sys v0.0.0-20220405052023-b1e9470b6e64 golang.org/x/text v0.3.7 golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac gopkg.in/urfave/cli.v1 v1.20.0 @@ -84,7 +86,6 @@ require ( github.com/prometheus/procfs v0.7.3 // indirect github.com/prometheus/tsdb v0.10.0 // indirect github.com/rs/cors v1.7.0 // indirect - github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible // indirect github.com/spf13/afero v1.6.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/subosito/gotenv v1.2.0 // indirect @@ -94,7 +95,6 @@ require ( go.uber.org/multierr v1.6.0 // indirect go.uber.org/zap v1.21.0 // indirect golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f // indirect - golang.org/x/sys v0.0.0-20220405052023-b1e9470b6e64 // indirect golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d // indirect gonum.org/v1/gonum v0.9.1 // indirect google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa // indirect diff --git a/internal/debug/flags.go b/internal/debug/flags.go index e0601b2e9a..15e2b25fb3 100644 --- a/internal/debug/flags.go +++ b/internal/debug/flags.go @@ -35,8 +35,6 @@ import ( "runtime" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" - "github.com/ethereum/go-ethereum/metrics/exp" "github.com/fjl/memsize/memsizeui" "github.com/mattn/go-colorable" "github.com/mattn/go-isatty" @@ -189,17 +187,12 @@ func Setup(ctx *cli.Context) error { address := fmt.Sprintf("%s:%d", listenHost, port) // This context value ("metrics.addr") represents the utils.MetricsHTTPFlag.Name. // It cannot be imported because it will cause a cyclical dependency. - StartPProf(address, !ctx.GlobalIsSet("metrics.addr")) + StartPProf(address) } return nil } -func StartPProf(address string, withMetrics bool) { - // Hook go-metrics into expvar on any /debug/metrics request, load all vars - // from the registry into expvar, and execute regular expvar handler. - if withMetrics { - exp.Exp(metrics.DefaultRegistry) - } +func StartPProf(address string) { http.Handle("/memsize/", http.StripPrefix("/memsize", &Memsize)) log.Info("Starting pprof server", "addr", fmt.Sprintf("http://%s/debug/pprof", address)) go func() { diff --git a/metrics/FORK.md b/metrics/FORK.md new file mode 100644 index 0000000000..b19985bf56 --- /dev/null +++ b/metrics/FORK.md @@ -0,0 +1 @@ +This repo has been forked from https://github.com/rcrowley/go-metrics at commit e181e09 diff --git a/metrics/LICENSE b/metrics/LICENSE new file mode 100644 index 0000000000..363fa9ee77 --- /dev/null +++ b/metrics/LICENSE @@ -0,0 +1,29 @@ +Copyright 2012 Richard Crowley. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY RICHARD CROWLEY ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL RICHARD CROWLEY OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation +are those of the authors and should not be interpreted as representing +official policies, either expressed or implied, of Richard Crowley. diff --git a/metrics/README.md b/metrics/README.md new file mode 100644 index 0000000000..0fbaabe4be --- /dev/null +++ b/metrics/README.md @@ -0,0 +1,102 @@ +go-metrics +========== + +![travis build status](https://travis-ci.org/rcrowley/go-metrics.svg?branch=master) + +Go port of Coda Hale's Metrics library: . + +Documentation: . + +Usage +----- + +Create and update metrics: + +```go +c := metrics.NewCounter() +metrics.Register("foo", c) +c.Inc(47) + +g := metrics.NewGauge() +metrics.Register("bar", g) +g.Update(47) + +r := NewRegistry() +g := metrics.NewRegisteredFunctionalGauge("cache-evictions", r, func() int64 { return cache.getEvictionsCount() }) + +s := metrics.NewExpDecaySample(1028, 0.015) // or metrics.NewUniformSample(1028) +h := metrics.NewHistogram(s) +metrics.Register("baz", h) +h.Update(47) + +m := metrics.NewMeter() +metrics.Register("quux", m) +m.Mark(47) + +t := metrics.NewTimer() +metrics.Register("bang", t) +t.Time(func() {}) +t.Update(47) +``` + +Register() is not threadsafe. For threadsafe metric registration use +GetOrRegister: + +```go +t := metrics.GetOrRegisterTimer("account.create.latency", nil) +t.Time(func() {}) +t.Update(47) +``` + +**NOTE:** Be sure to unregister short-lived meters and timers otherwise they will +leak memory: + +```go +// Will call Stop() on the Meter to allow for garbage collection +metrics.Unregister("quux") +// Or similarly for a Timer that embeds a Meter +metrics.Unregister("bang") +``` + +Periodically log every metric in human-readable form to standard error: + +```go +go metrics.Log(metrics.DefaultRegistry, 5 * time.Second, log.New(os.Stderr, "metrics: ", log.Lmicroseconds)) +``` + +Periodically log every metric in slightly-more-parseable form to syslog: + +```go +w, _ := syslog.Dial("unixgram", "/dev/log", syslog.LOG_INFO, "metrics") +go metrics.Syslog(metrics.DefaultRegistry, 60e9, w) +``` + +Periodically emit every metric to Graphite using the [Graphite client](https://github.com/cyberdelia/go-metrics-graphite): + +```go + +import "github.com/cyberdelia/go-metrics-graphite" + +addr, _ := net.ResolveTCPAddr("tcp", "127.0.0.1:2003") +go graphite.Graphite(metrics.DefaultRegistry, 10e9, "metrics", addr) +``` + +Installation +------------ + +```sh +go get github.com/rcrowley/go-metrics +``` + +StatHat support additionally requires their Go client: + +```sh +go get github.com/stathat/go +``` + +Publishing Metrics +------------------ + +Clients are available for the following destinations: + +* Prometheus - https://github.com/deathowl/go-metrics-prometheus diff --git a/metrics/config.go b/metrics/config.go new file mode 100644 index 0000000000..a60d96e962 --- /dev/null +++ b/metrics/config.go @@ -0,0 +1,43 @@ +// (c) 2022, Ava Labs, Inc. +// +// This file is a derived work, based on the go-ethereum library whose original +// notices appear below. +// +// It is distributed under a license compatible with the licensing terms of the +// original code from which it is derived. +// +// Much love to the original authors for their work. +// ********** +// Copyright 2021 The go-ethereum Authors +// This file is part of go-ethereum. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package metrics + +// Config contains the configuration for the metric collection. +type Config struct { + Enabled bool `toml:",omitempty"` + EnabledExpensive bool `toml:",omitempty"` + HTTP string `toml:",omitempty"` + Port int `toml:",omitempty"` +} + +// DefaultConfig is the default config for metrics used in go-ethereum. +var DefaultConfig = Config{ + Enabled: false, + EnabledExpensive: false, + HTTP: "127.0.0.1", + Port: 6060, +} diff --git a/metrics/counter.go b/metrics/counter.go new file mode 100644 index 0000000000..2f78c90d5c --- /dev/null +++ b/metrics/counter.go @@ -0,0 +1,144 @@ +package metrics + +import ( + "sync/atomic" +) + +// Counters hold an int64 value that can be incremented and decremented. +type Counter interface { + Clear() + Count() int64 + Dec(int64) + Inc(int64) + Snapshot() Counter +} + +// GetOrRegisterCounter returns an existing Counter or constructs and registers +// a new StandardCounter. +func GetOrRegisterCounter(name string, r Registry) Counter { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewCounter).(Counter) +} + +// GetOrRegisterCounterForced returns an existing Counter or constructs and registers a +// new Counter no matter the global switch is enabled or not. +// Be sure to unregister the counter from the registry once it is of no use to +// allow for garbage collection. +func GetOrRegisterCounterForced(name string, r Registry) Counter { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewCounterForced).(Counter) +} + +// NewCounter constructs a new StandardCounter. +func NewCounter() Counter { + if !Enabled { + return NilCounter{} + } + return &StandardCounter{0} +} + +// NewCounterForced constructs a new StandardCounter and returns it no matter if +// the global switch is enabled or not. +func NewCounterForced() Counter { + return &StandardCounter{0} +} + +// NewRegisteredCounter constructs and registers a new StandardCounter. +func NewRegisteredCounter(name string, r Registry) Counter { + c := NewCounter() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewRegisteredCounterForced constructs and registers a new StandardCounter +// and launches a goroutine no matter the global switch is enabled or not. +// Be sure to unregister the counter from the registry once it is of no use to +// allow for garbage collection. +func NewRegisteredCounterForced(name string, r Registry) Counter { + c := NewCounterForced() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// CounterSnapshot is a read-only copy of another Counter. +type CounterSnapshot int64 + +// Clear panics. +func (CounterSnapshot) Clear() { + panic("Clear called on a CounterSnapshot") +} + +// Count returns the count at the time the snapshot was taken. +func (c CounterSnapshot) Count() int64 { return int64(c) } + +// Dec panics. +func (CounterSnapshot) Dec(int64) { + panic("Dec called on a CounterSnapshot") +} + +// Inc panics. +func (CounterSnapshot) Inc(int64) { + panic("Inc called on a CounterSnapshot") +} + +// Snapshot returns the snapshot. +func (c CounterSnapshot) Snapshot() Counter { return c } + +// NilCounter is a no-op Counter. +type NilCounter struct{} + +// Clear is a no-op. +func (NilCounter) Clear() {} + +// Count is a no-op. +func (NilCounter) Count() int64 { return 0 } + +// Dec is a no-op. +func (NilCounter) Dec(i int64) {} + +// Inc is a no-op. +func (NilCounter) Inc(i int64) {} + +// Snapshot is a no-op. +func (NilCounter) Snapshot() Counter { return NilCounter{} } + +// StandardCounter is the standard implementation of a Counter and uses the +// sync/atomic package to manage a single int64 value. +type StandardCounter struct { + count int64 +} + +// Clear sets the counter to zero. +func (c *StandardCounter) Clear() { + atomic.StoreInt64(&c.count, 0) +} + +// Count returns the current count. +func (c *StandardCounter) Count() int64 { + return atomic.LoadInt64(&c.count) +} + +// Dec decrements the counter by the given amount. +func (c *StandardCounter) Dec(i int64) { + atomic.AddInt64(&c.count, -i) +} + +// Inc increments the counter by the given amount. +func (c *StandardCounter) Inc(i int64) { + atomic.AddInt64(&c.count, i) +} + +// Snapshot returns a read-only copy of the counter. +func (c *StandardCounter) Snapshot() Counter { + return CounterSnapshot(c.Count()) +} diff --git a/metrics/counter_test.go b/metrics/counter_test.go new file mode 100644 index 0000000000..af26ef1548 --- /dev/null +++ b/metrics/counter_test.go @@ -0,0 +1,77 @@ +package metrics + +import "testing" + +func BenchmarkCounter(b *testing.B) { + c := NewCounter() + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Inc(1) + } +} + +func TestCounterClear(t *testing.T) { + c := NewCounter() + c.Inc(1) + c.Clear() + if count := c.Count(); count != 0 { + t.Errorf("c.Count(): 0 != %v\n", count) + } +} + +func TestCounterDec1(t *testing.T) { + c := NewCounter() + c.Dec(1) + if count := c.Count(); count != -1 { + t.Errorf("c.Count(): -1 != %v\n", count) + } +} + +func TestCounterDec2(t *testing.T) { + c := NewCounter() + c.Dec(2) + if count := c.Count(); count != -2 { + t.Errorf("c.Count(): -2 != %v\n", count) + } +} + +func TestCounterInc1(t *testing.T) { + c := NewCounter() + c.Inc(1) + if count := c.Count(); count != 1 { + t.Errorf("c.Count(): 1 != %v\n", count) + } +} + +func TestCounterInc2(t *testing.T) { + c := NewCounter() + c.Inc(2) + if count := c.Count(); count != 2 { + t.Errorf("c.Count(): 2 != %v\n", count) + } +} + +func TestCounterSnapshot(t *testing.T) { + c := NewCounter() + c.Inc(1) + snapshot := c.Snapshot() + c.Inc(1) + if count := snapshot.Count(); count != 1 { + t.Errorf("c.Count(): 1 != %v\n", count) + } +} + +func TestCounterZero(t *testing.T) { + c := NewCounter() + if count := c.Count(); count != 0 { + t.Errorf("c.Count(): 0 != %v\n", count) + } +} + +func TestGetOrRegisterCounter(t *testing.T) { + r := NewRegistry() + NewRegisteredCounter("foo", r).Inc(47) + if c := GetOrRegisterCounter("foo", r); c.Count() != 47 { + t.Fatal(c) + } +} diff --git a/metrics/cpu.go b/metrics/cpu.go new file mode 100644 index 0000000000..6bd560b328 --- /dev/null +++ b/metrics/cpu.go @@ -0,0 +1,34 @@ +// (c) 2022, Ava Labs, Inc. +// +// This file is a derived work, based on the go-ethereum library whose original +// notices appear below. +// +// It is distributed under a license compatible with the licensing terms of the +// original code from which it is derived. +// +// Much love to the original authors for their work. +// ********** +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package metrics + +// CPUStats is the system and process CPU stats. +type CPUStats struct { + GlobalTime int64 // Time spent by the CPU working on all processes + GlobalWait int64 // Time spent by waiting on disk for all processes + LocalTime int64 // Time spent by the CPU working on this process +} diff --git a/metrics/cpu_disabled.go b/metrics/cpu_disabled.go new file mode 100644 index 0000000000..f2c3ead5db --- /dev/null +++ b/metrics/cpu_disabled.go @@ -0,0 +1,34 @@ +// (c) 2022, Ava Labs, Inc. +// +// This file is a derived work, based on the go-ethereum library whose original +// notices appear below. +// +// It is distributed under a license compatible with the licensing terms of the +// original code from which it is derived. +// +// Much love to the original authors for their work. +// ********** +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +//go:build ios || js +// +build ios js + +package metrics + +// ReadCPUStats retrieves the current CPU stats. Internally this uses `gosigar`, +// which is not supported on the platforms in this file. +func ReadCPUStats(stats *CPUStats) {} diff --git a/metrics/cpu_enabled.go b/metrics/cpu_enabled.go new file mode 100644 index 0000000000..b04b432f1b --- /dev/null +++ b/metrics/cpu_enabled.go @@ -0,0 +1,54 @@ +// (c) 2022, Ava Labs, Inc. +// +// This file is a derived work, based on the go-ethereum library whose original +// notices appear below. +// +// It is distributed under a license compatible with the licensing terms of the +// original code from which it is derived. +// +// Much love to the original authors for their work. +// ********** +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +//go:build !ios && !js +// +build !ios,!js + +package metrics + +import ( + "github.com/ethereum/go-ethereum/log" + "github.com/shirou/gopsutil/cpu" +) + +// ReadCPUStats retrieves the current CPU stats. +func ReadCPUStats(stats *CPUStats) { + // passing false to request all cpu times + timeStats, err := cpu.Times(false) + if err != nil { + log.Error("Could not read cpu stats", "err", err) + return + } + if len(timeStats) == 0 { + log.Error("Empty cpu stats") + return + } + // requesting all cpu times will always return an array with only one time stats entry + timeStat := timeStats[0] + stats.GlobalTime = int64((timeStat.User + timeStat.Nice + timeStat.System) * cpu.ClocksPerSec) + stats.GlobalWait = int64((timeStat.Iowait) * cpu.ClocksPerSec) + stats.LocalTime = getProcessCPUTime() +} diff --git a/metrics/cputime_nop.go b/metrics/cputime_nop.go new file mode 100644 index 0000000000..995a68fef1 --- /dev/null +++ b/metrics/cputime_nop.go @@ -0,0 +1,36 @@ +// (c) 2022, Ava Labs, Inc. +// +// This file is a derived work, based on the go-ethereum library whose original +// notices appear below. +// +// It is distributed under a license compatible with the licensing terms of the +// original code from which it is derived. +// +// Much love to the original authors for their work. +// ********** +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +//go:build windows || js +// +build windows js + +package metrics + +// getProcessCPUTime returns 0 on Windows as there is no system call to resolve +// the actual process' CPU time. +func getProcessCPUTime() int64 { + return 0 +} diff --git a/metrics/cputime_unix.go b/metrics/cputime_unix.go new file mode 100644 index 0000000000..95f60ed830 --- /dev/null +++ b/metrics/cputime_unix.go @@ -0,0 +1,46 @@ +// (c) 2022, Ava Labs, Inc. +// +// This file is a derived work, based on the go-ethereum library whose original +// notices appear below. +// +// It is distributed under a license compatible with the licensing terms of the +// original code from which it is derived. +// +// Much love to the original authors for their work. +// ********** +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +//go:build !windows && !js +// +build !windows,!js + +package metrics + +import ( + syscall "golang.org/x/sys/unix" + + "github.com/ethereum/go-ethereum/log" +) + +// getProcessCPUTime retrieves the process' CPU time since program startup. +func getProcessCPUTime() int64 { + var usage syscall.Rusage + if err := syscall.Getrusage(syscall.RUSAGE_SELF, &usage); err != nil { + log.Warn("Failed to retrieve CPU time", "err", err) + return 0 + } + return int64(usage.Utime.Sec+usage.Stime.Sec)*100 + int64(usage.Utime.Usec+usage.Stime.Usec)/10000 //nolint:unconvert +} diff --git a/metrics/debug.go b/metrics/debug.go new file mode 100644 index 0000000000..de4a2739fe --- /dev/null +++ b/metrics/debug.go @@ -0,0 +1,76 @@ +package metrics + +import ( + "runtime/debug" + "time" +) + +var ( + debugMetrics struct { + GCStats struct { + LastGC Gauge + NumGC Gauge + Pause Histogram + //PauseQuantiles Histogram + PauseTotal Gauge + } + ReadGCStats Timer + } + gcStats debug.GCStats +) + +// Capture new values for the Go garbage collector statistics exported in +// debug.GCStats. This is designed to be called as a goroutine. +func CaptureDebugGCStats(r Registry, d time.Duration) { + for range time.Tick(d) { + CaptureDebugGCStatsOnce(r) + } +} + +// Capture new values for the Go garbage collector statistics exported in +// debug.GCStats. This is designed to be called in a background goroutine. +// Giving a registry which has not been given to RegisterDebugGCStats will +// panic. +// +// Be careful (but much less so) with this because debug.ReadGCStats calls +// the C function runtime·lock(runtime·mheap) which, while not a stop-the-world +// operation, isn't something you want to be doing all the time. +func CaptureDebugGCStatsOnce(r Registry) { + lastGC := gcStats.LastGC + t := time.Now() + debug.ReadGCStats(&gcStats) + debugMetrics.ReadGCStats.UpdateSince(t) + + debugMetrics.GCStats.LastGC.Update(gcStats.LastGC.UnixNano()) + debugMetrics.GCStats.NumGC.Update(gcStats.NumGC) + if lastGC != gcStats.LastGC && 0 < len(gcStats.Pause) { + debugMetrics.GCStats.Pause.Update(int64(gcStats.Pause[0])) + } + //debugMetrics.GCStats.PauseQuantiles.Update(gcStats.PauseQuantiles) + debugMetrics.GCStats.PauseTotal.Update(int64(gcStats.PauseTotal)) +} + +// Register metrics for the Go garbage collector statistics exported in +// debug.GCStats. The metrics are named by their fully-qualified Go symbols, +// i.e. debug.GCStats.PauseTotal. +func RegisterDebugGCStats(r Registry) { + debugMetrics.GCStats.LastGC = NewGauge() + debugMetrics.GCStats.NumGC = NewGauge() + debugMetrics.GCStats.Pause = NewHistogram(NewExpDecaySample(1028, 0.015)) + //debugMetrics.GCStats.PauseQuantiles = NewHistogram(NewExpDecaySample(1028, 0.015)) + debugMetrics.GCStats.PauseTotal = NewGauge() + debugMetrics.ReadGCStats = NewTimer() + + r.Register("debug.GCStats.LastGC", debugMetrics.GCStats.LastGC) + r.Register("debug.GCStats.NumGC", debugMetrics.GCStats.NumGC) + r.Register("debug.GCStats.Pause", debugMetrics.GCStats.Pause) + //r.Register("debug.GCStats.PauseQuantiles", debugMetrics.GCStats.PauseQuantiles) + r.Register("debug.GCStats.PauseTotal", debugMetrics.GCStats.PauseTotal) + r.Register("debug.ReadGCStats", debugMetrics.ReadGCStats) +} + +// Allocate an initial slice for gcStats.Pause to avoid allocations during +// normal operation. +func init() { + gcStats.Pause = make([]time.Duration, 11) +} diff --git a/metrics/debug_test.go b/metrics/debug_test.go new file mode 100644 index 0000000000..07eb867841 --- /dev/null +++ b/metrics/debug_test.go @@ -0,0 +1,48 @@ +package metrics + +import ( + "runtime" + "runtime/debug" + "testing" + "time" +) + +func BenchmarkDebugGCStats(b *testing.B) { + r := NewRegistry() + RegisterDebugGCStats(r) + b.ResetTimer() + for i := 0; i < b.N; i++ { + CaptureDebugGCStatsOnce(r) + } +} + +func TestDebugGCStatsBlocking(t *testing.T) { + if g := runtime.GOMAXPROCS(0); g < 2 { + t.Skipf("skipping TestDebugGCMemStatsBlocking with GOMAXPROCS=%d\n", g) + return + } + ch := make(chan int) + go testDebugGCStatsBlocking(ch) + var gcStats debug.GCStats + t0 := time.Now() + debug.ReadGCStats(&gcStats) + t1 := time.Now() + t.Log("i++ during debug.ReadGCStats:", <-ch) + go testDebugGCStatsBlocking(ch) + d := t1.Sub(t0) + t.Log(d) + time.Sleep(d) + t.Log("i++ during time.Sleep:", <-ch) +} + +func testDebugGCStatsBlocking(ch chan int) { + i := 0 + for { + select { + case ch <- i: + return + default: + i++ + } + } +} diff --git a/metrics/disk.go b/metrics/disk.go new file mode 100644 index 0000000000..1fdd32a4d3 --- /dev/null +++ b/metrics/disk.go @@ -0,0 +1,35 @@ +// (c) 2022, Ava Labs, Inc. +// +// This file is a derived work, based on the go-ethereum library whose original +// notices appear below. +// +// It is distributed under a license compatible with the licensing terms of the +// original code from which it is derived. +// +// Much love to the original authors for their work. +// ********** +// Copyright 2015 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package metrics + +// DiskStats is the per process disk io stats. +type DiskStats struct { + ReadCount int64 // Number of read operations executed + ReadBytes int64 // Total number of bytes read + WriteCount int64 // Number of write operations executed + WriteBytes int64 // Total number of byte written +} diff --git a/metrics/disk_linux.go b/metrics/disk_linux.go new file mode 100644 index 0000000000..25341d748a --- /dev/null +++ b/metrics/disk_linux.go @@ -0,0 +1,82 @@ +// (c) 2022, Ava Labs, Inc. +// +// This file is a derived work, based on the go-ethereum library whose original +// notices appear below. +// +// It is distributed under a license compatible with the licensing terms of the +// original code from which it is derived. +// +// Much love to the original authors for their work. +// ********** +// Copyright 2015 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Contains the Linux implementation of process disk IO counter retrieval. + +package metrics + +import ( + "bufio" + "fmt" + "io" + "os" + "strconv" + "strings" +) + +// ReadDiskStats retrieves the disk IO stats belonging to the current process. +func ReadDiskStats(stats *DiskStats) error { + // Open the process disk IO counter file + inf, err := os.Open(fmt.Sprintf("/proc/%d/io", os.Getpid())) + if err != nil { + return err + } + defer inf.Close() + in := bufio.NewReader(inf) + + // Iterate over the IO counter, and extract what we need + for { + // Read the next line and split to key and value + line, err := in.ReadString('\n') + if err != nil { + if err == io.EOF { + return nil + } + return err + } + parts := strings.Split(line, ":") + if len(parts) != 2 { + continue + } + key := strings.TrimSpace(parts[0]) + value, err := strconv.ParseInt(strings.TrimSpace(parts[1]), 10, 64) + if err != nil { + return err + } + + // Update the counter based on the key + switch key { + case "syscr": + stats.ReadCount = value + case "syscw": + stats.WriteCount = value + case "rchar": + stats.ReadBytes = value + case "wchar": + stats.WriteBytes = value + } + } +} diff --git a/metrics/disk_nop.go b/metrics/disk_nop.go new file mode 100644 index 0000000000..f04515571f --- /dev/null +++ b/metrics/disk_nop.go @@ -0,0 +1,37 @@ +// (c) 2022, Ava Labs, Inc. +// +// This file is a derived work, based on the go-ethereum library whose original +// notices appear below. +// +// It is distributed under a license compatible with the licensing terms of the +// original code from which it is derived. +// +// Much love to the original authors for their work. +// ********** +// Copyright 2015 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +//go:build !linux +// +build !linux + +package metrics + +import "errors" + +// ReadDiskStats retrieves the disk IO stats belonging to the current process. +func ReadDiskStats(stats *DiskStats) error { + return errors.New("Not implemented") +} diff --git a/metrics/doc.go b/metrics/doc.go new file mode 100644 index 0000000000..13f429c168 --- /dev/null +++ b/metrics/doc.go @@ -0,0 +1,4 @@ +package metrics + +const epsilon = 0.0000000000000001 +const epsilonPercentile = .00000000001 diff --git a/metrics/ewma.go b/metrics/ewma.go new file mode 100644 index 0000000000..039286493e --- /dev/null +++ b/metrics/ewma.go @@ -0,0 +1,116 @@ +package metrics + +import ( + "math" + "sync" + "sync/atomic" + "time" +) + +// EWMAs continuously calculate an exponentially-weighted moving average +// based on an outside source of clock ticks. +type EWMA interface { + Rate() float64 + Snapshot() EWMA + Tick() + Update(int64) +} + +// NewEWMA constructs a new EWMA with the given alpha. +func NewEWMA(alpha float64) EWMA { + return &StandardEWMA{alpha: alpha} +} + +// NewEWMA1 constructs a new EWMA for a one-minute moving average. +func NewEWMA1() EWMA { + return NewEWMA(1 - math.Exp(-5.0/60.0/1)) +} + +// NewEWMA5 constructs a new EWMA for a five-minute moving average. +func NewEWMA5() EWMA { + return NewEWMA(1 - math.Exp(-5.0/60.0/5)) +} + +// NewEWMA15 constructs a new EWMA for a fifteen-minute moving average. +func NewEWMA15() EWMA { + return NewEWMA(1 - math.Exp(-5.0/60.0/15)) +} + +// EWMASnapshot is a read-only copy of another EWMA. +type EWMASnapshot float64 + +// Rate returns the rate of events per second at the time the snapshot was +// taken. +func (a EWMASnapshot) Rate() float64 { return float64(a) } + +// Snapshot returns the snapshot. +func (a EWMASnapshot) Snapshot() EWMA { return a } + +// Tick panics. +func (EWMASnapshot) Tick() { + panic("Tick called on an EWMASnapshot") +} + +// Update panics. +func (EWMASnapshot) Update(int64) { + panic("Update called on an EWMASnapshot") +} + +// NilEWMA is a no-op EWMA. +type NilEWMA struct{} + +// Rate is a no-op. +func (NilEWMA) Rate() float64 { return 0.0 } + +// Snapshot is a no-op. +func (NilEWMA) Snapshot() EWMA { return NilEWMA{} } + +// Tick is a no-op. +func (NilEWMA) Tick() {} + +// Update is a no-op. +func (NilEWMA) Update(n int64) {} + +// StandardEWMA is the standard implementation of an EWMA and tracks the number +// of uncounted events and processes them on each tick. It uses the +// sync/atomic package to manage uncounted events. +type StandardEWMA struct { + uncounted int64 // /!\ this should be the first member to ensure 64-bit alignment + alpha float64 + rate float64 + init bool + mutex sync.Mutex +} + +// Rate returns the moving average rate of events per second. +func (a *StandardEWMA) Rate() float64 { + a.mutex.Lock() + defer a.mutex.Unlock() + return a.rate * float64(time.Second) +} + +// Snapshot returns a read-only copy of the EWMA. +func (a *StandardEWMA) Snapshot() EWMA { + return EWMASnapshot(a.Rate()) +} + +// Tick ticks the clock to update the moving average. It assumes it is called +// every five seconds. +func (a *StandardEWMA) Tick() { + count := atomic.LoadInt64(&a.uncounted) + atomic.AddInt64(&a.uncounted, -count) + instantRate := float64(count) / float64(5*time.Second) + a.mutex.Lock() + defer a.mutex.Unlock() + if a.init { + a.rate += a.alpha * (instantRate - a.rate) + } else { + a.init = true + a.rate = instantRate + } +} + +// Update adds n uncounted events. +func (a *StandardEWMA) Update(n int64) { + atomic.AddInt64(&a.uncounted, n) +} diff --git a/metrics/ewma_test.go b/metrics/ewma_test.go new file mode 100644 index 0000000000..5b24419161 --- /dev/null +++ b/metrics/ewma_test.go @@ -0,0 +1,228 @@ +package metrics + +import ( + "math" + "testing" +) + +func BenchmarkEWMA(b *testing.B) { + a := NewEWMA1() + b.ResetTimer() + for i := 0; i < b.N; i++ { + a.Update(1) + a.Tick() + } +} + +func TestEWMA1(t *testing.T) { + a := NewEWMA1() + a.Update(3) + a.Tick() + if rate := a.Rate(); math.Abs(0.6-rate) > epsilon { + t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.22072766470286553-rate) > epsilon { + t.Errorf("1 minute a.Rate(): 0.22072766470286553 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.08120116994196772-rate) > epsilon { + t.Errorf("2 minute a.Rate(): 0.08120116994196772 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.029872241020718428-rate) > epsilon { + t.Errorf("3 minute a.Rate(): 0.029872241020718428 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.01098938333324054-rate) > epsilon { + t.Errorf("4 minute a.Rate(): 0.01098938333324054 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.004042768199451294-rate) > epsilon { + t.Errorf("5 minute a.Rate(): 0.004042768199451294 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.0014872513059998212-rate) > epsilon { + t.Errorf("6 minute a.Rate(): 0.0014872513059998212 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.0005471291793327122-rate) > epsilon { + t.Errorf("7 minute a.Rate(): 0.0005471291793327122 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.00020127757674150815-rate) > epsilon { + t.Errorf("8 minute a.Rate(): 0.00020127757674150815 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(7.404588245200814e-05-rate) > epsilon { + t.Errorf("9 minute a.Rate(): 7.404588245200814e-05 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(2.7239957857491083e-05-rate) > epsilon { + t.Errorf("10 minute a.Rate(): 2.7239957857491083e-05 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(1.0021020474147462e-05-rate) > epsilon { + t.Errorf("11 minute a.Rate(): 1.0021020474147462e-05 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(3.6865274119969525e-06-rate) > epsilon { + t.Errorf("12 minute a.Rate(): 3.6865274119969525e-06 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(1.3561976441886433e-06-rate) > epsilon { + t.Errorf("13 minute a.Rate(): 1.3561976441886433e-06 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(4.989172314621449e-07-rate) > epsilon { + t.Errorf("14 minute a.Rate(): 4.989172314621449e-07 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(1.8354139230109722e-07-rate) > epsilon { + t.Errorf("15 minute a.Rate(): 1.8354139230109722e-07 != %v\n", rate) + } +} + +func TestEWMA5(t *testing.T) { + a := NewEWMA5() + a.Update(3) + a.Tick() + if rate := a.Rate(); math.Abs(0.6-rate) > epsilon { + t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.49123845184678905-rate) > epsilon { + t.Errorf("1 minute a.Rate(): 0.49123845184678905 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.4021920276213837-rate) > epsilon { + t.Errorf("2 minute a.Rate(): 0.4021920276213837 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.32928698165641596-rate) > epsilon { + t.Errorf("3 minute a.Rate(): 0.32928698165641596 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.269597378470333-rate) > epsilon { + t.Errorf("4 minute a.Rate(): 0.269597378470333 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.2207276647028654-rate) > epsilon { + t.Errorf("5 minute a.Rate(): 0.2207276647028654 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.18071652714732128-rate) > epsilon { + t.Errorf("6 minute a.Rate(): 0.18071652714732128 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.14795817836496392-rate) > epsilon { + t.Errorf("7 minute a.Rate(): 0.14795817836496392 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.12113791079679326-rate) > epsilon { + t.Errorf("8 minute a.Rate(): 0.12113791079679326 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.09917933293295193-rate) > epsilon { + t.Errorf("9 minute a.Rate(): 0.09917933293295193 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.08120116994196763-rate) > epsilon { + t.Errorf("10 minute a.Rate(): 0.08120116994196763 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.06648189501740036-rate) > epsilon { + t.Errorf("11 minute a.Rate(): 0.06648189501740036 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.05443077197364752-rate) > epsilon { + t.Errorf("12 minute a.Rate(): 0.05443077197364752 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.04456414692860035-rate) > epsilon { + t.Errorf("13 minute a.Rate(): 0.04456414692860035 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.03648603757513079-rate) > epsilon { + t.Errorf("14 minute a.Rate(): 0.03648603757513079 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.0298722410207183831020718428-rate) > epsilon { + t.Errorf("15 minute a.Rate(): 0.0298722410207183831020718428 != %v\n", rate) + } +} + +func TestEWMA15(t *testing.T) { + a := NewEWMA15() + a.Update(3) + a.Tick() + if rate := a.Rate(); math.Abs(0.6-rate) > epsilon { + t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.5613041910189706-rate) > epsilon { + t.Errorf("1 minute a.Rate(): 0.5613041910189706 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.5251039914257684-rate) > epsilon { + t.Errorf("2 minute a.Rate(): 0.5251039914257684 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.4912384518467888184678905-rate) > epsilon { + t.Errorf("3 minute a.Rate(): 0.4912384518467888184678905 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.459557003018789-rate) > epsilon { + t.Errorf("4 minute a.Rate(): 0.459557003018789 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.4299187863442732-rate) > epsilon { + t.Errorf("5 minute a.Rate(): 0.4299187863442732 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.4021920276213831-rate) > epsilon { + t.Errorf("6 minute a.Rate(): 0.4021920276213831 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.37625345116383313-rate) > epsilon { + t.Errorf("7 minute a.Rate(): 0.37625345116383313 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.3519877317060185-rate) > epsilon { + t.Errorf("8 minute a.Rate(): 0.3519877317060185 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.3292869816564153165641596-rate) > epsilon { + t.Errorf("9 minute a.Rate(): 0.3292869816564153165641596 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.3080502714195546-rate) > epsilon { + t.Errorf("10 minute a.Rate(): 0.3080502714195546 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.2881831806538789-rate) > epsilon { + t.Errorf("11 minute a.Rate(): 0.2881831806538789 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.26959737847033216-rate) > epsilon { + t.Errorf("12 minute a.Rate(): 0.26959737847033216 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.2522102307052083-rate) > epsilon { + t.Errorf("13 minute a.Rate(): 0.2522102307052083 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.23594443252115815-rate) > epsilon { + t.Errorf("14 minute a.Rate(): 0.23594443252115815 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); math.Abs(0.2207276647028646247028654470286553-rate) > epsilon { + t.Errorf("15 minute a.Rate(): 0.2207276647028646247028654470286553 != %v\n", rate) + } +} + +func elapseMinute(a EWMA) { + for i := 0; i < 12; i++ { + a.Tick() + } +} diff --git a/metrics/gauge.go b/metrics/gauge.go new file mode 100644 index 0000000000..b6b2758b0d --- /dev/null +++ b/metrics/gauge.go @@ -0,0 +1,158 @@ +package metrics + +import "sync/atomic" + +// Gauges hold an int64 value that can be set arbitrarily. +type Gauge interface { + Snapshot() Gauge + Update(int64) + Dec(int64) + Inc(int64) + Value() int64 +} + +// GetOrRegisterGauge returns an existing Gauge or constructs and registers a +// new StandardGauge. +func GetOrRegisterGauge(name string, r Registry) Gauge { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewGauge).(Gauge) +} + +// NewGauge constructs a new StandardGauge. +func NewGauge() Gauge { + if !Enabled { + return NilGauge{} + } + return &StandardGauge{0} +} + +// NewRegisteredGauge constructs and registers a new StandardGauge. +func NewRegisteredGauge(name string, r Registry) Gauge { + c := NewGauge() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewFunctionalGauge constructs a new FunctionalGauge. +func NewFunctionalGauge(f func() int64) Gauge { + if !Enabled { + return NilGauge{} + } + return &FunctionalGauge{value: f} +} + +// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. +func NewRegisteredFunctionalGauge(name string, r Registry, f func() int64) Gauge { + c := NewFunctionalGauge(f) + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// GaugeSnapshot is a read-only copy of another Gauge. +type GaugeSnapshot int64 + +// Snapshot returns the snapshot. +func (g GaugeSnapshot) Snapshot() Gauge { return g } + +// Update panics. +func (GaugeSnapshot) Update(int64) { + panic("Update called on a GaugeSnapshot") +} + +// Dec panics. +func (GaugeSnapshot) Dec(int64) { + panic("Dec called on a GaugeSnapshot") +} + +// Inc panics. +func (GaugeSnapshot) Inc(int64) { + panic("Inc called on a GaugeSnapshot") +} + +// Value returns the value at the time the snapshot was taken. +func (g GaugeSnapshot) Value() int64 { return int64(g) } + +// NilGauge is a no-op Gauge. +type NilGauge struct{} + +// Snapshot is a no-op. +func (NilGauge) Snapshot() Gauge { return NilGauge{} } + +// Update is a no-op. +func (NilGauge) Update(v int64) {} + +// Dec is a no-op. +func (NilGauge) Dec(i int64) {} + +// Inc is a no-op. +func (NilGauge) Inc(i int64) {} + +// Value is a no-op. +func (NilGauge) Value() int64 { return 0 } + +// StandardGauge is the standard implementation of a Gauge and uses the +// sync/atomic package to manage a single int64 value. +type StandardGauge struct { + value int64 +} + +// Snapshot returns a read-only copy of the gauge. +func (g *StandardGauge) Snapshot() Gauge { + return GaugeSnapshot(g.Value()) +} + +// Update updates the gauge's value. +func (g *StandardGauge) Update(v int64) { + atomic.StoreInt64(&g.value, v) +} + +// Value returns the gauge's current value. +func (g *StandardGauge) Value() int64 { + return atomic.LoadInt64(&g.value) +} + +// Dec decrements the gauge's current value by the given amount. +func (g *StandardGauge) Dec(i int64) { + atomic.AddInt64(&g.value, -i) +} + +// Inc increments the gauge's current value by the given amount. +func (g *StandardGauge) Inc(i int64) { + atomic.AddInt64(&g.value, i) +} + +// FunctionalGauge returns value from given function +type FunctionalGauge struct { + value func() int64 +} + +// Value returns the gauge's current value. +func (g FunctionalGauge) Value() int64 { + return g.value() +} + +// Snapshot returns the snapshot. +func (g FunctionalGauge) Snapshot() Gauge { return GaugeSnapshot(g.Value()) } + +// Update panics. +func (FunctionalGauge) Update(int64) { + panic("Update called on a FunctionalGauge") +} + +// Dec panics. +func (FunctionalGauge) Dec(int64) { + panic("Dec called on a FunctionalGauge") +} + +// Inc panics. +func (FunctionalGauge) Inc(int64) { + panic("Inc called on a FunctionalGauge") +} diff --git a/metrics/gauge_float64.go b/metrics/gauge_float64.go new file mode 100644 index 0000000000..66819c9577 --- /dev/null +++ b/metrics/gauge_float64.go @@ -0,0 +1,127 @@ +package metrics + +import "sync" + +// GaugeFloat64s hold a float64 value that can be set arbitrarily. +type GaugeFloat64 interface { + Snapshot() GaugeFloat64 + Update(float64) + Value() float64 +} + +// GetOrRegisterGaugeFloat64 returns an existing GaugeFloat64 or constructs and registers a +// new StandardGaugeFloat64. +func GetOrRegisterGaugeFloat64(name string, r Registry) GaugeFloat64 { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewGaugeFloat64()).(GaugeFloat64) +} + +// NewGaugeFloat64 constructs a new StandardGaugeFloat64. +func NewGaugeFloat64() GaugeFloat64 { + if !Enabled { + return NilGaugeFloat64{} + } + return &StandardGaugeFloat64{ + value: 0.0, + } +} + +// NewRegisteredGaugeFloat64 constructs and registers a new StandardGaugeFloat64. +func NewRegisteredGaugeFloat64(name string, r Registry) GaugeFloat64 { + c := NewGaugeFloat64() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewFunctionalGauge constructs a new FunctionalGauge. +func NewFunctionalGaugeFloat64(f func() float64) GaugeFloat64 { + if !Enabled { + return NilGaugeFloat64{} + } + return &FunctionalGaugeFloat64{value: f} +} + +// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. +func NewRegisteredFunctionalGaugeFloat64(name string, r Registry, f func() float64) GaugeFloat64 { + c := NewFunctionalGaugeFloat64(f) + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// GaugeFloat64Snapshot is a read-only copy of another GaugeFloat64. +type GaugeFloat64Snapshot float64 + +// Snapshot returns the snapshot. +func (g GaugeFloat64Snapshot) Snapshot() GaugeFloat64 { return g } + +// Update panics. +func (GaugeFloat64Snapshot) Update(float64) { + panic("Update called on a GaugeFloat64Snapshot") +} + +// Value returns the value at the time the snapshot was taken. +func (g GaugeFloat64Snapshot) Value() float64 { return float64(g) } + +// NilGauge is a no-op Gauge. +type NilGaugeFloat64 struct{} + +// Snapshot is a no-op. +func (NilGaugeFloat64) Snapshot() GaugeFloat64 { return NilGaugeFloat64{} } + +// Update is a no-op. +func (NilGaugeFloat64) Update(v float64) {} + +// Value is a no-op. +func (NilGaugeFloat64) Value() float64 { return 0.0 } + +// StandardGaugeFloat64 is the standard implementation of a GaugeFloat64 and uses +// sync.Mutex to manage a single float64 value. +type StandardGaugeFloat64 struct { + mutex sync.Mutex + value float64 +} + +// Snapshot returns a read-only copy of the gauge. +func (g *StandardGaugeFloat64) Snapshot() GaugeFloat64 { + return GaugeFloat64Snapshot(g.Value()) +} + +// Update updates the gauge's value. +func (g *StandardGaugeFloat64) Update(v float64) { + g.mutex.Lock() + defer g.mutex.Unlock() + g.value = v +} + +// Value returns the gauge's current value. +func (g *StandardGaugeFloat64) Value() float64 { + g.mutex.Lock() + defer g.mutex.Unlock() + return g.value +} + +// FunctionalGaugeFloat64 returns value from given function +type FunctionalGaugeFloat64 struct { + value func() float64 +} + +// Value returns the gauge's current value. +func (g FunctionalGaugeFloat64) Value() float64 { + return g.value() +} + +// Snapshot returns the snapshot. +func (g FunctionalGaugeFloat64) Snapshot() GaugeFloat64 { return GaugeFloat64Snapshot(g.Value()) } + +// Update panics. +func (FunctionalGaugeFloat64) Update(float64) { + panic("Update called on a FunctionalGaugeFloat64") +} diff --git a/metrics/gauge_float64_test.go b/metrics/gauge_float64_test.go new file mode 100644 index 0000000000..02b75580c4 --- /dev/null +++ b/metrics/gauge_float64_test.go @@ -0,0 +1,59 @@ +package metrics + +import "testing" + +func BenchmarkGuageFloat64(b *testing.B) { + g := NewGaugeFloat64() + b.ResetTimer() + for i := 0; i < b.N; i++ { + g.Update(float64(i)) + } +} + +func TestGaugeFloat64(t *testing.T) { + g := NewGaugeFloat64() + g.Update(47.0) + if v := g.Value(); 47.0 != v { + t.Errorf("g.Value(): 47.0 != %v\n", v) + } +} + +func TestGaugeFloat64Snapshot(t *testing.T) { + g := NewGaugeFloat64() + g.Update(47.0) + snapshot := g.Snapshot() + g.Update(float64(0)) + if v := snapshot.Value(); 47.0 != v { + t.Errorf("g.Value(): 47.0 != %v\n", v) + } +} + +func TestGetOrRegisterGaugeFloat64(t *testing.T) { + r := NewRegistry() + NewRegisteredGaugeFloat64("foo", r).Update(47.0) + t.Logf("registry: %v", r) + if g := GetOrRegisterGaugeFloat64("foo", r); 47.0 != g.Value() { + t.Fatal(g) + } +} + +func TestFunctionalGaugeFloat64(t *testing.T) { + var counter float64 + fg := NewFunctionalGaugeFloat64(func() float64 { + counter++ + return counter + }) + fg.Value() + fg.Value() + if counter != 2 { + t.Error("counter != 2") + } +} + +func TestGetOrRegisterFunctionalGaugeFloat64(t *testing.T) { + r := NewRegistry() + NewRegisteredFunctionalGaugeFloat64("foo", r, func() float64 { return 47 }) + if g := GetOrRegisterGaugeFloat64("foo", r); g.Value() != 47 { + t.Fatal(g) + } +} diff --git a/metrics/gauge_test.go b/metrics/gauge_test.go new file mode 100644 index 0000000000..3aee143455 --- /dev/null +++ b/metrics/gauge_test.go @@ -0,0 +1,68 @@ +package metrics + +import ( + "fmt" + "testing" +) + +func BenchmarkGuage(b *testing.B) { + g := NewGauge() + b.ResetTimer() + for i := 0; i < b.N; i++ { + g.Update(int64(i)) + } +} + +func TestGauge(t *testing.T) { + g := NewGauge() + g.Update(int64(47)) + if v := g.Value(); v != 47 { + t.Errorf("g.Value(): 47 != %v\n", v) + } +} + +func TestGaugeSnapshot(t *testing.T) { + g := NewGauge() + g.Update(int64(47)) + snapshot := g.Snapshot() + g.Update(int64(0)) + if v := snapshot.Value(); v != 47 { + t.Errorf("g.Value(): 47 != %v\n", v) + } +} + +func TestGetOrRegisterGauge(t *testing.T) { + r := NewRegistry() + NewRegisteredGauge("foo", r).Update(47) + if g := GetOrRegisterGauge("foo", r); g.Value() != 47 { + t.Fatal(g) + } +} + +func TestFunctionalGauge(t *testing.T) { + var counter int64 + fg := NewFunctionalGauge(func() int64 { + counter++ + return counter + }) + fg.Value() + fg.Value() + if counter != 2 { + t.Error("counter != 2") + } +} + +func TestGetOrRegisterFunctionalGauge(t *testing.T) { + r := NewRegistry() + NewRegisteredFunctionalGauge("foo", r, func() int64 { return 47 }) + if g := GetOrRegisterGauge("foo", r); g.Value() != 47 { + t.Fatal(g) + } +} + +func ExampleGetOrRegisterGauge() { + m := "server.bytes_sent" + g := GetOrRegisterGauge(m, nil) + g.Update(47) + fmt.Println(g.Value()) // Output: 47 +} diff --git a/metrics/graphite.go b/metrics/graphite.go new file mode 100644 index 0000000000..142eec86be --- /dev/null +++ b/metrics/graphite.go @@ -0,0 +1,113 @@ +package metrics + +import ( + "bufio" + "fmt" + "log" + "net" + "strconv" + "strings" + "time" +) + +// GraphiteConfig provides a container with configuration parameters for +// the Graphite exporter +type GraphiteConfig struct { + Addr *net.TCPAddr // Network address to connect to + Registry Registry // Registry to be exported + FlushInterval time.Duration // Flush interval + DurationUnit time.Duration // Time conversion unit for durations + Prefix string // Prefix to be prepended to metric names + Percentiles []float64 // Percentiles to export from timers and histograms +} + +// Graphite is a blocking exporter function which reports metrics in r +// to a graphite server located at addr, flushing them every d duration +// and prepending metric names with prefix. +func Graphite(r Registry, d time.Duration, prefix string, addr *net.TCPAddr) { + GraphiteWithConfig(GraphiteConfig{ + Addr: addr, + Registry: r, + FlushInterval: d, + DurationUnit: time.Nanosecond, + Prefix: prefix, + Percentiles: []float64{0.5, 0.75, 0.95, 0.99, 0.999}, + }) +} + +// GraphiteWithConfig is a blocking exporter function just like Graphite, +// but it takes a GraphiteConfig instead. +func GraphiteWithConfig(c GraphiteConfig) { + log.Printf("WARNING: This go-metrics client has been DEPRECATED! It has been moved to https://github.com/cyberdelia/go-metrics-graphite and will be removed from rcrowley/go-metrics on August 12th 2015") + for range time.Tick(c.FlushInterval) { + if err := graphite(&c); nil != err { + log.Println(err) + } + } +} + +// GraphiteOnce performs a single submission to Graphite, returning a +// non-nil error on failed connections. This can be used in a loop +// similar to GraphiteWithConfig for custom error handling. +func GraphiteOnce(c GraphiteConfig) error { + log.Printf("WARNING: This go-metrics client has been DEPRECATED! It has been moved to https://github.com/cyberdelia/go-metrics-graphite and will be removed from rcrowley/go-metrics on August 12th 2015") + return graphite(&c) +} + +func graphite(c *GraphiteConfig) error { + now := time.Now().Unix() + du := float64(c.DurationUnit) + conn, err := net.DialTCP("tcp", nil, c.Addr) + if nil != err { + return err + } + defer conn.Close() + w := bufio.NewWriter(conn) + c.Registry.Each(func(name string, i interface{}) { + switch metric := i.(type) { + case Counter: + fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, metric.Count(), now) + case Gauge: + fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Value(), now) + case GaugeFloat64: + fmt.Fprintf(w, "%s.%s.value %f %d\n", c.Prefix, name, metric.Value(), now) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles(c.Percentiles) + fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, h.Count(), now) + fmt.Fprintf(w, "%s.%s.min %d %d\n", c.Prefix, name, h.Min(), now) + fmt.Fprintf(w, "%s.%s.max %d %d\n", c.Prefix, name, h.Max(), now) + fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, h.Mean(), now) + fmt.Fprintf(w, "%s.%s.std-dev %.2f %d\n", c.Prefix, name, h.StdDev(), now) + for psIdx, psKey := range c.Percentiles { + key := strings.Replace(strconv.FormatFloat(psKey*100.0, 'f', -1, 64), ".", "", 1) + fmt.Fprintf(w, "%s.%s.%s-percentile %.2f %d\n", c.Prefix, name, key, ps[psIdx], now) + } + case Meter: + m := metric.Snapshot() + fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, m.Count(), now) + fmt.Fprintf(w, "%s.%s.one-minute %.2f %d\n", c.Prefix, name, m.Rate1(), now) + fmt.Fprintf(w, "%s.%s.five-minute %.2f %d\n", c.Prefix, name, m.Rate5(), now) + fmt.Fprintf(w, "%s.%s.fifteen-minute %.2f %d\n", c.Prefix, name, m.Rate15(), now) + fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, m.RateMean(), now) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles(c.Percentiles) + fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, t.Count(), now) + fmt.Fprintf(w, "%s.%s.min %d %d\n", c.Prefix, name, t.Min()/int64(du), now) + fmt.Fprintf(w, "%s.%s.max %d %d\n", c.Prefix, name, t.Max()/int64(du), now) + fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, t.Mean()/du, now) + fmt.Fprintf(w, "%s.%s.std-dev %.2f %d\n", c.Prefix, name, t.StdDev()/du, now) + for psIdx, psKey := range c.Percentiles { + key := strings.Replace(strconv.FormatFloat(psKey*100.0, 'f', -1, 64), ".", "", 1) + fmt.Fprintf(w, "%s.%s.%s-percentile %.2f %d\n", c.Prefix, name, key, ps[psIdx], now) + } + fmt.Fprintf(w, "%s.%s.one-minute %.2f %d\n", c.Prefix, name, t.Rate1(), now) + fmt.Fprintf(w, "%s.%s.five-minute %.2f %d\n", c.Prefix, name, t.Rate5(), now) + fmt.Fprintf(w, "%s.%s.fifteen-minute %.2f %d\n", c.Prefix, name, t.Rate15(), now) + fmt.Fprintf(w, "%s.%s.mean-rate %.2f %d\n", c.Prefix, name, t.RateMean(), now) + } + w.Flush() + }) + return nil +} diff --git a/metrics/graphite_test.go b/metrics/graphite_test.go new file mode 100644 index 0000000000..c797c781df --- /dev/null +++ b/metrics/graphite_test.go @@ -0,0 +1,22 @@ +package metrics + +import ( + "net" + "time" +) + +func ExampleGraphite() { + addr, _ := net.ResolveTCPAddr("net", ":2003") + go Graphite(DefaultRegistry, 1*time.Second, "some.prefix", addr) +} + +func ExampleGraphiteWithConfig() { + addr, _ := net.ResolveTCPAddr("net", ":2003") + go GraphiteWithConfig(GraphiteConfig{ + Addr: addr, + Registry: DefaultRegistry, + FlushInterval: 1 * time.Second, + DurationUnit: time.Millisecond, + Percentiles: []float64{0.5, 0.75, 0.99, 0.999}, + }) +} diff --git a/metrics/healthcheck.go b/metrics/healthcheck.go new file mode 100644 index 0000000000..f1ae31e34a --- /dev/null +++ b/metrics/healthcheck.go @@ -0,0 +1,61 @@ +package metrics + +// Healthchecks hold an error value describing an arbitrary up/down status. +type Healthcheck interface { + Check() + Error() error + Healthy() + Unhealthy(error) +} + +// NewHealthcheck constructs a new Healthcheck which will use the given +// function to update its status. +func NewHealthcheck(f func(Healthcheck)) Healthcheck { + if !Enabled { + return NilHealthcheck{} + } + return &StandardHealthcheck{nil, f} +} + +// NilHealthcheck is a no-op. +type NilHealthcheck struct{} + +// Check is a no-op. +func (NilHealthcheck) Check() {} + +// Error is a no-op. +func (NilHealthcheck) Error() error { return nil } + +// Healthy is a no-op. +func (NilHealthcheck) Healthy() {} + +// Unhealthy is a no-op. +func (NilHealthcheck) Unhealthy(error) {} + +// StandardHealthcheck is the standard implementation of a Healthcheck and +// stores the status and a function to call to update the status. +type StandardHealthcheck struct { + err error + f func(Healthcheck) +} + +// Check runs the healthcheck function to update the healthcheck's status. +func (h *StandardHealthcheck) Check() { + h.f(h) +} + +// Error returns the healthcheck's status, which will be nil if it is healthy. +func (h *StandardHealthcheck) Error() error { + return h.err +} + +// Healthy marks the healthcheck as healthy. +func (h *StandardHealthcheck) Healthy() { + h.err = nil +} + +// Unhealthy marks the healthcheck as unhealthy. The error is stored and +// may be retrieved by the Error method. +func (h *StandardHealthcheck) Unhealthy(err error) { + h.err = err +} diff --git a/metrics/histogram.go b/metrics/histogram.go new file mode 100644 index 0000000000..2c54ce8b40 --- /dev/null +++ b/metrics/histogram.go @@ -0,0 +1,211 @@ +package metrics + +// Histograms calculate distribution statistics from a series of int64 values. +type Histogram interface { + Clear() + Count() int64 + Max() int64 + Mean() float64 + Min() int64 + Percentile(float64) float64 + Percentiles([]float64) []float64 + Sample() Sample + Snapshot() Histogram + StdDev() float64 + Sum() int64 + Update(int64) + Variance() float64 +} + +// GetOrRegisterHistogram returns an existing Histogram or constructs and +// registers a new StandardHistogram. +func GetOrRegisterHistogram(name string, r Registry, s Sample) Histogram { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, func() Histogram { return NewHistogram(s) }).(Histogram) +} + +// GetOrRegisterHistogramLazy returns an existing Histogram or constructs and +// registers a new StandardHistogram. +func GetOrRegisterHistogramLazy(name string, r Registry, s func() Sample) Histogram { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, func() Histogram { return NewHistogram(s()) }).(Histogram) +} + +// NewHistogram constructs a new StandardHistogram from a Sample. +func NewHistogram(s Sample) Histogram { + if !Enabled { + return NilHistogram{} + } + return &StandardHistogram{sample: s} +} + +// NewRegisteredHistogram constructs and registers a new StandardHistogram from +// a Sample. +func NewRegisteredHistogram(name string, r Registry, s Sample) Histogram { + c := NewHistogram(s) + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// HistogramSnapshot is a read-only copy of another Histogram. +type HistogramSnapshot struct { + sample *SampleSnapshot +} + +// Clear panics. +func (*HistogramSnapshot) Clear() { + panic("Clear called on a HistogramSnapshot") +} + +// Count returns the number of samples recorded at the time the snapshot was +// taken. +func (h *HistogramSnapshot) Count() int64 { return h.sample.Count() } + +// Max returns the maximum value in the sample at the time the snapshot was +// taken. +func (h *HistogramSnapshot) Max() int64 { return h.sample.Max() } + +// Mean returns the mean of the values in the sample at the time the snapshot +// was taken. +func (h *HistogramSnapshot) Mean() float64 { return h.sample.Mean() } + +// Min returns the minimum value in the sample at the time the snapshot was +// taken. +func (h *HistogramSnapshot) Min() int64 { return h.sample.Min() } + +// Percentile returns an arbitrary percentile of values in the sample at the +// time the snapshot was taken. +func (h *HistogramSnapshot) Percentile(p float64) float64 { + return h.sample.Percentile(p) +} + +// Percentiles returns a slice of arbitrary percentiles of values in the sample +// at the time the snapshot was taken. +func (h *HistogramSnapshot) Percentiles(ps []float64) []float64 { + return h.sample.Percentiles(ps) +} + +// Sample returns the Sample underlying the histogram. +func (h *HistogramSnapshot) Sample() Sample { return h.sample } + +// Snapshot returns the snapshot. +func (h *HistogramSnapshot) Snapshot() Histogram { return h } + +// StdDev returns the standard deviation of the values in the sample at the +// time the snapshot was taken. +func (h *HistogramSnapshot) StdDev() float64 { return h.sample.StdDev() } + +// Sum returns the sum in the sample at the time the snapshot was taken. +func (h *HistogramSnapshot) Sum() int64 { return h.sample.Sum() } + +// Update panics. +func (*HistogramSnapshot) Update(int64) { + panic("Update called on a HistogramSnapshot") +} + +// Variance returns the variance of inputs at the time the snapshot was taken. +func (h *HistogramSnapshot) Variance() float64 { return h.sample.Variance() } + +// NilHistogram is a no-op Histogram. +type NilHistogram struct{} + +// Clear is a no-op. +func (NilHistogram) Clear() {} + +// Count is a no-op. +func (NilHistogram) Count() int64 { return 0 } + +// Max is a no-op. +func (NilHistogram) Max() int64 { return 0 } + +// Mean is a no-op. +func (NilHistogram) Mean() float64 { return 0.0 } + +// Min is a no-op. +func (NilHistogram) Min() int64 { return 0 } + +// Percentile is a no-op. +func (NilHistogram) Percentile(p float64) float64 { return 0.0 } + +// Percentiles is a no-op. +func (NilHistogram) Percentiles(ps []float64) []float64 { + return make([]float64, len(ps)) +} + +// Sample is a no-op. +func (NilHistogram) Sample() Sample { return NilSample{} } + +// Snapshot is a no-op. +func (NilHistogram) Snapshot() Histogram { return NilHistogram{} } + +// StdDev is a no-op. +func (NilHistogram) StdDev() float64 { return 0.0 } + +// Sum is a no-op. +func (NilHistogram) Sum() int64 { return 0 } + +// Update is a no-op. +func (NilHistogram) Update(v int64) {} + +// Variance is a no-op. +func (NilHistogram) Variance() float64 { return 0.0 } + +// StandardHistogram is the standard implementation of a Histogram and uses a +// Sample to bound its memory use. +type StandardHistogram struct { + sample Sample +} + +// Clear clears the histogram and its sample. +func (h *StandardHistogram) Clear() { h.sample.Clear() } + +// Count returns the number of samples recorded since the histogram was last +// cleared. +func (h *StandardHistogram) Count() int64 { return h.sample.Count() } + +// Max returns the maximum value in the sample. +func (h *StandardHistogram) Max() int64 { return h.sample.Max() } + +// Mean returns the mean of the values in the sample. +func (h *StandardHistogram) Mean() float64 { return h.sample.Mean() } + +// Min returns the minimum value in the sample. +func (h *StandardHistogram) Min() int64 { return h.sample.Min() } + +// Percentile returns an arbitrary percentile of the values in the sample. +func (h *StandardHistogram) Percentile(p float64) float64 { + return h.sample.Percentile(p) +} + +// Percentiles returns a slice of arbitrary percentiles of the values in the +// sample. +func (h *StandardHistogram) Percentiles(ps []float64) []float64 { + return h.sample.Percentiles(ps) +} + +// Sample returns the Sample underlying the histogram. +func (h *StandardHistogram) Sample() Sample { return h.sample } + +// Snapshot returns a read-only copy of the histogram. +func (h *StandardHistogram) Snapshot() Histogram { + return &HistogramSnapshot{sample: h.sample.Snapshot().(*SampleSnapshot)} +} + +// StdDev returns the standard deviation of the values in the sample. +func (h *StandardHistogram) StdDev() float64 { return h.sample.StdDev() } + +// Sum returns the sum in the sample. +func (h *StandardHistogram) Sum() int64 { return h.sample.Sum() } + +// Update samples a new value. +func (h *StandardHistogram) Update(v int64) { h.sample.Update(v) } + +// Variance returns the variance of the values in the sample. +func (h *StandardHistogram) Variance() float64 { return h.sample.Variance() } diff --git a/metrics/histogram_test.go b/metrics/histogram_test.go new file mode 100644 index 0000000000..7c9f42fcec --- /dev/null +++ b/metrics/histogram_test.go @@ -0,0 +1,95 @@ +package metrics + +import "testing" + +func BenchmarkHistogram(b *testing.B) { + h := NewHistogram(NewUniformSample(100)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + h.Update(int64(i)) + } +} + +func TestGetOrRegisterHistogram(t *testing.T) { + r := NewRegistry() + s := NewUniformSample(100) + NewRegisteredHistogram("foo", r, s).Update(47) + if h := GetOrRegisterHistogram("foo", r, s); h.Count() != 1 { + t.Fatal(h) + } +} + +func TestHistogram10000(t *testing.T) { + h := NewHistogram(NewUniformSample(100000)) + for i := 1; i <= 10000; i++ { + h.Update(int64(i)) + } + testHistogram10000(t, h) +} + +func TestHistogramEmpty(t *testing.T) { + h := NewHistogram(NewUniformSample(100)) + if count := h.Count(); count != 0 { + t.Errorf("h.Count(): 0 != %v\n", count) + } + if min := h.Min(); min != 0 { + t.Errorf("h.Min(): 0 != %v\n", min) + } + if max := h.Max(); max != 0 { + t.Errorf("h.Max(): 0 != %v\n", max) + } + if mean := h.Mean(); mean != 0.0 { + t.Errorf("h.Mean(): 0.0 != %v\n", mean) + } + if stdDev := h.StdDev(); stdDev != 0.0 { + t.Errorf("h.StdDev(): 0.0 != %v\n", stdDev) + } + ps := h.Percentiles([]float64{0.5, 0.75, 0.99}) + if ps[0] != 0.0 { + t.Errorf("median: 0.0 != %v\n", ps[0]) + } + if ps[1] != 0.0 { + t.Errorf("75th percentile: 0.0 != %v\n", ps[1]) + } + if ps[2] != 0.0 { + t.Errorf("99th percentile: 0.0 != %v\n", ps[2]) + } +} + +func TestHistogramSnapshot(t *testing.T) { + h := NewHistogram(NewUniformSample(100000)) + for i := 1; i <= 10000; i++ { + h.Update(int64(i)) + } + snapshot := h.Snapshot() + h.Update(0) + testHistogram10000(t, snapshot) +} + +func testHistogram10000(t *testing.T, h Histogram) { + if count := h.Count(); count != 10000 { + t.Errorf("h.Count(): 10000 != %v\n", count) + } + if min := h.Min(); min != 1 { + t.Errorf("h.Min(): 1 != %v\n", min) + } + if max := h.Max(); max != 10000 { + t.Errorf("h.Max(): 10000 != %v\n", max) + } + if mean := h.Mean(); mean != 5000.5 { + t.Errorf("h.Mean(): 5000.5 != %v\n", mean) + } + if stdDev := h.StdDev(); stdDev != 2886.751331514372 { + t.Errorf("h.StdDev(): 2886.751331514372 != %v\n", stdDev) + } + ps := h.Percentiles([]float64{0.5, 0.75, 0.99}) + if ps[0] != 5000.5 { + t.Errorf("median: 5000.5 != %v\n", ps[0]) + } + if ps[1] != 7500.75 { + t.Errorf("75th percentile: 7500.75 != %v\n", ps[1]) + } + if ps[2] != 9900.99 { + t.Errorf("99th percentile: 9900.99 != %v\n", ps[2]) + } +} diff --git a/metrics/json.go b/metrics/json.go new file mode 100644 index 0000000000..2087d8211e --- /dev/null +++ b/metrics/json.go @@ -0,0 +1,31 @@ +package metrics + +import ( + "encoding/json" + "io" + "time" +) + +// MarshalJSON returns a byte slice containing a JSON representation of all +// the metrics in the Registry. +func (r *StandardRegistry) MarshalJSON() ([]byte, error) { + return json.Marshal(r.GetAll()) +} + +// WriteJSON writes metrics from the given registry periodically to the +// specified io.Writer as JSON. +func WriteJSON(r Registry, d time.Duration, w io.Writer) { + for range time.Tick(d) { + WriteJSONOnce(r, w) + } +} + +// WriteJSONOnce writes metrics from the given registry to the specified +// io.Writer as JSON. +func WriteJSONOnce(r Registry, w io.Writer) { + json.NewEncoder(w).Encode(r) +} + +func (p *PrefixedRegistry) MarshalJSON() ([]byte, error) { + return json.Marshal(p.GetAll()) +} diff --git a/metrics/json_test.go b/metrics/json_test.go new file mode 100644 index 0000000000..f91fe8cfa5 --- /dev/null +++ b/metrics/json_test.go @@ -0,0 +1,28 @@ +package metrics + +import ( + "bytes" + "encoding/json" + "testing" +) + +func TestRegistryMarshallJSON(t *testing.T) { + b := &bytes.Buffer{} + enc := json.NewEncoder(b) + r := NewRegistry() + r.Register("counter", NewCounter()) + enc.Encode(r) + if s := b.String(); s != "{\"counter\":{\"count\":0}}\n" { + t.Fatalf(s) + } +} + +func TestRegistryWriteJSONOnce(t *testing.T) { + r := NewRegistry() + r.Register("counter", NewCounter()) + b := &bytes.Buffer{} + WriteJSONOnce(r, b) + if s := b.String(); s != "{\"counter\":{\"count\":0}}\n" { + t.Fail() + } +} diff --git a/metrics/log.go b/metrics/log.go new file mode 100644 index 0000000000..0c8ea7c971 --- /dev/null +++ b/metrics/log.go @@ -0,0 +1,80 @@ +package metrics + +import ( + "time" +) + +type Logger interface { + Printf(format string, v ...interface{}) +} + +func Log(r Registry, freq time.Duration, l Logger) { + LogScaled(r, freq, time.Nanosecond, l) +} + +// Output each metric in the given registry periodically using the given +// logger. Print timings in `scale` units (eg time.Millisecond) rather than nanos. +func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { + du := float64(scale) + duSuffix := scale.String()[1:] + + for range time.Tick(freq) { + r.Each(func(name string, i interface{}) { + switch metric := i.(type) { + case Counter: + l.Printf("counter %s\n", name) + l.Printf(" count: %9d\n", metric.Count()) + case Gauge: + l.Printf("gauge %s\n", name) + l.Printf(" value: %9d\n", metric.Value()) + case GaugeFloat64: + l.Printf("gauge %s\n", name) + l.Printf(" value: %f\n", metric.Value()) + case Healthcheck: + metric.Check() + l.Printf("healthcheck %s\n", name) + l.Printf(" error: %v\n", metric.Error()) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + l.Printf("histogram %s\n", name) + l.Printf(" count: %9d\n", h.Count()) + l.Printf(" min: %9d\n", h.Min()) + l.Printf(" max: %9d\n", h.Max()) + l.Printf(" mean: %12.2f\n", h.Mean()) + l.Printf(" stddev: %12.2f\n", h.StdDev()) + l.Printf(" median: %12.2f\n", ps[0]) + l.Printf(" 75%%: %12.2f\n", ps[1]) + l.Printf(" 95%%: %12.2f\n", ps[2]) + l.Printf(" 99%%: %12.2f\n", ps[3]) + l.Printf(" 99.9%%: %12.2f\n", ps[4]) + case Meter: + m := metric.Snapshot() + l.Printf("meter %s\n", name) + l.Printf(" count: %9d\n", m.Count()) + l.Printf(" 1-min rate: %12.2f\n", m.Rate1()) + l.Printf(" 5-min rate: %12.2f\n", m.Rate5()) + l.Printf(" 15-min rate: %12.2f\n", m.Rate15()) + l.Printf(" mean rate: %12.2f\n", m.RateMean()) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + l.Printf("timer %s\n", name) + l.Printf(" count: %9d\n", t.Count()) + l.Printf(" min: %12.2f%s\n", float64(t.Min())/du, duSuffix) + l.Printf(" max: %12.2f%s\n", float64(t.Max())/du, duSuffix) + l.Printf(" mean: %12.2f%s\n", t.Mean()/du, duSuffix) + l.Printf(" stddev: %12.2f%s\n", t.StdDev()/du, duSuffix) + l.Printf(" median: %12.2f%s\n", ps[0]/du, duSuffix) + l.Printf(" 75%%: %12.2f%s\n", ps[1]/du, duSuffix) + l.Printf(" 95%%: %12.2f%s\n", ps[2]/du, duSuffix) + l.Printf(" 99%%: %12.2f%s\n", ps[3]/du, duSuffix) + l.Printf(" 99.9%%: %12.2f%s\n", ps[4]/du, duSuffix) + l.Printf(" 1-min rate: %12.2f\n", t.Rate1()) + l.Printf(" 5-min rate: %12.2f\n", t.Rate5()) + l.Printf(" 15-min rate: %12.2f\n", t.Rate15()) + l.Printf(" mean rate: %12.2f\n", t.RateMean()) + } + }) + } +} diff --git a/metrics/memory.md b/metrics/memory.md new file mode 100644 index 0000000000..47454f54b6 --- /dev/null +++ b/metrics/memory.md @@ -0,0 +1,285 @@ +Memory usage +============ + +(Highly unscientific.) + +Command used to gather static memory usage: + +```sh +grep ^Vm "/proc/$(ps fax | grep [m]etrics-bench | awk '{print $1}')/status" +``` + +Program used to gather baseline memory usage: + +```go +package main + +import "time" + +func main() { + time.Sleep(600e9) +} +``` + +Baseline +-------- + +``` +VmPeak: 42604 kB +VmSize: 42604 kB +VmLck: 0 kB +VmHWM: 1120 kB +VmRSS: 1120 kB +VmData: 35460 kB +VmStk: 136 kB +VmExe: 1020 kB +VmLib: 1848 kB +VmPTE: 36 kB +VmSwap: 0 kB +``` + +Program used to gather metric memory usage (with other metrics being similar): + +```go +package main + +import ( + "fmt" + "metrics" + "time" +) + +func main() { + fmt.Sprintf("foo") + metrics.NewRegistry() + time.Sleep(600e9) +} +``` + +1000 counters registered +------------------------ + +``` +VmPeak: 44016 kB +VmSize: 44016 kB +VmLck: 0 kB +VmHWM: 1928 kB +VmRSS: 1928 kB +VmData: 36868 kB +VmStk: 136 kB +VmExe: 1024 kB +VmLib: 1848 kB +VmPTE: 40 kB +VmSwap: 0 kB +``` + +**1.412 kB virtual, TODO 0.808 kB resident per counter.** + +100000 counters registered +-------------------------- + +``` +VmPeak: 55024 kB +VmSize: 55024 kB +VmLck: 0 kB +VmHWM: 12440 kB +VmRSS: 12440 kB +VmData: 47876 kB +VmStk: 136 kB +VmExe: 1024 kB +VmLib: 1848 kB +VmPTE: 64 kB +VmSwap: 0 kB +``` + +**0.1242 kB virtual, 0.1132 kB resident per counter.** + +1000 gauges registered +---------------------- + +``` +VmPeak: 44012 kB +VmSize: 44012 kB +VmLck: 0 kB +VmHWM: 1928 kB +VmRSS: 1928 kB +VmData: 36868 kB +VmStk: 136 kB +VmExe: 1020 kB +VmLib: 1848 kB +VmPTE: 40 kB +VmSwap: 0 kB +``` + +**1.408 kB virtual, 0.808 kB resident per counter.** + +100000 gauges registered +------------------------ + +``` +VmPeak: 55020 kB +VmSize: 55020 kB +VmLck: 0 kB +VmHWM: 12432 kB +VmRSS: 12432 kB +VmData: 47876 kB +VmStk: 136 kB +VmExe: 1020 kB +VmLib: 1848 kB +VmPTE: 60 kB +VmSwap: 0 kB +``` + +**0.12416 kB virtual, 0.11312 resident per gauge.** + +1000 histograms with a uniform sample size of 1028 +-------------------------------------------------- + +``` +VmPeak: 72272 kB +VmSize: 72272 kB +VmLck: 0 kB +VmHWM: 16204 kB +VmRSS: 16204 kB +VmData: 65100 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 80 kB +VmSwap: 0 kB +``` + +**29.668 kB virtual, TODO 15.084 resident per histogram.** + +10000 histograms with a uniform sample size of 1028 +--------------------------------------------------- + +``` +VmPeak: 256912 kB +VmSize: 256912 kB +VmLck: 0 kB +VmHWM: 146204 kB +VmRSS: 146204 kB +VmData: 249740 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 448 kB +VmSwap: 0 kB +``` + +**21.4308 kB virtual, 14.5084 kB resident per histogram.** + +50000 histograms with a uniform sample size of 1028 +--------------------------------------------------- + +``` +VmPeak: 908112 kB +VmSize: 908112 kB +VmLck: 0 kB +VmHWM: 645832 kB +VmRSS: 645588 kB +VmData: 900940 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 1716 kB +VmSwap: 1544 kB +``` + +**17.31016 kB virtual, 12.88936 kB resident per histogram.** + +1000 histograms with an exponentially-decaying sample size of 1028 and alpha of 0.015 +------------------------------------------------------------------------------------- + +``` +VmPeak: 62480 kB +VmSize: 62480 kB +VmLck: 0 kB +VmHWM: 11572 kB +VmRSS: 11572 kB +VmData: 55308 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 64 kB +VmSwap: 0 kB +``` + +**19.876 kB virtual, 10.452 kB resident per histogram.** + +10000 histograms with an exponentially-decaying sample size of 1028 and alpha of 0.015 +-------------------------------------------------------------------------------------- + +``` +VmPeak: 153296 kB +VmSize: 153296 kB +VmLck: 0 kB +VmHWM: 101176 kB +VmRSS: 101176 kB +VmData: 146124 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 240 kB +VmSwap: 0 kB +``` + +**11.0692 kB virtual, 10.0056 kB resident per histogram.** + +50000 histograms with an exponentially-decaying sample size of 1028 and alpha of 0.015 +-------------------------------------------------------------------------------------- + +``` +VmPeak: 557264 kB +VmSize: 557264 kB +VmLck: 0 kB +VmHWM: 501056 kB +VmRSS: 501056 kB +VmData: 550092 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 1032 kB +VmSwap: 0 kB +``` + +**10.2932 kB virtual, 9.99872 kB resident per histogram.** + +1000 meters +----------- + +``` +VmPeak: 74504 kB +VmSize: 74504 kB +VmLck: 0 kB +VmHWM: 24124 kB +VmRSS: 24124 kB +VmData: 67340 kB +VmStk: 136 kB +VmExe: 1040 kB +VmLib: 1848 kB +VmPTE: 92 kB +VmSwap: 0 kB +``` + +**31.9 kB virtual, 23.004 kB resident per meter.** + +10000 meters +------------ + +``` +VmPeak: 278920 kB +VmSize: 278920 kB +VmLck: 0 kB +VmHWM: 227300 kB +VmRSS: 227300 kB +VmData: 271756 kB +VmStk: 136 kB +VmExe: 1040 kB +VmLib: 1848 kB +VmPTE: 488 kB +VmSwap: 0 kB +``` + +**23.6316 kB virtual, 22.618 kB resident per meter.** diff --git a/metrics/meter.go b/metrics/meter.go new file mode 100644 index 0000000000..60ae919d04 --- /dev/null +++ b/metrics/meter.go @@ -0,0 +1,301 @@ +package metrics + +import ( + "sync" + "sync/atomic" + "time" +) + +// Meters count events to produce exponentially-weighted moving average rates +// at one-, five-, and fifteen-minutes and a mean rate. +type Meter interface { + Count() int64 + Mark(int64) + Rate1() float64 + Rate5() float64 + Rate15() float64 + RateMean() float64 + Snapshot() Meter + Stop() +} + +// GetOrRegisterMeter returns an existing Meter or constructs and registers a +// new StandardMeter. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func GetOrRegisterMeter(name string, r Registry) Meter { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewMeter).(Meter) +} + +// GetOrRegisterMeterForced returns an existing Meter or constructs and registers a +// new StandardMeter no matter the global switch is enabled or not. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func GetOrRegisterMeterForced(name string, r Registry) Meter { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewMeterForced).(Meter) +} + +// NewMeter constructs a new StandardMeter and launches a goroutine. +// Be sure to call Stop() once the meter is of no use to allow for garbage collection. +func NewMeter() Meter { + if !Enabled { + return NilMeter{} + } + m := newStandardMeter() + arbiter.Lock() + defer arbiter.Unlock() + arbiter.meters[m] = struct{}{} + if !arbiter.started { + arbiter.started = true + go arbiter.tick() + } + return m +} + +// NewMeterForced constructs a new StandardMeter and launches a goroutine no matter +// the global switch is enabled or not. +// Be sure to call Stop() once the meter is of no use to allow for garbage collection. +func NewMeterForced() Meter { + m := newStandardMeter() + arbiter.Lock() + defer arbiter.Unlock() + arbiter.meters[m] = struct{}{} + if !arbiter.started { + arbiter.started = true + go arbiter.tick() + } + return m +} + +// NewRegisteredMeter constructs and registers a new StandardMeter +// and launches a goroutine. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func NewRegisteredMeter(name string, r Registry) Meter { + c := NewMeter() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewRegisteredMeterForced constructs and registers a new StandardMeter +// and launches a goroutine no matter the global switch is enabled or not. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func NewRegisteredMeterForced(name string, r Registry) Meter { + c := NewMeterForced() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// MeterSnapshot is a read-only copy of another Meter. +type MeterSnapshot struct { + // WARNING: The `temp` field is accessed atomically. + // On 32 bit platforms, only 64-bit aligned fields can be atomic. The struct is + // guaranteed to be so aligned, so take advantage of that. For more information, + // see https://golang.org/pkg/sync/atomic/#pkg-note-BUG. + temp int64 + count int64 + rate1, rate5, rate15, rateMean float64 +} + +// Count returns the count of events at the time the snapshot was taken. +func (m *MeterSnapshot) Count() int64 { return m.count } + +// Mark panics. +func (*MeterSnapshot) Mark(n int64) { + panic("Mark called on a MeterSnapshot") +} + +// Rate1 returns the one-minute moving average rate of events per second at the +// time the snapshot was taken. +func (m *MeterSnapshot) Rate1() float64 { return m.rate1 } + +// Rate5 returns the five-minute moving average rate of events per second at +// the time the snapshot was taken. +func (m *MeterSnapshot) Rate5() float64 { return m.rate5 } + +// Rate15 returns the fifteen-minute moving average rate of events per second +// at the time the snapshot was taken. +func (m *MeterSnapshot) Rate15() float64 { return m.rate15 } + +// RateMean returns the meter's mean rate of events per second at the time the +// snapshot was taken. +func (m *MeterSnapshot) RateMean() float64 { return m.rateMean } + +// Snapshot returns the snapshot. +func (m *MeterSnapshot) Snapshot() Meter { return m } + +// Stop is a no-op. +func (m *MeterSnapshot) Stop() {} + +// NilMeter is a no-op Meter. +type NilMeter struct{} + +// Count is a no-op. +func (NilMeter) Count() int64 { return 0 } + +// Mark is a no-op. +func (NilMeter) Mark(n int64) {} + +// Rate1 is a no-op. +func (NilMeter) Rate1() float64 { return 0.0 } + +// Rate5 is a no-op. +func (NilMeter) Rate5() float64 { return 0.0 } + +// Rate15 is a no-op. +func (NilMeter) Rate15() float64 { return 0.0 } + +// RateMean is a no-op. +func (NilMeter) RateMean() float64 { return 0.0 } + +// Snapshot is a no-op. +func (NilMeter) Snapshot() Meter { return NilMeter{} } + +// Stop is a no-op. +func (NilMeter) Stop() {} + +// StandardMeter is the standard implementation of a Meter. +type StandardMeter struct { + lock sync.RWMutex + snapshot *MeterSnapshot + a1, a5, a15 EWMA + startTime time.Time + stopped uint32 +} + +func newStandardMeter() *StandardMeter { + return &StandardMeter{ + snapshot: &MeterSnapshot{}, + a1: NewEWMA1(), + a5: NewEWMA5(), + a15: NewEWMA15(), + startTime: time.Now(), + } +} + +// Stop stops the meter, Mark() will be a no-op if you use it after being stopped. +func (m *StandardMeter) Stop() { + stopped := atomic.SwapUint32(&m.stopped, 1) + if stopped != 1 { + arbiter.Lock() + delete(arbiter.meters, m) + arbiter.Unlock() + } +} + +// Count returns the number of events recorded. +// It updates the meter to be as accurate as possible +func (m *StandardMeter) Count() int64 { + m.lock.Lock() + defer m.lock.Unlock() + m.updateMeter() + return m.snapshot.count +} + +// Mark records the occurrence of n events. +func (m *StandardMeter) Mark(n int64) { + atomic.AddInt64(&m.snapshot.temp, n) +} + +// Rate1 returns the one-minute moving average rate of events per second. +func (m *StandardMeter) Rate1() float64 { + m.lock.RLock() + defer m.lock.RUnlock() + return m.snapshot.rate1 +} + +// Rate5 returns the five-minute moving average rate of events per second. +func (m *StandardMeter) Rate5() float64 { + m.lock.RLock() + defer m.lock.RUnlock() + return m.snapshot.rate5 +} + +// Rate15 returns the fifteen-minute moving average rate of events per second. +func (m *StandardMeter) Rate15() float64 { + m.lock.RLock() + defer m.lock.RUnlock() + return m.snapshot.rate15 +} + +// RateMean returns the meter's mean rate of events per second. +func (m *StandardMeter) RateMean() float64 { + m.lock.RLock() + defer m.lock.RUnlock() + return m.snapshot.rateMean +} + +// Snapshot returns a read-only copy of the meter. +func (m *StandardMeter) Snapshot() Meter { + m.lock.RLock() + snapshot := *m.snapshot + m.lock.RUnlock() + return &snapshot +} + +func (m *StandardMeter) updateSnapshot() { + // should run with write lock held on m.lock + snapshot := m.snapshot + snapshot.rate1 = m.a1.Rate() + snapshot.rate5 = m.a5.Rate() + snapshot.rate15 = m.a15.Rate() + snapshot.rateMean = float64(snapshot.count) / time.Since(m.startTime).Seconds() +} + +func (m *StandardMeter) updateMeter() { + // should only run with write lock held on m.lock + n := atomic.SwapInt64(&m.snapshot.temp, 0) + m.snapshot.count += n + m.a1.Update(n) + m.a5.Update(n) + m.a15.Update(n) +} + +func (m *StandardMeter) tick() { + m.lock.Lock() + defer m.lock.Unlock() + m.updateMeter() + m.a1.Tick() + m.a5.Tick() + m.a15.Tick() + m.updateSnapshot() +} + +// meterArbiter ticks meters every 5s from a single goroutine. +// meters are references in a set for future stopping. +type meterArbiter struct { + sync.RWMutex + started bool + meters map[*StandardMeter]struct{} + ticker *time.Ticker +} + +var arbiter = meterArbiter{ticker: time.NewTicker(5 * time.Second), meters: make(map[*StandardMeter]struct{})} + +// Ticks meters on the scheduled interval +func (ma *meterArbiter) tick() { + for range ma.ticker.C { + ma.tickMeters() + } +} + +func (ma *meterArbiter) tickMeters() { + ma.RLock() + defer ma.RUnlock() + for meter := range ma.meters { + meter.tick() + } +} diff --git a/metrics/meter_test.go b/metrics/meter_test.go new file mode 100644 index 0000000000..b3f6cb8c0c --- /dev/null +++ b/metrics/meter_test.go @@ -0,0 +1,91 @@ +package metrics + +import ( + "testing" + "time" +) + +func BenchmarkMeter(b *testing.B) { + m := NewMeter() + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.Mark(1) + } +} + +func TestGetOrRegisterMeter(t *testing.T) { + r := NewRegistry() + NewRegisteredMeter("foo", r).Mark(47) + if m := GetOrRegisterMeter("foo", r); m.Count() != 47 { + t.Fatal(m.Count()) + } +} + +func TestMeterDecay(t *testing.T) { + ma := meterArbiter{ + ticker: time.NewTicker(time.Millisecond), + meters: make(map[*StandardMeter]struct{}), + } + defer ma.ticker.Stop() + m := newStandardMeter() + ma.meters[m] = struct{}{} + m.Mark(1) + ma.tickMeters() + rateMean := m.RateMean() + time.Sleep(100 * time.Millisecond) + ma.tickMeters() + if m.RateMean() >= rateMean { + t.Error("m.RateMean() didn't decrease") + } +} + +func TestMeterNonzero(t *testing.T) { + m := NewMeter() + m.Mark(3) + if count := m.Count(); count != 3 { + t.Errorf("m.Count(): 3 != %v\n", count) + } +} + +func TestMeterStop(t *testing.T) { + l := len(arbiter.meters) + m := NewMeter() + if l+1 != len(arbiter.meters) { + t.Errorf("arbiter.meters: %d != %d\n", l+1, len(arbiter.meters)) + } + m.Stop() + if l != len(arbiter.meters) { + t.Errorf("arbiter.meters: %d != %d\n", l, len(arbiter.meters)) + } +} + +func TestMeterSnapshot(t *testing.T) { + m := NewMeter() + m.Mark(1) + if snapshot := m.Snapshot(); m.RateMean() != snapshot.RateMean() { + t.Fatal(snapshot) + } +} + +func TestMeterZero(t *testing.T) { + m := NewMeter() + if count := m.Count(); count != 0 { + t.Errorf("m.Count(): 0 != %v\n", count) + } +} + +func TestMeterRepeat(t *testing.T) { + m := NewMeter() + for i := 0; i < 101; i++ { + m.Mark(int64(i)) + } + if count := m.Count(); count != 5050 { + t.Errorf("m.Count(): 5050 != %v\n", count) + } + for i := 0; i < 101; i++ { + m.Mark(int64(i)) + } + if count := m.Count(); count != 10100 { + t.Errorf("m.Count(): 10100 != %v\n", count) + } +} diff --git a/metrics/metrics.go b/metrics/metrics.go new file mode 100644 index 0000000000..e01beef68e --- /dev/null +++ b/metrics/metrics.go @@ -0,0 +1,18 @@ +// Go port of Coda Hale's Metrics library +// +// +// +// Coda Hale's original work: +package metrics + +// Enabled is checked by the constructor functions for all of the +// standard metrics. If it is true, the metric returned is a stub. +// +// This global kill-switch helps quantify the observer effect and makes +// for less cluttered pprof profiles. +var Enabled = true + +// EnabledExpensive is a soft-flag meant for external packages to check if costly +// metrics gathering is allowed or not. The goal is to separate standard metrics +// for health monitoring and debug metrics that might impact runtime performance. +var EnabledExpensive = false diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go new file mode 100644 index 0000000000..df36da0ade --- /dev/null +++ b/metrics/metrics_test.go @@ -0,0 +1,125 @@ +package metrics + +import ( + "fmt" + "io/ioutil" + "log" + "sync" + "testing" + "time" +) + +const FANOUT = 128 + +// Stop the compiler from complaining during debugging. +var ( + _ = ioutil.Discard + _ = log.LstdFlags +) + +func BenchmarkMetrics(b *testing.B) { + r := NewRegistry() + c := NewRegisteredCounter("counter", r) + g := NewRegisteredGauge("gauge", r) + gf := NewRegisteredGaugeFloat64("gaugefloat64", r) + h := NewRegisteredHistogram("histogram", r, NewUniformSample(100)) + m := NewRegisteredMeter("meter", r) + t := NewRegisteredTimer("timer", r) + RegisterDebugGCStats(r) + RegisterRuntimeMemStats(r) + b.ResetTimer() + ch := make(chan bool) + + wgD := &sync.WaitGroup{} + /* + wgD.Add(1) + go func() { + defer wgD.Done() + //log.Println("go CaptureDebugGCStats") + for { + select { + case <-ch: + //log.Println("done CaptureDebugGCStats") + return + default: + CaptureDebugGCStatsOnce(r) + } + } + }() + //*/ + + wgR := &sync.WaitGroup{} + //* + wgR.Add(1) + go func() { + defer wgR.Done() + //log.Println("go CaptureRuntimeMemStats") + for { + select { + case <-ch: + //log.Println("done CaptureRuntimeMemStats") + return + default: + CaptureRuntimeMemStatsOnce(r) + } + } + }() + //*/ + + wgW := &sync.WaitGroup{} + /* + wgW.Add(1) + go func() { + defer wgW.Done() + //log.Println("go Write") + for { + select { + case <-ch: + //log.Println("done Write") + return + default: + WriteOnce(r, ioutil.Discard) + } + } + }() + //*/ + + wg := &sync.WaitGroup{} + wg.Add(FANOUT) + for i := 0; i < FANOUT; i++ { + go func(i int) { + defer wg.Done() + //log.Println("go", i) + for i := 0; i < b.N; i++ { + c.Inc(1) + g.Update(int64(i)) + gf.Update(float64(i)) + h.Update(int64(i)) + m.Mark(1) + t.Update(1) + } + //log.Println("done", i) + }(i) + } + wg.Wait() + close(ch) + wgD.Wait() + wgR.Wait() + wgW.Wait() +} + +func Example() { + c := NewCounter() + Register("money", c) + c.Inc(17) + + // Threadsafe registration + t := GetOrRegisterTimer("db.get.latency", nil) + t.Time(func() { time.Sleep(10 * time.Millisecond) }) + t.Update(1) + + fmt.Println(c.Count()) + fmt.Println(t.Min()) + // Output: 17 + // 1 +} diff --git a/metrics/opentsdb.go b/metrics/opentsdb.go new file mode 100644 index 0000000000..3fde55454b --- /dev/null +++ b/metrics/opentsdb.go @@ -0,0 +1,119 @@ +package metrics + +import ( + "bufio" + "fmt" + "log" + "net" + "os" + "strings" + "time" +) + +var shortHostName = "" + +// OpenTSDBConfig provides a container with configuration parameters for +// the OpenTSDB exporter +type OpenTSDBConfig struct { + Addr *net.TCPAddr // Network address to connect to + Registry Registry // Registry to be exported + FlushInterval time.Duration // Flush interval + DurationUnit time.Duration // Time conversion unit for durations + Prefix string // Prefix to be prepended to metric names +} + +// OpenTSDB is a blocking exporter function which reports metrics in r +// to a TSDB server located at addr, flushing them every d duration +// and prepending metric names with prefix. +func OpenTSDB(r Registry, d time.Duration, prefix string, addr *net.TCPAddr) { + OpenTSDBWithConfig(OpenTSDBConfig{ + Addr: addr, + Registry: r, + FlushInterval: d, + DurationUnit: time.Nanosecond, + Prefix: prefix, + }) +} + +// OpenTSDBWithConfig is a blocking exporter function just like OpenTSDB, +// but it takes a OpenTSDBConfig instead. +func OpenTSDBWithConfig(c OpenTSDBConfig) { + for range time.Tick(c.FlushInterval) { + if err := openTSDB(&c); nil != err { + log.Println(err) + } + } +} + +func getShortHostname() string { + if shortHostName == "" { + host, _ := os.Hostname() + if index := strings.Index(host, "."); index > 0 { + shortHostName = host[:index] + } else { + shortHostName = host + } + } + return shortHostName +} + +func openTSDB(c *OpenTSDBConfig) error { + shortHostname := getShortHostname() + now := time.Now().Unix() + du := float64(c.DurationUnit) + conn, err := net.DialTCP("tcp", nil, c.Addr) + if nil != err { + return err + } + defer conn.Close() + w := bufio.NewWriter(conn) + c.Registry.Each(func(name string, i interface{}) { + switch metric := i.(type) { + case Counter: + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Count(), shortHostname) + case Gauge: + fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) + case GaugeFloat64: + fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, h.Count(), shortHostname) + fmt.Fprintf(w, "put %s.%s.min %d %d host=%s\n", c.Prefix, name, now, h.Min(), shortHostname) + fmt.Fprintf(w, "put %s.%s.max %d %d host=%s\n", c.Prefix, name, now, h.Max(), shortHostname) + fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, h.Mean(), shortHostname) + fmt.Fprintf(w, "put %s.%s.std-dev %d %.2f host=%s\n", c.Prefix, name, now, h.StdDev(), shortHostname) + fmt.Fprintf(w, "put %s.%s.50-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[0], shortHostname) + fmt.Fprintf(w, "put %s.%s.75-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[1], shortHostname) + fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2], shortHostname) + fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3], shortHostname) + fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4], shortHostname) + case Meter: + m := metric.Snapshot() + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, m.Count(), shortHostname) + fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate1(), shortHostname) + fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate5(), shortHostname) + fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate15(), shortHostname) + fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, m.RateMean(), shortHostname) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, t.Count(), shortHostname) + fmt.Fprintf(w, "put %s.%s.min %d %d host=%s\n", c.Prefix, name, now, t.Min()/int64(du), shortHostname) + fmt.Fprintf(w, "put %s.%s.max %d %d host=%s\n", c.Prefix, name, now, t.Max()/int64(du), shortHostname) + fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, t.Mean()/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.std-dev %d %.2f host=%s\n", c.Prefix, name, now, t.StdDev()/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.50-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[0]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.75-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[1]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate1(), shortHostname) + fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate5(), shortHostname) + fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate15(), shortHostname) + fmt.Fprintf(w, "put %s.%s.mean-rate %d %.2f host=%s\n", c.Prefix, name, now, t.RateMean(), shortHostname) + } + w.Flush() + }) + return nil +} diff --git a/metrics/opentsdb_test.go b/metrics/opentsdb_test.go new file mode 100644 index 0000000000..c43728960e --- /dev/null +++ b/metrics/opentsdb_test.go @@ -0,0 +1,21 @@ +package metrics + +import ( + "net" + "time" +) + +func ExampleOpenTSDB() { + addr, _ := net.ResolveTCPAddr("net", ":2003") + go OpenTSDB(DefaultRegistry, 1*time.Second, "some.prefix", addr) +} + +func ExampleOpenTSDBWithConfig() { + addr, _ := net.ResolveTCPAddr("net", ":2003") + go OpenTSDBWithConfig(OpenTSDBConfig{ + Addr: addr, + Registry: DefaultRegistry, + FlushInterval: 1 * time.Second, + DurationUnit: time.Millisecond, + }) +} diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go index ae00356336..99abe03a4e 100644 --- a/metrics/prometheus/prometheus.go +++ b/metrics/prometheus/prometheus.go @@ -7,7 +7,7 @@ import ( "sort" "strings" - "github.com/ethereum/go-ethereum/metrics" + "github.com/ava-labs/subnet-evm/metrics" "github.com/prometheus/client_golang/prometheus" diff --git a/metrics/prometheus/prometheus_test.go b/metrics/prometheus/prometheus_test.go index 5d603257da..58d0b550ec 100644 --- a/metrics/prometheus/prometheus_test.go +++ b/metrics/prometheus/prometheus_test.go @@ -9,13 +9,9 @@ import ( "github.com/stretchr/testify/assert" - "github.com/ethereum/go-ethereum/metrics" + "github.com/ava-labs/subnet-evm/metrics" ) -func init() { - metrics.Enabled = true -} - func TestGatherer(t *testing.T) { registry := metrics.NewRegistry() diff --git a/metrics/registry.go b/metrics/registry.go new file mode 100644 index 0000000000..c5435adf24 --- /dev/null +++ b/metrics/registry.go @@ -0,0 +1,358 @@ +package metrics + +import ( + "fmt" + "reflect" + "strings" + "sync" +) + +// DuplicateMetric is the error returned by Registry.Register when a metric +// already exists. If you mean to Register that metric you must first +// Unregister the existing metric. +type DuplicateMetric string + +func (err DuplicateMetric) Error() string { + return fmt.Sprintf("duplicate metric: %s", string(err)) +} + +// A Registry holds references to a set of metrics by name and can iterate +// over them, calling callback functions provided by the user. +// +// This is an interface so as to encourage other structs to implement +// the Registry API as appropriate. +type Registry interface { + + // Call the given function for each registered metric. + Each(func(string, interface{})) + + // Get the metric by the given name or nil if none is registered. + Get(string) interface{} + + // GetAll metrics in the Registry. + GetAll() map[string]map[string]interface{} + + // Gets an existing metric or registers the given one. + // The interface can be the metric to register if not found in registry, + // or a function returning the metric for lazy instantiation. + GetOrRegister(string, interface{}) interface{} + + // Register the given metric under the given name. + Register(string, interface{}) error + + // Run all registered healthchecks. + RunHealthchecks() + + // Unregister the metric with the given name. + Unregister(string) + + // Unregister all metrics. (Mostly for testing.) + UnregisterAll() +} + +// The standard implementation of a Registry is a mutex-protected map +// of names to metrics. +type StandardRegistry struct { + metrics map[string]interface{} + mutex sync.Mutex +} + +// Create a new registry. +func NewRegistry() Registry { + return &StandardRegistry{metrics: make(map[string]interface{})} +} + +// Call the given function for each registered metric. +func (r *StandardRegistry) Each(f func(string, interface{})) { + for name, i := range r.registered() { + f(name, i) + } +} + +// Get the metric by the given name or nil if none is registered. +func (r *StandardRegistry) Get(name string) interface{} { + r.mutex.Lock() + defer r.mutex.Unlock() + return r.metrics[name] +} + +// Gets an existing metric or creates and registers a new one. Threadsafe +// alternative to calling Get and Register on failure. +// The interface can be the metric to register if not found in registry, +// or a function returning the metric for lazy instantiation. +func (r *StandardRegistry) GetOrRegister(name string, i interface{}) interface{} { + r.mutex.Lock() + defer r.mutex.Unlock() + if metric, ok := r.metrics[name]; ok { + return metric + } + if v := reflect.ValueOf(i); v.Kind() == reflect.Func { + i = v.Call(nil)[0].Interface() + } + r.register(name, i) + return i +} + +// Register the given metric under the given name. Returns a DuplicateMetric +// if a metric by the given name is already registered. +func (r *StandardRegistry) Register(name string, i interface{}) error { + r.mutex.Lock() + defer r.mutex.Unlock() + return r.register(name, i) +} + +// Run all registered healthchecks. +func (r *StandardRegistry) RunHealthchecks() { + r.mutex.Lock() + defer r.mutex.Unlock() + for _, i := range r.metrics { + if h, ok := i.(Healthcheck); ok { + h.Check() + } + } +} + +// GetAll metrics in the Registry +func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { + data := make(map[string]map[string]interface{}) + r.Each(func(name string, i interface{}) { + values := make(map[string]interface{}) + switch metric := i.(type) { + case Counter: + values["count"] = metric.Count() + case Gauge: + values["value"] = metric.Value() + case GaugeFloat64: + values["value"] = metric.Value() + case Healthcheck: + values["error"] = nil + metric.Check() + if err := metric.Error(); nil != err { + values["error"] = metric.Error().Error() + } + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + values["count"] = h.Count() + values["min"] = h.Min() + values["max"] = h.Max() + values["mean"] = h.Mean() + values["stddev"] = h.StdDev() + values["median"] = ps[0] + values["75%"] = ps[1] + values["95%"] = ps[2] + values["99%"] = ps[3] + values["99.9%"] = ps[4] + case Meter: + m := metric.Snapshot() + values["count"] = m.Count() + values["1m.rate"] = m.Rate1() + values["5m.rate"] = m.Rate5() + values["15m.rate"] = m.Rate15() + values["mean.rate"] = m.RateMean() + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + values["count"] = t.Count() + values["min"] = t.Min() + values["max"] = t.Max() + values["mean"] = t.Mean() + values["stddev"] = t.StdDev() + values["median"] = ps[0] + values["75%"] = ps[1] + values["95%"] = ps[2] + values["99%"] = ps[3] + values["99.9%"] = ps[4] + values["1m.rate"] = t.Rate1() + values["5m.rate"] = t.Rate5() + values["15m.rate"] = t.Rate15() + values["mean.rate"] = t.RateMean() + } + data[name] = values + }) + return data +} + +// Unregister the metric with the given name. +func (r *StandardRegistry) Unregister(name string) { + r.mutex.Lock() + defer r.mutex.Unlock() + r.stop(name) + delete(r.metrics, name) +} + +// Unregister all metrics. (Mostly for testing.) +func (r *StandardRegistry) UnregisterAll() { + r.mutex.Lock() + defer r.mutex.Unlock() + for name := range r.metrics { + r.stop(name) + delete(r.metrics, name) + } +} + +func (r *StandardRegistry) register(name string, i interface{}) error { + if _, ok := r.metrics[name]; ok { + return DuplicateMetric(name) + } + switch i.(type) { + case Counter, Gauge, GaugeFloat64, Healthcheck, Histogram, Meter, Timer, ResettingTimer: + r.metrics[name] = i + } + return nil +} + +func (r *StandardRegistry) registered() map[string]interface{} { + r.mutex.Lock() + defer r.mutex.Unlock() + metrics := make(map[string]interface{}, len(r.metrics)) + for name, i := range r.metrics { + metrics[name] = i + } + return metrics +} + +func (r *StandardRegistry) stop(name string) { + if i, ok := r.metrics[name]; ok { + if s, ok := i.(Stoppable); ok { + s.Stop() + } + } +} + +// Stoppable defines the metrics which has to be stopped. +type Stoppable interface { + Stop() +} + +type PrefixedRegistry struct { + underlying Registry + prefix string +} + +func NewPrefixedRegistry(prefix string) Registry { + return &PrefixedRegistry{ + underlying: NewRegistry(), + prefix: prefix, + } +} + +func NewPrefixedChildRegistry(parent Registry, prefix string) Registry { + return &PrefixedRegistry{ + underlying: parent, + prefix: prefix, + } +} + +// Call the given function for each registered metric. +func (r *PrefixedRegistry) Each(fn func(string, interface{})) { + wrappedFn := func(prefix string) func(string, interface{}) { + return func(name string, iface interface{}) { + if strings.HasPrefix(name, prefix) { + fn(name, iface) + } else { + return + } + } + } + + baseRegistry, prefix := findPrefix(r, "") + baseRegistry.Each(wrappedFn(prefix)) +} + +func findPrefix(registry Registry, prefix string) (Registry, string) { + switch r := registry.(type) { + case *PrefixedRegistry: + return findPrefix(r.underlying, r.prefix+prefix) + case *StandardRegistry: + return r, prefix + } + return nil, "" +} + +// Get the metric by the given name or nil if none is registered. +func (r *PrefixedRegistry) Get(name string) interface{} { + realName := r.prefix + name + return r.underlying.Get(realName) +} + +// Gets an existing metric or registers the given one. +// The interface can be the metric to register if not found in registry, +// or a function returning the metric for lazy instantiation. +func (r *PrefixedRegistry) GetOrRegister(name string, metric interface{}) interface{} { + realName := r.prefix + name + return r.underlying.GetOrRegister(realName, metric) +} + +// Register the given metric under the given name. The name will be prefixed. +func (r *PrefixedRegistry) Register(name string, metric interface{}) error { + realName := r.prefix + name + return r.underlying.Register(realName, metric) +} + +// Run all registered healthchecks. +func (r *PrefixedRegistry) RunHealthchecks() { + r.underlying.RunHealthchecks() +} + +// GetAll metrics in the Registry +func (r *PrefixedRegistry) GetAll() map[string]map[string]interface{} { + return r.underlying.GetAll() +} + +// Unregister the metric with the given name. The name will be prefixed. +func (r *PrefixedRegistry) Unregister(name string) { + realName := r.prefix + name + r.underlying.Unregister(realName) +} + +// Unregister all metrics. (Mostly for testing.) +func (r *PrefixedRegistry) UnregisterAll() { + r.underlying.UnregisterAll() +} + +var ( + DefaultRegistry = NewRegistry() + EphemeralRegistry = NewRegistry() + AccountingRegistry = NewRegistry() // registry used in swarm +) + +// Call the given function for each registered metric. +func Each(f func(string, interface{})) { + DefaultRegistry.Each(f) +} + +// Get the metric by the given name or nil if none is registered. +func Get(name string) interface{} { + return DefaultRegistry.Get(name) +} + +// Gets an existing metric or creates and registers a new one. Threadsafe +// alternative to calling Get and Register on failure. +func GetOrRegister(name string, i interface{}) interface{} { + return DefaultRegistry.GetOrRegister(name, i) +} + +// Register the given metric under the given name. Returns a DuplicateMetric +// if a metric by the given name is already registered. +func Register(name string, i interface{}) error { + return DefaultRegistry.Register(name, i) +} + +// Register the given metric under the given name. Panics if a metric by the +// given name is already registered. +func MustRegister(name string, i interface{}) { + if err := Register(name, i); err != nil { + panic(err) + } +} + +// Run all registered healthchecks. +func RunHealthchecks() { + DefaultRegistry.RunHealthchecks() +} + +// Unregister the metric with the given name. +func Unregister(name string) { + DefaultRegistry.Unregister(name) +} diff --git a/metrics/registry_test.go b/metrics/registry_test.go new file mode 100644 index 0000000000..6cfedfd88f --- /dev/null +++ b/metrics/registry_test.go @@ -0,0 +1,311 @@ +package metrics + +import ( + "testing" +) + +func BenchmarkRegistry(b *testing.B) { + r := NewRegistry() + r.Register("foo", NewCounter()) + b.ResetTimer() + for i := 0; i < b.N; i++ { + r.Each(func(string, interface{}) {}) + } +} + +func TestRegistry(t *testing.T) { + r := NewRegistry() + r.Register("foo", NewCounter()) + i := 0 + r.Each(func(name string, iface interface{}) { + i++ + if name != "foo" { + t.Fatal(name) + } + if _, ok := iface.(Counter); !ok { + t.Fatal(iface) + } + }) + if i != 1 { + t.Fatal(i) + } + r.Unregister("foo") + i = 0 + r.Each(func(string, interface{}) { i++ }) + if i != 0 { + t.Fatal(i) + } +} + +func TestRegistryDuplicate(t *testing.T) { + r := NewRegistry() + if err := r.Register("foo", NewCounter()); nil != err { + t.Fatal(err) + } + if err := r.Register("foo", NewGauge()); nil == err { + t.Fatal(err) + } + i := 0 + r.Each(func(name string, iface interface{}) { + i++ + if _, ok := iface.(Counter); !ok { + t.Fatal(iface) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestRegistryGet(t *testing.T) { + r := NewRegistry() + r.Register("foo", NewCounter()) + if count := r.Get("foo").(Counter).Count(); count != 0 { + t.Fatal(count) + } + r.Get("foo").(Counter).Inc(1) + if count := r.Get("foo").(Counter).Count(); count != 1 { + t.Fatal(count) + } +} + +func TestRegistryGetOrRegister(t *testing.T) { + r := NewRegistry() + + // First metric wins with GetOrRegister + _ = r.GetOrRegister("foo", NewCounter()) + m := r.GetOrRegister("foo", NewGauge()) + if _, ok := m.(Counter); !ok { + t.Fatal(m) + } + + i := 0 + r.Each(func(name string, iface interface{}) { + i++ + if name != "foo" { + t.Fatal(name) + } + if _, ok := iface.(Counter); !ok { + t.Fatal(iface) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestRegistryGetOrRegisterWithLazyInstantiation(t *testing.T) { + r := NewRegistry() + + // First metric wins with GetOrRegister + _ = r.GetOrRegister("foo", NewCounter) + m := r.GetOrRegister("foo", NewGauge) + if _, ok := m.(Counter); !ok { + t.Fatal(m) + } + + i := 0 + r.Each(func(name string, iface interface{}) { + i++ + if name != "foo" { + t.Fatal(name) + } + if _, ok := iface.(Counter); !ok { + t.Fatal(iface) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestRegistryUnregister(t *testing.T) { + l := len(arbiter.meters) + r := NewRegistry() + r.Register("foo", NewCounter()) + r.Register("bar", NewMeter()) + r.Register("baz", NewTimer()) + if len(arbiter.meters) != l+2 { + t.Errorf("arbiter.meters: %d != %d\n", l+2, len(arbiter.meters)) + } + r.Unregister("foo") + r.Unregister("bar") + r.Unregister("baz") + if len(arbiter.meters) != l { + t.Errorf("arbiter.meters: %d != %d\n", l+2, len(arbiter.meters)) + } +} + +func TestPrefixedChildRegistryGetOrRegister(t *testing.T) { + r := NewRegistry() + pr := NewPrefixedChildRegistry(r, "prefix.") + + _ = pr.GetOrRegister("foo", NewCounter()) + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestPrefixedRegistryGetOrRegister(t *testing.T) { + r := NewPrefixedRegistry("prefix.") + + _ = r.GetOrRegister("foo", NewCounter()) + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestPrefixedRegistryRegister(t *testing.T) { + r := NewPrefixedRegistry("prefix.") + err := r.Register("foo", NewCounter()) + c := NewCounter() + Register("bar", c) + if err != nil { + t.Fatal(err.Error()) + } + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestPrefixedRegistryUnregister(t *testing.T) { + r := NewPrefixedRegistry("prefix.") + + _ = r.Register("foo", NewCounter()) + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } + + r.Unregister("foo") + + i = 0 + r.Each(func(name string, m interface{}) { + i++ + }) + + if i != 0 { + t.Fatal(i) + } +} + +func TestPrefixedRegistryGet(t *testing.T) { + pr := NewPrefixedRegistry("prefix.") + name := "foo" + pr.Register(name, NewCounter()) + + fooCounter := pr.Get(name) + if fooCounter == nil { + t.Fatal(name) + } +} + +func TestPrefixedChildRegistryGet(t *testing.T) { + r := NewRegistry() + pr := NewPrefixedChildRegistry(r, "prefix.") + name := "foo" + pr.Register(name, NewCounter()) + fooCounter := pr.Get(name) + if fooCounter == nil { + t.Fatal(name) + } +} + +func TestChildPrefixedRegistryRegister(t *testing.T) { + r := NewPrefixedChildRegistry(DefaultRegistry, "prefix.") + err := r.Register("foo", NewCounter()) + c := NewCounter() + Register("bar", c) + if err != nil { + t.Fatal(err.Error()) + } + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestChildPrefixedRegistryOfChildRegister(t *testing.T) { + r := NewPrefixedChildRegistry(NewRegistry(), "prefix.") + r2 := NewPrefixedChildRegistry(r, "prefix2.") + err := r.Register("foo2", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } + err = r2.Register("baz", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } + c := NewCounter() + Register("bars", c) + + i := 0 + r2.Each(func(name string, m interface{}) { + i++ + if name != "prefix.prefix2.baz" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestWalkRegistries(t *testing.T) { + r := NewPrefixedChildRegistry(NewRegistry(), "prefix.") + r2 := NewPrefixedChildRegistry(r, "prefix2.") + err := r.Register("foo2", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } + err = r2.Register("baz", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } + c := NewCounter() + Register("bars", c) + + _, prefix := findPrefix(r2, "") + if prefix != "prefix.prefix2." { + t.Fatal(prefix) + } + +} diff --git a/metrics/resetting_sample.go b/metrics/resetting_sample.go new file mode 100644 index 0000000000..43c1129cd0 --- /dev/null +++ b/metrics/resetting_sample.go @@ -0,0 +1,24 @@ +package metrics + +// ResettingSample converts an ordinary sample into one that resets whenever its +// snapshot is retrieved. This will break for multi-monitor systems, but when only +// a single metric is being pushed out, this ensure that low-frequency events don't +// skew th charts indefinitely. +func ResettingSample(sample Sample) Sample { + return &resettingSample{ + Sample: sample, + } +} + +// resettingSample is a simple wrapper around a sample that resets it upon the +// snapshot retrieval. +type resettingSample struct { + Sample +} + +// Snapshot returns a read-only copy of the sample with the original reset. +func (rs *resettingSample) Snapshot() Sample { + s := rs.Sample.Snapshot() + rs.Sample.Clear() + return s +} diff --git a/metrics/resetting_timer.go b/metrics/resetting_timer.go new file mode 100644 index 0000000000..e5327d3bd3 --- /dev/null +++ b/metrics/resetting_timer.go @@ -0,0 +1,241 @@ +package metrics + +import ( + "math" + "sort" + "sync" + "time" +) + +// Initial slice capacity for the values stored in a ResettingTimer +const InitialResettingTimerSliceCap = 10 + +// ResettingTimer is used for storing aggregated values for timers, which are reset on every flush interval. +type ResettingTimer interface { + Values() []int64 + Snapshot() ResettingTimer + Percentiles([]float64) []int64 + Mean() float64 + Time(func()) + Update(time.Duration) + UpdateSince(time.Time) +} + +// GetOrRegisterResettingTimer returns an existing ResettingTimer or constructs and registers a +// new StandardResettingTimer. +func GetOrRegisterResettingTimer(name string, r Registry) ResettingTimer { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewResettingTimer).(ResettingTimer) +} + +// NewRegisteredResettingTimer constructs and registers a new StandardResettingTimer. +func NewRegisteredResettingTimer(name string, r Registry) ResettingTimer { + c := NewResettingTimer() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewResettingTimer constructs a new StandardResettingTimer +func NewResettingTimer() ResettingTimer { + if !Enabled { + return NilResettingTimer{} + } + return &StandardResettingTimer{ + values: make([]int64, 0, InitialResettingTimerSliceCap), + } +} + +// NilResettingTimer is a no-op ResettingTimer. +type NilResettingTimer struct { +} + +// Values is a no-op. +func (NilResettingTimer) Values() []int64 { return nil } + +// Snapshot is a no-op. +func (NilResettingTimer) Snapshot() ResettingTimer { + return &ResettingTimerSnapshot{ + values: []int64{}, + } +} + +// Time is a no-op. +func (NilResettingTimer) Time(func()) {} + +// Update is a no-op. +func (NilResettingTimer) Update(time.Duration) {} + +// Percentiles panics. +func (NilResettingTimer) Percentiles([]float64) []int64 { + panic("Percentiles called on a NilResettingTimer") +} + +// Mean panics. +func (NilResettingTimer) Mean() float64 { + panic("Mean called on a NilResettingTimer") +} + +// UpdateSince is a no-op. +func (NilResettingTimer) UpdateSince(time.Time) {} + +// StandardResettingTimer is the standard implementation of a ResettingTimer. +// and Meter. +type StandardResettingTimer struct { + values []int64 + mutex sync.Mutex +} + +// Values returns a slice with all measurements. +func (t *StandardResettingTimer) Values() []int64 { + return t.values +} + +// Snapshot resets the timer and returns a read-only copy of its contents. +func (t *StandardResettingTimer) Snapshot() ResettingTimer { + t.mutex.Lock() + defer t.mutex.Unlock() + currentValues := t.values + t.values = make([]int64, 0, InitialResettingTimerSliceCap) + + return &ResettingTimerSnapshot{ + values: currentValues, + } +} + +// Percentiles panics. +func (t *StandardResettingTimer) Percentiles([]float64) []int64 { + panic("Percentiles called on a StandardResettingTimer") +} + +// Mean panics. +func (t *StandardResettingTimer) Mean() float64 { + panic("Mean called on a StandardResettingTimer") +} + +// Record the duration of the execution of the given function. +func (t *StandardResettingTimer) Time(f func()) { + ts := time.Now() + f() + t.Update(time.Since(ts)) +} + +// Record the duration of an event. +func (t *StandardResettingTimer) Update(d time.Duration) { + t.mutex.Lock() + defer t.mutex.Unlock() + t.values = append(t.values, int64(d)) +} + +// Record the duration of an event that started at a time and ends now. +func (t *StandardResettingTimer) UpdateSince(ts time.Time) { + t.mutex.Lock() + defer t.mutex.Unlock() + t.values = append(t.values, int64(time.Since(ts))) +} + +// ResettingTimerSnapshot is a point-in-time copy of another ResettingTimer. +type ResettingTimerSnapshot struct { + values []int64 + mean float64 + thresholdBoundaries []int64 + calculated bool +} + +// Snapshot returns the snapshot. +func (t *ResettingTimerSnapshot) Snapshot() ResettingTimer { return t } + +// Time panics. +func (*ResettingTimerSnapshot) Time(func()) { + panic("Time called on a ResettingTimerSnapshot") +} + +// Update panics. +func (*ResettingTimerSnapshot) Update(time.Duration) { + panic("Update called on a ResettingTimerSnapshot") +} + +// UpdateSince panics. +func (*ResettingTimerSnapshot) UpdateSince(time.Time) { + panic("UpdateSince called on a ResettingTimerSnapshot") +} + +// Values returns all values from snapshot. +func (t *ResettingTimerSnapshot) Values() []int64 { + return t.values +} + +// Percentiles returns the boundaries for the input percentiles. +func (t *ResettingTimerSnapshot) Percentiles(percentiles []float64) []int64 { + t.calc(percentiles) + + return t.thresholdBoundaries +} + +// Mean returns the mean of the snapshotted values +func (t *ResettingTimerSnapshot) Mean() float64 { + if !t.calculated { + t.calc([]float64{}) + } + + return t.mean +} + +func (t *ResettingTimerSnapshot) calc(percentiles []float64) { + sort.Sort(Int64Slice(t.values)) + + count := len(t.values) + if count > 0 { + min := t.values[0] + max := t.values[count-1] + + cumulativeValues := make([]int64, count) + cumulativeValues[0] = min + for i := 1; i < count; i++ { + cumulativeValues[i] = t.values[i] + cumulativeValues[i-1] + } + + t.thresholdBoundaries = make([]int64, len(percentiles)) + + thresholdBoundary := max + + for i, pct := range percentiles { + if count > 1 { + var abs float64 + if pct >= 0 { + abs = pct + } else { + abs = 100 + pct + } + // poor man's math.Round(x): + // math.Floor(x + 0.5) + indexOfPerc := int(math.Floor(((abs / 100.0) * float64(count)) + 0.5)) + if pct >= 0 && indexOfPerc > 0 { + indexOfPerc -= 1 // index offset=0 + } + thresholdBoundary = t.values[indexOfPerc] + } + + t.thresholdBoundaries[i] = thresholdBoundary + } + + sum := cumulativeValues[count-1] + t.mean = float64(sum) / float64(count) + } else { + t.thresholdBoundaries = make([]int64, len(percentiles)) + t.mean = 0 + } + + t.calculated = true +} + +// Int64Slice attaches the methods of sort.Interface to []int64, sorting in increasing order. +type Int64Slice []int64 + +func (s Int64Slice) Len() int { return len(s) } +func (s Int64Slice) Less(i, j int) bool { return s[i] < s[j] } +func (s Int64Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } diff --git a/metrics/resetting_timer_test.go b/metrics/resetting_timer_test.go new file mode 100644 index 0000000000..77c49dc386 --- /dev/null +++ b/metrics/resetting_timer_test.go @@ -0,0 +1,216 @@ +package metrics + +import ( + "testing" + "time" +) + +func TestResettingTimer(t *testing.T) { + tests := []struct { + values []int64 + start int + end int + wantP50 int64 + wantP95 int64 + wantP99 int64 + wantMean float64 + wantMin int64 + wantMax int64 + }{ + { + values: []int64{}, + start: 1, + end: 11, + wantP50: 5, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + { + values: []int64{}, + start: 1, + end: 101, + wantP50: 50, wantP95: 95, wantP99: 99, + wantMin: 1, wantMax: 100, wantMean: 50.5, + }, + { + values: []int64{1}, + start: 0, + end: 0, + wantP50: 1, wantP95: 1, wantP99: 1, + wantMin: 1, wantMax: 1, wantMean: 1, + }, + { + values: []int64{0}, + start: 0, + end: 0, + wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{}, + start: 0, + end: 0, + wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{1, 10}, + start: 0, + end: 0, + wantP50: 1, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + } + for ind, tt := range tests { + timer := NewResettingTimer() + + for i := tt.start; i < tt.end; i++ { + tt.values = append(tt.values, int64(i)) + } + + for _, v := range tt.values { + timer.Update(time.Duration(v)) + } + + snap := timer.Snapshot() + + ps := snap.Percentiles([]float64{50, 95, 99}) + + val := snap.Values() + + if len(val) > 0 { + if tt.wantMin != val[0] { + t.Fatalf("%d: min: got %d, want %d", ind, val[0], tt.wantMin) + } + + if tt.wantMax != val[len(val)-1] { + t.Fatalf("%d: max: got %d, want %d", ind, val[len(val)-1], tt.wantMax) + } + } + + if tt.wantMean != snap.Mean() { + t.Fatalf("%d: mean: got %.2f, want %.2f", ind, snap.Mean(), tt.wantMean) + } + + if tt.wantP50 != ps[0] { + t.Fatalf("%d: p50: got %d, want %d", ind, ps[0], tt.wantP50) + } + + if tt.wantP95 != ps[1] { + t.Fatalf("%d: p95: got %d, want %d", ind, ps[1], tt.wantP95) + } + + if tt.wantP99 != ps[2] { + t.Fatalf("%d: p99: got %d, want %d", ind, ps[2], tt.wantP99) + } + } +} + +func TestResettingTimerWithFivePercentiles(t *testing.T) { + tests := []struct { + values []int64 + start int + end int + wantP05 int64 + wantP20 int64 + wantP50 int64 + wantP95 int64 + wantP99 int64 + wantMean float64 + wantMin int64 + wantMax int64 + }{ + { + values: []int64{}, + start: 1, + end: 11, + wantP05: 1, wantP20: 2, wantP50: 5, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + { + values: []int64{}, + start: 1, + end: 101, + wantP05: 5, wantP20: 20, wantP50: 50, wantP95: 95, wantP99: 99, + wantMin: 1, wantMax: 100, wantMean: 50.5, + }, + { + values: []int64{1}, + start: 0, + end: 0, + wantP05: 1, wantP20: 1, wantP50: 1, wantP95: 1, wantP99: 1, + wantMin: 1, wantMax: 1, wantMean: 1, + }, + { + values: []int64{0}, + start: 0, + end: 0, + wantP05: 0, wantP20: 0, wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{}, + start: 0, + end: 0, + wantP05: 0, wantP20: 0, wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{1, 10}, + start: 0, + end: 0, + wantP05: 1, wantP20: 1, wantP50: 1, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + } + for ind, tt := range tests { + timer := NewResettingTimer() + + for i := tt.start; i < tt.end; i++ { + tt.values = append(tt.values, int64(i)) + } + + for _, v := range tt.values { + timer.Update(time.Duration(v)) + } + + snap := timer.Snapshot() + + ps := snap.Percentiles([]float64{5, 20, 50, 95, 99}) + + val := snap.Values() + + if len(val) > 0 { + if tt.wantMin != val[0] { + t.Fatalf("%d: min: got %d, want %d", ind, val[0], tt.wantMin) + } + + if tt.wantMax != val[len(val)-1] { + t.Fatalf("%d: max: got %d, want %d", ind, val[len(val)-1], tt.wantMax) + } + } + + if tt.wantMean != snap.Mean() { + t.Fatalf("%d: mean: got %.2f, want %.2f", ind, snap.Mean(), tt.wantMean) + } + + if tt.wantP05 != ps[0] { + t.Fatalf("%d: p05: got %d, want %d", ind, ps[0], tt.wantP05) + } + + if tt.wantP20 != ps[1] { + t.Fatalf("%d: p20: got %d, want %d", ind, ps[1], tt.wantP20) + } + + if tt.wantP50 != ps[2] { + t.Fatalf("%d: p50: got %d, want %d", ind, ps[2], tt.wantP50) + } + + if tt.wantP95 != ps[3] { + t.Fatalf("%d: p95: got %d, want %d", ind, ps[3], tt.wantP95) + } + + if tt.wantP99 != ps[4] { + t.Fatalf("%d: p99: got %d, want %d", ind, ps[4], tt.wantP99) + } + } +} diff --git a/metrics/runtime.go b/metrics/runtime.go new file mode 100644 index 0000000000..9450c479ba --- /dev/null +++ b/metrics/runtime.go @@ -0,0 +1,212 @@ +package metrics + +import ( + "runtime" + "runtime/pprof" + "time" +) + +var ( + memStats runtime.MemStats + runtimeMetrics struct { + MemStats struct { + Alloc Gauge + BuckHashSys Gauge + DebugGC Gauge + EnableGC Gauge + Frees Gauge + HeapAlloc Gauge + HeapIdle Gauge + HeapInuse Gauge + HeapObjects Gauge + HeapReleased Gauge + HeapSys Gauge + LastGC Gauge + Lookups Gauge + Mallocs Gauge + MCacheInuse Gauge + MCacheSys Gauge + MSpanInuse Gauge + MSpanSys Gauge + NextGC Gauge + NumGC Gauge + GCCPUFraction GaugeFloat64 + PauseNs Histogram + PauseTotalNs Gauge + StackInuse Gauge + StackSys Gauge + Sys Gauge + TotalAlloc Gauge + } + NumCgoCall Gauge + NumGoroutine Gauge + NumThread Gauge + ReadMemStats Timer + } + frees uint64 + lookups uint64 + mallocs uint64 + numGC uint32 + numCgoCalls int64 + + threadCreateProfile = pprof.Lookup("threadcreate") +) + +// Capture new values for the Go runtime statistics exported in +// runtime.MemStats. This is designed to be called as a goroutine. +func CaptureRuntimeMemStats(r Registry, d time.Duration) { + for range time.Tick(d) { + CaptureRuntimeMemStatsOnce(r) + } +} + +// Capture new values for the Go runtime statistics exported in +// runtime.MemStats. This is designed to be called in a background +// goroutine. Giving a registry which has not been given to +// RegisterRuntimeMemStats will panic. +// +// Be very careful with this because runtime.ReadMemStats calls the C +// functions runtime·semacquire(&runtime·worldsema) and runtime·stoptheworld() +// and that last one does what it says on the tin. +func CaptureRuntimeMemStatsOnce(r Registry) { + t := time.Now() + runtime.ReadMemStats(&memStats) // This takes 50-200us. + runtimeMetrics.ReadMemStats.UpdateSince(t) + + runtimeMetrics.MemStats.Alloc.Update(int64(memStats.Alloc)) + runtimeMetrics.MemStats.BuckHashSys.Update(int64(memStats.BuckHashSys)) + if memStats.DebugGC { + runtimeMetrics.MemStats.DebugGC.Update(1) + } else { + runtimeMetrics.MemStats.DebugGC.Update(0) + } + if memStats.EnableGC { + runtimeMetrics.MemStats.EnableGC.Update(1) + } else { + runtimeMetrics.MemStats.EnableGC.Update(0) + } + + runtimeMetrics.MemStats.Frees.Update(int64(memStats.Frees - frees)) + runtimeMetrics.MemStats.HeapAlloc.Update(int64(memStats.HeapAlloc)) + runtimeMetrics.MemStats.HeapIdle.Update(int64(memStats.HeapIdle)) + runtimeMetrics.MemStats.HeapInuse.Update(int64(memStats.HeapInuse)) + runtimeMetrics.MemStats.HeapObjects.Update(int64(memStats.HeapObjects)) + runtimeMetrics.MemStats.HeapReleased.Update(int64(memStats.HeapReleased)) + runtimeMetrics.MemStats.HeapSys.Update(int64(memStats.HeapSys)) + runtimeMetrics.MemStats.LastGC.Update(int64(memStats.LastGC)) + runtimeMetrics.MemStats.Lookups.Update(int64(memStats.Lookups - lookups)) + runtimeMetrics.MemStats.Mallocs.Update(int64(memStats.Mallocs - mallocs)) + runtimeMetrics.MemStats.MCacheInuse.Update(int64(memStats.MCacheInuse)) + runtimeMetrics.MemStats.MCacheSys.Update(int64(memStats.MCacheSys)) + runtimeMetrics.MemStats.MSpanInuse.Update(int64(memStats.MSpanInuse)) + runtimeMetrics.MemStats.MSpanSys.Update(int64(memStats.MSpanSys)) + runtimeMetrics.MemStats.NextGC.Update(int64(memStats.NextGC)) + runtimeMetrics.MemStats.NumGC.Update(int64(memStats.NumGC - numGC)) + runtimeMetrics.MemStats.GCCPUFraction.Update(gcCPUFraction(&memStats)) + + // + i := numGC % uint32(len(memStats.PauseNs)) + ii := memStats.NumGC % uint32(len(memStats.PauseNs)) + if memStats.NumGC-numGC >= uint32(len(memStats.PauseNs)) { + for i = 0; i < uint32(len(memStats.PauseNs)); i++ { + runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) + } + } else { + if i > ii { + for ; i < uint32(len(memStats.PauseNs)); i++ { + runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) + } + i = 0 + } + for ; i < ii; i++ { + runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) + } + } + frees = memStats.Frees + lookups = memStats.Lookups + mallocs = memStats.Mallocs + numGC = memStats.NumGC + + runtimeMetrics.MemStats.PauseTotalNs.Update(int64(memStats.PauseTotalNs)) + runtimeMetrics.MemStats.StackInuse.Update(int64(memStats.StackInuse)) + runtimeMetrics.MemStats.StackSys.Update(int64(memStats.StackSys)) + runtimeMetrics.MemStats.Sys.Update(int64(memStats.Sys)) + runtimeMetrics.MemStats.TotalAlloc.Update(int64(memStats.TotalAlloc)) + + currentNumCgoCalls := numCgoCall() + runtimeMetrics.NumCgoCall.Update(currentNumCgoCalls - numCgoCalls) + numCgoCalls = currentNumCgoCalls + + runtimeMetrics.NumGoroutine.Update(int64(runtime.NumGoroutine())) + + runtimeMetrics.NumThread.Update(int64(threadCreateProfile.Count())) +} + +// Register runtimeMetrics for the Go runtime statistics exported in runtime and +// specifically runtime.MemStats. The runtimeMetrics are named by their +// fully-qualified Go symbols, i.e. runtime.MemStats.Alloc. +func RegisterRuntimeMemStats(r Registry) { + runtimeMetrics.MemStats.Alloc = NewGauge() + runtimeMetrics.MemStats.BuckHashSys = NewGauge() + runtimeMetrics.MemStats.DebugGC = NewGauge() + runtimeMetrics.MemStats.EnableGC = NewGauge() + runtimeMetrics.MemStats.Frees = NewGauge() + runtimeMetrics.MemStats.HeapAlloc = NewGauge() + runtimeMetrics.MemStats.HeapIdle = NewGauge() + runtimeMetrics.MemStats.HeapInuse = NewGauge() + runtimeMetrics.MemStats.HeapObjects = NewGauge() + runtimeMetrics.MemStats.HeapReleased = NewGauge() + runtimeMetrics.MemStats.HeapSys = NewGauge() + runtimeMetrics.MemStats.LastGC = NewGauge() + runtimeMetrics.MemStats.Lookups = NewGauge() + runtimeMetrics.MemStats.Mallocs = NewGauge() + runtimeMetrics.MemStats.MCacheInuse = NewGauge() + runtimeMetrics.MemStats.MCacheSys = NewGauge() + runtimeMetrics.MemStats.MSpanInuse = NewGauge() + runtimeMetrics.MemStats.MSpanSys = NewGauge() + runtimeMetrics.MemStats.NextGC = NewGauge() + runtimeMetrics.MemStats.NumGC = NewGauge() + runtimeMetrics.MemStats.GCCPUFraction = NewGaugeFloat64() + runtimeMetrics.MemStats.PauseNs = NewHistogram(NewExpDecaySample(1028, 0.015)) + runtimeMetrics.MemStats.PauseTotalNs = NewGauge() + runtimeMetrics.MemStats.StackInuse = NewGauge() + runtimeMetrics.MemStats.StackSys = NewGauge() + runtimeMetrics.MemStats.Sys = NewGauge() + runtimeMetrics.MemStats.TotalAlloc = NewGauge() + runtimeMetrics.NumCgoCall = NewGauge() + runtimeMetrics.NumGoroutine = NewGauge() + runtimeMetrics.NumThread = NewGauge() + runtimeMetrics.ReadMemStats = NewTimer() + + r.Register("runtime.MemStats.Alloc", runtimeMetrics.MemStats.Alloc) + r.Register("runtime.MemStats.BuckHashSys", runtimeMetrics.MemStats.BuckHashSys) + r.Register("runtime.MemStats.DebugGC", runtimeMetrics.MemStats.DebugGC) + r.Register("runtime.MemStats.EnableGC", runtimeMetrics.MemStats.EnableGC) + r.Register("runtime.MemStats.Frees", runtimeMetrics.MemStats.Frees) + r.Register("runtime.MemStats.HeapAlloc", runtimeMetrics.MemStats.HeapAlloc) + r.Register("runtime.MemStats.HeapIdle", runtimeMetrics.MemStats.HeapIdle) + r.Register("runtime.MemStats.HeapInuse", runtimeMetrics.MemStats.HeapInuse) + r.Register("runtime.MemStats.HeapObjects", runtimeMetrics.MemStats.HeapObjects) + r.Register("runtime.MemStats.HeapReleased", runtimeMetrics.MemStats.HeapReleased) + r.Register("runtime.MemStats.HeapSys", runtimeMetrics.MemStats.HeapSys) + r.Register("runtime.MemStats.LastGC", runtimeMetrics.MemStats.LastGC) + r.Register("runtime.MemStats.Lookups", runtimeMetrics.MemStats.Lookups) + r.Register("runtime.MemStats.Mallocs", runtimeMetrics.MemStats.Mallocs) + r.Register("runtime.MemStats.MCacheInuse", runtimeMetrics.MemStats.MCacheInuse) + r.Register("runtime.MemStats.MCacheSys", runtimeMetrics.MemStats.MCacheSys) + r.Register("runtime.MemStats.MSpanInuse", runtimeMetrics.MemStats.MSpanInuse) + r.Register("runtime.MemStats.MSpanSys", runtimeMetrics.MemStats.MSpanSys) + r.Register("runtime.MemStats.NextGC", runtimeMetrics.MemStats.NextGC) + r.Register("runtime.MemStats.NumGC", runtimeMetrics.MemStats.NumGC) + r.Register("runtime.MemStats.GCCPUFraction", runtimeMetrics.MemStats.GCCPUFraction) + r.Register("runtime.MemStats.PauseNs", runtimeMetrics.MemStats.PauseNs) + r.Register("runtime.MemStats.PauseTotalNs", runtimeMetrics.MemStats.PauseTotalNs) + r.Register("runtime.MemStats.StackInuse", runtimeMetrics.MemStats.StackInuse) + r.Register("runtime.MemStats.StackSys", runtimeMetrics.MemStats.StackSys) + r.Register("runtime.MemStats.Sys", runtimeMetrics.MemStats.Sys) + r.Register("runtime.MemStats.TotalAlloc", runtimeMetrics.MemStats.TotalAlloc) + r.Register("runtime.NumCgoCall", runtimeMetrics.NumCgoCall) + r.Register("runtime.NumGoroutine", runtimeMetrics.NumGoroutine) + r.Register("runtime.NumThread", runtimeMetrics.NumThread) + r.Register("runtime.ReadMemStats", runtimeMetrics.ReadMemStats) +} diff --git a/metrics/runtime_cgo.go b/metrics/runtime_cgo.go new file mode 100644 index 0000000000..4307ebdba6 --- /dev/null +++ b/metrics/runtime_cgo.go @@ -0,0 +1,10 @@ +//go:build cgo && !appengine && !js +// +build cgo,!appengine,!js + +package metrics + +import "runtime" + +func numCgoCall() int64 { + return runtime.NumCgoCall() +} diff --git a/metrics/runtime_gccpufraction.go b/metrics/runtime_gccpufraction.go new file mode 100644 index 0000000000..28cd44752b --- /dev/null +++ b/metrics/runtime_gccpufraction.go @@ -0,0 +1,10 @@ +//go:build go1.5 +// +build go1.5 + +package metrics + +import "runtime" + +func gcCPUFraction(memStats *runtime.MemStats) float64 { + return memStats.GCCPUFraction +} diff --git a/metrics/runtime_no_cgo.go b/metrics/runtime_no_cgo.go new file mode 100644 index 0000000000..1799bef63b --- /dev/null +++ b/metrics/runtime_no_cgo.go @@ -0,0 +1,8 @@ +//go:build !cgo || appengine || js +// +build !cgo appengine js + +package metrics + +func numCgoCall() int64 { + return 0 +} diff --git a/metrics/runtime_no_gccpufraction.go b/metrics/runtime_no_gccpufraction.go new file mode 100644 index 0000000000..af1a4b63c8 --- /dev/null +++ b/metrics/runtime_no_gccpufraction.go @@ -0,0 +1,10 @@ +//go:build !go1.5 +// +build !go1.5 + +package metrics + +import "runtime" + +func gcCPUFraction(memStats *runtime.MemStats) float64 { + return 0 +} diff --git a/metrics/runtime_test.go b/metrics/runtime_test.go new file mode 100644 index 0000000000..f85f7868f7 --- /dev/null +++ b/metrics/runtime_test.go @@ -0,0 +1,88 @@ +package metrics + +import ( + "runtime" + "testing" + "time" +) + +func BenchmarkRuntimeMemStats(b *testing.B) { + r := NewRegistry() + RegisterRuntimeMemStats(r) + b.ResetTimer() + for i := 0; i < b.N; i++ { + CaptureRuntimeMemStatsOnce(r) + } +} + +func TestRuntimeMemStats(t *testing.T) { + r := NewRegistry() + RegisterRuntimeMemStats(r) + CaptureRuntimeMemStatsOnce(r) + zero := runtimeMetrics.MemStats.PauseNs.Count() // Get a "zero" since GC may have run before these tests. + runtime.GC() + CaptureRuntimeMemStatsOnce(r) + if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 1 { + t.Fatal(count - zero) + } + runtime.GC() + runtime.GC() + CaptureRuntimeMemStatsOnce(r) + if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 3 { + t.Fatal(count - zero) + } + for i := 0; i < 256; i++ { + runtime.GC() + } + CaptureRuntimeMemStatsOnce(r) + if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 259 { + t.Fatal(count - zero) + } + for i := 0; i < 257; i++ { + runtime.GC() + } + CaptureRuntimeMemStatsOnce(r) + if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 515 { // We lost one because there were too many GCs between captures. + t.Fatal(count - zero) + } +} + +func TestRuntimeMemStatsNumThread(t *testing.T) { + r := NewRegistry() + RegisterRuntimeMemStats(r) + CaptureRuntimeMemStatsOnce(r) + + if value := runtimeMetrics.NumThread.Value(); value < 1 { + t.Fatalf("got NumThread: %d, wanted at least 1", value) + } +} + +func TestRuntimeMemStatsBlocking(t *testing.T) { + if g := runtime.GOMAXPROCS(0); g < 2 { + t.Skipf("skipping TestRuntimeMemStatsBlocking with GOMAXPROCS=%d\n", g) + } + ch := make(chan int) + go testRuntimeMemStatsBlocking(ch) + var memStats runtime.MemStats + t0 := time.Now() + runtime.ReadMemStats(&memStats) + t1 := time.Now() + t.Log("i++ during runtime.ReadMemStats:", <-ch) + go testRuntimeMemStatsBlocking(ch) + d := t1.Sub(t0) + t.Log(d) + time.Sleep(d) + t.Log("i++ during time.Sleep:", <-ch) +} + +func testRuntimeMemStatsBlocking(ch chan int) { + i := 0 + for { + select { + case ch <- i: + return + default: + i++ + } + } +} diff --git a/metrics/sample.go b/metrics/sample.go new file mode 100644 index 0000000000..fa2bfb274e --- /dev/null +++ b/metrics/sample.go @@ -0,0 +1,616 @@ +package metrics + +import ( + "math" + "math/rand" + "sort" + "sync" + "time" +) + +const rescaleThreshold = time.Hour + +// Samples maintain a statistically-significant selection of values from +// a stream. +type Sample interface { + Clear() + Count() int64 + Max() int64 + Mean() float64 + Min() int64 + Percentile(float64) float64 + Percentiles([]float64) []float64 + Size() int + Snapshot() Sample + StdDev() float64 + Sum() int64 + Update(int64) + Values() []int64 + Variance() float64 +} + +// ExpDecaySample is an exponentially-decaying sample using a forward-decaying +// priority reservoir. See Cormode et al's "Forward Decay: A Practical Time +// Decay Model for Streaming Systems". +// +// +type ExpDecaySample struct { + alpha float64 + count int64 + mutex sync.Mutex + reservoirSize int + t0, t1 time.Time + values *expDecaySampleHeap +} + +// NewExpDecaySample constructs a new exponentially-decaying sample with the +// given reservoir size and alpha. +func NewExpDecaySample(reservoirSize int, alpha float64) Sample { + if !Enabled { + return NilSample{} + } + s := &ExpDecaySample{ + alpha: alpha, + reservoirSize: reservoirSize, + t0: time.Now(), + values: newExpDecaySampleHeap(reservoirSize), + } + s.t1 = s.t0.Add(rescaleThreshold) + return s +} + +// Clear clears all samples. +func (s *ExpDecaySample) Clear() { + s.mutex.Lock() + defer s.mutex.Unlock() + s.count = 0 + s.t0 = time.Now() + s.t1 = s.t0.Add(rescaleThreshold) + s.values.Clear() +} + +// Count returns the number of samples recorded, which may exceed the +// reservoir size. +func (s *ExpDecaySample) Count() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return s.count +} + +// Max returns the maximum value in the sample, which may not be the maximum +// value ever to be part of the sample. +func (s *ExpDecaySample) Max() int64 { + return SampleMax(s.Values()) +} + +// Mean returns the mean of the values in the sample. +func (s *ExpDecaySample) Mean() float64 { + return SampleMean(s.Values()) +} + +// Min returns the minimum value in the sample, which may not be the minimum +// value ever to be part of the sample. +func (s *ExpDecaySample) Min() int64 { + return SampleMin(s.Values()) +} + +// Percentile returns an arbitrary percentile of values in the sample. +func (s *ExpDecaySample) Percentile(p float64) float64 { + return SamplePercentile(s.Values(), p) +} + +// Percentiles returns a slice of arbitrary percentiles of values in the +// sample. +func (s *ExpDecaySample) Percentiles(ps []float64) []float64 { + return SamplePercentiles(s.Values(), ps) +} + +// Size returns the size of the sample, which is at most the reservoir size. +func (s *ExpDecaySample) Size() int { + s.mutex.Lock() + defer s.mutex.Unlock() + return s.values.Size() +} + +// Snapshot returns a read-only copy of the sample. +func (s *ExpDecaySample) Snapshot() Sample { + s.mutex.Lock() + defer s.mutex.Unlock() + vals := s.values.Values() + values := make([]int64, len(vals)) + for i, v := range vals { + values[i] = v.v + } + return &SampleSnapshot{ + count: s.count, + values: values, + } +} + +// StdDev returns the standard deviation of the values in the sample. +func (s *ExpDecaySample) StdDev() float64 { + return SampleStdDev(s.Values()) +} + +// Sum returns the sum of the values in the sample. +func (s *ExpDecaySample) Sum() int64 { + return SampleSum(s.Values()) +} + +// Update samples a new value. +func (s *ExpDecaySample) Update(v int64) { + s.update(time.Now(), v) +} + +// Values returns a copy of the values in the sample. +func (s *ExpDecaySample) Values() []int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + vals := s.values.Values() + values := make([]int64, len(vals)) + for i, v := range vals { + values[i] = v.v + } + return values +} + +// Variance returns the variance of the values in the sample. +func (s *ExpDecaySample) Variance() float64 { + return SampleVariance(s.Values()) +} + +// update samples a new value at a particular timestamp. This is a method all +// its own to facilitate testing. +func (s *ExpDecaySample) update(t time.Time, v int64) { + s.mutex.Lock() + defer s.mutex.Unlock() + s.count++ + if s.values.Size() == s.reservoirSize { + s.values.Pop() + } + s.values.Push(expDecaySample{ + k: math.Exp(t.Sub(s.t0).Seconds()*s.alpha) / rand.Float64(), + v: v, + }) + if t.After(s.t1) { + values := s.values.Values() + t0 := s.t0 + s.values.Clear() + s.t0 = t + s.t1 = s.t0.Add(rescaleThreshold) + for _, v := range values { + v.k = v.k * math.Exp(-s.alpha*s.t0.Sub(t0).Seconds()) + s.values.Push(v) + } + } +} + +// NilSample is a no-op Sample. +type NilSample struct{} + +// Clear is a no-op. +func (NilSample) Clear() {} + +// Count is a no-op. +func (NilSample) Count() int64 { return 0 } + +// Max is a no-op. +func (NilSample) Max() int64 { return 0 } + +// Mean is a no-op. +func (NilSample) Mean() float64 { return 0.0 } + +// Min is a no-op. +func (NilSample) Min() int64 { return 0 } + +// Percentile is a no-op. +func (NilSample) Percentile(p float64) float64 { return 0.0 } + +// Percentiles is a no-op. +func (NilSample) Percentiles(ps []float64) []float64 { + return make([]float64, len(ps)) +} + +// Size is a no-op. +func (NilSample) Size() int { return 0 } + +// Sample is a no-op. +func (NilSample) Snapshot() Sample { return NilSample{} } + +// StdDev is a no-op. +func (NilSample) StdDev() float64 { return 0.0 } + +// Sum is a no-op. +func (NilSample) Sum() int64 { return 0 } + +// Update is a no-op. +func (NilSample) Update(v int64) {} + +// Values is a no-op. +func (NilSample) Values() []int64 { return []int64{} } + +// Variance is a no-op. +func (NilSample) Variance() float64 { return 0.0 } + +// SampleMax returns the maximum value of the slice of int64. +func SampleMax(values []int64) int64 { + if len(values) == 0 { + return 0 + } + var max int64 = math.MinInt64 + for _, v := range values { + if max < v { + max = v + } + } + return max +} + +// SampleMean returns the mean value of the slice of int64. +func SampleMean(values []int64) float64 { + if len(values) == 0 { + return 0.0 + } + return float64(SampleSum(values)) / float64(len(values)) +} + +// SampleMin returns the minimum value of the slice of int64. +func SampleMin(values []int64) int64 { + if len(values) == 0 { + return 0 + } + var min int64 = math.MaxInt64 + for _, v := range values { + if min > v { + min = v + } + } + return min +} + +// SamplePercentiles returns an arbitrary percentile of the slice of int64. +func SamplePercentile(values int64Slice, p float64) float64 { + return SamplePercentiles(values, []float64{p})[0] +} + +// SamplePercentiles returns a slice of arbitrary percentiles of the slice of +// int64. +func SamplePercentiles(values int64Slice, ps []float64) []float64 { + scores := make([]float64, len(ps)) + size := len(values) + if size > 0 { + sort.Sort(values) + for i, p := range ps { + pos := p * float64(size+1) + if pos < 1.0 { + scores[i] = float64(values[0]) + } else if pos >= float64(size) { + scores[i] = float64(values[size-1]) + } else { + lower := float64(values[int(pos)-1]) + upper := float64(values[int(pos)]) + scores[i] = lower + (pos-math.Floor(pos))*(upper-lower) + } + } + } + return scores +} + +// SampleSnapshot is a read-only copy of another Sample. +type SampleSnapshot struct { + count int64 + values []int64 +} + +func NewSampleSnapshot(count int64, values []int64) *SampleSnapshot { + return &SampleSnapshot{ + count: count, + values: values, + } +} + +// Clear panics. +func (*SampleSnapshot) Clear() { + panic("Clear called on a SampleSnapshot") +} + +// Count returns the count of inputs at the time the snapshot was taken. +func (s *SampleSnapshot) Count() int64 { return s.count } + +// Max returns the maximal value at the time the snapshot was taken. +func (s *SampleSnapshot) Max() int64 { return SampleMax(s.values) } + +// Mean returns the mean value at the time the snapshot was taken. +func (s *SampleSnapshot) Mean() float64 { return SampleMean(s.values) } + +// Min returns the minimal value at the time the snapshot was taken. +func (s *SampleSnapshot) Min() int64 { return SampleMin(s.values) } + +// Percentile returns an arbitrary percentile of values at the time the +// snapshot was taken. +func (s *SampleSnapshot) Percentile(p float64) float64 { + return SamplePercentile(s.values, p) +} + +// Percentiles returns a slice of arbitrary percentiles of values at the time +// the snapshot was taken. +func (s *SampleSnapshot) Percentiles(ps []float64) []float64 { + return SamplePercentiles(s.values, ps) +} + +// Size returns the size of the sample at the time the snapshot was taken. +func (s *SampleSnapshot) Size() int { return len(s.values) } + +// Snapshot returns the snapshot. +func (s *SampleSnapshot) Snapshot() Sample { return s } + +// StdDev returns the standard deviation of values at the time the snapshot was +// taken. +func (s *SampleSnapshot) StdDev() float64 { return SampleStdDev(s.values) } + +// Sum returns the sum of values at the time the snapshot was taken. +func (s *SampleSnapshot) Sum() int64 { return SampleSum(s.values) } + +// Update panics. +func (*SampleSnapshot) Update(int64) { + panic("Update called on a SampleSnapshot") +} + +// Values returns a copy of the values in the sample. +func (s *SampleSnapshot) Values() []int64 { + values := make([]int64, len(s.values)) + copy(values, s.values) + return values +} + +// Variance returns the variance of values at the time the snapshot was taken. +func (s *SampleSnapshot) Variance() float64 { return SampleVariance(s.values) } + +// SampleStdDev returns the standard deviation of the slice of int64. +func SampleStdDev(values []int64) float64 { + return math.Sqrt(SampleVariance(values)) +} + +// SampleSum returns the sum of the slice of int64. +func SampleSum(values []int64) int64 { + var sum int64 + for _, v := range values { + sum += v + } + return sum +} + +// SampleVariance returns the variance of the slice of int64. +func SampleVariance(values []int64) float64 { + if len(values) == 0 { + return 0.0 + } + m := SampleMean(values) + var sum float64 + for _, v := range values { + d := float64(v) - m + sum += d * d + } + return sum / float64(len(values)) +} + +// A uniform sample using Vitter's Algorithm R. +// +// +type UniformSample struct { + count int64 + mutex sync.Mutex + reservoirSize int + values []int64 +} + +// NewUniformSample constructs a new uniform sample with the given reservoir +// size. +func NewUniformSample(reservoirSize int) Sample { + if !Enabled { + return NilSample{} + } + return &UniformSample{ + reservoirSize: reservoirSize, + values: make([]int64, 0, reservoirSize), + } +} + +// Clear clears all samples. +func (s *UniformSample) Clear() { + s.mutex.Lock() + defer s.mutex.Unlock() + s.count = 0 + s.values = make([]int64, 0, s.reservoirSize) +} + +// Count returns the number of samples recorded, which may exceed the +// reservoir size. +func (s *UniformSample) Count() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return s.count +} + +// Max returns the maximum value in the sample, which may not be the maximum +// value ever to be part of the sample. +func (s *UniformSample) Max() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleMax(s.values) +} + +// Mean returns the mean of the values in the sample. +func (s *UniformSample) Mean() float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleMean(s.values) +} + +// Min returns the minimum value in the sample, which may not be the minimum +// value ever to be part of the sample. +func (s *UniformSample) Min() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleMin(s.values) +} + +// Percentile returns an arbitrary percentile of values in the sample. +func (s *UniformSample) Percentile(p float64) float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SamplePercentile(s.values, p) +} + +// Percentiles returns a slice of arbitrary percentiles of values in the +// sample. +func (s *UniformSample) Percentiles(ps []float64) []float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SamplePercentiles(s.values, ps) +} + +// Size returns the size of the sample, which is at most the reservoir size. +func (s *UniformSample) Size() int { + s.mutex.Lock() + defer s.mutex.Unlock() + return len(s.values) +} + +// Snapshot returns a read-only copy of the sample. +func (s *UniformSample) Snapshot() Sample { + s.mutex.Lock() + defer s.mutex.Unlock() + values := make([]int64, len(s.values)) + copy(values, s.values) + return &SampleSnapshot{ + count: s.count, + values: values, + } +} + +// StdDev returns the standard deviation of the values in the sample. +func (s *UniformSample) StdDev() float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleStdDev(s.values) +} + +// Sum returns the sum of the values in the sample. +func (s *UniformSample) Sum() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleSum(s.values) +} + +// Update samples a new value. +func (s *UniformSample) Update(v int64) { + s.mutex.Lock() + defer s.mutex.Unlock() + s.count++ + if len(s.values) < s.reservoirSize { + s.values = append(s.values, v) + } else { + r := rand.Int63n(s.count) + if r < int64(len(s.values)) { + s.values[int(r)] = v + } + } +} + +// Values returns a copy of the values in the sample. +func (s *UniformSample) Values() []int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + values := make([]int64, len(s.values)) + copy(values, s.values) + return values +} + +// Variance returns the variance of the values in the sample. +func (s *UniformSample) Variance() float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleVariance(s.values) +} + +// expDecaySample represents an individual sample in a heap. +type expDecaySample struct { + k float64 + v int64 +} + +func newExpDecaySampleHeap(reservoirSize int) *expDecaySampleHeap { + return &expDecaySampleHeap{make([]expDecaySample, 0, reservoirSize)} +} + +// expDecaySampleHeap is a min-heap of expDecaySamples. +// The internal implementation is copied from the standard library's container/heap +type expDecaySampleHeap struct { + s []expDecaySample +} + +func (h *expDecaySampleHeap) Clear() { + h.s = h.s[:0] +} + +func (h *expDecaySampleHeap) Push(s expDecaySample) { + n := len(h.s) + h.s = h.s[0 : n+1] + h.s[n] = s + h.up(n) +} + +func (h *expDecaySampleHeap) Pop() expDecaySample { + n := len(h.s) - 1 + h.s[0], h.s[n] = h.s[n], h.s[0] + h.down(0, n) + + n = len(h.s) + s := h.s[n-1] + h.s = h.s[0 : n-1] + return s +} + +func (h *expDecaySampleHeap) Size() int { + return len(h.s) +} + +func (h *expDecaySampleHeap) Values() []expDecaySample { + return h.s +} + +func (h *expDecaySampleHeap) up(j int) { + for { + i := (j - 1) / 2 // parent + if i == j || !(h.s[j].k < h.s[i].k) { + break + } + h.s[i], h.s[j] = h.s[j], h.s[i] + j = i + } +} + +func (h *expDecaySampleHeap) down(i, n int) { + for { + j1 := 2*i + 1 + if j1 >= n || j1 < 0 { // j1 < 0 after int overflow + break + } + j := j1 // left child + if j2 := j1 + 1; j2 < n && !(h.s[j1].k < h.s[j2].k) { + j = j2 // = 2*i + 2 // right child + } + if !(h.s[j].k < h.s[i].k) { + break + } + h.s[i], h.s[j] = h.s[j], h.s[i] + i = j + } +} + +type int64Slice []int64 + +func (p int64Slice) Len() int { return len(p) } +func (p int64Slice) Less(i, j int) bool { return p[i] < p[j] } +func (p int64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } diff --git a/metrics/sample_test.go b/metrics/sample_test.go new file mode 100644 index 0000000000..c9168d3e82 --- /dev/null +++ b/metrics/sample_test.go @@ -0,0 +1,365 @@ +package metrics + +import ( + "math" + "math/rand" + "runtime" + "testing" + "time" +) + +// Benchmark{Compute,Copy}{1000,1000000} demonstrate that, even for relatively +// expensive computations like Variance, the cost of copying the Sample, as +// approximated by a make and copy, is much greater than the cost of the +// computation for small samples and only slightly less for large samples. +func BenchmarkCompute1000(b *testing.B) { + s := make([]int64, 1000) + for i := 0; i < len(s); i++ { + s[i] = int64(i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + SampleVariance(s) + } +} +func BenchmarkCompute1000000(b *testing.B) { + s := make([]int64, 1000000) + for i := 0; i < len(s); i++ { + s[i] = int64(i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + SampleVariance(s) + } +} +func BenchmarkCopy1000(b *testing.B) { + s := make([]int64, 1000) + for i := 0; i < len(s); i++ { + s[i] = int64(i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + sCopy := make([]int64, len(s)) + copy(sCopy, s) + } +} +func BenchmarkCopy1000000(b *testing.B) { + s := make([]int64, 1000000) + for i := 0; i < len(s); i++ { + s[i] = int64(i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + sCopy := make([]int64, len(s)) + copy(sCopy, s) + } +} + +func BenchmarkExpDecaySample257(b *testing.B) { + benchmarkSample(b, NewExpDecaySample(257, 0.015)) +} + +func BenchmarkExpDecaySample514(b *testing.B) { + benchmarkSample(b, NewExpDecaySample(514, 0.015)) +} + +func BenchmarkExpDecaySample1028(b *testing.B) { + benchmarkSample(b, NewExpDecaySample(1028, 0.015)) +} + +func BenchmarkUniformSample257(b *testing.B) { + benchmarkSample(b, NewUniformSample(257)) +} + +func BenchmarkUniformSample514(b *testing.B) { + benchmarkSample(b, NewUniformSample(514)) +} + +func BenchmarkUniformSample1028(b *testing.B) { + benchmarkSample(b, NewUniformSample(1028)) +} + +func TestExpDecaySample10(t *testing.T) { + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 0; i < 10; i++ { + s.Update(int64(i)) + } + if size := s.Count(); size != 10 { + t.Errorf("s.Count(): 10 != %v\n", size) + } + if size := s.Size(); size != 10 { + t.Errorf("s.Size(): 10 != %v\n", size) + } + if l := len(s.Values()); l != 10 { + t.Errorf("len(s.Values()): 10 != %v\n", l) + } + for _, v := range s.Values() { + if v > 10 || v < 0 { + t.Errorf("out of range [0, 10): %v\n", v) + } + } +} + +func TestExpDecaySample100(t *testing.T) { + rand.Seed(1) + s := NewExpDecaySample(1000, 0.01) + for i := 0; i < 100; i++ { + s.Update(int64(i)) + } + if size := s.Count(); size != 100 { + t.Errorf("s.Count(): 100 != %v\n", size) + } + if size := s.Size(); size != 100 { + t.Errorf("s.Size(): 100 != %v\n", size) + } + if l := len(s.Values()); l != 100 { + t.Errorf("len(s.Values()): 100 != %v\n", l) + } + for _, v := range s.Values() { + if v > 100 || v < 0 { + t.Errorf("out of range [0, 100): %v\n", v) + } + } +} + +func TestExpDecaySample1000(t *testing.T) { + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 0; i < 1000; i++ { + s.Update(int64(i)) + } + if size := s.Count(); size != 1000 { + t.Errorf("s.Count(): 1000 != %v\n", size) + } + if size := s.Size(); size != 100 { + t.Errorf("s.Size(): 100 != %v\n", size) + } + if l := len(s.Values()); l != 100 { + t.Errorf("len(s.Values()): 100 != %v\n", l) + } + for _, v := range s.Values() { + if v > 1000 || v < 0 { + t.Errorf("out of range [0, 1000): %v\n", v) + } + } +} + +// This test makes sure that the sample's priority is not amplified by using +// nanosecond duration since start rather than second duration since start. +// The priority becomes +Inf quickly after starting if this is done, +// effectively freezing the set of samples until a rescale step happens. +func TestExpDecaySampleNanosecondRegression(t *testing.T) { + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 0; i < 100; i++ { + s.Update(10) + } + time.Sleep(1 * time.Millisecond) + for i := 0; i < 100; i++ { + s.Update(20) + } + v := s.Values() + avg := float64(0) + for i := 0; i < len(v); i++ { + avg += float64(v[i]) + } + avg /= float64(len(v)) + if avg > 16 || avg < 14 { + t.Errorf("out of range [14, 16]: %v\n", avg) + } +} + +func TestExpDecaySampleRescale(t *testing.T) { + s := NewExpDecaySample(2, 0.001).(*ExpDecaySample) + s.update(time.Now(), 1) + s.update(time.Now().Add(time.Hour+time.Microsecond), 1) + for _, v := range s.values.Values() { + if v.k == 0.0 { + t.Fatal("v.k == 0.0") + } + } +} + +func TestExpDecaySampleSnapshot(t *testing.T) { + now := time.Now() + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 1; i <= 10000; i++ { + s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) + } + snapshot := s.Snapshot() + s.Update(1) + testExpDecaySampleStatistics(t, snapshot) +} + +func TestExpDecaySampleStatistics(t *testing.T) { + now := time.Now() + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 1; i <= 10000; i++ { + s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) + } + testExpDecaySampleStatistics(t, s) +} + +func TestUniformSample(t *testing.T) { + rand.Seed(1) + s := NewUniformSample(100) + for i := 0; i < 1000; i++ { + s.Update(int64(i)) + } + if size := s.Count(); size != 1000 { + t.Errorf("s.Count(): 1000 != %v\n", size) + } + if size := s.Size(); size != 100 { + t.Errorf("s.Size(): 100 != %v\n", size) + } + if l := len(s.Values()); l != 100 { + t.Errorf("len(s.Values()): 100 != %v\n", l) + } + for _, v := range s.Values() { + if v > 1000 || v < 0 { + t.Errorf("out of range [0, 100): %v\n", v) + } + } +} + +func TestUniformSampleIncludesTail(t *testing.T) { + rand.Seed(1) + s := NewUniformSample(100) + max := 100 + for i := 0; i < max; i++ { + s.Update(int64(i)) + } + v := s.Values() + sum := 0 + exp := (max - 1) * max / 2 + for i := 0; i < len(v); i++ { + sum += int(v[i]) + } + if exp != sum { + t.Errorf("sum: %v != %v\n", exp, sum) + } +} + +func TestUniformSampleSnapshot(t *testing.T) { + s := NewUniformSample(100) + for i := 1; i <= 10000; i++ { + s.Update(int64(i)) + } + snapshot := s.Snapshot() + s.Update(1) + testUniformSampleStatistics(t, snapshot) +} + +func TestUniformSampleStatistics(t *testing.T) { + rand.Seed(1) + s := NewUniformSample(100) + for i := 1; i <= 10000; i++ { + s.Update(int64(i)) + } + testUniformSampleStatistics(t, s) +} + +func benchmarkSample(b *testing.B, s Sample) { + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + pauseTotalNs := memStats.PauseTotalNs + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Update(1) + } + b.StopTimer() + runtime.GC() + runtime.ReadMemStats(&memStats) + b.Logf("GC cost: %d ns/op", int(memStats.PauseTotalNs-pauseTotalNs)/b.N) +} + +func testExpDecaySampleStatistics(t *testing.T, s Sample) { + if count := s.Count(); count != 10000 { + t.Errorf("s.Count(): 10000 != %v\n", count) + } + if min := s.Min(); min != 107 { + t.Errorf("s.Min(): 107 != %v\n", min) + } + if max := s.Max(); max != 10000 { + t.Errorf("s.Max(): 10000 != %v\n", max) + } + if mean := s.Mean(); mean != 4965.98 { + t.Errorf("s.Mean(): 4965.98 != %v\n", mean) + } + if stdDev := s.StdDev(); stdDev != 2959.825156930727 { + t.Errorf("s.StdDev(): 2959.825156930727 != %v\n", stdDev) + } + ps := s.Percentiles([]float64{0.5, 0.75, 0.99}) + if ps[0] != 4615 { + t.Errorf("median: 4615 != %v\n", ps[0]) + } + if ps[1] != 7672 { + t.Errorf("75th percentile: 7672 != %v\n", ps[1]) + } + if ps[2] != 9998.99 { + t.Errorf("99th percentile: 9998.99 != %v\n", ps[2]) + } +} + +func testUniformSampleStatistics(t *testing.T, s Sample) { + if count := s.Count(); count != 10000 { + t.Errorf("s.Count(): 10000 != %v\n", count) + } + if min := s.Min(); min != 37 { + t.Errorf("s.Min(): 37 != %v\n", min) + } + if max := s.Max(); max != 9989 { + t.Errorf("s.Max(): 9989 != %v\n", max) + } + if mean := s.Mean(); mean != 4748.14 { + t.Errorf("s.Mean(): 4748.14 != %v\n", mean) + } + if stdDev := s.StdDev(); stdDev != 2826.684117548333 { + t.Errorf("s.StdDev(): 2826.684117548333 != %v\n", stdDev) + } + ps := s.Percentiles([]float64{0.5, 0.75, 0.99}) + if ps[0] != 4599 { + t.Errorf("median: 4599 != %v\n", ps[0]) + } + if ps[1] != 7380.5 { + t.Errorf("75th percentile: 7380.5 != %v\n", ps[1]) + } + if math.Abs(9986.429999999998-ps[2]) > epsilonPercentile { + t.Errorf("99th percentile: 9986.429999999998 != %v\n", ps[2]) + } +} + +// TestUniformSampleConcurrentUpdateCount would expose data race problems with +// concurrent Update and Count calls on Sample when test is called with -race +// argument +func TestUniformSampleConcurrentUpdateCount(t *testing.T) { + if testing.Short() { + t.Skip("skipping in short mode") + } + s := NewUniformSample(100) + for i := 0; i < 100; i++ { + s.Update(int64(i)) + } + quit := make(chan struct{}) + go func() { + t := time.NewTicker(10 * time.Millisecond) + defer t.Stop() + for { + select { + case <-t.C: + s.Update(rand.Int63()) + case <-quit: + t.Stop() + return + } + } + }() + for i := 0; i < 1000; i++ { + s.Count() + time.Sleep(5 * time.Millisecond) + } + quit <- struct{}{} +} diff --git a/metrics/syslog.go b/metrics/syslog.go new file mode 100644 index 0000000000..551a2bd0f0 --- /dev/null +++ b/metrics/syslog.go @@ -0,0 +1,79 @@ +//go:build !windows +// +build !windows + +package metrics + +import ( + "fmt" + "log/syslog" + "time" +) + +// Output each metric in the given registry to syslog periodically using +// the given syslogger. +func Syslog(r Registry, d time.Duration, w *syslog.Writer) { + for range time.Tick(d) { + r.Each(func(name string, i interface{}) { + switch metric := i.(type) { + case Counter: + w.Info(fmt.Sprintf("counter %s: count: %d", name, metric.Count())) + case Gauge: + w.Info(fmt.Sprintf("gauge %s: value: %d", name, metric.Value())) + case GaugeFloat64: + w.Info(fmt.Sprintf("gauge %s: value: %f", name, metric.Value())) + case Healthcheck: + metric.Check() + w.Info(fmt.Sprintf("healthcheck %s: error: %v", name, metric.Error())) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + w.Info(fmt.Sprintf( + "histogram %s: count: %d min: %d max: %d mean: %.2f stddev: %.2f median: %.2f 75%%: %.2f 95%%: %.2f 99%%: %.2f 99.9%%: %.2f", + name, + h.Count(), + h.Min(), + h.Max(), + h.Mean(), + h.StdDev(), + ps[0], + ps[1], + ps[2], + ps[3], + ps[4], + )) + case Meter: + m := metric.Snapshot() + w.Info(fmt.Sprintf( + "meter %s: count: %d 1-min: %.2f 5-min: %.2f 15-min: %.2f mean: %.2f", + name, + m.Count(), + m.Rate1(), + m.Rate5(), + m.Rate15(), + m.RateMean(), + )) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + w.Info(fmt.Sprintf( + "timer %s: count: %d min: %d max: %d mean: %.2f stddev: %.2f median: %.2f 75%%: %.2f 95%%: %.2f 99%%: %.2f 99.9%%: %.2f 1-min: %.2f 5-min: %.2f 15-min: %.2f mean-rate: %.2f", + name, + t.Count(), + t.Min(), + t.Max(), + t.Mean(), + t.StdDev(), + ps[0], + ps[1], + ps[2], + ps[3], + ps[4], + t.Rate1(), + t.Rate5(), + t.Rate15(), + t.RateMean(), + )) + } + }) + } +} diff --git a/metrics/timer.go b/metrics/timer.go new file mode 100644 index 0000000000..a63c9dfb6c --- /dev/null +++ b/metrics/timer.go @@ -0,0 +1,326 @@ +package metrics + +import ( + "sync" + "time" +) + +// Timers capture the duration and rate of events. +type Timer interface { + Count() int64 + Max() int64 + Mean() float64 + Min() int64 + Percentile(float64) float64 + Percentiles([]float64) []float64 + Rate1() float64 + Rate5() float64 + Rate15() float64 + RateMean() float64 + Snapshot() Timer + StdDev() float64 + Stop() + Sum() int64 + Time(func()) + Update(time.Duration) + UpdateSince(time.Time) + Variance() float64 +} + +// GetOrRegisterTimer returns an existing Timer or constructs and registers a +// new StandardTimer. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func GetOrRegisterTimer(name string, r Registry) Timer { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewTimer).(Timer) +} + +// NewCustomTimer constructs a new StandardTimer from a Histogram and a Meter. +// Be sure to call Stop() once the timer is of no use to allow for garbage collection. +func NewCustomTimer(h Histogram, m Meter) Timer { + if !Enabled { + return NilTimer{} + } + return &StandardTimer{ + histogram: h, + meter: m, + } +} + +// NewRegisteredTimer constructs and registers a new StandardTimer. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func NewRegisteredTimer(name string, r Registry) Timer { + c := NewTimer() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewTimer constructs a new StandardTimer using an exponentially-decaying +// sample with the same reservoir size and alpha as UNIX load averages. +// Be sure to call Stop() once the timer is of no use to allow for garbage collection. +func NewTimer() Timer { + if !Enabled { + return NilTimer{} + } + return &StandardTimer{ + histogram: NewHistogram(NewExpDecaySample(1028, 0.015)), + meter: NewMeter(), + } +} + +// NilTimer is a no-op Timer. +type NilTimer struct{} + +// Count is a no-op. +func (NilTimer) Count() int64 { return 0 } + +// Max is a no-op. +func (NilTimer) Max() int64 { return 0 } + +// Mean is a no-op. +func (NilTimer) Mean() float64 { return 0.0 } + +// Min is a no-op. +func (NilTimer) Min() int64 { return 0 } + +// Percentile is a no-op. +func (NilTimer) Percentile(p float64) float64 { return 0.0 } + +// Percentiles is a no-op. +func (NilTimer) Percentiles(ps []float64) []float64 { + return make([]float64, len(ps)) +} + +// Rate1 is a no-op. +func (NilTimer) Rate1() float64 { return 0.0 } + +// Rate5 is a no-op. +func (NilTimer) Rate5() float64 { return 0.0 } + +// Rate15 is a no-op. +func (NilTimer) Rate15() float64 { return 0.0 } + +// RateMean is a no-op. +func (NilTimer) RateMean() float64 { return 0.0 } + +// Snapshot is a no-op. +func (NilTimer) Snapshot() Timer { return NilTimer{} } + +// StdDev is a no-op. +func (NilTimer) StdDev() float64 { return 0.0 } + +// Stop is a no-op. +func (NilTimer) Stop() {} + +// Sum is a no-op. +func (NilTimer) Sum() int64 { return 0 } + +// Time is a no-op. +func (NilTimer) Time(func()) {} + +// Update is a no-op. +func (NilTimer) Update(time.Duration) {} + +// UpdateSince is a no-op. +func (NilTimer) UpdateSince(time.Time) {} + +// Variance is a no-op. +func (NilTimer) Variance() float64 { return 0.0 } + +// StandardTimer is the standard implementation of a Timer and uses a Histogram +// and Meter. +type StandardTimer struct { + histogram Histogram + meter Meter + mutex sync.Mutex +} + +// Count returns the number of events recorded. +func (t *StandardTimer) Count() int64 { + return t.histogram.Count() +} + +// Max returns the maximum value in the sample. +func (t *StandardTimer) Max() int64 { + return t.histogram.Max() +} + +// Mean returns the mean of the values in the sample. +func (t *StandardTimer) Mean() float64 { + return t.histogram.Mean() +} + +// Min returns the minimum value in the sample. +func (t *StandardTimer) Min() int64 { + return t.histogram.Min() +} + +// Percentile returns an arbitrary percentile of the values in the sample. +func (t *StandardTimer) Percentile(p float64) float64 { + return t.histogram.Percentile(p) +} + +// Percentiles returns a slice of arbitrary percentiles of the values in the +// sample. +func (t *StandardTimer) Percentiles(ps []float64) []float64 { + return t.histogram.Percentiles(ps) +} + +// Rate1 returns the one-minute moving average rate of events per second. +func (t *StandardTimer) Rate1() float64 { + return t.meter.Rate1() +} + +// Rate5 returns the five-minute moving average rate of events per second. +func (t *StandardTimer) Rate5() float64 { + return t.meter.Rate5() +} + +// Rate15 returns the fifteen-minute moving average rate of events per second. +func (t *StandardTimer) Rate15() float64 { + return t.meter.Rate15() +} + +// RateMean returns the meter's mean rate of events per second. +func (t *StandardTimer) RateMean() float64 { + return t.meter.RateMean() +} + +// Snapshot returns a read-only copy of the timer. +func (t *StandardTimer) Snapshot() Timer { + t.mutex.Lock() + defer t.mutex.Unlock() + return &TimerSnapshot{ + histogram: t.histogram.Snapshot().(*HistogramSnapshot), + meter: t.meter.Snapshot().(*MeterSnapshot), + } +} + +// StdDev returns the standard deviation of the values in the sample. +func (t *StandardTimer) StdDev() float64 { + return t.histogram.StdDev() +} + +// Stop stops the meter. +func (t *StandardTimer) Stop() { + t.meter.Stop() +} + +// Sum returns the sum in the sample. +func (t *StandardTimer) Sum() int64 { + return t.histogram.Sum() +} + +// Record the duration of the execution of the given function. +func (t *StandardTimer) Time(f func()) { + ts := time.Now() + f() + t.Update(time.Since(ts)) +} + +// Record the duration of an event. +func (t *StandardTimer) Update(d time.Duration) { + t.mutex.Lock() + defer t.mutex.Unlock() + t.histogram.Update(int64(d)) + t.meter.Mark(1) +} + +// Record the duration of an event that started at a time and ends now. +func (t *StandardTimer) UpdateSince(ts time.Time) { + t.mutex.Lock() + defer t.mutex.Unlock() + t.histogram.Update(int64(time.Since(ts))) + t.meter.Mark(1) +} + +// Variance returns the variance of the values in the sample. +func (t *StandardTimer) Variance() float64 { + return t.histogram.Variance() +} + +// TimerSnapshot is a read-only copy of another Timer. +type TimerSnapshot struct { + histogram *HistogramSnapshot + meter *MeterSnapshot +} + +// Count returns the number of events recorded at the time the snapshot was +// taken. +func (t *TimerSnapshot) Count() int64 { return t.histogram.Count() } + +// Max returns the maximum value at the time the snapshot was taken. +func (t *TimerSnapshot) Max() int64 { return t.histogram.Max() } + +// Mean returns the mean value at the time the snapshot was taken. +func (t *TimerSnapshot) Mean() float64 { return t.histogram.Mean() } + +// Min returns the minimum value at the time the snapshot was taken. +func (t *TimerSnapshot) Min() int64 { return t.histogram.Min() } + +// Percentile returns an arbitrary percentile of sampled values at the time the +// snapshot was taken. +func (t *TimerSnapshot) Percentile(p float64) float64 { + return t.histogram.Percentile(p) +} + +// Percentiles returns a slice of arbitrary percentiles of sampled values at +// the time the snapshot was taken. +func (t *TimerSnapshot) Percentiles(ps []float64) []float64 { + return t.histogram.Percentiles(ps) +} + +// Rate1 returns the one-minute moving average rate of events per second at the +// time the snapshot was taken. +func (t *TimerSnapshot) Rate1() float64 { return t.meter.Rate1() } + +// Rate5 returns the five-minute moving average rate of events per second at +// the time the snapshot was taken. +func (t *TimerSnapshot) Rate5() float64 { return t.meter.Rate5() } + +// Rate15 returns the fifteen-minute moving average rate of events per second +// at the time the snapshot was taken. +func (t *TimerSnapshot) Rate15() float64 { return t.meter.Rate15() } + +// RateMean returns the meter's mean rate of events per second at the time the +// snapshot was taken. +func (t *TimerSnapshot) RateMean() float64 { return t.meter.RateMean() } + +// Snapshot returns the snapshot. +func (t *TimerSnapshot) Snapshot() Timer { return t } + +// StdDev returns the standard deviation of the values at the time the snapshot +// was taken. +func (t *TimerSnapshot) StdDev() float64 { return t.histogram.StdDev() } + +// Stop is a no-op. +func (t *TimerSnapshot) Stop() {} + +// Sum returns the sum at the time the snapshot was taken. +func (t *TimerSnapshot) Sum() int64 { return t.histogram.Sum() } + +// Time panics. +func (*TimerSnapshot) Time(func()) { + panic("Time called on a TimerSnapshot") +} + +// Update panics. +func (*TimerSnapshot) Update(time.Duration) { + panic("Update called on a TimerSnapshot") +} + +// UpdateSince panics. +func (*TimerSnapshot) UpdateSince(time.Time) { + panic("UpdateSince called on a TimerSnapshot") +} + +// Variance returns the variance of the values at the time the snapshot was +// taken. +func (t *TimerSnapshot) Variance() float64 { return t.histogram.Variance() } diff --git a/metrics/timer_test.go b/metrics/timer_test.go new file mode 100644 index 0000000000..903e8e8d49 --- /dev/null +++ b/metrics/timer_test.go @@ -0,0 +1,114 @@ +package metrics + +import ( + "fmt" + "math" + "testing" + "time" +) + +func BenchmarkTimer(b *testing.B) { + tm := NewTimer() + b.ResetTimer() + for i := 0; i < b.N; i++ { + tm.Update(1) + } +} + +func TestGetOrRegisterTimer(t *testing.T) { + r := NewRegistry() + NewRegisteredTimer("foo", r).Update(47) + if tm := GetOrRegisterTimer("foo", r); tm.Count() != 1 { + t.Fatal(tm) + } +} + +func TestTimerExtremes(t *testing.T) { + tm := NewTimer() + tm.Update(math.MaxInt64) + tm.Update(0) + if stdDev := tm.StdDev(); stdDev != 4.611686018427388e+18 { + t.Errorf("tm.StdDev(): 4.611686018427388e+18 != %v\n", stdDev) + } +} + +func TestTimerStop(t *testing.T) { + l := len(arbiter.meters) + tm := NewTimer() + if l+1 != len(arbiter.meters) { + t.Errorf("arbiter.meters: %d != %d\n", l+1, len(arbiter.meters)) + } + tm.Stop() + if l != len(arbiter.meters) { + t.Errorf("arbiter.meters: %d != %d\n", l, len(arbiter.meters)) + } +} + +func TestTimerFunc(t *testing.T) { + var ( + tm = NewTimer() + testStart = time.Now() + actualTime time.Duration + ) + tm.Time(func() { + time.Sleep(50 * time.Millisecond) + actualTime = time.Since(testStart) + }) + var ( + drift = time.Millisecond * 2 + measured = time.Duration(tm.Max()) + ceil = actualTime + drift + floor = actualTime - drift + ) + if measured > ceil || measured < floor { + t.Errorf("tm.Max(): %v > %v || %v > %v\n", measured, ceil, measured, floor) + } +} + +func TestTimerZero(t *testing.T) { + tm := NewTimer() + if count := tm.Count(); count != 0 { + t.Errorf("tm.Count(): 0 != %v\n", count) + } + if min := tm.Min(); min != 0 { + t.Errorf("tm.Min(): 0 != %v\n", min) + } + if max := tm.Max(); max != 0 { + t.Errorf("tm.Max(): 0 != %v\n", max) + } + if mean := tm.Mean(); mean != 0.0 { + t.Errorf("tm.Mean(): 0.0 != %v\n", mean) + } + if stdDev := tm.StdDev(); stdDev != 0.0 { + t.Errorf("tm.StdDev(): 0.0 != %v\n", stdDev) + } + ps := tm.Percentiles([]float64{0.5, 0.75, 0.99}) + if ps[0] != 0.0 { + t.Errorf("median: 0.0 != %v\n", ps[0]) + } + if ps[1] != 0.0 { + t.Errorf("75th percentile: 0.0 != %v\n", ps[1]) + } + if ps[2] != 0.0 { + t.Errorf("99th percentile: 0.0 != %v\n", ps[2]) + } + if rate1 := tm.Rate1(); rate1 != 0.0 { + t.Errorf("tm.Rate1(): 0.0 != %v\n", rate1) + } + if rate5 := tm.Rate5(); rate5 != 0.0 { + t.Errorf("tm.Rate5(): 0.0 != %v\n", rate5) + } + if rate15 := tm.Rate15(); rate15 != 0.0 { + t.Errorf("tm.Rate15(): 0.0 != %v\n", rate15) + } + if rateMean := tm.RateMean(); rateMean != 0.0 { + t.Errorf("tm.RateMean(): 0.0 != %v\n", rateMean) + } +} + +func ExampleGetOrRegisterTimer() { + m := "account.create.latency" + t := GetOrRegisterTimer(m, nil) + t.Update(47) + fmt.Println(t.Max()) // Output: 47 +} diff --git a/metrics/validate.sh b/metrics/validate.sh new file mode 100755 index 0000000000..c4ae91e642 --- /dev/null +++ b/metrics/validate.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e + +# check there are no formatting issues +GOFMT_LINES=`gofmt -l . | wc -l | xargs` +test $GOFMT_LINES -eq 0 || echo "gofmt needs to be run, ${GOFMT_LINES} files have issues" + +# run the tests for the root package +go test -race . diff --git a/metrics/writer.go b/metrics/writer.go new file mode 100644 index 0000000000..88521a80d9 --- /dev/null +++ b/metrics/writer.go @@ -0,0 +1,100 @@ +package metrics + +import ( + "fmt" + "io" + "sort" + "time" +) + +// Write sorts writes each metric in the given registry periodically to the +// given io.Writer. +func Write(r Registry, d time.Duration, w io.Writer) { + for range time.Tick(d) { + WriteOnce(r, w) + } +} + +// WriteOnce sorts and writes metrics in the given registry to the given +// io.Writer. +func WriteOnce(r Registry, w io.Writer) { + var namedMetrics namedMetricSlice + r.Each(func(name string, i interface{}) { + namedMetrics = append(namedMetrics, namedMetric{name, i}) + }) + + sort.Sort(namedMetrics) + for _, namedMetric := range namedMetrics { + switch metric := namedMetric.m.(type) { + case Counter: + fmt.Fprintf(w, "counter %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %9d\n", metric.Count()) + case Gauge: + fmt.Fprintf(w, "gauge %s\n", namedMetric.name) + fmt.Fprintf(w, " value: %9d\n", metric.Value()) + case GaugeFloat64: + fmt.Fprintf(w, "gauge %s\n", namedMetric.name) + fmt.Fprintf(w, " value: %f\n", metric.Value()) + case Healthcheck: + metric.Check() + fmt.Fprintf(w, "healthcheck %s\n", namedMetric.name) + fmt.Fprintf(w, " error: %v\n", metric.Error()) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + fmt.Fprintf(w, "histogram %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %9d\n", h.Count()) + fmt.Fprintf(w, " min: %9d\n", h.Min()) + fmt.Fprintf(w, " max: %9d\n", h.Max()) + fmt.Fprintf(w, " mean: %12.2f\n", h.Mean()) + fmt.Fprintf(w, " stddev: %12.2f\n", h.StdDev()) + fmt.Fprintf(w, " median: %12.2f\n", ps[0]) + fmt.Fprintf(w, " 75%%: %12.2f\n", ps[1]) + fmt.Fprintf(w, " 95%%: %12.2f\n", ps[2]) + fmt.Fprintf(w, " 99%%: %12.2f\n", ps[3]) + fmt.Fprintf(w, " 99.9%%: %12.2f\n", ps[4]) + case Meter: + m := metric.Snapshot() + fmt.Fprintf(w, "meter %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %9d\n", m.Count()) + fmt.Fprintf(w, " 1-min rate: %12.2f\n", m.Rate1()) + fmt.Fprintf(w, " 5-min rate: %12.2f\n", m.Rate5()) + fmt.Fprintf(w, " 15-min rate: %12.2f\n", m.Rate15()) + fmt.Fprintf(w, " mean rate: %12.2f\n", m.RateMean()) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + fmt.Fprintf(w, "timer %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %9d\n", t.Count()) + fmt.Fprintf(w, " min: %9d\n", t.Min()) + fmt.Fprintf(w, " max: %9d\n", t.Max()) + fmt.Fprintf(w, " mean: %12.2f\n", t.Mean()) + fmt.Fprintf(w, " stddev: %12.2f\n", t.StdDev()) + fmt.Fprintf(w, " median: %12.2f\n", ps[0]) + fmt.Fprintf(w, " 75%%: %12.2f\n", ps[1]) + fmt.Fprintf(w, " 95%%: %12.2f\n", ps[2]) + fmt.Fprintf(w, " 99%%: %12.2f\n", ps[3]) + fmt.Fprintf(w, " 99.9%%: %12.2f\n", ps[4]) + fmt.Fprintf(w, " 1-min rate: %12.2f\n", t.Rate1()) + fmt.Fprintf(w, " 5-min rate: %12.2f\n", t.Rate5()) + fmt.Fprintf(w, " 15-min rate: %12.2f\n", t.Rate15()) + fmt.Fprintf(w, " mean rate: %12.2f\n", t.RateMean()) + } + } +} + +type namedMetric struct { + name string + m interface{} +} + +// namedMetricSlice is a slice of namedMetrics that implements sort.Interface. +type namedMetricSlice []namedMetric + +func (nms namedMetricSlice) Len() int { return len(nms) } + +func (nms namedMetricSlice) Swap(i, j int) { nms[i], nms[j] = nms[j], nms[i] } + +func (nms namedMetricSlice) Less(i, j int) bool { + return nms[i].name < nms[j].name +} diff --git a/metrics/writer_test.go b/metrics/writer_test.go new file mode 100644 index 0000000000..1aacc28712 --- /dev/null +++ b/metrics/writer_test.go @@ -0,0 +1,22 @@ +package metrics + +import ( + "sort" + "testing" +) + +func TestMetricsSorting(t *testing.T) { + var namedMetrics = namedMetricSlice{ + {name: "zzz"}, + {name: "bbb"}, + {name: "fff"}, + {name: "ggg"}, + } + + sort.Sort(namedMetrics) + for i, name := range []string{"bbb", "fff", "ggg", "zzz"} { + if namedMetrics[i].name != name { + t.Fail() + } + } +} diff --git a/peer/network.go b/peer/network.go index 731219b4e7..26dc332b45 100644 --- a/peer/network.go +++ b/peer/network.go @@ -86,6 +86,7 @@ func NewNetwork(appSender common.AppSender, codec codec.Manager, self ids.NodeID outstandingResponseHandlerMap: make(map[uint32]message.ResponseHandler), peers: make(map[ids.NodeID]version.Application), activeRequests: semaphore.NewWeighted(maxActiveRequests), + gossipHandler: message.NoopMempoolGossipHandler{}, } } diff --git a/plugin/evm/config.go b/plugin/evm/config.go index 9fb10f2bbd..c7fc731ea6 100644 --- a/plugin/evm/config.go +++ b/plugin/evm/config.go @@ -83,7 +83,7 @@ type Config struct { PopulateMissingTriesParallelism int `json:"populate-missing-tries-parallelism"` // Number of concurrent readers to use when re-populating missing tries on startup. // Metric Settings - MetricsExpensiveEnabled bool `json:"metrics-expensive-enabled"` + MetricsExpensiveEnabled bool `json:"metrics-expensive-enabled"` // Debug-level metrics that might impact runtime performance // API Settings LocalTxsEnabled bool `json:"local-txs-enabled"` diff --git a/plugin/evm/gossiper.go b/plugin/evm/gossiper.go index c983d22dcd..9d96a82398 100644 --- a/plugin/evm/gossiper.go +++ b/plugin/evm/gossiper.go @@ -68,9 +68,12 @@ type pushGossiper struct { signer types.Signer } -// newPushGossiper constructs and returns a pushGossiper -// assumes vm.chainConfig.SubnetEVMTimestamp is set -func (vm *VM) newPushGossiper() Gossiper { +// createGossipper constructs and returns a pushGossiper or noopGossiper +// based on whether vm.chainConfig.SubnetEVMTimestamp is set +func (vm *VM) createGossipper() Gossiper { + if vm.chainConfig.SubnetEVMTimestamp == nil { + return &noopGossiper{} + } net := &pushGossiper{ ctx: vm.ctx, gossipActivationTime: time.Unix(vm.chainConfig.SubnetEVMTimestamp.Int64(), 0), diff --git a/plugin/evm/vm.go b/plugin/evm/vm.go index b29f13f444..b60a2b01aa 100644 --- a/plugin/evm/vm.go +++ b/plugin/evm/vm.go @@ -14,17 +14,22 @@ import ( "sync" "time" + avalanchegoMetrics "github.com/ava-labs/avalanchego/api/metrics" + subnetEVM "github.com/ava-labs/subnet-evm/chain" "github.com/ava-labs/subnet-evm/constants" "github.com/ava-labs/subnet-evm/core" "github.com/ava-labs/subnet-evm/core/types" "github.com/ava-labs/subnet-evm/eth/ethconfig" - "github.com/ava-labs/subnet-evm/metrics/prometheus" + "github.com/ava-labs/subnet-evm/metrics" + subnetEVMPrometheus "github.com/ava-labs/subnet-evm/metrics/prometheus" "github.com/ava-labs/subnet-evm/node" "github.com/ava-labs/subnet-evm/params" "github.com/ava-labs/subnet-evm/peer" "github.com/ava-labs/subnet-evm/plugin/evm/message" + "github.com/prometheus/client_golang/prometheus" + // Force-load tracer engine to trigger registration // // We must import this package (not referenced elsewhere) so that the native "callTracer" @@ -34,7 +39,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" avalancheRPC "github.com/gorilla/rpc/v2" @@ -73,6 +77,10 @@ const ( decidedCacheSize = 100 missingCacheSize = 50 unverifiedCacheSize = 50 + + // Prefixes for metrics gatherers + ethMetricsPrefix = "eth" + chainStateMetricsPrefix = "chain_state" ) // Define the API endpoints for the VM @@ -127,6 +135,7 @@ type VM struct { genesisHash common.Hash chain *subnetEVM.ETHChain chainConfig *params.ChainConfig + // [db] is the VM's current database managed by ChainState db *versiondb.Database // [chaindb] is the database supplied to the Ethereum backend @@ -135,6 +144,8 @@ type VM struct { // block. acceptedBlockDB database.Database + toEngine chan<- commonEng.Message + builder *blockBuilder gossiper Gossiper @@ -151,6 +162,9 @@ type VM struct { client peer.Client networkCodec codec.Manager + // Metrics + multiGatherer avalanchegoMetrics.MultiGatherer + bootstrapped bool } @@ -220,12 +234,13 @@ func (vm *VM) Initialize( return err } - vm.ctx = ctx // Set log level logLevel, err := log.LvlFromString(vm.config.LogLevel) if err != nil { return fmt.Errorf("failed to initialize logger due to: %w ", err) } + + vm.ctx = ctx vm.setLogLevel(logLevel) if b, err := json.Marshal(vm.config); err == nil { log.Info("Initializing Subnet EVM VM", "Version", Version, "Config", string(b)) @@ -238,8 +253,10 @@ func (vm *VM) Initialize( return errUnsupportedFXs } + // Enable debug-level metrics that might impact runtime performance metrics.EnabledExpensive = vm.config.MetricsExpensiveEnabled + vm.toEngine = toEngine vm.shutdownChan = make(chan struct{}, 1) baseDB := dbManager.Current().Database // Use NewNested rather than New so that the structure of the database @@ -247,7 +264,6 @@ func (vm *VM) Initialize( vm.chaindb = Database{prefixdb.NewNested(ethDBPrefix, baseDB)} vm.db = versiondb.New(baseDB) vm.acceptedBlockDB = prefixdb.New(acceptedPrefix, vm.db) - g := new(core.Genesis) if err := json.Unmarshal(genesisBytes, g); err != nil { return err @@ -262,9 +278,9 @@ func (vm *VM) Initialize( } ethConfig := ethconfig.NewDefaultConfig() - // change network ID - ethConfig.NetworkId = g.Config.ChainID.Uint64() ethConfig.Genesis = g + ethConfig.NetworkId = g.Config.ChainID.Uint64() + // Set minimum price for mining and default gas price oracle value to the min // gas price to prevent so transactions and blocks all use the correct fees ethConfig.RPCGasCap = vm.config.RPCGasCap @@ -308,20 +324,56 @@ func (vm *VM) Initialize( log.Warn("Chain enabled `AllowFeeRecipients`, but chain config has not specified any coinbase address. Defaulting to the blackhole address.") } - // Handle offline pruning - ethConfig.OfflinePruning = vm.config.OfflinePruning - ethConfig.OfflinePruningBloomFilterSize = vm.config.OfflinePruningBloomFilterSize - ethConfig.OfflinePruningDataDirectory = vm.config.OfflinePruningDataDirectory - if len(ethConfig.OfflinePruningDataDirectory) != 0 { - if err := os.MkdirAll(ethConfig.OfflinePruningDataDirectory, perms.ReadWriteExecute); err != nil { - log.Error("failed to create offline pruning data directory", "error", err) - return err - } - } + vm.genesisHash = ethConfig.Genesis.ToBlock(nil).Hash() vm.chainConfig = g.Config vm.networkID = ethConfig.NetworkId + lastAcceptedHash, err := vm.readLastAccepted() + if err != nil { + return err + } + log.Info("reading accepted block db", "lastAcceptedHash", lastAcceptedHash) + + if err := vm.initializeMetrics(); err != nil { + return err + } + + vm.networkCodec, err = message.BuildCodec() + if err != nil { + return err + } + + // initialize peer network + vm.Network = peer.NewNetwork(appSender, vm.networkCodec, ctx.NodeID, vm.config.MaxOutboundActiveRequests) + vm.client = peer.NewClient(vm.Network) + + if err := vm.initializeChain(lastAcceptedHash, ethConfig); err != nil { + return err + } + + go vm.ctx.Log.RecoverAndPanic(vm.startContinuousProfiler) + + return nil +} + +func (vm *VM) initializeMetrics() error { + vm.multiGatherer = avalanchegoMetrics.NewMultiGatherer() + // If metrics are enabled, register the default metrics regitry + if metrics.Enabled { + gatherer := subnetEVMPrometheus.Gatherer(metrics.DefaultRegistry) + if err := vm.multiGatherer.Register(ethMetricsPrefix, gatherer); err != nil { + return err + } + // Register [multiGatherer] after registerers have been registered to it + if err := vm.ctx.Metrics.Register(vm.multiGatherer); err != nil { + return err + } + } + return nil +} + +func (vm *VM) initializeChain(lastAcceptedHash common.Hash, ethConfig ethconfig.Config) error { nodecfg := node.Config{ SubnetEVMVersion: Version, KeyStoreDir: vm.config.KeystoreDirectory, @@ -329,89 +381,58 @@ func (vm *VM) Initialize( InsecureUnlockAllowed: vm.config.KeystoreInsecureUnlockAllowed, } - // Attempt to load last accepted block to determine if it is necessary to - // initialize state with the genesis block. - lastAcceptedBytes, lastAcceptedErr := vm.acceptedBlockDB.Get(lastAcceptedKey) - var lastAcceptedHash common.Hash - switch { - case lastAcceptedErr == database.ErrNotFound: - // Set [lastAcceptedHash] to the genesis block hash. - lastAcceptedHash = ethConfig.Genesis.ToBlock(nil).Hash() - case lastAcceptedErr != nil: - return fmt.Errorf("failed to get last accepted block ID due to: %w", lastAcceptedErr) - case len(lastAcceptedBytes) != common.HashLength: - return fmt.Errorf("last accepted bytes should have been length %d, but found %d", common.HashLength, len(lastAcceptedBytes)) - default: - lastAcceptedHash = common.BytesToHash(lastAcceptedBytes) - } ethChain, err := subnetEVM.NewETHChain(ðConfig, &nodecfg, vm.chaindb, vm.config.EthBackendSettings(), lastAcceptedHash, &vm.clock) if err != nil { return err } vm.chain = ethChain - lastAccepted := vm.chain.LastAcceptedBlock() // start goroutines to update the tx pool gas minimum gas price when upgrades go into effect vm.handleGasPriceUpdates() - vm.networkCodec, err = message.BuildCodec() - if err != nil { - return err - } - - // initialize peer network - vm.Network = peer.NewNetwork(appSender, vm.networkCodec, ctx.NodeID, vm.config.MaxOutboundActiveRequests) - vm.client = peer.NewClient(vm.Network) - vm.initGossipHandling() - // start goroutines to manage block building // - // NOTE: gossip network must be initialized first otherwie ETH tx gossip will + // NOTE: gossip network must be initialized first otherwise ETH tx gossip will // not work. - vm.builder = vm.NewBlockBuilder(toEngine) + vm.gossiper = vm.createGossipper() + vm.builder = vm.NewBlockBuilder(vm.toEngine) + vm.builder.awaitSubmittedTxs() vm.chain.Start() + return vm.initChainState(vm.chain.LastAcceptedBlock()) +} - vm.genesisHash = vm.chain.GetGenesisBlock().Hash() - log.Info(fmt.Sprintf("lastAccepted = %s", lastAccepted.Hash().Hex())) - - vm.State = chain.NewState(&chain.Config{ +func (vm *VM) initChainState(lastAcceptedBlock *types.Block) error { + config := &chain.Config{ DecidedCacheSize: decidedCacheSize, MissingCacheSize: missingCacheSize, UnverifiedCacheSize: unverifiedCacheSize, + GetBlockIDAtHeight: vm.GetBlockIDAtHeight, + GetBlock: vm.getBlock, + UnmarshalBlock: vm.parseBlock, + BuildBlock: vm.buildBlock, LastAcceptedBlock: &Block{ - id: ids.ID(lastAccepted.Hash()), - ethBlock: lastAccepted, + id: ids.ID(lastAcceptedBlock.Hash()), + ethBlock: lastAcceptedBlock, vm: vm, status: choices.Accepted, }, - GetBlockIDAtHeight: vm.GetBlockIDAtHeight, - GetBlock: vm.getBlock, - UnmarshalBlock: vm.parseBlock, - BuildBlock: vm.buildBlock, - }) - - vm.builder.awaitSubmittedTxs() - go vm.ctx.Log.RecoverAndPanic(vm.startContinuousProfiler) + } - // Only provide metrics if they are being populated. - if metrics.Enabled { - gatherer := prometheus.Gatherer(metrics.DefaultRegistry) - if err := ctx.Metrics.Register(gatherer); err != nil { - return err - } + // Register chain state metrics + chainStateRegisterer := prometheus.NewRegistry() + state, err := chain.NewMeteredState(chainStateRegisterer, config) + if err != nil { + return fmt.Errorf("could not create metered state: %w", err) } + vm.State = state - return nil + return vm.multiGatherer.Register(chainStateMetricsPrefix, chainStateRegisterer) } func (vm *VM) initGossipHandling() { if vm.chainConfig.SubnetEVMTimestamp != nil { - vm.gossiper = vm.newPushGossiper() vm.Network.SetGossipHandler(NewGossipHandler(vm)) - } else { - vm.gossiper = &noopGossiper{} - vm.Network.SetGossipHandler(message.NoopMempoolGossipHandler{}) } } @@ -421,6 +442,7 @@ func (vm *VM) SetState(state snow.State) error { vm.bootstrapped = false return nil case snow.NormalOp: + vm.initGossipHandling() vm.bootstrapped = true return nil default: @@ -693,3 +715,25 @@ func (vm *VM) startContinuousProfiler() { // Wait for shutdownChan to be closed <-vm.shutdownChan } + +// readLastAccepted reads the last accepted hash from [acceptedBlockDB] and returns the +// last accepted block hash and height by reading directly from [vm.chaindb] instead of relying +// on [chain]. +// Note: assumes chaindb, ethConfig, and genesisHash have been initialized. +func (vm *VM) readLastAccepted() (common.Hash, error) { + // Attempt to load last accepted block to determine if it is necessary to + // initialize state with the genesis block. + lastAcceptedBytes, lastAcceptedErr := vm.acceptedBlockDB.Get(lastAcceptedKey) + switch { + case lastAcceptedErr == database.ErrNotFound: + // If there is nothing in the database, return the genesis block hash and height + return vm.genesisHash, nil + case lastAcceptedErr != nil: + return common.Hash{}, fmt.Errorf("failed to get last accepted block ID due to: %w", lastAcceptedErr) + case len(lastAcceptedBytes) != common.HashLength: + return common.Hash{}, fmt.Errorf("last accepted bytes should have been length %d, but found %d", common.HashLength, len(lastAcceptedBytes)) + default: + lastAcceptedHash := common.BytesToHash(lastAcceptedBytes) + return lastAcceptedHash, nil + } +} diff --git a/rpc/handler.go b/rpc/handler.go index 25285a84c0..78f8a43364 100644 --- a/rpc/handler.go +++ b/rpc/handler.go @@ -35,8 +35,8 @@ import ( "sync" "time" + "github.com/ava-labs/subnet-evm/metrics" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" "golang.org/x/time/rate" ) diff --git a/rpc/metrics.go b/rpc/metrics.go index 2e2ad81560..fec45e9dbf 100644 --- a/rpc/metrics.go +++ b/rpc/metrics.go @@ -29,7 +29,7 @@ package rpc import ( "fmt" - "github.com/ethereum/go-ethereum/metrics" + "github.com/ava-labs/subnet-evm/metrics" ) var ( diff --git a/statesync/handlers/stats/stats.go b/statesync/handlers/stats/stats.go index bb94453421..2810634e66 100644 --- a/statesync/handlers/stats/stats.go +++ b/statesync/handlers/stats/stats.go @@ -6,7 +6,7 @@ package stats import ( "time" - "github.com/ethereum/go-ethereum/metrics" + "github.com/ava-labs/subnet-evm/metrics" ) // HandlerStats reports prometheus metrics for the state sync handlers diff --git a/trie/database.go b/trie/database.go index e956efbddb..c3cdd1ea16 100644 --- a/trie/database.go +++ b/trie/database.go @@ -38,9 +38,9 @@ import ( "github.com/VictoriaMetrics/fastcache" "github.com/ava-labs/subnet-evm/core/rawdb" "github.com/ava-labs/subnet-evm/ethdb" + "github.com/ava-labs/subnet-evm/metrics" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" )