From 5e7789fd8909aa826a0d598907741df39a3d846e Mon Sep 17 00:00:00 2001 From: Matt Johnstone Date: Thu, 3 Oct 2024 11:24:23 +0200 Subject: [PATCH 1/2] added configurable address list to track solana_leader_slots_by_epoch for --- cmd/solana_exporter/exporter.go | 52 ++++++++++++++++++---------- cmd/solana_exporter/exporter_test.go | 4 +-- cmd/solana_exporter/slots.go | 19 +++++----- cmd/solana_exporter/slots_test.go | 8 ++--- 4 files changed, 49 insertions(+), 34 deletions(-) diff --git a/cmd/solana_exporter/exporter.go b/cmd/solana_exporter/exporter.go index 7173eb0..1c74b71 100644 --- a/cmd/solana_exporter/exporter.go +++ b/cmd/solana_exporter/exporter.go @@ -14,12 +14,17 @@ import ( ) var ( - httpTimeout = 60 * time.Second - rpcAddr = flag.String("rpcURI", "", "Solana RPC URI (including protocol and path)") - addr = flag.String("addr", ":8080", "Listen address") - votePubkey = flag.String("votepubkey", "", "Validator vote address (will only return results of this address)") - httpTimeoutSecs = flag.Int("http_timeout", 60, "HTTP timeout in seconds") - balanceAddresses = flag.String("balance-addresses", "", "Comma-separated list of addresses to monitor balances") + httpTimeout = 60 * time.Second + rpcAddr = flag.String("rpcURI", "", "Solana RPC URI (including protocol and path)") + addr = flag.String("addr", ":8080", "Listen address") + votePubkey = flag.String("votepubkey", "", "Validator vote address (will only return results of this address)") + httpTimeoutSecs = flag.Int("http_timeout", 60, "HTTP timeout in seconds") + balanceAddresses = flag.String("balance-addresses", "", "Comma-separated list of addresses to monitor balances") + leaderSlotAddresses = flag.String( + "leader-slot-addresses", + "", + "Comma-separated list of addresses to monitor leader slots by epoch for, leave nil to track by epoch for all validators.", + ) ) func init() { @@ -27,9 +32,10 @@ func init() { } type solanaCollector struct { - rpcClient rpc.Provider - slotPace time.Duration - balanceAddresses []string + rpcClient rpc.Provider + slotPace time.Duration + balanceAddresses []string + leaderSlotAddresses []string totalValidatorsDesc *prometheus.Desc validatorActivatedStake *prometheus.Desc @@ -40,11 +46,14 @@ type solanaCollector struct { balances *prometheus.Desc } -func createSolanaCollector(provider rpc.Provider, slotPace time.Duration, balanceAddresses []string) *solanaCollector { +func createSolanaCollector( + provider rpc.Provider, slotPace time.Duration, balanceAddresses []string, leaderSlotAddresses []string, +) *solanaCollector { return &solanaCollector{ - rpcClient: provider, - slotPace: slotPace, - balanceAddresses: balanceAddresses, + rpcClient: provider, + slotPace: slotPace, + balanceAddresses: balanceAddresses, + leaderSlotAddresses: leaderSlotAddresses, totalValidatorsDesc: prometheus.NewDesc( "solana_active_validators", "Total number of active validators by state", @@ -90,8 +99,8 @@ func createSolanaCollector(provider rpc.Provider, slotPace time.Duration, balanc } } -func NewSolanaCollector(rpcAddr string, balanceAddresses []string) *solanaCollector { - return createSolanaCollector(rpc.NewRPCClient(rpcAddr), slotPacerSchedule, balanceAddresses) +func NewSolanaCollector(rpcAddr string, balanceAddresses []string, leaderSlotAddresses []string) *solanaCollector { + return createSolanaCollector(rpc.NewRPCClient(rpcAddr), slotPacerSchedule, balanceAddresses, leaderSlotAddresses) } func (c *solanaCollector) Describe(ch chan<- *prometheus.Desc) { @@ -216,11 +225,18 @@ func main() { httpTimeout = time.Duration(*httpTimeoutSecs) * time.Second - var monitoredAddresses []string + var ( + balAddresses []string + lsAddresses []string + ) if *balanceAddresses != "" { - monitoredAddresses = strings.Split(*balanceAddresses, ",") + balAddresses = strings.Split(*balanceAddresses, ",") } - collector := NewSolanaCollector(*rpcAddr, monitoredAddresses) + if *leaderSlotAddresses != "" { + lsAddresses = strings.Split(*leaderSlotAddresses, ",") + } + + collector := NewSolanaCollector(*rpcAddr, balAddresses, lsAddresses) go collector.WatchSlots(context.Background()) diff --git a/cmd/solana_exporter/exporter_test.go b/cmd/solana_exporter/exporter_test.go index 7b8a965..0c57063 100644 --- a/cmd/solana_exporter/exporter_test.go +++ b/cmd/solana_exporter/exporter_test.go @@ -351,7 +351,7 @@ func runCollectionTests(t *testing.T, collector prometheus.Collector, testCases } func TestSolanaCollector_Collect_Static(t *testing.T) { - collector := createSolanaCollector(&staticRPCClient{}, slotPacerSchedule, identities) + collector := createSolanaCollector(&staticRPCClient{}, slotPacerSchedule, identities, []string{}) prometheus.NewPedanticRegistry().MustRegister(collector) testCases := []collectionTest{ @@ -429,7 +429,7 @@ solana_validator_delinquent{nodekey="ccc",pubkey="CCC"} 0 func TestSolanaCollector_Collect_Dynamic(t *testing.T) { client := newDynamicRPCClient() - collector := createSolanaCollector(client, slotPacerSchedule, identities) + collector := createSolanaCollector(client, slotPacerSchedule, identities, []string{}) prometheus.NewPedanticRegistry().MustRegister(collector) // start off by testing initial state: diff --git a/cmd/solana_exporter/slots.go b/cmd/solana_exporter/slots.go index 4dbdd81..2d36e94 100644 --- a/cmd/solana_exporter/slots.go +++ b/cmd/solana_exporter/slots.go @@ -3,6 +3,7 @@ package main import ( "context" "fmt" + "slices" "time" "github.com/asymmetric-research/solana_exporter/pkg/rpc" @@ -87,7 +88,7 @@ func (c *solanaCollector) WatchSlots(ctx context.Context) { epochLastSlot.Set(float64(lastSlot)) klog.Infof("Starting at slot %d in epoch %d (%d-%d)", firstSlot, currentEpoch, firstSlot, lastSlot) - _, err = updateCounters(c.rpcClient, currentEpoch, watermark, &lastSlot) + _, err = c.updateCounters(currentEpoch, watermark, &lastSlot) if err != nil { klog.Error(err) } @@ -126,7 +127,7 @@ func (c *solanaCollector) WatchSlots(ctx context.Context) { lastSlot, ) - last, err := updateCounters(c.rpcClient, currentEpoch, watermark, &lastSlot) + last, err := c.updateCounters(currentEpoch, watermark, &lastSlot) if err != nil { klog.Error(err) continue @@ -153,7 +154,7 @@ func (c *solanaCollector) WatchSlots(ctx context.Context) { totalTransactionsTotal.Set(float64(info.TransactionCount)) confirmedSlotHeight.Set(float64(info.AbsoluteSlot)) - last, err := updateCounters(c.rpcClient, currentEpoch, watermark, nil) + last, err := c.updateCounters(currentEpoch, watermark, nil) if err != nil { klog.Info(err) continue @@ -183,7 +184,7 @@ func getEpochBounds(info *rpc.EpochInfo) (int64, int64, int64) { return info.Epoch, firstSlot, lastSlot } -func updateCounters(c rpc.Provider, epoch, firstSlot int64, lastSlotOpt *int64) (int64, error) { +func (c *solanaCollector) updateCounters(epoch, firstSlot int64, lastSlotOpt *int64) (int64, error) { ctx, cancel := context.WithTimeout(context.Background(), httpTimeout) defer cancel() @@ -191,7 +192,7 @@ func updateCounters(c rpc.Provider, epoch, firstSlot int64, lastSlotOpt *int64) var err error if lastSlotOpt == nil { - lastSlot, err = c.GetSlot(ctx) + lastSlot, err = c.rpcClient.GetSlot(ctx) if err != nil { return 0, fmt.Errorf("error while getting the last slot: %v", err) @@ -214,7 +215,7 @@ func updateCounters(c rpc.Provider, epoch, firstSlot int64, lastSlotOpt *int64) ctx, cancel = context.WithTimeout(context.Background(), httpTimeout) defer cancel() - blockProduction, err := c.GetBlockProduction(ctx, &firstSlot, &lastSlot) + blockProduction, err := c.rpcClient.GetBlockProduction(ctx, &firstSlot, &lastSlot) if err != nil { return 0, fmt.Errorf("failed to fetch block production, retrying: %v", err) } @@ -228,8 +229,10 @@ func updateCounters(c rpc.Provider, epoch, firstSlot int64, lastSlotOpt *int64) leaderSlotsTotal.WithLabelValues("valid", host).Add(valid) leaderSlotsTotal.WithLabelValues("skipped", host).Add(skipped) - leaderSlotsByEpoch.WithLabelValues("valid", host, epochStr).Add(valid) - leaderSlotsByEpoch.WithLabelValues("skipped", host, epochStr).Add(skipped) + if len(c.leaderSlotAddresses) == 0 || slices.Contains(c.leaderSlotAddresses, host) { + leaderSlotsByEpoch.WithLabelValues("valid", host, epochStr).Add(valid) + leaderSlotsByEpoch.WithLabelValues("skipped", host, epochStr).Add(skipped) + } klog.V(1).Infof( "Epoch %s, slots %d-%d, node %s: Added %d valid and %d skipped slots", diff --git a/cmd/solana_exporter/slots_test.go b/cmd/solana_exporter/slots_test.go index 7872b1d..d8d8110 100644 --- a/cmd/solana_exporter/slots_test.go +++ b/cmd/solana_exporter/slots_test.go @@ -91,11 +91,7 @@ func TestSolanaCollector_WatchSlots_Static(t *testing.T) { leaderSlotsTotal.Reset() leaderSlotsByEpoch.Reset() - collector := createSolanaCollector( - &staticRPCClient{}, - 100*time.Millisecond, - identities, - ) + collector := createSolanaCollector(&staticRPCClient{}, 100*time.Millisecond, identities, []string{}) prometheus.NewPedanticRegistry().MustRegister(collector) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -149,7 +145,7 @@ func TestSolanaCollector_WatchSlots_Dynamic(t *testing.T) { // create clients: client := newDynamicRPCClient() - collector := createSolanaCollector(client, 300*time.Millisecond, identities) + collector := createSolanaCollector(client, 300*time.Millisecond, identities, []string{}) prometheus.NewPedanticRegistry().MustRegister(collector) // start client/collector and wait a bit: From 40044946d2a1129864aa69d8a917518c5aee4d15 Mon Sep 17 00:00:00 2001 From: Matt Johnstone Date: Fri, 4 Oct 2024 12:24:44 +0200 Subject: [PATCH 2/2] added leader-slot-addresses warnings --- cmd/solana_exporter/exporter.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cmd/solana_exporter/exporter.go b/cmd/solana_exporter/exporter.go index 1c74b71..a147622 100644 --- a/cmd/solana_exporter/exporter.go +++ b/cmd/solana_exporter/exporter.go @@ -23,7 +23,7 @@ var ( leaderSlotAddresses = flag.String( "leader-slot-addresses", "", - "Comma-separated list of addresses to monitor leader slots by epoch for, leave nil to track by epoch for all validators.", + "Comma-separated list of addresses to monitor leader slots by epoch for, leave nil to track by epoch for all validators (this creates a lot of Prometheus metrics with every new epoch).", ) ) @@ -223,6 +223,13 @@ func main() { klog.Fatal("Please specify -rpcURI") } + if *leaderSlotAddresses == "" { + klog.Warning( + "Not specifying leader-slot-addresses will lead to potentially thousands of new " + + "Prometheus metrics being created every epoch.", + ) + } + httpTimeout = time.Duration(*httpTimeoutSecs) * time.Second var (