From 88fe924c3c73016aaa4d94f96666bc1696c561d2 Mon Sep 17 00:00:00 2001 From: Brian Ginsburg <7957636+bgins@users.noreply.github.com> Date: Thu, 26 Sep 2024 10:47:50 -0700 Subject: [PATCH] feat: Add solver HTTP tracing (#388) * chore: Add solver tracer * feat: Add solver HTTP tracing * chore: Add solver disable telemetry envvar * chore: Change print error message to warn log --- cmd/lilypad/resource-provider.go | 5 ++--- cmd/lilypad/solver.go | 20 +++++++++++++------- docker/docker-compose.dev.yml | 1 + go.mod | 7 ++++--- go.sum | 8 ++++++++ pkg/options/solver.go | 17 ++++++++++++++--- pkg/solver/controller.go | 4 ++++ pkg/solver/server.go | 5 ++++- pkg/solver/solver.go | 16 ++++++++++------ 9 files changed, 60 insertions(+), 23 deletions(-) diff --git a/cmd/lilypad/resource-provider.go b/cmd/lilypad/resource-provider.go index e7f36765..437cdd91 100644 --- a/cmd/lilypad/resource-provider.go +++ b/cmd/lilypad/resource-provider.go @@ -1,13 +1,12 @@ package lilypad import ( - "fmt" - "github.com/lilypad-tech/lilypad/pkg/executor/bacalhau" optionsfactory "github.com/lilypad-tech/lilypad/pkg/options" "github.com/lilypad-tech/lilypad/pkg/resourceprovider" "github.com/lilypad-tech/lilypad/pkg/system" "github.com/lilypad-tech/lilypad/pkg/web3" + "github.com/rs/zerolog/log" "github.com/spf13/cobra" ) @@ -41,7 +40,7 @@ func runResourceProvider(cmd *cobra.Command, options resourceprovider.ResourcePr telemetry, err := configureTelemetry(commandCtx.Ctx, system.ResourceProviderService, network, options.Telemetry, options.Web3) if err != nil { - fmt.Printf("failed to setup opentelemetry: %s", err) + log.Warn().Msgf("failed to setup opentelemetry: %s", err) } commandCtx.Cm.RegisterCallbackWithContext(telemetry.Shutdown) tracer := telemetry.TracerProvider.Tracer(system.GetOTelServiceName(system.ResourceProviderService)) diff --git a/cmd/lilypad/solver.go b/cmd/lilypad/solver.go index ebe8e4cd..6c6e0704 100644 --- a/cmd/lilypad/solver.go +++ b/cmd/lilypad/solver.go @@ -6,8 +6,8 @@ import ( memorystore "github.com/lilypad-tech/lilypad/pkg/solver/store/memory" "github.com/lilypad-tech/lilypad/pkg/system" "github.com/lilypad-tech/lilypad/pkg/web3" + "github.com/rs/zerolog/log" "github.com/spf13/cobra" - "go.opentelemetry.io/otel/trace/noop" ) func newSolverCmd() *cobra.Command { @@ -25,7 +25,7 @@ func newSolverCmd() *cobra.Command { if err != nil { return err } - return runSolver(cmd, options) + return runSolver(cmd, options, network) }, } @@ -34,12 +34,18 @@ func newSolverCmd() *cobra.Command { return solverCmd } -func runSolver(cmd *cobra.Command, options solver.SolverOptions) error { +func runSolver(cmd *cobra.Command, options solver.SolverOptions, network string) error { commandCtx := system.NewCommandContext(cmd) defer commandCtx.Cleanup() - noopTracer := noop.NewTracerProvider().Tracer(system.GetOTelServiceName(system.SolverService)) - web3SDK, err := web3.NewContractSDK(commandCtx.Ctx, options.Web3, noopTracer) + telemetry, err := configureTelemetry(commandCtx.Ctx, system.SolverService, network, options.Telemetry, options.Web3) + if err != nil { + log.Warn().Msgf("failed to setup opentelemetry: %s", err) + } + commandCtx.Cm.RegisterCallbackWithContext(telemetry.Shutdown) + tracer := telemetry.TracerProvider.Tracer(system.GetOTelServiceName(system.SolverService)) + + web3SDK, err := web3.NewContractSDK(commandCtx.Ctx, options.Web3, tracer) if err != nil { return err } @@ -49,12 +55,12 @@ func runSolver(cmd *cobra.Command, options solver.SolverOptions) error { return err } - solverService, err := solver.NewSolver(options, solverStore, web3SDK) + solverService, err := solver.NewSolver(options, solverStore, web3SDK, tracer) if err != nil { return err } - solverErrors := solverService.Start(commandCtx.Ctx, commandCtx.Cm) + solverErrors := solverService.Start(commandCtx.Ctx, commandCtx.Cm, telemetry.TracerProvider) for { select { diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 97e5fa93..ebd74726 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -54,6 +54,7 @@ services: - SERVER_PORT=${SERVER_PORT} - SERVER_URL=${SERVER_URL} - WEB3_RPC_URL=${WEB3_RPC_URL} + - DISABLE_TELEMETRY=${DISABLE_TELEMETRY} ports: - 8080:8080 healthcheck: diff --git a/go.mod b/go.mod index cef06d7f..96a9fbeb 100644 --- a/go.mod +++ b/go.mod @@ -21,11 +21,11 @@ require ( github.com/spf13/cobra v1.8.0 github.com/stretchr/testify v1.9.0 github.com/theckman/yacspin v0.13.12 - go.opentelemetry.io/otel v1.28.0 + go.opentelemetry.io/otel v1.30.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 go.opentelemetry.io/otel/sdk v1.28.0 - go.opentelemetry.io/otel/trace v1.28.0 + go.opentelemetry.io/otel/trace v1.30.0 gorgonia.org/cu v0.9.7-0.20240623234718-3cd40db700e9 k8s.io/apimachinery v0.29.0 ) @@ -268,12 +268,13 @@ require ( github.com/yashtewari/glob-intersection v0.2.0 // indirect go.etcd.io/bbolt v1.3.8 // indirect go.opencensus.io v0.24.0 // indirect + go.opentelemetry.io/contrib/instrumentation/github.com/gorilla/mux/otelmux v0.55.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.47.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.40.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.40.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.21.0 // indirect - go.opentelemetry.io/otel/metric v1.28.0 // indirect + go.opentelemetry.io/otel/metric v1.30.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.24.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.ptx.dk/multierrgroup v0.0.3 // indirect diff --git a/go.sum b/go.sum index 2cd6cf49..d49d274d 100644 --- a/go.sum +++ b/go.sum @@ -910,10 +910,14 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/github.com/gorilla/mux/otelmux v0.55.0 h1:lRMfRoJnAaDWadgR58z6xyMIaH5UQ5SzFOhA+LfmUkA= +go.opentelemetry.io/contrib/instrumentation/github.com/gorilla/mux/otelmux v0.55.0/go.mod h1:7kJADjE+s91WUhMkzN7qTnDf2aUJZPzeD7RKw/uB5Xg= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.47.0 h1:sv9kVfal0MK0wBMCOGr+HeJm9v803BkJxGrk2au7j08= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.47.0/go.mod h1:SK2UL73Zy1quvRPonmOmRDiWk1KBV3LyIeeIxcEApWw= go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= +go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= +go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.40.0 h1:MZbjiZeMmn5wFMORhozpouGKDxj9POHTuU5UA8msBQk= go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.40.0/go.mod h1:C7tOYVCJmrDTCwxNny0MuUtnDIR3032vFHYke0F2ZrU= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.40.0 h1:q3FNPi8FLQVjLlmV+WWHQfH9ZCCtQIS0O/+dn1+4cJ4= @@ -928,12 +932,16 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 h1:j9+03 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0/go.mod h1:Y5+XiUG4Emn1hTfciPzGPJaSI+RpDts6BnCIir0SLqk= go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= +go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= +go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= go.opentelemetry.io/otel/sdk/metric v1.24.0 h1:yyMQrPzF+k88/DbH7o4FMAs80puqd+9osbiBrJrz/w8= go.opentelemetry.io/otel/sdk/metric v1.24.0/go.mod h1:I6Y5FjH6rvEnTTAYQz3Mmv2kl6Ek5IIrmwTLqMrrOE0= go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= +go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.ptx.dk/multierrgroup v0.0.3 h1:HNaevFVERSZ7/DyCAnOICWF86B8s+76QwdwOlgCPvQM= diff --git a/pkg/options/solver.go b/pkg/options/solver.go index 09595eb2..8f963ff8 100644 --- a/pkg/options/solver.go +++ b/pkg/options/solver.go @@ -8,9 +8,10 @@ import ( func NewSolverOptions() solver.SolverOptions { options := solver.SolverOptions{ - Server: GetDefaultServerOptions(), - Web3: GetDefaultWeb3Options(), - Services: GetDefaultServicesOptions(), + Server: GetDefaultServerOptions(), + Web3: GetDefaultWeb3Options(), + Services: GetDefaultServicesOptions(), + Telemetry: GetDefaultTelemetryOptions(), } options.Web3.Service = system.SolverService return options @@ -20,6 +21,7 @@ func AddSolverCliFlags(cmd *cobra.Command, options *solver.SolverOptions) { AddWeb3CliFlags(cmd, &options.Web3) AddServerCliFlags(cmd, &options.Server) AddServicesCliFlags(cmd, &options.Services) + AddTelemetryCliFlags(cmd, &options.Telemetry) } func CheckSolverOptions(options solver.SolverOptions) error { @@ -31,6 +33,10 @@ func CheckSolverOptions(options solver.SolverOptions) error { if err != nil { return err } + err = CheckTelemetryOptions(options.Telemetry) + if err != nil { + return err + } return nil } @@ -40,5 +46,10 @@ func ProcessSolverOptions(options solver.SolverOptions, network string) (solver. return options, err } options.Web3 = newWeb3Options + newTelemetryOptions, err := ProcessTelemetryOptions(options.Telemetry, network) + if err != nil { + return options, err + } + options.Telemetry = newTelemetryOptions return options, CheckSolverOptions(options) } diff --git a/pkg/solver/controller.go b/pkg/solver/controller.go index f0a1934b..30447eef 100644 --- a/pkg/solver/controller.go +++ b/pkg/solver/controller.go @@ -14,6 +14,7 @@ import ( "github.com/lilypad-tech/lilypad/pkg/web3/bindings/mediation" "github.com/lilypad-tech/lilypad/pkg/web3/bindings/storage" "github.com/rs/zerolog/log" + "go.opentelemetry.io/otel/trace" ) // add an enum for various types of event @@ -48,6 +49,7 @@ type SolverController struct { solverEventSubs []func(SolverEvent) options SolverOptions log *system.ServiceLogger + tracer trace.Tracer } // the background "even if we have not heard of an event" loop @@ -60,6 +62,7 @@ func NewSolverController( web3SDK *web3.Web3SDK, store store.SolverStore, options SolverOptions, + tracer trace.Tracer, ) (*SolverController, error) { controller := &SolverController{ web3SDK: web3SDK, @@ -67,6 +70,7 @@ func NewSolverController( store: store, options: options, log: system.NewServiceLogger(system.SolverService), + tracer: tracer, } return controller, nil } diff --git a/pkg/solver/server.go b/pkg/solver/server.go index 16147b38..7e52ab70 100644 --- a/pkg/solver/server.go +++ b/pkg/solver/server.go @@ -18,6 +18,8 @@ import ( "github.com/lilypad-tech/lilypad/pkg/solver/store" "github.com/lilypad-tech/lilypad/pkg/system" "github.com/rs/zerolog/log" + "go.opentelemetry.io/contrib/instrumentation/github.com/gorilla/mux/otelmux" + "go.opentelemetry.io/otel/sdk/trace" ) type solverServer struct { @@ -56,12 +58,13 @@ func NewSolverServer( * */ -func (solverServer *solverServer) ListenAndServe(ctx context.Context, cm *system.CleanupManager) error { +func (solverServer *solverServer) ListenAndServe(ctx context.Context, cm *system.CleanupManager, tracerProvider *trace.TracerProvider) error { router := mux.NewRouter() subrouter := router.PathPrefix(http.API_SUB_PATH).Subrouter() subrouter.Use(http.CorsMiddleware) + subrouter.Use(otelmux.Middleware("solver", otelmux.WithTracerProvider(tracerProvider))) subrouter.HandleFunc("/job_offers", http.GetHandler(solverServer.getJobOffers)).Methods("GET") subrouter.HandleFunc("/job_offers", http.PostHandler(solverServer.addJobOffer)).Methods("POST") diff --git a/pkg/solver/solver.go b/pkg/solver/solver.go index fedd5efc..32e5b2fe 100644 --- a/pkg/solver/solver.go +++ b/pkg/solver/solver.go @@ -9,12 +9,15 @@ import ( "github.com/lilypad-tech/lilypad/pkg/system" "github.com/lilypad-tech/lilypad/pkg/web3" "github.com/rs/zerolog/log" + sdkTrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" ) type SolverOptions struct { - Web3 web3.Web3Options - Server http.ServerOptions - Services data.ServiceConfig + Web3 web3.Web3Options + Server http.ServerOptions + Services data.ServiceConfig + Telemetry system.TelemetryOptions } type Solver struct { @@ -29,8 +32,9 @@ func NewSolver( options SolverOptions, store store.SolverStore, web3SDK *web3.Web3SDK, + tracer trace.Tracer, ) (*Solver, error) { - controller, err := NewSolverController(web3SDK, store, options) + controller, err := NewSolverController(web3SDK, store, options, tracer) if err != nil { return nil, err } @@ -48,11 +52,11 @@ func NewSolver( return solver, nil } -func (solver *Solver) Start(ctx context.Context, cm *system.CleanupManager) chan error { +func (solver *Solver) Start(ctx context.Context, cm *system.CleanupManager, tracerProvider *sdkTrace.TracerProvider) chan error { errorChan := solver.controller.Start(ctx, cm) log.Debug().Msgf("solver.server.ListenAndServe") go func() { - err := solver.server.ListenAndServe(ctx, cm) + err := solver.server.ListenAndServe(ctx, cm, tracerProvider) if err != nil { errorChan <- err }