From a33841772a8234e19638b18abf0f31e5ea9123ad Mon Sep 17 00:00:00 2001 From: Aleksandar Petrov <8142643+aleks-p@users.noreply.github.com> Date: Mon, 11 Dec 2023 09:59:09 -0400 Subject: [PATCH] Improve language detection performance (#2823) * Improve runtime performance for the profile language detection * Get the profile language from the pyroscope_spy label, if set * Use pyroscope_spy constant in more places * Add report memory allocations in GetLanguage benchmark --- pkg/distributor/distributor.go | 4 +++ pkg/distributor/model/push.go | 35 +++++++++++++++++++ pkg/distributor/model/push_test.go | 29 +++++++++++++++ pkg/ingester/pyroscope/ingest_adapter.go | 2 +- pkg/ingester/pyroscope/ingest_handler_test.go | 2 +- pkg/model/labels.go | 19 +++++----- pkg/og/convert/jfr/parser_suite_test.go | 6 ++-- pkg/og/convert/jfr/pprof.go | 2 +- pkg/og/convert/pprof/profile.go | 2 +- pkg/pprof/pprof.go | 28 ++++++++------- pkg/pprof/pprof_test.go | 29 +++++++++++++++ 11 files changed, 129 insertions(+), 29 deletions(-) create mode 100644 pkg/distributor/model/push_test.go diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index c67694b471..418ba7ddf7 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -239,6 +239,10 @@ func (d *Distributor) GetProfileLanguage(series *distributormodel.ProfileSeries) if len(series.Samples) == 0 { return "unknown" } + lang := series.GetLanguage() + if lang != "" { + return lang + } return pprof.GetLanguage(series.Samples[0].Profile, d.logger) } diff --git a/pkg/distributor/model/push.go b/pkg/distributor/model/push.go index cda79fea1d..67ce520693 100644 --- a/pkg/distributor/model/push.go +++ b/pkg/distributor/model/push.go @@ -2,6 +2,7 @@ package model import ( v1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" + phlaremodel "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/pprof" ) @@ -30,3 +31,37 @@ type ProfileSeries struct { Labels []*v1.LabelPair Samples []*ProfileSample } + +func (p *ProfileSeries) GetLanguage() string { + spyName := phlaremodel.Labels(p.Labels).Get(phlaremodel.LabelNamePyroscopeSpy) + if spyName != "" { + lang := getProfileLanguageFromSpy(spyName) + if lang != "" { + return lang + } + } + return "" +} + +func getProfileLanguageFromSpy(spyName string) string { + switch spyName { + default: + return "" + case "dotnetspy": + return "dotnet" + case "gospy": + return "go" + case "phpspy": + return "php" + case "pyspy": + return "python" + case "rbspy": + return "ruby" + case "nodespy": + return "nodejs" + case "javaspy": + return "java" + case "pyroscope-rs": + return "rust" + } +} diff --git a/pkg/distributor/model/push_test.go b/pkg/distributor/model/push_test.go new file mode 100644 index 0000000000..b7e60de548 --- /dev/null +++ b/pkg/distributor/model/push_test.go @@ -0,0 +1,29 @@ +package model + +import ( + "testing" + + typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" +) + +func TestProfileSeries_GetLanguage(t *testing.T) { + tests := []struct { + labels []*typesv1.LabelPair + want string + }{ + {labels: []*typesv1.LabelPair{{Name: "pyroscope_spy", Value: "gospy"}}, want: "go"}, + {labels: []*typesv1.LabelPair{{Name: "pyroscope_spy", Value: "javaspy"}}, want: "java"}, + {labels: []*typesv1.LabelPair{{Name: "pyroscope_spy", Value: "dotnetspy"}}, want: "dotnet"}, + {labels: []*typesv1.LabelPair{{Name: "pyroscope_spy", Value: ""}}, want: ""}, + } + for _, tt := range tests { + t.Run("", func(t *testing.T) { + p := &ProfileSeries{ + Labels: tt.labels, + } + if got := p.GetLanguage(); got != tt.want { + t.Errorf("GetLanguage() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/ingester/pyroscope/ingest_adapter.go b/pkg/ingester/pyroscope/ingest_adapter.go index 22d77fb3c8..6d05e51e41 100644 --- a/pkg/ingester/pyroscope/ingest_adapter.go +++ b/pkg/ingester/pyroscope/ingest_adapter.go @@ -118,7 +118,7 @@ func (p *pyroscopeIngesterAdapter) Put(ctx context.Context, pi *storage.PutInput }) if pi.SpyName != "" { series.Labels = append(series.Labels, &typesv1.LabelPair{ - Name: "pyroscope_spy", + Name: phlaremodel.LabelNamePyroscopeSpy, Value: pi.SpyName, }) } diff --git a/pkg/ingester/pyroscope/ingest_handler_test.go b/pkg/ingester/pyroscope/ingest_handler_test.go index ada45340a2..f8d2eaa0c6 100644 --- a/pkg/ingester/pyroscope/ingest_handler_test.go +++ b/pkg/ingester/pyroscope/ingest_handler_test.go @@ -395,7 +395,7 @@ func TestIngestPPROFFixtures(t *testing.T) { ls := phlaremodel.Labels(actualReq.Labels) require.Equal(t, testdatum.expectMetric, ls.Get(labels.MetricName)) require.Equal(t, "asd", ls.Get("qwe")) - require.Equal(t, spyName, ls.Get("pyroscope_spy")) + require.Equal(t, spyName, ls.Get(phlaremodel.LabelNamePyroscopeSpy)) require.Equal(t, "pprof.test", ls.Get("service_name")) require.Equal(t, "false", ls.Get("__delta__")) require.Equal(t, profile, actualReq.RawProfile) diff --git a/pkg/model/labels.go b/pkg/model/labels.go index 1d069a3a7b..764cb8a5e0 100644 --- a/pkg/model/labels.go +++ b/pkg/model/labels.go @@ -22,15 +22,16 @@ import ( var seps = []byte{'\xff'} const ( - LabelNameProfileType = "__profile_type__" - LabelNameType = "__type__" - LabelNameUnit = "__unit__" - LabelNamePeriodType = "__period_type__" - LabelNamePeriodUnit = "__period_unit__" - LabelNameDelta = "__delta__" - LabelNameProfileName = pmodel.MetricNameLabel - LabelNameServiceName = "service_name" - LabelNameSessionID = "__session_id__" + LabelNameProfileType = "__profile_type__" + LabelNameType = "__type__" + LabelNameUnit = "__unit__" + LabelNamePeriodType = "__period_type__" + LabelNamePeriodUnit = "__period_unit__" + LabelNameDelta = "__delta__" + LabelNameProfileName = pmodel.MetricNameLabel + LabelNameServiceName = "service_name" + LabelNamePyroscopeSpy = "pyroscope_spy" + LabelNameSessionID = "__session_id__" LabelNameServiceNameK8s = "__meta_kubernetes_pod_annotation_pyroscope_io_service_name" diff --git a/pkg/og/convert/jfr/parser_suite_test.go b/pkg/og/convert/jfr/parser_suite_test.go index b50fe814b9..165c8db73d 100644 --- a/pkg/og/convert/jfr/parser_suite_test.go +++ b/pkg/og/convert/jfr/parser_suite_test.go @@ -3,7 +3,6 @@ package jfr import ( "encoding/json" "fmt" - model2 "github.com/grafana/pyroscope/pkg/distributor/model" "os" "strings" "testing" @@ -11,6 +10,7 @@ import ( profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" v1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" + distributormodel "github.com/grafana/pyroscope/pkg/distributor/model" phlaremodel "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/og/convert/pprof/bench" "github.com/grafana/pyroscope/pkg/og/storage" @@ -84,7 +84,7 @@ func TestParseCompareExpectedData(t *testing.T) { } } -func compareWithJson(t *testing.T, req *model2.PushRequest, file string) error { +func compareWithJson(t *testing.T, req *distributormodel.PushRequest, file string) error { type flatProfileSeries struct { Labels []*v1.LabelPair Profile *profilev1.Profile @@ -181,7 +181,7 @@ func compareWithJson(t *testing.T, req *model2.PushRequest, file string) error { return err } for _, label := range profile.Labels { - if strings.HasPrefix(label.Name, "__") || label.Name == "service_name" || label.Name == "jfr_event" || label.Name == "pyroscope_spy" { + if strings.HasPrefix(label.Name, "__") || label.Name == phlaremodel.LabelNameServiceName || label.Name == "jfr_event" || label.Name == phlaremodel.LabelNamePyroscopeSpy { continue } parseKey.Add(label.Name, label.Value) diff --git a/pkg/og/convert/jfr/pprof.go b/pkg/og/convert/jfr/pprof.go index 59fd4bba6a..b07043c9a1 100644 --- a/pkg/og/convert/jfr/pprof.go +++ b/pkg/og/convert/jfr/pprof.go @@ -227,7 +227,7 @@ func (b *jfrPprofBuilders) build(event string) *model.PushRequest { Name: "jfr_event", Value: event, }, &v1.LabelPair{ - Name: "pyroscope_spy", + Name: phlaremodel.LabelNamePyroscopeSpy, Value: b.spyName, }) for _, v := range b.labels { diff --git a/pkg/og/convert/pprof/profile.go b/pkg/og/convert/pprof/profile.go index 2f960ea580..7eb1d86a23 100644 --- a/pkg/og/convert/pprof/profile.go +++ b/pkg/og/convert/pprof/profile.go @@ -211,7 +211,7 @@ func (p *RawProfile) createLabels(profile *pprof.Profile, md ingestion.Metadata) Name: "service_name", Value: md.Key.AppName(), }, &v1.LabelPair{ - Name: "pyroscope_spy", + Name: phlaremodel.LabelNamePyroscopeSpy, Value: md.SpyName, }) for k, v := range md.Key.Labels() { diff --git a/pkg/pprof/pprof.go b/pkg/pprof/pprof.go index 66b03c4c2b..161b5e75b5 100644 --- a/pkg/pprof/pprof.go +++ b/pkg/pprof/pprof.go @@ -7,8 +7,8 @@ import ( "io" "os" "reflect" - "regexp" "sort" + "strings" "sync" "time" "unsafe" @@ -1109,22 +1109,24 @@ func ZeroLabelStrings(p *profilev1.Profile) { } } -var languageMatchers = map[string]*regexp.Regexp{ - "java": regexp.MustCompile(`^java/|^jdk/|libjvm`), - "go": regexp.MustCompile(`/usr/local/go/|\.go$`), - "python": regexp.MustCompile(`\.py$`), - "ruby": regexp.MustCompile(`^gems/|\.rb$`), - "dotnet": regexp.MustCompile(`^Microsoft\.|^System\.`), - "nodejs": regexp.MustCompile(`\.jsx?:?|/node_modules/`), - "rust": regexp.MustCompile(`\.rs(:\d+)?`), +var languageMatchers = map[string][]string{ + "go": {".go", "/usr/local/go/"}, + "java": {"java/", "sun/"}, + "ruby": {".rb", "gems/"}, + "nodejs": {"./node_modules/", ".js"}, + "dotnet": {"System.", "Microsoft."}, + "python": {".py"}, + "rust": {"main.rs", "core.rs"}, } func GetLanguage(profile *Profile, logger log.Logger) string { for _, symbol := range profile.StringTable { - for lang, matcher := range languageMatchers { - if matcher.MatchString(symbol) { - level.Debug(logger).Log("msg", "found profile language", "lang", lang, "symbol", symbol) - return lang + for lang, matcherPatterns := range languageMatchers { + for _, pattern := range matcherPatterns { + if strings.HasPrefix(symbol, pattern) || strings.HasSuffix(symbol, pattern) { + level.Debug(logger).Log("msg", "found profile language", "lang", lang, "symbol", symbol) + return lang + } } } } diff --git a/pkg/pprof/pprof_test.go b/pkg/pprof/pprof_test.go index 978fbafc3c..758d82c86f 100644 --- a/pkg/pprof/pprof_test.go +++ b/pkg/pprof/pprof_test.go @@ -1101,3 +1101,32 @@ func Test_GetProfileLanguage_rust_profile(t *testing.T) { language := GetLanguage(p, log.NewNopLogger()) assert.Equal(t, "rust", language) } + +func Benchmark_GetProfileLanguage(b *testing.B) { + tests := []string{ + "testdata/go.cpu.labels.pprof", + "testdata/heap", + "testdata/dotnet.labels.pprof", + "testdata/profile_java", + "testdata/profile_nodejs", + "testdata/profile_python", + "testdata/profile_ruby", + "testdata/profile_rust", + } + + for _, testdata := range tests { + f := testdata + b.Run(testdata, func(b *testing.B) { + p, err := OpenFile(f) + require.NoError(b, err) + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + language := GetLanguage(p, log.NewNopLogger()) + if language == "unknown" { + b.Fatal() + } + } + }) + } +}