From 16b12d1bb90c71fa403a0e4df69e318488ccb6ec Mon Sep 17 00:00:00 2001 From: sgangopadhyay Date: Wed, 13 Aug 2025 14:29:23 +0530 Subject: [PATCH 1/4] GATEWAYS-4306: exporting metrics for conntrack per zone --- go.mod | 33 +++++++--- go.sum | 96 ++++++++--------------------- internal/ovsexporter/conntrack.go | 75 ++++++++++++++++++++++ internal/ovsexporter/ovsexporter.go | 29 +++++++-- 4 files changed, 152 insertions(+), 81 deletions(-) create mode 100644 internal/ovsexporter/conntrack.go diff --git a/go.mod b/go.mod index 20f9632..fb8e7af 100644 --- a/go.mod +++ b/go.mod @@ -1,18 +1,35 @@ module github.com/digitalocean/openvswitch_exporter -go 1.15 +go 1.23.0 + +toolchain go1.24.2 require ( github.com/digitalocean/go-openvswitch v0.0.0-20201214180534-ce0f183468d8 - github.com/google/go-cmp v0.5.4 // indirect - github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect - github.com/mdlayher/netlink v1.3.2 // indirect github.com/prometheus/client_golang v1.9.0 + github.com/prometheus/prometheus v2.2.1-0.20180315085919-58e2a31db8de+incompatible +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.1.1 // indirect + github.com/golang/protobuf v1.4.3 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/josharian/native v1.1.0 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect + github.com/mdlayher/genetlink v1.0.0 // indirect + github.com/mdlayher/netlink v1.7.2 // indirect + github.com/mdlayher/socket v0.5.1 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_model v0.2.0 // indirect github.com/prometheus/common v0.17.0 // indirect github.com/prometheus/procfs v0.6.0 // indirect - github.com/prometheus/prometheus v2.2.1-0.20180315085919-58e2a31db8de+incompatible - golang.org/x/net v0.0.0-20210222171744-9060382bd457 // indirect - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect - golang.org/x/sys v0.0.0-20210223095934-7937bea0104d // indirect + github.com/ti-mo/conntrack v0.5.2 // indirect + github.com/ti-mo/netfilter v0.5.3 // indirect + golang.org/x/net v0.39.0 // indirect + golang.org/x/sync v0.14.0 // indirect + golang.org/x/sys v0.34.0 // indirect google.golang.org/protobuf v1.25.0 // indirect ) +replace github.com/digitalocean/go-openvswitch => /Users/sgangopadhyay/dev/digitalocean/go-openvswitch + diff --git a/go.sum b/go.sum index 1d1c526..b2e238f 100644 --- a/go.sum +++ b/go.sum @@ -20,8 +20,6 @@ github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6l github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= -github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a h1:BtpsbiV638WQZwhA98cEZw2BsbnQJrbd0BI7tsy0W1c= -github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -42,12 +40,9 @@ github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfc github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/digitalocean/go-openvswitch v0.0.0-20180412190941-6a4a47d93e43 h1:WbVAw/VDkXvaFyMOkJRzKBE6bf9PY7PAfrsOY3RHnIE= -github.com/digitalocean/go-openvswitch v0.0.0-20180412190941-6a4a47d93e43/go.mod h1:MpzfscrezUxa94/T4sy2tDaxB+hQ6w0EmRBPv+xHWEs= -github.com/digitalocean/go-openvswitch v0.0.0-20201214180534-ce0f183468d8 h1:RQAD2flP6n+U5sAudMpru+EuLJ6VQduu6yenl6LwM5E= -github.com/digitalocean/go-openvswitch v0.0.0-20201214180534-ce0f183468d8/go.mod h1:MpzfscrezUxa94/T4sy2tDaxB+hQ6w0EmRBPv+xHWEs= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= @@ -77,8 +72,6 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfU github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v0.0.0-20171021043952-1643683e1b54 h1:nRNJXiJvemchkOTn0V4U11TZkvacB94gTzbTZbSA7Rw= -github.com/golang/protobuf v0.0.0-20171021043952-1643683e1b54/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -99,9 +92,9 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -139,16 +132,10 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/josharian/native v0.0.0-20200817173448-b6b71def0850 h1:uhL5Gw7BINiiPAo24A2sxkcDI0Jt/sqp1v5xQCniEFA= -github.com/josharian/native v0.0.0-20200817173448-b6b71def0850/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= +github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA= +github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/jsimonetti/rtnetlink v0.0.0-20190606172950-9527aa82566a/go.mod h1:Oz+70psSo5OFh8DBl0Zv2ACw7Esh6pPUphlvZG9x7uw= -github.com/jsimonetti/rtnetlink v0.0.0-20200117123717-f846d4f6c1f4/go.mod h1:WGuG/smIU4J/54PblvSbh+xvCZmpJnFgr3ds6Z55XMQ= -github.com/jsimonetti/rtnetlink v0.0.0-20201009170750-9c6f07d100c1/go.mod h1:hqoO/u39cqLeBLebZ8fWdE96O7FxrAsRYhnVOdgHxok= -github.com/jsimonetti/rtnetlink v0.0.0-20201216134343-bde56ed16391/go.mod h1:cR77jAZG3Y3bsb8hF6fHJbFoyFukLFOkQ98S0pQz3xw= -github.com/jsimonetti/rtnetlink v0.0.0-20201220180245-69540ac93943/go.mod h1:z4c53zj6Eex712ROyh8WI0ihysb5j2ROyV42iNogmAs= -github.com/jsimonetti/rtnetlink v0.0.0-20210122163228-8d122574c736/go.mod h1:ZXpIyOK59ZnN7J0BV99cZUPmsqDRZ3eq5X+st7u/oSA= -github.com/jsimonetti/rtnetlink v0.0.0-20210212075122-66c871082f2b/go.mod h1:8w9Rh8m+aHZIG69YPGGem1i5VzoyRC8nw2kA8B+ik5U= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= @@ -173,23 +160,14 @@ github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNx github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/mdlayher/ethtool v0.0.0-20210210192532-2b88debcdd43/go.mod h1:+t7E0lkKfbBsebllff1xdTmyJt8lH37niI6kwFk9OTo= -github.com/mdlayher/genetlink v0.0.0-20170901181924-76fecce4c787 h1:Tbivh+kRjFJUTZmMic7LcmuzfEF/HV42ZRMY0LiQ2dU= -github.com/mdlayher/genetlink v0.0.0-20170901181924-76fecce4c787/go.mod h1:EOrmeik1bDMaRduo2B+uAYe1HmTq6yF2IMDmJi1GoWk= github.com/mdlayher/genetlink v1.0.0 h1:OoHN1OdyEIkScEmRgxLEe2M9U8ClMytqA5niynLtfj0= github.com/mdlayher/genetlink v1.0.0/go.mod h1:0rJ0h4itni50A86M2kHcgS85ttZazNt7a8H2a2cw0Gc= -github.com/mdlayher/netlink v0.0.0-20180326144912-dc216978b479 h1:MF+m/B1wWGiOBY92ORRiv6hGcRBX4KHqNoYIO+y2Owo= -github.com/mdlayher/netlink v0.0.0-20180326144912-dc216978b479/go.mod h1:a3TlQHkJH2m32RF224Z7LhD5N4mpyR8eUbCoYHywrwg= github.com/mdlayher/netlink v0.0.0-20190409211403-11939a169225/go.mod h1:eQB3mZE4aiYnlUsyGGCOpPETfdQq4Jhsgf1fk3cwQaA= github.com/mdlayher/netlink v1.0.0/go.mod h1:KxeJAFOFLG6AjpyDkQ/iIhxygIUKD+vcwqcnu43w/+M= -github.com/mdlayher/netlink v1.1.0/go.mod h1:H4WCitaheIsdF9yOYu8CFmCgQthAPIWZmcKp9uZHgmY= -github.com/mdlayher/netlink v1.1.1/go.mod h1:WTYpFb/WTvlRJAyKhZL5/uy69TDDpHHu2VZmb2XgV7o= -github.com/mdlayher/netlink v1.2.0/go.mod h1:kwVW1io0AZy9A1E2YYgaD4Cj+C+GPkU6klXCMzIJ9p8= -github.com/mdlayher/netlink v1.2.1/go.mod h1:bacnNlfhqHqqLo4WsYeXSqfyXkInQ9JneWI68v1KwSU= -github.com/mdlayher/netlink v1.2.2-0.20210123213345-5cc92139ae3e/go.mod h1:bacnNlfhqHqqLo4WsYeXSqfyXkInQ9JneWI68v1KwSU= -github.com/mdlayher/netlink v1.3.0/go.mod h1:xK/BssKuwcRXHrtN04UBkwQ6dY9VviGGuriDdoPSWys= -github.com/mdlayher/netlink v1.3.2 h1:fMZOU2/M7PRMzGM3br5l1N2fu6bPSHtRytmQ338a9iA= -github.com/mdlayher/netlink v1.3.2/go.mod h1:dRJi5IABcZpBD2A3D0Mv/AiX8I9uDEu5oGkAVrekmf8= +github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g= +github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw= +github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos= +github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= @@ -234,12 +212,12 @@ github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0 github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= -github.com/prometheus/client_golang v0.9.0-pre1.0.20171005112915-5cec1d0429b0 h1:eIVGl4K1clOaKdGaS+KSUEOwF+g2g2aIEsmikqXqRgY= -github.com/prometheus/client_golang v0.9.0-pre1.0.20171005112915-5cec1d0429b0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= @@ -247,8 +225,6 @@ github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeD github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.9.0 h1:Rrch9mh17XcxvEu9D9DEpb4isxjGBtcevQjKvxPRQIU= github.com/prometheus/client_golang v1.9.0/go.mod h1:FqZLKOZnGdFAhOK4nqGHa7D66IdsO+O441Eve7ptJDU= -github.com/prometheus/client_model v0.0.0-20170216185247-6f3806018612 h1:13pIdM2tpaDi4OVe24fgoIS7ZTqMt0QI+bwQsX5hq+g= -github.com/prometheus/client_model v0.0.0-20170216185247-6f3806018612/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -256,8 +232,6 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/common v0.0.0-20171006141418-1bab55dd05db h1:PmL7nSW2mvuotGlJKuvUcSI/eE86zwYUcIAGoB6eHBk= -github.com/prometheus/common v0.0.0-20171006141418-1bab55dd05db/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= @@ -265,8 +239,6 @@ github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB8 github.com/prometheus/common v0.15.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= github.com/prometheus/common v0.17.0 h1:kDIZLI74SS+3tedSvEkykgBkD7txMxaJAPj8DtJUKYA= github.com/prometheus/common v0.17.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= -github.com/prometheus/procfs v0.0.0-20171226183907-b15cd069a834 h1:HRxr4uZnx/S86wVQsfXcKhadpzdceXn2qCzCtagcI6w= -github.com/prometheus/procfs v0.0.0-20171226183907-b15cd069a834/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= @@ -302,9 +274,17 @@ github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/ti-mo/conntrack v0.5.2 h1:PQ7MCdFjniEiTJT+qsAysREUsT5iH62/VNyhkB06HOI= +github.com/ti-mo/conntrack v0.5.2/go.mod h1:4HZrFQQLOSuBzgQNid3H/wYyyp1kfGXUYxueXjIGibo= +github.com/ti-mo/netfilter v0.5.3 h1:ikzduvnaUMwre5bhbNwWOd6bjqLMVb33vv0XXbK0xGQ= +github.com/ti-mo/netfilter v0.5.3/go.mod h1:08SyBCg6hu1qyQk4s3DjjJKNrm3RTb32nm6AzyT972E= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= +github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= +github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= @@ -333,8 +313,6 @@ golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHl golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/net v0.0.0-20170614204310-ddf80d097059 h1:gMF+Wxxy27FCUvSZhKB22yNezu60IyLC37MHpj45QXs= -golang.org/x/net v0.0.0-20170614204310-ddf80d097059/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -353,15 +331,9 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191007182048-72f939374954/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201216054612-986b41b23924/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210222171744-9060382bd457 h1:hMm9lBjyNLe/c9C6bElQxp4wsrleaJn1vXMZIQkNN44= -golang.org/x/net v0.0.0-20210222171744-9060382bd457/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= +golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -371,10 +343,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180420145319-79b0c6888797 h1:ux9vYny+vlzqIcwoO6gRu+voPvKJA10ZceuJwWf2J88= -golang.org/x/sys v0.0.0-20180420145319-79b0c6888797/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= +golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -394,26 +364,15 @@ golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201118182958-a01c418693c7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201214210602-f9fddec55a1e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201218084310-7d0127a74742/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210110051926-789bb1bd4061/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210123111255-9b0068b26619/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210216163648-f7da38b97c65/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210223095934-7937bea0104d h1:u0GOGnBJ3EKE/tNqREhhGiCzE9jFXydDo2lf7hOwGuc= -golang.org/x/sys v0.0.0-20210223095934-7937bea0104d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -431,9 +390,7 @@ golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -460,7 +417,6 @@ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQ google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= @@ -482,6 +438,8 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/internal/ovsexporter/conntrack.go b/internal/ovsexporter/conntrack.go new file mode 100644 index 0000000..b949127 --- /dev/null +++ b/internal/ovsexporter/conntrack.go @@ -0,0 +1,75 @@ +package ovsexporter + +import ( + "fmt" + "log" + + "github.com/digitalocean/go-openvswitch/ovsnl" + "github.com/prometheus/client_golang/prometheus" +) + +type conntrackCollector struct { + Count *prometheus.Desc + listConntrackEntries func() ([]ovsnl.ConntrackEntry, error) +} + +func newConntrackCollector(fn func() ([]ovsnl.ConntrackEntry, error)) prometheus.Collector { + return &conntrackCollector{ + Count: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "conntrack", "count"), + "Number of conntrack entries by zone, state, and mark", + []string{"zone", "state", "mark"}, nil, + ), + listConntrackEntries: fn, + } +} + +func (c *conntrackCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.Count +} + +func (c *conntrackCollector) Collect(ch chan<- prometheus.Metric) { + entries, err := c.listConntrackEntries() + if err != nil { + log.Printf("Failed to collect conntrack entries: %v", err) + // Return a zero metric to indicate the collector is working but no data + ch <- prometheus.MustNewConstMetric( + c.Count, + prometheus.GaugeValue, + 0.0, + "unknown", "unknown", "0", + ) + return + } + + // Log the number of entries found for debugging + log.Printf("Found %d conntrack entries", len(entries)) + + // Aggregate counts + counts := make(map[string]map[string]map[string]int) + for _, e := range entries { + zone := fmt.Sprintf("%d", e.Zone) + state := e.State + mark := fmt.Sprintf("%d", e.Mark) + if counts[zone] == nil { + counts[zone] = make(map[string]map[string]int) + } + if counts[zone][state] == nil { + counts[zone][state] = make(map[string]int) + } + counts[zone][state][mark]++ + } + + for zone, stateMap := range counts { + for state, markMap := range stateMap { + for mark, count := range markMap { + ch <- prometheus.MustNewConstMetric( + c.Count, + prometheus.GaugeValue, + float64(count), + zone, state, mark, + ) + } + } + } +} diff --git a/internal/ovsexporter/ovsexporter.go b/internal/ovsexporter/ovsexporter.go index 9ff3284..8d19838 100644 --- a/internal/ovsexporter/ovsexporter.go +++ b/internal/ovsexporter/ovsexporter.go @@ -6,6 +6,8 @@ package ovsexporter import ( + "context" + "log" "sync" "github.com/digitalocean/go-openvswitch/ovsnl" @@ -27,11 +29,30 @@ var _ prometheus.Collector = &collector{} // New creates a new Prometheus collector which collects metrics using the // input Open vSwitch generic netlink client. func New(c *ovsnl.Client) prometheus.Collector { + collectors := []prometheus.Collector{ + newDatapathCollector(c.Datapath.List), + } + + // Try to add conntrack collector, but don't fail if it's not available + conntrackCollector := newConntrackCollector(func() ([]ovsnl.ConntrackEntry, error) { + svc, err := ovsnl.NewConntrackService() + if err != nil { + return nil, err + } + defer svc.Close() + return svc.List(context.Background()) + }) + + // Test if conntrack service can be created + if _, err := ovsnl.NewConntrackService(); err != nil { + log.Printf("Warning: Conntrack service not available: %v. Conntrack metrics will be disabled.", err) + } else { + collectors = append(collectors, conntrackCollector) + log.Printf("Conntrack collector enabled") + } + return &collector{ - cs: []prometheus.Collector{ - // Additional generic netlink family collectors can be added here. - newDatapathCollector(c.Datapath.List), - }, + cs: collectors, } } From f33d105bb5b097b27cc3101fd1358207cc13d8ac Mon Sep 17 00:00:00 2001 From: sgangopadhyay Date: Thu, 28 Aug 2025 21:40:43 +0530 Subject: [PATCH 2/4] GATEWAYS-4306: scaling with even driven approach --- go.mod | 10 +- go.sum | 22 +- internal/ovsexporter/conntrack.go | 340 +++++++++++++++++++++++++--- internal/ovsexporter/ovsexporter.go | 60 +++-- vendor/modules.txt | 113 +++++++++ 5 files changed, 475 insertions(+), 70 deletions(-) create mode 100644 vendor/modules.txt diff --git a/go.mod b/go.mod index fb8e7af..49f73b0 100644 --- a/go.mod +++ b/go.mod @@ -22,14 +22,14 @@ require ( github.com/mdlayher/socket v0.5.1 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_model v0.2.0 // indirect - github.com/prometheus/common v0.17.0 // indirect - github.com/prometheus/procfs v0.6.0 // indirect + github.com/prometheus/common v0.15.0 // indirect + github.com/prometheus/procfs v0.2.0 // indirect github.com/ti-mo/conntrack v0.5.2 // indirect github.com/ti-mo/netfilter v0.5.3 // indirect golang.org/x/net v0.39.0 // indirect golang.org/x/sync v0.14.0 // indirect - golang.org/x/sys v0.34.0 // indirect - google.golang.org/protobuf v1.25.0 // indirect + golang.org/x/sys v0.35.0 // indirect + google.golang.org/protobuf v1.23.0 // indirect ) -replace github.com/digitalocean/go-openvswitch => /Users/sgangopadhyay/dev/digitalocean/go-openvswitch +replace github.com/digitalocean/go-openvswitch => ../go-openvswitch diff --git a/go.sum b/go.sum index b2e238f..d03edde 100644 --- a/go.sum +++ b/go.sum @@ -80,7 +80,6 @@ github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:x github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= @@ -91,8 +90,6 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -236,17 +233,15 @@ github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8 github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= +github.com/prometheus/common v0.15.0 h1:4fgOnadei3EZvgRwxJ7RMpG1k1pOZth5Pc13tyspaKM= github.com/prometheus/common v0.15.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= -github.com/prometheus/common v0.17.0 h1:kDIZLI74SS+3tedSvEkykgBkD7txMxaJAPj8DtJUKYA= -github.com/prometheus/common v0.17.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/procfs v0.2.0 h1:wH4vA7pcjKuZzjF7lM8awk4fnuJO6idemZXoKnULUx4= github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4= -github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/prometheus v2.2.1-0.20180315085919-58e2a31db8de+incompatible h1:jgW1I0kWFlDOqNLlYBcxVfpRGSOL3n6lXn1BykdEG30= github.com/prometheus/prometheus v2.2.1-0.20180315085919-58e2a31db8de+incompatible/go.mod h1:oAIUtOny2rjMX0OWN5vPR5/q/twIROJvdqnQKDdil/s= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= @@ -342,7 +337,6 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -368,9 +362,8 @@ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201214210602-f9fddec55a1e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= -golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -400,7 +393,6 @@ google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRn google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= @@ -410,17 +402,13 @@ google.golang.org/grpc v1.22.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/ovsexporter/conntrack.go b/internal/ovsexporter/conntrack.go index b949127..e0f4390 100644 --- a/internal/ovsexporter/conntrack.go +++ b/internal/ovsexporter/conntrack.go @@ -1,37 +1,307 @@ package ovsexporter import ( + "context" "fmt" "log" + "math/rand" + "runtime" + "sync" + "time" "github.com/digitalocean/go-openvswitch/ovsnl" "github.com/prometheus/client_golang/prometheus" ) -type conntrackCollector struct { - Count *prometheus.Desc - listConntrackEntries func() ([]ovsnl.ConntrackEntry, error) +const ( + zoneThreshold = 50000 // Configure threshold for zone alerts (reduced for 2M test) + // Memory management for large conntrack tables + maxEntriesPerZone = 100 // Drastically reduced maximum entries to collect per zone to prevent OOM + largeZoneThreshold = 100000 // Use streaming approach for zones with >100k entries + // Memory pressure thresholds + memoryPressureThreshold = 0.8 // Trigger memory pressure handling when 80% of memory is used + // CPU time limits + maxCPUTimePerCollection = 60 * time.Second // Maximum CPU time per collection cycle + // Sampling configuration for large zones + sampleRateForLargeZones = 0.01 // Sample 1% of entries for zones > 1M entries + // Timeout configuration + conntrackTimeout = 30 * time.Second // Reduced timeout to prevent getting stuck + // Memory pressure logging cooldown + memoryPressureLogCooldown = 30 * time.Second // Prevent log spam + // Memory cleanup thresholds + memoryCleanupThreshold = 0.7 // Trigger aggressive cleanup at 70% usage + // Circuit breaker for performance regression + maxConsecutiveTimeouts = 3 // Stop processing after 3 consecutive timeouts +) + +var ( + lastMemoryPressureLog time.Time + consecutiveTimeouts int + lastTimeoutTime time.Time +) + +type ConntrackCollector struct { + Count *prometheus.Desc + Performance *prometheus.Desc + listZoneStats func(context.Context, int) (map[uint16]*ovsnl.ZoneStats, error) + getStats func() (*ovsnl.ConntrackPerformanceStats, error) } -func newConntrackCollector(fn func() ([]ovsnl.ConntrackEntry, error)) prometheus.Collector { - return &conntrackCollector{ +// ConntrackCollectorWithAggAccessor wraps the existing collector with access to the aggregator snapshot +type ConntrackCollectorWithAggAccessor struct { + *ConntrackCollector + SnapshotFunc func() map[uint16]map[uint32]int +} + +func newConntrackCollector(fn func(context.Context, int) (map[uint16]*ovsnl.ZoneStats, error), statsFn func() (*ovsnl.ConntrackPerformanceStats, error)) prometheus.Collector { + return &ConntrackCollector{ Count: prometheus.NewDesc( prometheus.BuildFQName(namespace, "conntrack", "count"), "Number of conntrack entries by zone, state, and mark", []string{"zone", "state", "mark"}, nil, ), - listConntrackEntries: fn, + Performance: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "conntrack", "performance"), + "Conntrack performance counters", + []string{"counter"}, nil, + ), + listZoneStats: fn, + getStats: statsFn, + } +} + +// checkCircuitBreaker checks if we should stop processing due to too many timeouts +func checkCircuitBreaker() bool { + now := time.Now() + + // Reset counter if more than 5 minutes have passed since last timeout + if now.Sub(lastTimeoutTime) > 5*time.Minute { + consecutiveTimeouts = 0 + return false + } + + // If we've had too many consecutive timeouts, stop processing + if consecutiveTimeouts >= maxConsecutiveTimeouts { + log.Printf("Circuit breaker triggered: %d consecutive timeouts, stopping conntrack collection", consecutiveTimeouts) + return true + } + + return false +} + +// checkMemoryPressure checks if we're under memory pressure and triggers GC if needed +func checkMemoryPressure() { + var m runtime.MemStats + runtime.ReadMemStats(&m) + + // Calculate memory usage percentage + memoryUsage := float64(m.Alloc) / float64(m.Sys) + + if memoryUsage > memoryPressureThreshold { + // Only log if enough time has passed since last log + if time.Since(lastMemoryPressureLog) > memoryPressureLogCooldown { + log.Printf("Memory pressure detected: %.2f%% usage, triggering GC", memoryUsage*100) + lastMemoryPressureLog = time.Now() + } + runtime.GC() + } else if memoryUsage > memoryCleanupThreshold { + // Aggressive cleanup at 70% usage + runtime.GC() + } +} + +// shouldSampleEntry determines if we should sample an entry based on zone size +func shouldSampleEntry(zoneTotalCount int, entryIndex int) bool { + if zoneTotalCount <= maxEntriesPerZone { + // For small zones, collect all entries + return true + } + + if zoneTotalCount > 1000000 { + // For very large zones (>1M), use statistical sampling + return rand.Float64() < sampleRateForLargeZones + } + + // For medium zones, collect first maxEntriesPerZone entries + return entryIndex < maxEntriesPerZone +} + +// checkCPUTime checks if we're exceeding CPU time limits +func checkCPUTime(startTime time.Time) bool { + elapsed := time.Since(startTime) + if elapsed > maxCPUTimePerCollection { + log.Printf("CPU time limit exceeded: %v elapsed, continuing with sampling", elapsed) + return true + } + return false +} + +// collectConntrackWithTimeout safely collects conntrack data with timeout protection +func (c *ConntrackCollector) collectConntrackWithTimeout(ctx context.Context, threshold int) (map[uint16]*ovsnl.ZoneStats, error) { + // Check circuit breaker first + if checkCircuitBreaker() { + log.Printf("Circuit breaker active, skipping conntrack collection") + return make(map[uint16]*ovsnl.ZoneStats), nil + } + + var result map[uint16]*ovsnl.ZoneStats + var err error + var mu sync.Mutex + var wg sync.WaitGroup + + // Create a timeout context + timeoutCtx, cancel := context.WithTimeout(ctx, conntrackTimeout) + defer cancel() + + // Start collection in a goroutine + wg.Add(1) + go func() { + defer wg.Done() + defer func() { + if r := recover(); r != nil { + log.Printf("Panic in conntrack collection: %v", r) + err = fmt.Errorf("panic in conntrack collection: %v", r) + } + }() + + // Try streaming first, fallback to regular + if c.listZoneStats != nil { + result, err = c.listZoneStats(timeoutCtx, threshold) + } else { + // This case should ideally not be reached if listZoneStats is always set + err = fmt.Errorf("no listZoneStats function available") + } + + mu.Lock() + defer mu.Unlock() + }() + + // Wait for completion or timeout + done := make(chan struct{}) + go func() { + wg.Wait() + close(done) + }() + + select { + case <-done: + mu.Lock() + defer mu.Unlock() + // Reset timeout counter on success + consecutiveTimeouts = 0 + return result, err + case <-timeoutCtx.Done(): + // Track timeout + consecutiveTimeouts++ + lastTimeoutTime = time.Now() + log.Printf("Conntrack collection timed out after %v (timeout #%d), returning partial results", conntrackTimeout, consecutiveTimeouts) + // Force cleanup before returning + runtime.GC() + // Return empty result instead of error to prevent metric collection failure + return make(map[uint16]*ovsnl.ZoneStats), nil } } -func (c *conntrackCollector) Describe(ch chan<- *prometheus.Desc) { +func (c *ConntrackCollector) Describe(ch chan<- *prometheus.Desc) { ch <- c.Count + ch <- c.Performance } -func (c *conntrackCollector) Collect(ch chan<- prometheus.Metric) { - entries, err := c.listConntrackEntries() +func (c *ConntrackCollector) Collect(ch chan<- prometheus.Metric) { + startTime := time.Now() + ctx := context.Background() + + // Check memory pressure before starting + checkMemoryPressure() + + // Emergency shutdown if memory pressure is too high + var m runtime.MemStats + runtime.ReadMemStats(&m) + memoryUsage := float64(m.Alloc) / float64(m.Sys) + if memoryUsage > 0.85 { // 85% threshold for emergency shutdown + log.Printf("Emergency shutdown: memory usage %.2f%% too high, skipping conntrack collection", memoryUsage*100) + // Return basic metrics only + ch <- prometheus.MustNewConstMetric( + c.Count, + prometheus.GaugeValue, + 0.0, + "emergency", "shutdown", "0", + ) + return + } + + // Collect performance stats first (lightweight operation) + if c.getStats != nil { + if stats, err := c.getStats(); err == nil { + ch <- prometheus.MustNewConstMetric( + c.Performance, + prometheus.GaugeValue, + float64(stats.TotalFound), + "found", + ) + ch <- prometheus.MustNewConstMetric( + c.Performance, + prometheus.GaugeValue, + float64(stats.TotalInvalid), + "invalid", + ) + ch <- prometheus.MustNewConstMetric( + c.Performance, + prometheus.GaugeValue, + float64(stats.TotalIgnore), + "ignore", + ) + ch <- prometheus.MustNewConstMetric( + c.Performance, + prometheus.GaugeValue, + float64(stats.TotalInsert), + "insert", + ) + ch <- prometheus.MustNewConstMetric( + c.Performance, + prometheus.GaugeValue, + float64(stats.TotalInsertFailed), + "insert_failed", + ) + ch <- prometheus.MustNewConstMetric( + c.Performance, + prometheus.GaugeValue, + float64(stats.TotalDrop), + "drop", + ) + ch <- prometheus.MustNewConstMetric( + c.Performance, + prometheus.GaugeValue, + float64(stats.TotalEarlyDrop), + "early_drop", + ) + ch <- prometheus.MustNewConstMetric( + c.Performance, + prometheus.GaugeValue, + float64(stats.TotalError), + "error", + ) + ch <- prometheus.MustNewConstMetric( + c.Performance, + prometheus.GaugeValue, + float64(stats.TotalSearchRestart), + "search_restart", + ) + } else { + log.Printf("Failed to collect conntrack performance stats: %v", err) + } + } + + // Check memory pressure again before heavy operation + checkMemoryPressure() + + // Collect zone statistics with timeout protection + stats, err := c.collectConntrackWithTimeout(ctx, zoneThreshold) + if err != nil { log.Printf("Failed to collect conntrack entries: %v", err) + // Force cleanup on error + runtime.GC() // Return a zero metric to indicate the collector is working but no data ch <- prometheus.MustNewConstMetric( c.Count, @@ -42,34 +312,44 @@ func (c *conntrackCollector) Collect(ch chan<- prometheus.Metric) { return } - // Log the number of entries found for debugging - log.Printf("Found %d conntrack entries", len(entries)) - - // Aggregate counts - counts := make(map[string]map[string]map[string]int) - for _, e := range entries { - zone := fmt.Sprintf("%d", e.Zone) - state := e.State - mark := fmt.Sprintf("%d", e.Mark) - if counts[zone] == nil { - counts[zone] = make(map[string]map[string]int) - } - if counts[zone][state] == nil { - counts[zone][state] = make(map[string]int) - } - counts[zone][state][mark]++ + // Process zones using event-driven aggregator data + // This is much more efficient than the old sampling approach + for zone, zoneStats := range stats { + // Always emit total count for the zone (this is critical!) + ch <- prometheus.MustNewConstMetric( + c.Count, + prometheus.GaugeValue, + float64(zoneStats.TotalCount), + fmt.Sprint(zone), + "total", + "0", + ) } - for zone, stateMap := range counts { - for state, markMap := range stateMap { - for mark, count := range markMap { + // OPTIONAL: emit per-mark counts using the aggregator directly. + // This avoids storing per-entry slices and stays O(unique marks). + if aggClient, ok := any(c).(*ConntrackCollectorWithAggAccessor); ok { + zm := aggClient.SnapshotFunc() // <- we'll show how to plumb this accessor next + // To avoid high-cardinality explosion, you can cap marks per zone: + const maxMarksPerZone = 2000 // tune for your environment + for zone, markMap := range zm { + emitted := 0 + for mark, cnt := range markMap { + if emitted >= maxMarksPerZone { + break + } ch <- prometheus.MustNewConstMetric( c.Count, prometheus.GaugeValue, - float64(count), - zone, state, mark, + float64(cnt), + fmt.Sprint(zone), "total", fmt.Sprint(mark), ) + emitted++ } } } + + // Log collection time + elapsed := time.Since(startTime) + log.Printf("Conntrack collection completed in %v", elapsed) } diff --git a/internal/ovsexporter/ovsexporter.go b/internal/ovsexporter/ovsexporter.go index 8d19838..2ae1a4f 100644 --- a/internal/ovsexporter/ovsexporter.go +++ b/internal/ovsexporter/ovsexporter.go @@ -20,8 +20,9 @@ const ( // A collector aggregates Open vSwitch Prometheus collectors. type collector struct { - mu sync.Mutex - cs []prometheus.Collector + mu sync.Mutex + cs []prometheus.Collector + conntrackEnabled bool } var _ prometheus.Collector = &collector{} @@ -33,27 +34,50 @@ func New(c *ovsnl.Client) prometheus.Collector { newDatapathCollector(c.Datapath.List), } - // Try to add conntrack collector, but don't fail if it's not available - conntrackCollector := newConntrackCollector(func() ([]ovsnl.ConntrackEntry, error) { - svc, err := ovsnl.NewConntrackService() - if err != nil { - return nil, err - } - defer svc.Close() - return svc.List(context.Background()) - }) + // When you build the collector in New(...): + var snapshot func() map[uint16]map[uint32]int + if c.Agg != nil { + snapshot = c.Agg.Snapshot + } + base := newConntrackCollector( + // listZoneStats: + func(ctx context.Context, threshold int) (map[uint16]*ovsnl.ZoneStats, error) { + if c.Agg == nil { + return map[uint16]*ovsnl.ZoneStats{}, nil + } + zm := c.Agg.Snapshot() + + out := make(map[uint16]*ovsnl.ZoneStats, len(zm)) + for zone, marks := range zm { + total := 0 + for _, cnt := range marks { + total += cnt + } + // Always include the zone (so "total" time series is complete). + zs := &ovsnl.ZoneStats{TotalCount: total} + // No per-entry slice to avoid memory. + // If you still want per-mark metrics, do it in Collect directly using zm. + out[zone] = zs + _ = threshold // threshold is not used here; you can still filter if desired. + } + return out, nil + }, + // getStats: Disabled due to multicast connection issues + nil, // This will skip stats collection entirely + ) + conntrackCollector := &ConntrackCollectorWithAggAccessor{ + ConntrackCollector: base.(*ConntrackCollector), + SnapshotFunc: snapshot, + } - // Test if conntrack service can be created - if _, err := ovsnl.NewConntrackService(); err != nil { - log.Printf("Warning: Conntrack service not available: %v. Conntrack metrics will be disabled.", err) + if c.Conntrack == nil { + log.Printf("Warning: Conntrack service not available; metrics disabled.") } else { collectors = append(collectors, conntrackCollector) - log.Printf("Conntrack collector enabled") + log.Printf("Conntrack collector enabled (event-driven)") } - return &collector{ - cs: collectors, - } + return &collector{cs: collectors, conntrackEnabled: true} } // Describe implements prometheus.Collector. diff --git a/vendor/modules.txt b/vendor/modules.txt new file mode 100644 index 0000000..d76209f --- /dev/null +++ b/vendor/modules.txt @@ -0,0 +1,113 @@ +# github.com/beorn7/perks v1.0.1 +## explicit; go 1.11 +github.com/beorn7/perks/quantile +# github.com/cespare/xxhash/v2 v2.1.1 +## explicit; go 1.11 +github.com/cespare/xxhash/v2 +# github.com/digitalocean/go-openvswitch v0.0.0-20201214180534-ce0f183468d8 => ../go-openvswitch +## explicit; go 1.23.0 +github.com/digitalocean/go-openvswitch/ovsnl +github.com/digitalocean/go-openvswitch/ovsnl/internal/ovsh +# github.com/golang/protobuf v1.4.3 +## explicit; go 1.9 +github.com/golang/protobuf/proto +github.com/golang/protobuf/ptypes +github.com/golang/protobuf/ptypes/any +github.com/golang/protobuf/ptypes/duration +github.com/golang/protobuf/ptypes/timestamp +# github.com/google/go-cmp v0.7.0 +## explicit; go 1.21 +github.com/google/go-cmp/cmp +github.com/google/go-cmp/cmp/internal/diff +github.com/google/go-cmp/cmp/internal/flags +github.com/google/go-cmp/cmp/internal/function +github.com/google/go-cmp/cmp/internal/value +# github.com/josharian/native v1.1.0 +## explicit; go 1.13 +github.com/josharian/native +# github.com/matttproud/golang_protobuf_extensions v1.0.1 +## explicit +github.com/matttproud/golang_protobuf_extensions/pbutil +# github.com/mdlayher/genetlink v1.0.0 +## explicit; go 1.13 +github.com/mdlayher/genetlink +# github.com/mdlayher/netlink v1.7.2 +## explicit; go 1.18 +github.com/mdlayher/netlink +github.com/mdlayher/netlink/nlenc +# github.com/mdlayher/socket v0.5.1 +## explicit; go 1.20 +github.com/mdlayher/socket +# github.com/pkg/errors v0.9.1 +## explicit +github.com/pkg/errors +# github.com/prometheus/client_golang v1.9.0 +## explicit; go 1.11 +github.com/prometheus/client_golang/prometheus +github.com/prometheus/client_golang/prometheus/internal +github.com/prometheus/client_golang/prometheus/promhttp +# github.com/prometheus/client_model v0.2.0 +## explicit; go 1.9 +github.com/prometheus/client_model/go +# github.com/prometheus/common v0.15.0 +## explicit; go 1.11 +github.com/prometheus/common/expfmt +github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg +github.com/prometheus/common/model +# github.com/prometheus/procfs v0.2.0 +## explicit; go 1.12 +github.com/prometheus/procfs +github.com/prometheus/procfs/internal/fs +github.com/prometheus/procfs/internal/util +# github.com/prometheus/prometheus v2.2.1-0.20180315085919-58e2a31db8de+incompatible +## explicit +github.com/prometheus/prometheus/util/promlint +# github.com/ti-mo/conntrack v0.5.2 +## explicit; go 1.23.0 +github.com/ti-mo/conntrack +# github.com/ti-mo/netfilter v0.5.3 +## explicit; go 1.23.0 +github.com/ti-mo/netfilter +# golang.org/x/net v0.39.0 +## explicit; go 1.23.0 +golang.org/x/net/bpf +# golang.org/x/sync v0.14.0 +## explicit; go 1.23.0 +golang.org/x/sync/errgroup +# golang.org/x/sys v0.35.0 +## explicit; go 1.23.0 +golang.org/x/sys/unix +golang.org/x/sys/windows +# google.golang.org/protobuf v1.23.0 +## explicit; go 1.9 +google.golang.org/protobuf/encoding/prototext +google.golang.org/protobuf/encoding/protowire +google.golang.org/protobuf/internal/descfmt +google.golang.org/protobuf/internal/descopts +google.golang.org/protobuf/internal/detrand +google.golang.org/protobuf/internal/encoding/defval +google.golang.org/protobuf/internal/encoding/messageset +google.golang.org/protobuf/internal/encoding/tag +google.golang.org/protobuf/internal/encoding/text +google.golang.org/protobuf/internal/errors +google.golang.org/protobuf/internal/fieldnum +google.golang.org/protobuf/internal/fieldsort +google.golang.org/protobuf/internal/filedesc +google.golang.org/protobuf/internal/filetype +google.golang.org/protobuf/internal/flags +google.golang.org/protobuf/internal/genname +google.golang.org/protobuf/internal/impl +google.golang.org/protobuf/internal/mapsort +google.golang.org/protobuf/internal/pragma +google.golang.org/protobuf/internal/set +google.golang.org/protobuf/internal/strs +google.golang.org/protobuf/internal/version +google.golang.org/protobuf/proto +google.golang.org/protobuf/reflect/protoreflect +google.golang.org/protobuf/reflect/protoregistry +google.golang.org/protobuf/runtime/protoiface +google.golang.org/protobuf/runtime/protoimpl +google.golang.org/protobuf/types/known/anypb +google.golang.org/protobuf/types/known/durationpb +google.golang.org/protobuf/types/known/timestamppb +# github.com/digitalocean/go-openvswitch => ../go-openvswitch From 30b6d5819256111694d62c13a72d0e70fbea0e16 Mon Sep 17 00:00:00 2001 From: sgangopadhyay Date: Wed, 3 Sep 2025 01:28:14 +0530 Subject: [PATCH 3/4] code cleanup --- internal/ovsexporter/conntrack.go | 345 +++------------------------- internal/ovsexporter/ovsexporter.go | 65 +++--- vendor/modules.txt | 113 --------- 3 files changed, 52 insertions(+), 471 deletions(-) delete mode 100644 vendor/modules.txt diff --git a/internal/ovsexporter/conntrack.go b/internal/ovsexporter/conntrack.go index e0f4390..f6f4938 100644 --- a/internal/ovsexporter/conntrack.go +++ b/internal/ovsexporter/conntrack.go @@ -1,355 +1,62 @@ package ovsexporter import ( - "context" "fmt" "log" - "math/rand" - "runtime" - "sync" - "time" "github.com/digitalocean/go-openvswitch/ovsnl" "github.com/prometheus/client_golang/prometheus" ) -const ( - zoneThreshold = 50000 // Configure threshold for zone alerts (reduced for 2M test) - // Memory management for large conntrack tables - maxEntriesPerZone = 100 // Drastically reduced maximum entries to collect per zone to prevent OOM - largeZoneThreshold = 100000 // Use streaming approach for zones with >100k entries - // Memory pressure thresholds - memoryPressureThreshold = 0.8 // Trigger memory pressure handling when 80% of memory is used - // CPU time limits - maxCPUTimePerCollection = 60 * time.Second // Maximum CPU time per collection cycle - // Sampling configuration for large zones - sampleRateForLargeZones = 0.01 // Sample 1% of entries for zones > 1M entries - // Timeout configuration - conntrackTimeout = 30 * time.Second // Reduced timeout to prevent getting stuck - // Memory pressure logging cooldown - memoryPressureLogCooldown = 30 * time.Second // Prevent log spam - // Memory cleanup thresholds - memoryCleanupThreshold = 0.7 // Trigger aggressive cleanup at 70% usage - // Circuit breaker for performance regression - maxConsecutiveTimeouts = 3 // Stop processing after 3 consecutive timeouts -) - -var ( - lastMemoryPressureLog time.Time - consecutiveTimeouts int - lastTimeoutTime time.Time -) - -type ConntrackCollector struct { - Count *prometheus.Desc - Performance *prometheus.Desc - listZoneStats func(context.Context, int) (map[uint16]*ovsnl.ZoneStats, error) - getStats func() (*ovsnl.ConntrackPerformanceStats, error) +type conntrackCollector struct { + desc *prometheus.Desc + agg *ovsnl.ZoneMarkAggregator } // ConntrackCollectorWithAggAccessor wraps the existing collector with access to the aggregator snapshot type ConntrackCollectorWithAggAccessor struct { - *ConntrackCollector + *conntrackCollector SnapshotFunc func() map[uint16]map[uint32]int } -func newConntrackCollector(fn func(context.Context, int) (map[uint16]*ovsnl.ZoneStats, error), statsFn func() (*ovsnl.ConntrackPerformanceStats, error)) prometheus.Collector { - return &ConntrackCollector{ - Count: prometheus.NewDesc( +func newConntrackCollector(agg *ovsnl.ZoneMarkAggregator) prometheus.Collector { + return &conntrackCollector{ + desc: prometheus.NewDesc( prometheus.BuildFQName(namespace, "conntrack", "count"), - "Number of conntrack entries by zone, state, and mark", - []string{"zone", "state", "mark"}, nil, - ), - Performance: prometheus.NewDesc( - prometheus.BuildFQName(namespace, "conntrack", "performance"), - "Conntrack performance counters", - []string{"counter"}, nil, + "Number of conntrack entries by zone and mark", + []string{"zone", "mark"}, + nil, ), - listZoneStats: fn, - getStats: statsFn, + agg: agg, } } -// checkCircuitBreaker checks if we should stop processing due to too many timeouts -func checkCircuitBreaker() bool { - now := time.Now() - - // Reset counter if more than 5 minutes have passed since last timeout - if now.Sub(lastTimeoutTime) > 5*time.Minute { - consecutiveTimeouts = 0 - return false - } - - // If we've had too many consecutive timeouts, stop processing - if consecutiveTimeouts >= maxConsecutiveTimeouts { - log.Printf("Circuit breaker triggered: %d consecutive timeouts, stopping conntrack collection", consecutiveTimeouts) - return true - } - - return false +func (c *conntrackCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.desc } -// checkMemoryPressure checks if we're under memory pressure and triggers GC if needed -func checkMemoryPressure() { - var m runtime.MemStats - runtime.ReadMemStats(&m) - - // Calculate memory usage percentage - memoryUsage := float64(m.Alloc) / float64(m.Sys) - - if memoryUsage > memoryPressureThreshold { - // Only log if enough time has passed since last log - if time.Since(lastMemoryPressureLog) > memoryPressureLogCooldown { - log.Printf("Memory pressure detected: %.2f%% usage, triggering GC", memoryUsage*100) - lastMemoryPressureLog = time.Now() - } - runtime.GC() - } else if memoryUsage > memoryCleanupThreshold { - // Aggressive cleanup at 70% usage - runtime.GC() - } -} - -// shouldSampleEntry determines if we should sample an entry based on zone size -func shouldSampleEntry(zoneTotalCount int, entryIndex int) bool { - if zoneTotalCount <= maxEntriesPerZone { - // For small zones, collect all entries - return true - } - - if zoneTotalCount > 1000000 { - // For very large zones (>1M), use statistical sampling - return rand.Float64() < sampleRateForLargeZones - } - - // For medium zones, collect first maxEntriesPerZone entries - return entryIndex < maxEntriesPerZone -} - -// checkCPUTime checks if we're exceeding CPU time limits -func checkCPUTime(startTime time.Time) bool { - elapsed := time.Since(startTime) - if elapsed > maxCPUTimePerCollection { - log.Printf("CPU time limit exceeded: %v elapsed, continuing with sampling", elapsed) - return true - } - return false -} - -// collectConntrackWithTimeout safely collects conntrack data with timeout protection -func (c *ConntrackCollector) collectConntrackWithTimeout(ctx context.Context, threshold int) (map[uint16]*ovsnl.ZoneStats, error) { - // Check circuit breaker first - if checkCircuitBreaker() { - log.Printf("Circuit breaker active, skipping conntrack collection") - return make(map[uint16]*ovsnl.ZoneStats), nil - } - - var result map[uint16]*ovsnl.ZoneStats - var err error - var mu sync.Mutex - var wg sync.WaitGroup - - // Create a timeout context - timeoutCtx, cancel := context.WithTimeout(ctx, conntrackTimeout) - defer cancel() - - // Start collection in a goroutine - wg.Add(1) - go func() { - defer wg.Done() - defer func() { - if r := recover(); r != nil { - log.Printf("Panic in conntrack collection: %v", r) - err = fmt.Errorf("panic in conntrack collection: %v", r) - } - }() - - // Try streaming first, fallback to regular - if c.listZoneStats != nil { - result, err = c.listZoneStats(timeoutCtx, threshold) - } else { - // This case should ideally not be reached if listZoneStats is always set - err = fmt.Errorf("no listZoneStats function available") - } - - mu.Lock() - defer mu.Unlock() - }() - - // Wait for completion or timeout - done := make(chan struct{}) - go func() { - wg.Wait() - close(done) - }() - - select { - case <-done: - mu.Lock() - defer mu.Unlock() - // Reset timeout counter on success - consecutiveTimeouts = 0 - return result, err - case <-timeoutCtx.Done(): - // Track timeout - consecutiveTimeouts++ - lastTimeoutTime = time.Now() - log.Printf("Conntrack collection timed out after %v (timeout #%d), returning partial results", conntrackTimeout, consecutiveTimeouts) - // Force cleanup before returning - runtime.GC() - // Return empty result instead of error to prevent metric collection failure - return make(map[uint16]*ovsnl.ZoneStats), nil - } -} - -func (c *ConntrackCollector) Describe(ch chan<- *prometheus.Desc) { - ch <- c.Count - ch <- c.Performance -} - -func (c *ConntrackCollector) Collect(ch chan<- prometheus.Metric) { - startTime := time.Now() - ctx := context.Background() - - // Check memory pressure before starting - checkMemoryPressure() - - // Emergency shutdown if memory pressure is too high - var m runtime.MemStats - runtime.ReadMemStats(&m) - memoryUsage := float64(m.Alloc) / float64(m.Sys) - if memoryUsage > 0.85 { // 85% threshold for emergency shutdown - log.Printf("Emergency shutdown: memory usage %.2f%% too high, skipping conntrack collection", memoryUsage*100) - // Return basic metrics only +func (c *conntrackCollector) Collect(ch chan<- prometheus.Metric) { + if c.agg == nil { + log.Printf("No aggregator available, emitting zero metric") ch <- prometheus.MustNewConstMetric( - c.Count, + c.desc, prometheus.GaugeValue, - 0.0, - "emergency", "shutdown", "0", + 0, + "unknown", "unknown", ) return } - // Collect performance stats first (lightweight operation) - if c.getStats != nil { - if stats, err := c.getStats(); err == nil { + snapshot := c.agg.Snapshot() + for zone, marks := range snapshot { + for mark, count := range marks { ch <- prometheus.MustNewConstMetric( - c.Performance, + c.desc, prometheus.GaugeValue, - float64(stats.TotalFound), - "found", + float64(count), + fmt.Sprintf("%d", zone), + fmt.Sprintf("%d", mark), ) - ch <- prometheus.MustNewConstMetric( - c.Performance, - prometheus.GaugeValue, - float64(stats.TotalInvalid), - "invalid", - ) - ch <- prometheus.MustNewConstMetric( - c.Performance, - prometheus.GaugeValue, - float64(stats.TotalIgnore), - "ignore", - ) - ch <- prometheus.MustNewConstMetric( - c.Performance, - prometheus.GaugeValue, - float64(stats.TotalInsert), - "insert", - ) - ch <- prometheus.MustNewConstMetric( - c.Performance, - prometheus.GaugeValue, - float64(stats.TotalInsertFailed), - "insert_failed", - ) - ch <- prometheus.MustNewConstMetric( - c.Performance, - prometheus.GaugeValue, - float64(stats.TotalDrop), - "drop", - ) - ch <- prometheus.MustNewConstMetric( - c.Performance, - prometheus.GaugeValue, - float64(stats.TotalEarlyDrop), - "early_drop", - ) - ch <- prometheus.MustNewConstMetric( - c.Performance, - prometheus.GaugeValue, - float64(stats.TotalError), - "error", - ) - ch <- prometheus.MustNewConstMetric( - c.Performance, - prometheus.GaugeValue, - float64(stats.TotalSearchRestart), - "search_restart", - ) - } else { - log.Printf("Failed to collect conntrack performance stats: %v", err) - } - } - - // Check memory pressure again before heavy operation - checkMemoryPressure() - - // Collect zone statistics with timeout protection - stats, err := c.collectConntrackWithTimeout(ctx, zoneThreshold) - - if err != nil { - log.Printf("Failed to collect conntrack entries: %v", err) - // Force cleanup on error - runtime.GC() - // Return a zero metric to indicate the collector is working but no data - ch <- prometheus.MustNewConstMetric( - c.Count, - prometheus.GaugeValue, - 0.0, - "unknown", "unknown", "0", - ) - return - } - - // Process zones using event-driven aggregator data - // This is much more efficient than the old sampling approach - for zone, zoneStats := range stats { - // Always emit total count for the zone (this is critical!) - ch <- prometheus.MustNewConstMetric( - c.Count, - prometheus.GaugeValue, - float64(zoneStats.TotalCount), - fmt.Sprint(zone), - "total", - "0", - ) - } - - // OPTIONAL: emit per-mark counts using the aggregator directly. - // This avoids storing per-entry slices and stays O(unique marks). - if aggClient, ok := any(c).(*ConntrackCollectorWithAggAccessor); ok { - zm := aggClient.SnapshotFunc() // <- we'll show how to plumb this accessor next - // To avoid high-cardinality explosion, you can cap marks per zone: - const maxMarksPerZone = 2000 // tune for your environment - for zone, markMap := range zm { - emitted := 0 - for mark, cnt := range markMap { - if emitted >= maxMarksPerZone { - break - } - ch <- prometheus.MustNewConstMetric( - c.Count, - prometheus.GaugeValue, - float64(cnt), - fmt.Sprint(zone), "total", fmt.Sprint(mark), - ) - emitted++ - } } } - - // Log collection time - elapsed := time.Since(startTime) - log.Printf("Conntrack collection completed in %v", elapsed) } diff --git a/internal/ovsexporter/ovsexporter.go b/internal/ovsexporter/ovsexporter.go index 2ae1a4f..d1531c9 100644 --- a/internal/ovsexporter/ovsexporter.go +++ b/internal/ovsexporter/ovsexporter.go @@ -6,10 +6,11 @@ package ovsexporter import ( - "context" "log" "sync" + // "time" + "github.com/digitalocean/go-openvswitch/ovsnl" "github.com/prometheus/client_golang/prometheus" ) @@ -25,6 +26,7 @@ type collector struct { conntrackEnabled bool } +// Make sure collector implements prometheus.Collector var _ prometheus.Collector = &collector{} // New creates a new Prometheus collector which collects metrics using the @@ -34,50 +36,35 @@ func New(c *ovsnl.Client) prometheus.Collector { newDatapathCollector(c.Datapath.List), } - // When you build the collector in New(...): - var snapshot func() map[uint16]map[uint32]int - if c.Agg != nil { - snapshot = c.Agg.Snapshot + // Start zone/mark aggregator + svc, err := ovsnl.NewConntrackService() + if err != nil { + log.Printf("Warning: Conntrack service not available: %v", err) + return &collector{cs: collectors} } - base := newConntrackCollector( - // listZoneStats: - func(ctx context.Context, threshold int) (map[uint16]*ovsnl.ZoneStats, error) { - if c.Agg == nil { - return map[uint16]*ovsnl.ZoneStats{}, nil - } - zm := c.Agg.Snapshot() - - out := make(map[uint16]*ovsnl.ZoneStats, len(zm)) - for zone, marks := range zm { - total := 0 - for _, cnt := range marks { - total += cnt - } - // Always include the zone (so "total" time series is complete). - zs := &ovsnl.ZoneStats{TotalCount: total} - // No per-entry slice to avoid memory. - // If you still want per-mark metrics, do it in Collect directly using zm. - out[zone] = zs - _ = threshold // threshold is not used here; you can still filter if desired. - } - return out, nil - }, - // getStats: Disabled due to multicast connection issues - nil, // This will skip stats collection entirely - ) - conntrackCollector := &ConntrackCollectorWithAggAccessor{ - ConntrackCollector: base.(*ConntrackCollector), - SnapshotFunc: snapshot, + + agg, err := ovsnl.NewZoneMarkAggregator(svc) + if err != nil { + log.Printf("Warning: Failed to create zone/mark aggregator: %v", err) + return &collector{cs: collectors} } + //TODO : To confirm if we absolutely need this, can omit if eventual consistency is ok - if c.Conntrack == nil { - log.Printf("Warning: Conntrack service not available; metrics disabled.") + // if err := agg.PrimeSnapshot(context.Background(), 0); err != nil { + // log.Printf("Warning: Failed to prime snapshot: %v", err) + // } + if err := agg.Start(); err != nil { + log.Printf("Warning: Failed to start zone/mark aggregator: %v", err) } else { - collectors = append(collectors, conntrackCollector) - log.Printf("Conntrack collector enabled (event-driven)") + log.Printf("Conntrack zone/mark aggregator started") } - return &collector{cs: collectors, conntrackEnabled: true} + collectors = append(collectors, newConntrackCollector(agg)) + + return &collector{ + cs: collectors, + conntrackEnabled: true, + } } // Describe implements prometheus.Collector. diff --git a/vendor/modules.txt b/vendor/modules.txt deleted file mode 100644 index d76209f..0000000 --- a/vendor/modules.txt +++ /dev/null @@ -1,113 +0,0 @@ -# github.com/beorn7/perks v1.0.1 -## explicit; go 1.11 -github.com/beorn7/perks/quantile -# github.com/cespare/xxhash/v2 v2.1.1 -## explicit; go 1.11 -github.com/cespare/xxhash/v2 -# github.com/digitalocean/go-openvswitch v0.0.0-20201214180534-ce0f183468d8 => ../go-openvswitch -## explicit; go 1.23.0 -github.com/digitalocean/go-openvswitch/ovsnl -github.com/digitalocean/go-openvswitch/ovsnl/internal/ovsh -# github.com/golang/protobuf v1.4.3 -## explicit; go 1.9 -github.com/golang/protobuf/proto -github.com/golang/protobuf/ptypes -github.com/golang/protobuf/ptypes/any -github.com/golang/protobuf/ptypes/duration -github.com/golang/protobuf/ptypes/timestamp -# github.com/google/go-cmp v0.7.0 -## explicit; go 1.21 -github.com/google/go-cmp/cmp -github.com/google/go-cmp/cmp/internal/diff -github.com/google/go-cmp/cmp/internal/flags -github.com/google/go-cmp/cmp/internal/function -github.com/google/go-cmp/cmp/internal/value -# github.com/josharian/native v1.1.0 -## explicit; go 1.13 -github.com/josharian/native -# github.com/matttproud/golang_protobuf_extensions v1.0.1 -## explicit -github.com/matttproud/golang_protobuf_extensions/pbutil -# github.com/mdlayher/genetlink v1.0.0 -## explicit; go 1.13 -github.com/mdlayher/genetlink -# github.com/mdlayher/netlink v1.7.2 -## explicit; go 1.18 -github.com/mdlayher/netlink -github.com/mdlayher/netlink/nlenc -# github.com/mdlayher/socket v0.5.1 -## explicit; go 1.20 -github.com/mdlayher/socket -# github.com/pkg/errors v0.9.1 -## explicit -github.com/pkg/errors -# github.com/prometheus/client_golang v1.9.0 -## explicit; go 1.11 -github.com/prometheus/client_golang/prometheus -github.com/prometheus/client_golang/prometheus/internal -github.com/prometheus/client_golang/prometheus/promhttp -# github.com/prometheus/client_model v0.2.0 -## explicit; go 1.9 -github.com/prometheus/client_model/go -# github.com/prometheus/common v0.15.0 -## explicit; go 1.11 -github.com/prometheus/common/expfmt -github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg -github.com/prometheus/common/model -# github.com/prometheus/procfs v0.2.0 -## explicit; go 1.12 -github.com/prometheus/procfs -github.com/prometheus/procfs/internal/fs -github.com/prometheus/procfs/internal/util -# github.com/prometheus/prometheus v2.2.1-0.20180315085919-58e2a31db8de+incompatible -## explicit -github.com/prometheus/prometheus/util/promlint -# github.com/ti-mo/conntrack v0.5.2 -## explicit; go 1.23.0 -github.com/ti-mo/conntrack -# github.com/ti-mo/netfilter v0.5.3 -## explicit; go 1.23.0 -github.com/ti-mo/netfilter -# golang.org/x/net v0.39.0 -## explicit; go 1.23.0 -golang.org/x/net/bpf -# golang.org/x/sync v0.14.0 -## explicit; go 1.23.0 -golang.org/x/sync/errgroup -# golang.org/x/sys v0.35.0 -## explicit; go 1.23.0 -golang.org/x/sys/unix -golang.org/x/sys/windows -# google.golang.org/protobuf v1.23.0 -## explicit; go 1.9 -google.golang.org/protobuf/encoding/prototext -google.golang.org/protobuf/encoding/protowire -google.golang.org/protobuf/internal/descfmt -google.golang.org/protobuf/internal/descopts -google.golang.org/protobuf/internal/detrand -google.golang.org/protobuf/internal/encoding/defval -google.golang.org/protobuf/internal/encoding/messageset -google.golang.org/protobuf/internal/encoding/tag -google.golang.org/protobuf/internal/encoding/text -google.golang.org/protobuf/internal/errors -google.golang.org/protobuf/internal/fieldnum -google.golang.org/protobuf/internal/fieldsort -google.golang.org/protobuf/internal/filedesc -google.golang.org/protobuf/internal/filetype -google.golang.org/protobuf/internal/flags -google.golang.org/protobuf/internal/genname -google.golang.org/protobuf/internal/impl -google.golang.org/protobuf/internal/mapsort -google.golang.org/protobuf/internal/pragma -google.golang.org/protobuf/internal/set -google.golang.org/protobuf/internal/strs -google.golang.org/protobuf/internal/version -google.golang.org/protobuf/proto -google.golang.org/protobuf/reflect/protoreflect -google.golang.org/protobuf/reflect/protoregistry -google.golang.org/protobuf/runtime/protoiface -google.golang.org/protobuf/runtime/protoimpl -google.golang.org/protobuf/types/known/anypb -google.golang.org/protobuf/types/known/durationpb -google.golang.org/protobuf/types/known/timestamppb -# github.com/digitalocean/go-openvswitch => ../go-openvswitch From 79cd030ed3724080247650a31d3913808658dbd3 Mon Sep 17 00:00:00 2001 From: sgangopadhyay Date: Mon, 22 Sep 2025 20:43:44 +0530 Subject: [PATCH 4/4] conntrack destroy rate limiting part 1 --- internal/ovsexporter/ovsexporter.go | 33 +++++++++++++++++++---------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/internal/ovsexporter/ovsexporter.go b/internal/ovsexporter/ovsexporter.go index d1531c9..5cfa9c9 100644 --- a/internal/ovsexporter/ovsexporter.go +++ b/internal/ovsexporter/ovsexporter.go @@ -24,6 +24,7 @@ type collector struct { mu sync.Mutex cs []prometheus.Collector conntrackEnabled bool + agg *ovsnl.ZoneMarkAggregator } // Make sure collector implements prometheus.Collector @@ -36,34 +37,32 @@ func New(c *ovsnl.Client) prometheus.Collector { newDatapathCollector(c.Datapath.List), } - // Start zone/mark aggregator - svc, err := ovsnl.NewConntrackService() - if err != nil { - log.Printf("Warning: Conntrack service not available: %v", err) + // Create the aggregator using the client's ConntrackService + if c.Conntrack == nil { + log.Printf("Warning: Conntrack service not available in client") return &collector{cs: collectors} } - agg, err := ovsnl.NewZoneMarkAggregator(svc) + agg, err := ovsnl.NewZoneMarkAggregator(c.Conntrack) if err != nil { log.Printf("Warning: Failed to create zone/mark aggregator: %v", err) return &collector{cs: collectors} } - //TODO : To confirm if we absolutely need this, can omit if eventual consistency is ok - // if err := agg.PrimeSnapshot(context.Background(), 0); err != nil { - // log.Printf("Warning: Failed to prime snapshot: %v", err) - // } + // Start the aggregator if err := agg.Start(); err != nil { log.Printf("Warning: Failed to start zone/mark aggregator: %v", err) - } else { - log.Printf("Conntrack zone/mark aggregator started") + return &collector{cs: collectors} } + log.Printf("Enhanced conntrack zone/mark aggregator started with adaptive sync") + collectors = append(collectors, newConntrackCollector(agg)) return &collector{ cs: collectors, conntrackEnabled: true, + agg: agg, } } @@ -86,3 +85,15 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { cc.Collect(ch) } } + +// Close cleans up resources +func (c *collector) Close() { + c.mu.Lock() + defer c.mu.Unlock() + + if c.agg != nil { + log.Printf("Stopping conntrack aggregator...") + c.agg.Stop() + c.agg = nil + } +}