From 9adefdceeb6a6467bda2222f124e72f6a8081e36 Mon Sep 17 00:00:00 2001 From: Tatenda Zifudzi Date: Sat, 3 Jun 2023 16:22:17 -0700 Subject: [PATCH] fix: Resolve missing container network metrics by querying from HNS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fix ensures that network stats for containerd on Windows are successfully collected. Before this change, other container stats such as CPU and memory are successfully collected, but network stats are failing for containerd. The root cause is that the code for collecting network stats was originally written to work with docker which relies on v1 schema. After dockershim removal as Kubernetes's container runtime, containerd adoption has increased and this error is more frequently encountered when using containerd asĀ  the runtime. containerd uses v2 schema whereby the network stats need to be queried from the HNS component. Signed-off-by: Tatenda Zifudzi --- collector/container.go | 110 +++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 42 deletions(-) diff --git a/collector/container.go b/collector/container.go index 27967045f..5f559ea5c 100644 --- a/collector/container.go +++ b/collector/container.go @@ -4,6 +4,9 @@ package collector import ( + "fmt" + "strings" + "github.com/Microsoft/hcsshim" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -199,6 +202,8 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome return nil, nil } + containerPrefixes := make(map[string]string) + for _, containerDetails := range containers { container, err := hcsshim.OpenContainer(containerDetails.ID) if container != nil { @@ -214,7 +219,9 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome _ = level.Error(c.logger).Log("msg", "err in fetching container Statistics", "containerId", containerDetails.ID, "err", err) continue } + containerIdWithPrefix := getContainerIdWithPrefix(containerDetails) + containerPrefixes[containerDetails.ID] = containerIdWithPrefix ch <- prometheus.MustNewConstMetric( c.ContainerAvailable, @@ -258,78 +265,97 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome float64(cstats.Processor.RuntimeKernel100ns)*ticksToSecondsScaleFactor, containerIdWithPrefix, ) + ch <- prometheus.MustNewConstMetric( + c.ReadCountNormalized, + prometheus.CounterValue, + float64(cstats.Storage.ReadCountNormalized), + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.ReadSizeBytes, + prometheus.CounterValue, + float64(cstats.Storage.ReadSizeBytes), + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.WriteCountNormalized, + prometheus.CounterValue, + float64(cstats.Storage.WriteCountNormalized), + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.WriteSizeBytes, + prometheus.CounterValue, + float64(cstats.Storage.WriteSizeBytes), + containerIdWithPrefix, + ) + } + + hnsEndpoints, err := hcsshim.HNSListEndpointRequest() + if err != nil { + _ = level.Warn(c.logger).Log("msg", "Failed to collect network stats for containers") + return nil, nil + } - if len(cstats.Network) == 0 { - _ = level.Info(c.logger).Log("msg", "No Network Stats for container", "containerId", containerDetails.ID) + if len(hnsEndpoints) == 0 { + _ = level.Info(c.logger).Log("msg", fmt.Sprintf("No network stats for containers to collect")) + return nil, nil + } + + for _, endpoint := range hnsEndpoints { + endpointStats, err := hcsshim.GetHNSEndpointStats(endpoint.Id) + if err != nil { + _ = level.Warn(c.logger).Log("msg", fmt.Sprintf("Failed to collect network stats for interface %s", endpoint.Id), "err", err) continue } - networkStats := cstats.Network + for _, containerId := range endpoint.SharedContainers { + containerIdWithPrefix, ok := containerPrefixes[containerId] + endpointId := strings.ToUpper(endpoint.Id) + + if !ok { + _ = level.Warn(c.logger).Log("msg", fmt.Sprintf("Failed to collect network stats for container %s", containerId)) + continue + } - for _, networkInterface := range networkStats { ch <- prometheus.MustNewConstMetric( c.BytesReceived, prometheus.CounterValue, - float64(networkInterface.BytesReceived), - containerIdWithPrefix, networkInterface.EndpointId, + float64(endpointStats.BytesReceived), + containerIdWithPrefix, endpointId, ) + ch <- prometheus.MustNewConstMetric( c.BytesSent, prometheus.CounterValue, - float64(networkInterface.BytesSent), - containerIdWithPrefix, networkInterface.EndpointId, + float64(endpointStats.BytesSent), + containerIdWithPrefix, endpointId, ) ch <- prometheus.MustNewConstMetric( c.PacketsReceived, prometheus.CounterValue, - float64(networkInterface.PacketsReceived), - containerIdWithPrefix, networkInterface.EndpointId, + float64(endpointStats.PacketsReceived), + containerIdWithPrefix, endpointId, ) ch <- prometheus.MustNewConstMetric( c.PacketsSent, prometheus.CounterValue, - float64(networkInterface.PacketsSent), - containerIdWithPrefix, networkInterface.EndpointId, + float64(endpointStats.PacketsSent), + containerIdWithPrefix, endpointId, ) ch <- prometheus.MustNewConstMetric( c.DroppedPacketsIncoming, prometheus.CounterValue, - float64(networkInterface.DroppedPacketsIncoming), - containerIdWithPrefix, networkInterface.EndpointId, + float64(endpointStats.DroppedPacketsIncoming), + containerIdWithPrefix, endpointId, ) ch <- prometheus.MustNewConstMetric( c.DroppedPacketsOutgoing, prometheus.CounterValue, - float64(networkInterface.DroppedPacketsOutgoing), - containerIdWithPrefix, networkInterface.EndpointId, + float64(endpointStats.DroppedPacketsOutgoing), + containerIdWithPrefix, endpointId, ) - break } - - ch <- prometheus.MustNewConstMetric( - c.ReadCountNormalized, - prometheus.CounterValue, - float64(cstats.Storage.ReadCountNormalized), - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.ReadSizeBytes, - prometheus.CounterValue, - float64(cstats.Storage.ReadSizeBytes), - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.WriteCountNormalized, - prometheus.CounterValue, - float64(cstats.Storage.WriteCountNormalized), - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.WriteSizeBytes, - prometheus.CounterValue, - float64(cstats.Storage.WriteSizeBytes), - containerIdWithPrefix, - ) } return nil, nil