Skip to content

Commit

Permalink
fix: Resolve missing container network metrics by querying from HNS
Browse files Browse the repository at this point in the history
This fix ensures that network stats for containerd on Windows are successfully collected. Before this change, other container stats such as CPU and memory are successfully collected, but network stats are failing for containerd.

The root cause is that the code for collecting network stats was originally written to work with docker which relies on v1 schema. After dockershim removal as Kubernetes's container runtime, containerd adoption has increased and this error is more frequently encountered when using containerd as  the runtime. containerd uses v2 schema whereby the network stats need to be queried from the HNS component.

Signed-off-by: Tatenda Zifudzi <tzifudzi@amazon.com>
  • Loading branch information
tzifudzi committed Jul 20, 2023
1 parent 89cb543 commit 9adefdc
Showing 1 changed file with 68 additions and 42 deletions.
110 changes: 68 additions & 42 deletions collector/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
package collector

import (
"fmt"
"strings"

"github.com/Microsoft/hcsshim"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
Expand Down Expand Up @@ -199,6 +202,8 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome
return nil, nil
}

containerPrefixes := make(map[string]string)

for _, containerDetails := range containers {
container, err := hcsshim.OpenContainer(containerDetails.ID)
if container != nil {
Expand All @@ -214,7 +219,9 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome
_ = level.Error(c.logger).Log("msg", "err in fetching container Statistics", "containerId", containerDetails.ID, "err", err)
continue
}

containerIdWithPrefix := getContainerIdWithPrefix(containerDetails)
containerPrefixes[containerDetails.ID] = containerIdWithPrefix

ch <- prometheus.MustNewConstMetric(
c.ContainerAvailable,
Expand Down Expand Up @@ -258,78 +265,97 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome
float64(cstats.Processor.RuntimeKernel100ns)*ticksToSecondsScaleFactor,
containerIdWithPrefix,
)
ch <- prometheus.MustNewConstMetric(
c.ReadCountNormalized,
prometheus.CounterValue,
float64(cstats.Storage.ReadCountNormalized),
containerIdWithPrefix,
)
ch <- prometheus.MustNewConstMetric(
c.ReadSizeBytes,
prometheus.CounterValue,
float64(cstats.Storage.ReadSizeBytes),
containerIdWithPrefix,
)
ch <- prometheus.MustNewConstMetric(
c.WriteCountNormalized,
prometheus.CounterValue,
float64(cstats.Storage.WriteCountNormalized),
containerIdWithPrefix,
)
ch <- prometheus.MustNewConstMetric(
c.WriteSizeBytes,
prometheus.CounterValue,
float64(cstats.Storage.WriteSizeBytes),
containerIdWithPrefix,
)
}

hnsEndpoints, err := hcsshim.HNSListEndpointRequest()
if err != nil {
_ = level.Warn(c.logger).Log("msg", "Failed to collect network stats for containers")
return nil, nil
}

if len(cstats.Network) == 0 {
_ = level.Info(c.logger).Log("msg", "No Network Stats for container", "containerId", containerDetails.ID)
if len(hnsEndpoints) == 0 {
_ = level.Info(c.logger).Log("msg", fmt.Sprintf("No network stats for containers to collect"))
return nil, nil
}

for _, endpoint := range hnsEndpoints {
endpointStats, err := hcsshim.GetHNSEndpointStats(endpoint.Id)
if err != nil {
_ = level.Warn(c.logger).Log("msg", fmt.Sprintf("Failed to collect network stats for interface %s", endpoint.Id), "err", err)
continue
}

networkStats := cstats.Network
for _, containerId := range endpoint.SharedContainers {
containerIdWithPrefix, ok := containerPrefixes[containerId]
endpointId := strings.ToUpper(endpoint.Id)

if !ok {
_ = level.Warn(c.logger).Log("msg", fmt.Sprintf("Failed to collect network stats for container %s", containerId))
continue
}

for _, networkInterface := range networkStats {
ch <- prometheus.MustNewConstMetric(
c.BytesReceived,
prometheus.CounterValue,
float64(networkInterface.BytesReceived),
containerIdWithPrefix, networkInterface.EndpointId,
float64(endpointStats.BytesReceived),
containerIdWithPrefix, endpointId,
)

ch <- prometheus.MustNewConstMetric(
c.BytesSent,
prometheus.CounterValue,
float64(networkInterface.BytesSent),
containerIdWithPrefix, networkInterface.EndpointId,
float64(endpointStats.BytesSent),
containerIdWithPrefix, endpointId,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsReceived,
prometheus.CounterValue,
float64(networkInterface.PacketsReceived),
containerIdWithPrefix, networkInterface.EndpointId,
float64(endpointStats.PacketsReceived),
containerIdWithPrefix, endpointId,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsSent,
prometheus.CounterValue,
float64(networkInterface.PacketsSent),
containerIdWithPrefix, networkInterface.EndpointId,
float64(endpointStats.PacketsSent),
containerIdWithPrefix, endpointId,
)
ch <- prometheus.MustNewConstMetric(
c.DroppedPacketsIncoming,
prometheus.CounterValue,
float64(networkInterface.DroppedPacketsIncoming),
containerIdWithPrefix, networkInterface.EndpointId,
float64(endpointStats.DroppedPacketsIncoming),
containerIdWithPrefix, endpointId,
)
ch <- prometheus.MustNewConstMetric(
c.DroppedPacketsOutgoing,
prometheus.CounterValue,
float64(networkInterface.DroppedPacketsOutgoing),
containerIdWithPrefix, networkInterface.EndpointId,
float64(endpointStats.DroppedPacketsOutgoing),
containerIdWithPrefix, endpointId,
)
break
}

ch <- prometheus.MustNewConstMetric(
c.ReadCountNormalized,
prometheus.CounterValue,
float64(cstats.Storage.ReadCountNormalized),
containerIdWithPrefix,
)
ch <- prometheus.MustNewConstMetric(
c.ReadSizeBytes,
prometheus.CounterValue,
float64(cstats.Storage.ReadSizeBytes),
containerIdWithPrefix,
)
ch <- prometheus.MustNewConstMetric(
c.WriteCountNormalized,
prometheus.CounterValue,
float64(cstats.Storage.WriteCountNormalized),
containerIdWithPrefix,
)
ch <- prometheus.MustNewConstMetric(
c.WriteSizeBytes,
prometheus.CounterValue,
float64(cstats.Storage.WriteSizeBytes),
containerIdWithPrefix,
)
}

return nil, nil
Expand Down

0 comments on commit 9adefdc

Please sign in to comment.