diff --git a/README.md b/README.md index abe561f..b772936 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,55 @@ Sometimes using a `kubectl` command is much faster than running a bunch of Prome ## What does it look like ``` -$ kubectl pod-restarts --help +This command prints a table with all the restarting pods inside +your cluster and the lookup can be restricted to a specific namespace, based +on a minimum threshold for restarts or just count containers restarts too. + +The purpose of this is to have a glance at what has been failing and since +when, as age and start times are included in the result table. The alternative to +that would be to run multiple shell commands with complex parsing or plot N graphs +with Prometheus or other tool. Usage: pod-restarts [flags] Examples: + +Cluster-wide listing +$ kubectl pod-restarts + +Restricts listing to a namespace (faster in big clusters) +$ kubectl pod-restarts -n production + +Ignores pods below a specific threshold (10 restarts) +$ kubectl pod-restarts -t 10 + +Also lists all the containers restarting inside the pods +$ kubectl pod-restarts -c + +Flags: + -c, --containers Also lists containers restarts, ignoring thresholds + -h, --help help for pod-restarts + -n, --namespace string If present, the namespace scope for this CLI request + -t, --threshold int32 Only list restarts above the given threshold ``` + +``` +$ kubectl pod-restarts -c -n istio-system +NAMESPACE RESTARTS NAME AGE START +istio-system 4 istio-policy-86978d4c49-7wvdj/mixer 35s 2020-01-22 12:29:09 -0300 -03 +istio-system 4 istio-policy-86978d4c49-v7fxb/mixer 15d 2020-01-22 12:29:12 -0300 -03 +istio-system 5 istio-telemetry-7c5b6c9975-cj2vq/mixer 120d 2020-01-22 12:29:10 -0300 -03 +istio-system 5 istio-telemetry-7c5b6c9975-h2c6s/mixer 120d 2020-01-22 12:29:15 -0300 -03 +``` + +``` +$ kubectl pod-restarts -n kafka +NAMESPACE RESTARTS NAME AGE START +kafka 7 kafka-operator-entity-operator-66d6d5965-zbwmq 72h 2020-01-22 12:29:11 -0300 -03 +kafka 2 strimzi-topic-operator-6fc5484b85-996sx 90m 2020-01-22 12:29:11 -0300 -03 + +$ kubectl pod-restarts -n kafka -t 5 +NAMESPACE RESTARTS NAME AGE START +kafka 7 kafka-operator-entity-operator-66d6d5965-zbwmq 72h 2020-01-22 12:29:11 -0300 -03 +``` \ No newline at end of file diff --git a/cmd/plugin/cli/root.go b/cmd/plugin/cli/root.go index 40b704b..378e0d9 100644 --- a/cmd/plugin/cli/root.go +++ b/cmd/plugin/cli/root.go @@ -19,22 +19,27 @@ var ( func RootCmd() *cobra.Command { cmd := &cobra.Command{ Use: "pod-restarts", - Short: "Sorted table of all pods with restarts and their last start time.", - Long: `Dives into a node after the desired pod and returns data associated -with the pod no matter where it is running, such as its origin workload, -namespace, the node where it is running and its node pod siblings, as -well basic health status of it all. - -The purpose is to have meaningful pod info at a glance without needing to -run multiple kubectl commands to see what else is running next to your -pod in a given node inside a huge cluster, because sometimes all -you've got from an alert is the pod name.`, + Short: "Sorted table of all pods with restarts and their age, start time.", + Long: `This command prints a table with all the restarting pods inside +your cluster and the lookup can be restricted to a specific namespace, based +on a minimum threshold for restarts or just count containers restarts too. + +The purpose of this is to have a glance at what has been failing and since +when, as age and start times are included in the result table. The alternative to +that would be to run multiple shell commands with complex parsing or plot N graphs +with Prometheus or other tool.`, Example: ` Cluster-wide listing $ kubectl pod-restarts Restricts listing to a namespace (faster in big clusters) -$ kubectl pod-restarts -n production`, +$ kubectl pod-restarts -n production + +Ignores pods below a specific threshold (10 restarts) +$ kubectl pod-restarts -t 10 + +Also lists all the containers restarting inside the pods +$ kubectl pod-restarts -c`, SilenceErrors: true, SilenceUsage: false, PreRun: func(cmd *cobra.Command, args []string) { @@ -55,8 +60,8 @@ $ kubectl pod-restarts -n production`, KubernetesConfigFlags.AddFlags(cmd.Flags()) // extra flags to our plugin - cmd.Flags().BoolP("containers", "c", false, "Lists containers and their restarts instead.") - cmd.Flags().Int32P("threshold", "t", 0, "Only list restarts above this threshold.") + cmd.Flags().BoolP("containers", "c", false, "Also lists containers restarts, ignoring thresholds") + cmd.Flags().Int32P("threshold", "t", 0, "Only list restarts above the given threshold") // hide common flags supported by any kubectl command to declutter -h/--help // most people would only (if ever) miss kubeconfig, context or cluster diff --git a/pkg/plugin/plugin.go b/pkg/plugin/plugin.go index 5f2afa6..be97c97 100644 --- a/pkg/plugin/plugin.go +++ b/pkg/plugin/plugin.go @@ -43,20 +43,22 @@ func (pd *PodRestartsPlugin) findPodByPodName(namespace string) error { // we will seek the whole cluster if namespace is not passed as a flag (it will be a "" string) podFind, err := pd.Clientset.CoreV1().Pods(namespace).List(metav1.ListOptions{}) + if err != nil || len(podFind.Items) == 0 { + fmt.Println("Failed to get pods data: check your parameters, set a context or verify API server.") + return nil + } + // is there a more correct way to + // grab flags anywhere inside the code? v := viper.GetViper() listContainers := v.GetBool("containers") listThreshold := v.GetInt32("threshold") - if err != nil || len(podFind.Items) == 0 { - return errors.New("Failed to get pods data: check your parameters, set a context or verify API server.") - } - tbl.AddRow("NAMESPACE", "RESTARTS", "NAME", "AGE", "START") var allRestarts int32 = 0 for _, pod := range podFind.Items { - // RestartCount are all int32 + // restarts in the API are all int32 var totalRestarts int32 = 0 // just so we can have pretty printing of ages @@ -80,7 +82,11 @@ func (pd *PodRestartsPlugin) findPodByPodName(namespace string) error { containersCount := containerStatuses.RestartCount if containersCount != int32(0) { if listContainers { - tbl.AddRow(pod.GetNamespace(), containersCount, pod.GetName()+"/"+containerStatuses.Name, startTimePretty, pod.Status.StartTime) + tbl.AddRow( + pod.GetNamespace(), + containersCount, + pod.GetName()+"/"+containerStatuses.Name, + startTimePretty, pod.Status.StartTime) } totalRestarts += containersCount } @@ -90,7 +96,12 @@ func (pd *PodRestartsPlugin) findPodByPodName(namespace string) error { initContainersCount := initContainerStatuses.RestartCount if initContainersCount != int32(0) { if listContainers { - tbl.AddRow(pod.GetNamespace(), initContainersCount, pod.GetName()+"/"+initContainerStatuses.Name, startTimePretty, pod.Status.StartTime) + tbl.AddRow( + pod.GetNamespace(), + initContainersCount, + pod.GetName()+"/"+initContainerStatuses.Name, + startTimePretty, + pod.Status.StartTime) } totalRestarts += initContainersCount } @@ -99,11 +110,21 @@ func (pd *PodRestartsPlugin) findPodByPodName(namespace string) error { if totalRestarts != int32(0) { if listThreshold != int32(0) { if totalRestarts > listThreshold { - tbl.AddRow(pod.GetNamespace(), totalRestarts, pod.GetName(), startTimePretty, pod.Status.StartTime) + tbl.AddRow( + pod.GetNamespace(), + totalRestarts, + pod.GetName(), + startTimePretty, + pod.Status.StartTime) } } else { if !listContainers { - tbl.AddRow(pod.GetNamespace(), totalRestarts, pod.GetName(), startTimePretty, pod.Status.StartTime) + tbl.AddRow( + pod.GetNamespace(), + totalRestarts, + pod.GetName(), + startTimePretty, + pod.Status.StartTime) } } allRestarts += totalRestarts