Skip to content

Commit

Permalink
documentation and prettifying
Browse files Browse the repository at this point in the history
  • Loading branch information
Caio Begotti committed Feb 6, 2020
1 parent 3585ca8 commit e104d8a
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 23 deletions.
47 changes: 46 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,55 @@ Sometimes using a `kubectl` command is much faster than running a bunch of Prome
## What does it look like

```
$ kubectl pod-restarts --help
This command prints a table with all the restarting pods inside
your cluster and the lookup can be restricted to a specific namespace, based
on a minimum threshold for restarts or just count containers restarts too.
The purpose of this is to have a glance at what has been failing and since
when, as age and start times are included in the result table. The alternative to
that would be to run multiple shell commands with complex parsing or plot N graphs
with Prometheus or other tool.
Usage:
pod-restarts [flags]
Examples:
Cluster-wide listing
$ kubectl pod-restarts
Restricts listing to a namespace (faster in big clusters)
$ kubectl pod-restarts -n production
Ignores pods below a specific threshold (10 restarts)
$ kubectl pod-restarts -t 10
Also lists all the containers restarting inside the pods
$ kubectl pod-restarts -c
Flags:
-c, --containers Also lists containers restarts, ignoring thresholds
-h, --help help for pod-restarts
-n, --namespace string If present, the namespace scope for this CLI request
-t, --threshold int32 Only list restarts above the given threshold
```

```
$ kubectl pod-restarts -c -n istio-system
NAMESPACE RESTARTS NAME AGE START
istio-system 4 istio-policy-86978d4c49-7wvdj/mixer 35s 2020-01-22 12:29:09 -0300 -03
istio-system 4 istio-policy-86978d4c49-v7fxb/mixer 15d 2020-01-22 12:29:12 -0300 -03
istio-system 5 istio-telemetry-7c5b6c9975-cj2vq/mixer 120d 2020-01-22 12:29:10 -0300 -03
istio-system 5 istio-telemetry-7c5b6c9975-h2c6s/mixer 120d 2020-01-22 12:29:15 -0300 -03
```

```
$ kubectl pod-restarts -n kafka
NAMESPACE RESTARTS NAME AGE START
kafka 7 kafka-operator-entity-operator-66d6d5965-zbwmq 72h 2020-01-22 12:29:11 -0300 -03
kafka 2 strimzi-topic-operator-6fc5484b85-996sx 90m 2020-01-22 12:29:11 -0300 -03
$ kubectl pod-restarts -n kafka -t 5
NAMESPACE RESTARTS NAME AGE START
kafka 7 kafka-operator-entity-operator-66d6d5965-zbwmq 72h 2020-01-22 12:29:11 -0300 -03
```
31 changes: 18 additions & 13 deletions cmd/plugin/cli/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,27 @@ var (
func RootCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "pod-restarts",
Short: "Sorted table of all pods with restarts and their last start time.",
Long: `Dives into a node after the desired pod and returns data associated
with the pod no matter where it is running, such as its origin workload,
namespace, the node where it is running and its node pod siblings, as
well basic health status of it all.
The purpose is to have meaningful pod info at a glance without needing to
run multiple kubectl commands to see what else is running next to your
pod in a given node inside a huge cluster, because sometimes all
you've got from an alert is the pod name.`,
Short: "Sorted table of all pods with restarts and their age, start time.",
Long: `This command prints a table with all the restarting pods inside
your cluster and the lookup can be restricted to a specific namespace, based
on a minimum threshold for restarts or just count containers restarts too.
The purpose of this is to have a glance at what has been failing and since
when, as age and start times are included in the result table. The alternative to
that would be to run multiple shell commands with complex parsing or plot N graphs
with Prometheus or other tool.`,
Example: `
Cluster-wide listing
$ kubectl pod-restarts
Restricts listing to a namespace (faster in big clusters)
$ kubectl pod-restarts -n production`,
$ kubectl pod-restarts -n production
Ignores pods below a specific threshold (10 restarts)
$ kubectl pod-restarts -t 10
Also lists all the containers restarting inside the pods
$ kubectl pod-restarts -c`,
SilenceErrors: true,
SilenceUsage: false,
PreRun: func(cmd *cobra.Command, args []string) {
Expand All @@ -55,8 +60,8 @@ $ kubectl pod-restarts -n production`,
KubernetesConfigFlags.AddFlags(cmd.Flags())

// extra flags to our plugin
cmd.Flags().BoolP("containers", "c", false, "Lists containers and their restarts instead.")
cmd.Flags().Int32P("threshold", "t", 0, "Only list restarts above this threshold.")
cmd.Flags().BoolP("containers", "c", false, "Also lists containers restarts, ignoring thresholds")
cmd.Flags().Int32P("threshold", "t", 0, "Only list restarts above the given threshold")

// hide common flags supported by any kubectl command to declutter -h/--help
// most people would only (if ever) miss kubeconfig, context or cluster
Expand Down
39 changes: 30 additions & 9 deletions pkg/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,22 @@ func (pd *PodRestartsPlugin) findPodByPodName(namespace string) error {

// we will seek the whole cluster if namespace is not passed as a flag (it will be a "" string)
podFind, err := pd.Clientset.CoreV1().Pods(namespace).List(metav1.ListOptions{})
if err != nil || len(podFind.Items) == 0 {
fmt.Println("Failed to get pods data: check your parameters, set a context or verify API server.")
return nil
}

// is there a more correct way to
// grab flags anywhere inside the code?
v := viper.GetViper()
listContainers := v.GetBool("containers")
listThreshold := v.GetInt32("threshold")

if err != nil || len(podFind.Items) == 0 {
return errors.New("Failed to get pods data: check your parameters, set a context or verify API server.")
}

tbl.AddRow("NAMESPACE", "RESTARTS", "NAME", "AGE", "START")

var allRestarts int32 = 0
for _, pod := range podFind.Items {
// RestartCount are all int32
// restarts in the API are all int32
var totalRestarts int32 = 0

// just so we can have pretty printing of ages
Expand All @@ -80,7 +82,11 @@ func (pd *PodRestartsPlugin) findPodByPodName(namespace string) error {
containersCount := containerStatuses.RestartCount
if containersCount != int32(0) {
if listContainers {
tbl.AddRow(pod.GetNamespace(), containersCount, pod.GetName()+"/"+containerStatuses.Name, startTimePretty, pod.Status.StartTime)
tbl.AddRow(
pod.GetNamespace(),
containersCount,
pod.GetName()+"/"+containerStatuses.Name,
startTimePretty, pod.Status.StartTime)
}
totalRestarts += containersCount
}
Expand All @@ -90,7 +96,12 @@ func (pd *PodRestartsPlugin) findPodByPodName(namespace string) error {
initContainersCount := initContainerStatuses.RestartCount
if initContainersCount != int32(0) {
if listContainers {
tbl.AddRow(pod.GetNamespace(), initContainersCount, pod.GetName()+"/"+initContainerStatuses.Name, startTimePretty, pod.Status.StartTime)
tbl.AddRow(
pod.GetNamespace(),
initContainersCount,
pod.GetName()+"/"+initContainerStatuses.Name,
startTimePretty,
pod.Status.StartTime)
}
totalRestarts += initContainersCount
}
Expand All @@ -99,11 +110,21 @@ func (pd *PodRestartsPlugin) findPodByPodName(namespace string) error {
if totalRestarts != int32(0) {
if listThreshold != int32(0) {
if totalRestarts > listThreshold {
tbl.AddRow(pod.GetNamespace(), totalRestarts, pod.GetName(), startTimePretty, pod.Status.StartTime)
tbl.AddRow(
pod.GetNamespace(),
totalRestarts,
pod.GetName(),
startTimePretty,
pod.Status.StartTime)
}
} else {
if !listContainers {
tbl.AddRow(pod.GetNamespace(), totalRestarts, pod.GetName(), startTimePretty, pod.Status.StartTime)
tbl.AddRow(
pod.GetNamespace(),
totalRestarts,
pod.GetName(),
startTimePretty,
pod.Status.StartTime)
}
}
allRestarts += totalRestarts
Expand Down

0 comments on commit e104d8a

Please sign in to comment.