add support for prject_number and config-file arguments in output-parsers

gargnitingoogle · gargnitingoogle · commit 463063a3e692 · 2024-08-05T11:38:05.000Z
diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import argparse
 import json, os, pprint, subprocess
 import sys
 import dlio_workload
@@ -64,6 +65,26 @@ def downloadDlioOutputs(dlioWorkloads):
 
 
 if __name__ == "__main__":
+  parser = argparse.ArgumentParser(
+      prog="DLIO Unet3d test output parser",
+      description=(
+          "This program takes in a json test-config file and parses it for"
+          " output buckets. From each output bucket, it downloads all the dlio"
+          " output logs from gs://<bucket>/logs/ localy to"
+          f" {LOCAL_LOGS_LOCATION} and parses them for dlio test runs and their"
+          " output metrics."
+      ),
+  )
+  parser.add_argument("--workload-config")
+  parser.add_argument(
+      "--project-number",
+      help=(
+          "project-number (e.g. 93817472919) is needed to fetch the cpu/memory"
+          " utilization data from GCP."
+      ),
+  )
+  args = parser.parse_args()
+
   try:
     os.makedirs(LOCAL_LOGS_LOCATION)
   except FileExistsError:
@@ -155,10 +176,16 @@ def downloadDlioOutputs(dlioWorkloads):
         r["end"] = standard_timestamp(per_epoch_stats_data[str(i + 1)]["end"])
         if r["scenario"] != "local-ssd" and mash_installed:
           r["lowest_memory"], r["highest_memory"] = get_memory(
-              r["pod_name"], r["start"], r["end"]
+              r["pod_name"],
+              r["start"],
+              r["end"],
+              project_number=args.project_number,
           )
           r["lowest_cpu"], r["highest_cpu"] = get_cpu(
-              r["pod_name"], r["start"], r["end"]
+              r["pod_name"],
+              r["start"],
+              r["end"],
+              project_number=args.project_number,
           )
           pass
 
diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py
@@ -18,10 +18,6 @@
 import argparse
 from collections.abc import Sequence
 import json, os, pprint, subprocess
-import json
-import os
-import subprocess
-import subprocess
 import sys
 from absl import app
 import fio_workload
@@ -59,7 +55,8 @@ def downloadFioOutputs(fioWorkloads):
       os.makedirs(LOCAL_LOGS_LOCATION + "/" + fioWorkload.fileSize)
     except FileExistsError:
       pass
-    print(f"Download FIO output from the folder {fioWorkload.bucket}...")
+
+    print(f"Downloading FIO outputs from {fioWorkload.bucket}...")
     result = subprocess.run(
         [
             "gsutil",
@@ -78,6 +75,26 @@ def downloadFioOutputs(fioWorkloads):
 
 
 if __name__ == "__main__":
+  parser = argparse.ArgumentParser(
+      prog="DLIO Unet3d test output parser",
+      description=(
+          "This program takes in a json test-config file and parses it for"
+          " output buckets.From each output bucket, it downloads all the dlio"
+          " output logs from gs://<bucket>/logs/ locally to"
+          f" {LOCAL_LOGS_LOCATION} and parses them for dlio test runs and their"
+          " output metrics."
+      ),
+  )
+  parser.add_argument("--workload-config")
+  parser.add_argument(
+      "--project-number",
+      help=(
+          "project-number (93817472919) is needed to fetch the cpu/memory"
+          " utilization data from GCP."
+      ),
+  )
+  args = parser.parse_args()
+
   try:
     os.makedirs(LOCAL_LOGS_LOCATION)
   except FileExistsError:
@@ -168,10 +185,16 @@ def downloadFioOutputs(fioWorkloads):
       r["end"] = unix_to_timestamp(per_epoch_output_data["timestamp_ms"])
       if r["scenario"] != "local-ssd" and mash_installed:
         r["lowest_memory"], r["highest_memory"] = get_memory(
-            r["pod_name"], r["start"], r["end"]
+            r["pod_name"],
+            r["start"],
+            r["end"],
+            project_number=args.project_number,
         )
         r["lowest_cpu"], r["highest_cpu"] = get_cpu(
-            r["pod_name"], r["start"], r["end"]
+            r["pod_name"],
+            r["start"],
+            r["end"],
+            project_number=args.project_number,
         )
         pass
       r["gcsfuse_mount_options"] = gcsfuse_mount_options
diff --git a/perfmetrics/scripts/testing_on_gke/examples/utils/utils.py b/perfmetrics/scripts/testing_on_gke/examples/utils/utils.py
@@ -25,10 +25,10 @@ def is_mash_installed() -> bool:
     except subprocess.CalledProcessError:
         return False
 
-def get_memory(pod_name: str, start: str, end: str) -> Tuple[int, int]:
+def get_memory(pod_name: str, start: str, end: str, project_number: int) -> Tuple[int, int]:
     # for some reason, the mash filter does not always work, so we fetch all the metrics for all the pods and filter later.
     result = subprocess.run(["mash", "--namespace=cloud_prod", "--output=csv",
-                             f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/memory/used_bytes'), {{'project': '927584127901', 'metric:memory_type': 'non-evictable'}})| Window(Align('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"],
+                             f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/memory/used_bytes'), {{'project': '{project_number}', 'metric:memory_type': 'non-evictable'}})| Window(Align('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"], 
                              capture_output=True, text=True)
 
     data_points_int = []
@@ -48,10 +48,10 @@ def get_memory(pod_name: str, start: str, end: str) -> Tuple[int, int]:
 
     return int(min(data_points_int) / 1024 ** 2) , int(max(data_points_int) / 1024 ** 2)
 
-def get_cpu(pod_name: str, start: str, end: str) -> Tuple[float, float]:
+def get_cpu(pod_name: str, start: str, end: str, project_number: int) -> Tuple[float, float]:
     # for some reason, the mash filter does not always work, so we fetch all the metrics for all the pods and filter later.
     result = subprocess.run(["mash", "--namespace=cloud_prod", "--output=csv",
-                             f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/cpu/core_usage_time'), {{'project': '927584127901'}})| Window(Rate('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"],
+                             f"Query(Fetch(Raw('cloud.kubernetes.K8sContainer', 'kubernetes.io/container/cpu/core_usage_time'), {{'project': '{project_number}'}})| Window(Rate('10m'))| GroupBy(['pod_name', 'container_name'], Max()), TimeInterval('{start}', '{end}'), '5s')"], 
                              capture_output=True, text=True)
 
     data_points_float = []