orion cli, pylint workflow and README (#2)

shashank-boyapally · web-flow · commit 765ec5345060 · 2024-01-24T11:14:20.000-05:00
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -0,0 +1,23 @@
+name: Pylint
+
+on: [push,pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pylint
+    - name: Analysing the code with pylint
+      run: |
+        pylint -d C0103 $(git ls-files '*.py')
diff --git a/.gitignore b/.gitignore
@@ -158,3 +158,7 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+*.yaml
+*.csv
+.vscode/
diff --git a/README.md b/README.md
@@ -0,0 +1,68 @@
+# Orion - CLI tool to find regressions
+Orion stands as a powerful command-line tool designed for identifying regressions within perf-scale CPT runs, leveraging metadata provided during the process. The detection mechanism relies on [hunter](https://github.com/datastax-labs/hunter).
+
+Below is an illustrative example of the config and metadata that Orion can handle:
+
+```
+tests :
+  - name : aws-small-scale-cluster-density-v2
+    platform: AWS
+    masterNodesType: m6a.xlarge
+    masterNodesCount: 3
+    workerNodesType: m6a.xlarge
+    workerNodesCount: 24
+    benchmark: cluster-density-v2
+    ocpVersion: 4.15
+    networkType: OVNKubernetes
+    # encrypted: true
+    # fips: false
+    # ipsec: false
+
+    metrics : 
+    - metric : podReadyLatency
+      metricType : latency
+      
+    - metric : apiserverCPU
+      metricType : cpu
+      namespace: openshift-kube-apiserver
+
+    - metric: ovnCPU
+      metricType: cpu
+      namespace: openshift-ovn-kubernetes
+    
+    - metric: etcdCPU
+      metricType: cpu
+      namespace: openshift-ovn-kubernetes
+
+
+```
+
+## Build Orion
+Building Orion is a straightforward process. Follow these commands:
+
+Clone the current repository using git clone.
+
+```
+>> git clone <repository_url>
+>> pip install venv
+>> source venv/bin/activate
+>> pip install -r requirements.txt
+>> export ES_SERVER = <es_server_url>
+>> pip install .
+```
+## Run Orion
+Executing Orion is as simple as building it. After following the build steps, run the following:
+```
+>> orion
+```
+At the moment, 
+
+Orion provides flexibility in configuring its behavior by allowing users to set the path to their config file using the ```--config``` flag. 
+
+For enhanced troubleshooting and debugging, Orion supports the ```--debug``` flag, enabling the generation of detailed debug logs. 
+
+Additionally, users can specify a custom path for the output CSV file using the ```--output``` flag, providing control over the location where the generated CSV will be stored.
+
+Orion's seamless integration with metadata and hunter ensures a robust regression detection tool for perf-scale CPT runs.
+
+
diff --git a/orion.py b/orion.py
@@ -0,0 +1,161 @@
+"""
+This is the cli file for orion, tool to detect regressions using hunter
+"""
+# pylint: disable = import-error
+import sys
+from functools import reduce
+import logging
+import os
+
+import click
+import yaml
+import pandas as pd
+from fmatch.matcher import Matcher
+
+
+@click.group()
+def cli():
+    """
+    cli function to group commands
+    """
+
+
+# pylint: disable=too-many-locals
+@click.command()
+@click.option("--config", default="config.yaml", help="Path to the configuration file")
+@click.option("--output", default="output.csv", help="Path to save the output csv file")
+@click.option("--debug", is_flag=True, help="log level ")
+def orion(config, debug, output):
+    """Orion is the cli tool to detect regressions over the runs
+
+    Args:
+        config (str): path to the config file
+        debug (bool): lets you log debug mode
+        output (str): path to the output csv file
+    """
+    level = logging.DEBUG if debug else logging.INFO
+    logger = logging.getLogger("Orion")
+    logger = set_logging(level, logger)
+
+    if "ES_SERVER" not in os.environ:
+        logger.error("ES_SERVER environment variable not set")
+        sys.exit(1)
+
+    try:
+        with open(config, "r", encoding="utf-8") as file:
+            data = yaml.safe_load(file)
+            logger.debug("The %s file has successfully loaded", config)
+    except FileNotFoundError as e:
+        logger.error("Config file not found: %s", e)
+        sys.exit(1)
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        logger.error("An error occurred: %s", e)
+        sys.exit(1)
+    for test in data["tests"]:
+        metadata = get_metadata(test)
+        logger.info("The test %s has started", test["name"])
+        match = Matcher(index="perf_scale_ci", level=level)
+        uuids = match.get_uuid_by_metadata(metadata)
+        if len(uuids) == 0:
+            print("No UUID present for given metadata")
+            sys.exit()
+
+        runs = match.match_kube_burner(uuids)
+        ids = match.filter_runs(runs, runs)
+        metrics = test["metrics"]
+        dataframe_list = []
+
+        for metric in metrics:
+            logger.info("Collecting %s", metric["metric"])
+            if metric["metricType"] == "latency":
+                if metric["metric"] == "podReadyLatency":
+                    try:
+                        podl = match.burner_results("", ids, "ripsaw-kube-burner*")
+                        podl_df = match.convert_to_df(
+                            podl, columns=["uuid", "timestamp", "P99"]
+                        )
+                        dataframe_list.append(podl_df)
+                        logger.debug(podl_df)
+                    except Exception as e:  # pylint: disable=broad-exception-caught
+                        logger.error(
+                            "The namespace %s does not exist, exception %s",
+                            metric["namespace"],
+                            e,
+                        )
+
+            elif metric["metricType"] == "cpu":
+                try:
+                    cpu = match.burner_cpu_results(
+                        ids, metric["namespace"], "ripsaw-kube-burner*"
+                    )
+                    cpu_df = match.convert_to_df(cpu, columns=["uuid", "cpu_avg"])
+                    cpu_df = cpu_df.rename(
+                        columns={"cpu_avg": metric["metric"] + "_cpu_avg"}
+                    )
+                    dataframe_list.append(cpu_df)
+                    logger.debug(cpu_df)
+                except Exception as e:  # pylint: disable=broad-exception-caught
+                    logger.error(
+                        "The namespace %s does not exist, exception %s",
+                        metric["namespace"],
+                        e,
+                    )
+
+        merged_df = reduce(
+            lambda left, right: pd.merge(left, right, on="uuid", how="inner"),
+            dataframe_list,
+        )
+        match.save_results(merged_df, csv_file_path=output)
+
+
+def get_metadata(test):
+    """Gets metadata of the run from each test
+
+    Args:
+        test (dict): test dictionary
+
+    Returns:
+        dict: dictionary of the metadata
+    """
+    metadata_columns = [
+        "platform",
+        "masterNodesType",
+        "masterNodesCount",
+        "workerNodesType",
+        "workerNodesCount",
+        "benchmark",
+        "ocpVersion",
+        "networkType",
+        "encrypted",
+        "fips",
+        "ipsec",
+    ]
+    metadata = {key: test[key] for key in metadata_columns if key in test}
+    metadata["ocpVersion"] = str(metadata["ocpVersion"])
+    return metadata
+
+
+def set_logging(level, logger):
+    """sets log level and format
+
+    Args:
+        level (_type_): level of the log
+        logger (_type_): logger object
+
+    Returns:
+        logging.Logger: a formatted and level set logger
+    """
+    logger.setLevel(level)
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(level)
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    return logger
+
+
+if __name__ == "__main__":
+    cli.add_command(orion)
+    cli()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,14 @@
+certifi==2023.11.17
+click==8.1.7
+elastic-transport==8.11.0
+elasticsearch==8.11.1
+elasticsearch7==7.13.0
+fmatch==0.0.2
+numpy==1.26.3
+pandas==2.1.4
+python-dateutil==2.8.2
+pytz==2023.3.post1
+PyYAML==6.0.1
+six==1.16.0
+tzdata==2023.4
+urllib3==1.26.18
diff --git a/setup.py b/setup.py
@@ -0,0 +1,25 @@
+# orion/setup.py
+"""
+setup.py for orion cli tool
+"""
+from setuptools import setup
+
+setup(
+    name='orion',
+    version='1.0',
+    py_modules=['orion'],
+    install_requires=[
+        'click',
+        'fmatch'
+    ],
+    entry_points={
+        'console_scripts': [
+            'orion = orion:orion',
+        ],
+    },
+    classifiers=[
+        'Programming Language :: Python :: 3',
+        'License :: OSI Approved :: MIT License',
+        'Operating System :: OS Independent',
+    ],
+)