From 7eb95a4c44aa348340fac1a915179837c437b678 Mon Sep 17 00:00:00 2001
From: Kamesh Akella <kakella@redhat.com>
Date: Wed, 14 Aug 2024 07:11:26 -0400
Subject: [PATCH] update scaling benchmark to get metrics from both clusters

Closes #914

Signed-off-by: Kamesh Akella <kamesh.asp@gmail.com>
---
 .../prometheus-metrics-calc/action.yml        |  42 +++++--
 .github/workflows/rosa-scaling-benchmark.yml  | 119 ++++++++++++++++--
 benchmark/src/main/python/perfInsights.py     |   2 +-
 3 files changed, 145 insertions(+), 18 deletions(-)

diff --git a/.github/actions/prometheus-metrics-calc/action.yml b/.github/actions/prometheus-metrics-calc/action.yml
index 866b8fff2..3da52359d 100644
--- a/.github/actions/prometheus-metrics-calc/action.yml
+++ b/.github/actions/prometheus-metrics-calc/action.yml
@@ -44,11 +44,17 @@ runs:
       run: |
         readarray -t lines < ${{ inputs.input }}
         num1=${lines[0]}
-        num2=${lines[1]}
+        num2=${lines[2]}
+        num3=${lines[1]}
+        num4=${lines[3]}
         #calculating the difference of cumulative metric (changed during the benchmark execution)
-        difference=$(awk "BEGIN {print ($num2-$num1); exit}")
+        difference_cluster_1=$(awk "BEGIN {print ($num2 - $num1); exit}")
+        difference_cluster_2=$(awk "BEGIN {print ($num4 - $num3); exit}")
+
+        # averaging the differences between the two clusters
+        average_difference=$(awk "BEGIN {print ($difference_cluster_1 + $difference_cluster_2) / 2; exit}")
         #the script calculates vCPU need to calculate the vcpu number from CPU seconds metrics for the interval during which the test was running
-        metric_count_in_interval=$(awk "BEGIN {print $difference/$TIME_INTERVAL; exit}")
+        metric_count_in_interval=$(awk "BEGIN {print $average_difference/$TIME_INTERVAL; exit}")
         #calculating the average metric per pod
         metric_per_pod=$(awk "BEGIN {print $metric_count_in_interval/$POD_NUM; exit}")
         #Calculating the final number, i.e. how many of specified criteria (e.g. user logins/sec, client credential grants, etc)
@@ -68,11 +74,17 @@ runs:
       run: |
         readarray -t lines < ${{ inputs.input }}
         num1=${lines[0]}
-        num2=${lines[1]}
+        num2=${lines[2]}
+        num3=${lines[1]}
+        num4=${lines[3]}
         #calculating the difference of cumulative metric (changed during the benchmark execution)
-        difference=$(awk "BEGIN {print ($num2-$num1); exit}")
+        difference_cluster_1=$(awk "BEGIN {print ($num2 - $num1); exit}")
+        difference_cluster_2=$(awk "BEGIN {print ($num4 - $num3); exit}")
+
+        # averaging the differences between the two clusters
+        average_difference=$(awk "BEGIN {print ($difference_cluster_1 + $difference_cluster_2) / 2; exit}")
         #calculating the average metric per pod
-        metric_per_pod=$(awk "BEGIN {print $difference/$POD_NUM; exit}")
+        metric_per_pod=$(awk "BEGIN {print $average_difference/$POD_NUM; exit}")
         #Calculating the final number, i.e. based on current environment setup how many of specified criteria (e.g. user active session, etc)
         #can be handled with 500Mb per pod based on the number calculated above. The result is number rounded down.
         result=$(awk "BEGIN {print int($CRITERIA_VALUE*500/$metric_per_pod); exit}")
@@ -101,15 +113,25 @@ runs:
         #Reading ispn metrics file with xsite reqs data
         readarray -t lines < "${{ inputs.ispnCacheName}}_ispn_metrics_file_count"
         num1=${lines[0]}
-        num2=${lines[1]}
+        num2=${lines[2]}
+        num3=${lines[1]}
+        num4=${lines[3]}
         #calculating the number of xsite requests during the test execution
-        xsite_reqs=$(awk "BEGIN {print ($num2-$num1); exit}")
+        xsite_reqs_1=$(awk "BEGIN {print ($num2 - $num1); exit}")
+        xsite_reqs_2=$(awk "BEGIN {print ($num4 - $num3); exit}")
+
+        xsite_reqs=$(awk "BEGIN {print ($xsite_reqs_1 + $xsite_reqs_2) / 2; exit}")
         #Reading ispn metrics file with xsite reqs totad duration
         readarray -t lines1 < "${{ inputs.ispnCacheName}}_ispn_metrics_file_sum"
         num1=${lines1[0]}
-        num2=${lines1[1]}
+        num2=${lines1[2]}
+        num3=${lines1[1]}
+        num4=${lines1[3]}
         #calculating the number of xsite requests during the test execution
-        xsite_reqs_total_duration=$(awk "BEGIN {print ($num2-$num1); exit}")
+        xsite_reqs_total_duration_1=$(awk "BEGIN {print ($num2 - $num1); exit}")
+        xsite_reqs_total_duration_2=$(awk "BEGIN {print ($num4 - $num3); exit}")
+
+        xsite_reqs_total_duration=$(awk "BEGIN {print ($xsite_reqs_total_duration_1 + $xsite_reqs_total_duration_2) / 2; exit}")
         #calculating the average duration per request in ms
         if [[ $xsite_reqs -gt 0 ]]; then
           averageXsiteReplicationTimePerReq=$(awk "BEGIN {print int($xsite_reqs_total_duration*1000/$xsite_reqs); exit}")
diff --git a/.github/workflows/rosa-scaling-benchmark.yml b/.github/workflows/rosa-scaling-benchmark.yml
index 8c50e5787..27dbae5b2 100644
--- a/.github/workflows/rosa-scaling-benchmark.yml
+++ b/.github/workflows/rosa-scaling-benchmark.yml
@@ -7,6 +7,10 @@ on:
         description: 'Name of the cluster'
         type: string
         default: 'gh-keycloak-a'
+      clusterPrefix:
+        description: 'Cluster prefix'
+        type: string
+        default: 'gh-keycloak'
       region:
         description: 'Name of the region where EC2 instances should be installed'
         type: string
@@ -45,6 +49,10 @@ on:
       clusterName:
         description: 'Name of the cluster'
         type: string
+        default: 'gh-keycloak-a'
+      clusterPrefix:
+        description: 'Cluster prefix'
+        type: string
         default: 'gh-keycloak'
       region:
         description: 'Name of the region where EC2 instances should be installed'
@@ -105,7 +113,7 @@ jobs:
       - name: Login to OpenShift cluster
         uses: ./.github/actions/oc-keycloak-login
         with:
-          clusterName: ${{ inputs.clusterName || format('gh-{0}', github.repository_owner) }}
+          clusterName: ${{ inputs.clusterPrefix }}-a
 
       - name: Setup Go Task
         uses: ./.github/actions/task-setup
@@ -179,7 +187,19 @@ jobs:
         with:
           createReportFile: true
 
-      - name: Run Memory Usage Total Query Before Benchmark
+      - name: Run Memory Usage Total Query Before Benchmark on Cluster 1
+        uses: ./.github/actions/prometheus-run-queries
+        with:
+          project: ${{ env.PROJECT }}
+          runMemoryUsageTotal: true
+          output: memory_create_sessions
+
+      - name: Login to OpenShift cluster 2
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-b
+
+      - name: Run Memory Usage Total Query Before Benchmark on Cluster 2
         uses: ./.github/actions/prometheus-run-queries
         with:
           project: ${{ env.PROJECT }}
@@ -203,7 +223,24 @@ jobs:
         continue-on-error: true
         working-directory: ansible
 
-      - name: Run Memory Usage Total Query After Benchmark
+      - name: Login to OpenShift cluster 1
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-a
+
+      - name: Run Memory Usage Total Query After Benchmark on Cluster 1
+        uses: ./.github/actions/prometheus-run-queries
+        with:
+          project: ${{ env.PROJECT }}
+          runMemoryUsageTotal: true
+          output: memory_create_sessions
+
+      - name: Login to OpenShift cluster 2
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-b
+
+      - name: Run Memory Usage Total Query After Benchmark on Cluster 2
         uses: ./.github/actions/prometheus-run-queries
         with:
           project: ${{ env.PROJECT }}
@@ -227,7 +264,24 @@ jobs:
           isvCPU: false
           isMemory: true
 
-      - name: Run CPU sec Util Query Before Benchmark
+      - name: Login to OpenShift cluster 1
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-a
+
+      - name: Run CPU sec Util Query Before Benchmark on Cluster 1
+        uses: ./.github/actions/prometheus-run-queries
+        with:
+          project: ${{ env.PROJECT }}
+          runCpuSecsUtil: true
+          output: user_logins_vCpu
+
+      - name: Login to OpenShift cluster 2
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-b
+
+      - name: Run CPU sec Util Query Before Benchmark on Cluster 2
         uses: ./.github/actions/prometheus-run-queries
         with:
           project: ${{ env.PROJECT }}
@@ -251,7 +305,24 @@ jobs:
         continue-on-error: true
         working-directory: ansible
 
-      - name: Run CPU sec Util Query After Benchmark
+      - name: Login to OpenShift cluster 1
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-a
+
+      - name: Run CPU sec Util Query After Benchmark on Cluster 1
+        uses: ./.github/actions/prometheus-run-queries
+        with:
+          project: ${{ env.PROJECT }}
+          runCpuSecsUtil: true
+          output: user_logins_vCpu
+
+      - name: Login to OpenShift cluster 2
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-b
+
+      - name: Run CPU sec Util Query After Benchmark on Cluster 2
         uses: ./.github/actions/prometheus-run-queries
         with:
           project: ${{ env.PROJECT }}
@@ -269,7 +340,24 @@ jobs:
           isvCPU: true
           isMemory: false
 
-      - name: Run CPU sec Util Query Before Benchmark
+      - name: Login to OpenShift cluster 1
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-a
+
+      - name: Run CPU sec Util Query Before Benchmark on Cluster 1
+        uses: ./.github/actions/prometheus-run-queries
+        with:
+          project: ${{ env.PROJECT }}
+          runCpuSecsUtil: true
+          output: client_credential_grants_vCpu
+
+      - name: Login to OpenShift cluster 2
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-b
+
+      - name: Run CPU sec Util Query Before Benchmark on cluster 2
         uses: ./.github/actions/prometheus-run-queries
         with:
           project: ${{ env.PROJECT }}
@@ -292,7 +380,24 @@ jobs:
         continue-on-error: true
         working-directory: ansible
 
-      - name: Run CPU sec Util Query After Benchmark
+      - name: Login to OpenShift cluster 1
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-a
+
+      - name: Run CPU sec Util Query After Benchmark on Cluster 1
+        uses: ./.github/actions/prometheus-run-queries
+        with:
+          project: ${{ env.PROJECT }}
+          runCpuSecsUtil: true
+          output: client_credential_grants_vCpu
+
+      - name: Login to OpenShift cluster 2
+        uses: ./.github/actions/oc-keycloak-login
+        with:
+          clusterName: ${{ inputs.clusterPrefix }}-b
+
+      - name: Run CPU sec Util Query After Benchmark on cluster 2
         uses: ./.github/actions/prometheus-run-queries
         with:
           project: ${{ env.PROJECT }}
diff --git a/benchmark/src/main/python/perfInsights.py b/benchmark/src/main/python/perfInsights.py
index 4d454c654..5a4638aa3 100644
--- a/benchmark/src/main/python/perfInsights.py
+++ b/benchmark/src/main/python/perfInsights.py
@@ -61,7 +61,7 @@ def fetch_and_process_json(github_user, github_repo, branch_name, json_directory
                     data_frames[test].append(df)
 
     combined_df = pd.concat(basic_df, ignore_index=True)
-    perf_across_deployments_df = combined_df[['start', 'context.externalInfinispanFeatureEnabled', 'context.persistentSessionsEnabled', 'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod', 'credentialGrantsPerSecPer1vCpu', 'memoryUsageTest.activeSessionsPer500MbPerPod']]
+    perf_across_deployments_df = combined_df[['start', 'context.externalInfinispanFeatureEnabled', 'context.persistentSessionsEnabled', 'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod', 'cpuUsageForCredentialGrantsTest.credentialGrantsPerSecPer1vCpu', 'memoryUsageTest.activeSessionsPer500MbPerPod']]
 
     print(perf_across_deployments_df.to_csv(index=False))
     # Concatenate all DataFrames for each test into a single DataFrame