From 7eb95a4c44aa348340fac1a915179837c437b678 Mon Sep 17 00:00:00 2001 From: Kamesh Akella Date: Wed, 14 Aug 2024 07:11:26 -0400 Subject: [PATCH] update scaling benchmark to get metrics from both clusters Closes #914 Signed-off-by: Kamesh Akella --- .../prometheus-metrics-calc/action.yml | 42 +++++-- .github/workflows/rosa-scaling-benchmark.yml | 119 ++++++++++++++++-- benchmark/src/main/python/perfInsights.py | 2 +- 3 files changed, 145 insertions(+), 18 deletions(-) diff --git a/.github/actions/prometheus-metrics-calc/action.yml b/.github/actions/prometheus-metrics-calc/action.yml index 866b8fff2..3da52359d 100644 --- a/.github/actions/prometheus-metrics-calc/action.yml +++ b/.github/actions/prometheus-metrics-calc/action.yml @@ -44,11 +44,17 @@ runs: run: | readarray -t lines < ${{ inputs.input }} num1=${lines[0]} - num2=${lines[1]} + num2=${lines[2]} + num3=${lines[1]} + num4=${lines[3]} #calculating the difference of cumulative metric (changed during the benchmark execution) - difference=$(awk "BEGIN {print ($num2-$num1); exit}") + difference_cluster_1=$(awk "BEGIN {print ($num2 - $num1); exit}") + difference_cluster_2=$(awk "BEGIN {print ($num4 - $num3); exit}") + + # averaging the differences between the two clusters + average_difference=$(awk "BEGIN {print ($difference_cluster_1 + $difference_cluster_2) / 2; exit}") #the script calculates vCPU need to calculate the vcpu number from CPU seconds metrics for the interval during which the test was running - metric_count_in_interval=$(awk "BEGIN {print $difference/$TIME_INTERVAL; exit}") + metric_count_in_interval=$(awk "BEGIN {print $average_difference/$TIME_INTERVAL; exit}") #calculating the average metric per pod metric_per_pod=$(awk "BEGIN {print $metric_count_in_interval/$POD_NUM; exit}") #Calculating the final number, i.e. how many of specified criteria (e.g. user logins/sec, client credential grants, etc) @@ -68,11 +74,17 @@ runs: run: | readarray -t lines < ${{ inputs.input }} num1=${lines[0]} - num2=${lines[1]} + num2=${lines[2]} + num3=${lines[1]} + num4=${lines[3]} #calculating the difference of cumulative metric (changed during the benchmark execution) - difference=$(awk "BEGIN {print ($num2-$num1); exit}") + difference_cluster_1=$(awk "BEGIN {print ($num2 - $num1); exit}") + difference_cluster_2=$(awk "BEGIN {print ($num4 - $num3); exit}") + + # averaging the differences between the two clusters + average_difference=$(awk "BEGIN {print ($difference_cluster_1 + $difference_cluster_2) / 2; exit}") #calculating the average metric per pod - metric_per_pod=$(awk "BEGIN {print $difference/$POD_NUM; exit}") + metric_per_pod=$(awk "BEGIN {print $average_difference/$POD_NUM; exit}") #Calculating the final number, i.e. based on current environment setup how many of specified criteria (e.g. user active session, etc) #can be handled with 500Mb per pod based on the number calculated above. The result is number rounded down. result=$(awk "BEGIN {print int($CRITERIA_VALUE*500/$metric_per_pod); exit}") @@ -101,15 +113,25 @@ runs: #Reading ispn metrics file with xsite reqs data readarray -t lines < "${{ inputs.ispnCacheName}}_ispn_metrics_file_count" num1=${lines[0]} - num2=${lines[1]} + num2=${lines[2]} + num3=${lines[1]} + num4=${lines[3]} #calculating the number of xsite requests during the test execution - xsite_reqs=$(awk "BEGIN {print ($num2-$num1); exit}") + xsite_reqs_1=$(awk "BEGIN {print ($num2 - $num1); exit}") + xsite_reqs_2=$(awk "BEGIN {print ($num4 - $num3); exit}") + + xsite_reqs=$(awk "BEGIN {print ($xsite_reqs_1 + $xsite_reqs_2) / 2; exit}") #Reading ispn metrics file with xsite reqs totad duration readarray -t lines1 < "${{ inputs.ispnCacheName}}_ispn_metrics_file_sum" num1=${lines1[0]} - num2=${lines1[1]} + num2=${lines1[2]} + num3=${lines1[1]} + num4=${lines1[3]} #calculating the number of xsite requests during the test execution - xsite_reqs_total_duration=$(awk "BEGIN {print ($num2-$num1); exit}") + xsite_reqs_total_duration_1=$(awk "BEGIN {print ($num2 - $num1); exit}") + xsite_reqs_total_duration_2=$(awk "BEGIN {print ($num4 - $num3); exit}") + + xsite_reqs_total_duration=$(awk "BEGIN {print ($xsite_reqs_total_duration_1 + $xsite_reqs_total_duration_2) / 2; exit}") #calculating the average duration per request in ms if [[ $xsite_reqs -gt 0 ]]; then averageXsiteReplicationTimePerReq=$(awk "BEGIN {print int($xsite_reqs_total_duration*1000/$xsite_reqs); exit}") diff --git a/.github/workflows/rosa-scaling-benchmark.yml b/.github/workflows/rosa-scaling-benchmark.yml index 8c50e5787..27dbae5b2 100644 --- a/.github/workflows/rosa-scaling-benchmark.yml +++ b/.github/workflows/rosa-scaling-benchmark.yml @@ -7,6 +7,10 @@ on: description: 'Name of the cluster' type: string default: 'gh-keycloak-a' + clusterPrefix: + description: 'Cluster prefix' + type: string + default: 'gh-keycloak' region: description: 'Name of the region where EC2 instances should be installed' type: string @@ -45,6 +49,10 @@ on: clusterName: description: 'Name of the cluster' type: string + default: 'gh-keycloak-a' + clusterPrefix: + description: 'Cluster prefix' + type: string default: 'gh-keycloak' region: description: 'Name of the region where EC2 instances should be installed' @@ -105,7 +113,7 @@ jobs: - name: Login to OpenShift cluster uses: ./.github/actions/oc-keycloak-login with: - clusterName: ${{ inputs.clusterName || format('gh-{0}', github.repository_owner) }} + clusterName: ${{ inputs.clusterPrefix }}-a - name: Setup Go Task uses: ./.github/actions/task-setup @@ -179,7 +187,19 @@ jobs: with: createReportFile: true - - name: Run Memory Usage Total Query Before Benchmark + - name: Run Memory Usage Total Query Before Benchmark on Cluster 1 + uses: ./.github/actions/prometheus-run-queries + with: + project: ${{ env.PROJECT }} + runMemoryUsageTotal: true + output: memory_create_sessions + + - name: Login to OpenShift cluster 2 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-b + + - name: Run Memory Usage Total Query Before Benchmark on Cluster 2 uses: ./.github/actions/prometheus-run-queries with: project: ${{ env.PROJECT }} @@ -203,7 +223,24 @@ jobs: continue-on-error: true working-directory: ansible - - name: Run Memory Usage Total Query After Benchmark + - name: Login to OpenShift cluster 1 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-a + + - name: Run Memory Usage Total Query After Benchmark on Cluster 1 + uses: ./.github/actions/prometheus-run-queries + with: + project: ${{ env.PROJECT }} + runMemoryUsageTotal: true + output: memory_create_sessions + + - name: Login to OpenShift cluster 2 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-b + + - name: Run Memory Usage Total Query After Benchmark on Cluster 2 uses: ./.github/actions/prometheus-run-queries with: project: ${{ env.PROJECT }} @@ -227,7 +264,24 @@ jobs: isvCPU: false isMemory: true - - name: Run CPU sec Util Query Before Benchmark + - name: Login to OpenShift cluster 1 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-a + + - name: Run CPU sec Util Query Before Benchmark on Cluster 1 + uses: ./.github/actions/prometheus-run-queries + with: + project: ${{ env.PROJECT }} + runCpuSecsUtil: true + output: user_logins_vCpu + + - name: Login to OpenShift cluster 2 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-b + + - name: Run CPU sec Util Query Before Benchmark on Cluster 2 uses: ./.github/actions/prometheus-run-queries with: project: ${{ env.PROJECT }} @@ -251,7 +305,24 @@ jobs: continue-on-error: true working-directory: ansible - - name: Run CPU sec Util Query After Benchmark + - name: Login to OpenShift cluster 1 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-a + + - name: Run CPU sec Util Query After Benchmark on Cluster 1 + uses: ./.github/actions/prometheus-run-queries + with: + project: ${{ env.PROJECT }} + runCpuSecsUtil: true + output: user_logins_vCpu + + - name: Login to OpenShift cluster 2 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-b + + - name: Run CPU sec Util Query After Benchmark on Cluster 2 uses: ./.github/actions/prometheus-run-queries with: project: ${{ env.PROJECT }} @@ -269,7 +340,24 @@ jobs: isvCPU: true isMemory: false - - name: Run CPU sec Util Query Before Benchmark + - name: Login to OpenShift cluster 1 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-a + + - name: Run CPU sec Util Query Before Benchmark on Cluster 1 + uses: ./.github/actions/prometheus-run-queries + with: + project: ${{ env.PROJECT }} + runCpuSecsUtil: true + output: client_credential_grants_vCpu + + - name: Login to OpenShift cluster 2 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-b + + - name: Run CPU sec Util Query Before Benchmark on cluster 2 uses: ./.github/actions/prometheus-run-queries with: project: ${{ env.PROJECT }} @@ -292,7 +380,24 @@ jobs: continue-on-error: true working-directory: ansible - - name: Run CPU sec Util Query After Benchmark + - name: Login to OpenShift cluster 1 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-a + + - name: Run CPU sec Util Query After Benchmark on Cluster 1 + uses: ./.github/actions/prometheus-run-queries + with: + project: ${{ env.PROJECT }} + runCpuSecsUtil: true + output: client_credential_grants_vCpu + + - name: Login to OpenShift cluster 2 + uses: ./.github/actions/oc-keycloak-login + with: + clusterName: ${{ inputs.clusterPrefix }}-b + + - name: Run CPU sec Util Query After Benchmark on cluster 2 uses: ./.github/actions/prometheus-run-queries with: project: ${{ env.PROJECT }} diff --git a/benchmark/src/main/python/perfInsights.py b/benchmark/src/main/python/perfInsights.py index 4d454c654..5a4638aa3 100644 --- a/benchmark/src/main/python/perfInsights.py +++ b/benchmark/src/main/python/perfInsights.py @@ -61,7 +61,7 @@ def fetch_and_process_json(github_user, github_repo, branch_name, json_directory data_frames[test].append(df) combined_df = pd.concat(basic_df, ignore_index=True) - perf_across_deployments_df = combined_df[['start', 'context.externalInfinispanFeatureEnabled', 'context.persistentSessionsEnabled', 'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod', 'credentialGrantsPerSecPer1vCpu', 'memoryUsageTest.activeSessionsPer500MbPerPod']] + perf_across_deployments_df = combined_df[['start', 'context.externalInfinispanFeatureEnabled', 'context.persistentSessionsEnabled', 'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod', 'cpuUsageForCredentialGrantsTest.credentialGrantsPerSecPer1vCpu', 'memoryUsageTest.activeSessionsPer500MbPerPod']] print(perf_across_deployments_df.to_csv(index=False)) # Concatenate all DataFrames for each test into a single DataFrame