diff --git a/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0101__metrics.robot b/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0101__metrics.robot index 00defde73..f110c2571 100644 --- a/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0101__metrics.robot +++ b/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0101__metrics.robot @@ -28,6 +28,7 @@ Test Existence of Prometheus Alerting Rules [Tags] Smoke ... Tier1 ... ODS-509 + ... Monitoring Skip If RHODS Is Self-Managed Check Prometheus Alerting Rules @@ -36,6 +37,7 @@ Test Existence of Prometheus Recording Rules [Tags] Smoke ... Tier1 ... ODS-510 + ... Monitoring Skip If RHODS Is Self-Managed Check Prometheus Recording Rules @@ -44,6 +46,7 @@ Test Metric "Notebook CPU Usage" On ODS Prometheus [Tags] Sanity ... Tier1 ... ODS-178 + ... Monitoring Skip If RHODS Is Self-Managed ${cpu_usage_before} = Read Current CPU Usage Run Jupyter Notebook For 5 Minutes @@ -56,6 +59,7 @@ Test Metric "Rhods_Total_Users" On ODS Prometheus [Tags] Sanity ... Tier1 ... ODS-628 + ... Monitoring Skip If RHODS Is Self-Managed # Note: the expression ends with "step=1" to obtain the value for current second ${expression} = Set Variable rhods_total_users&step=1 @@ -81,6 +85,7 @@ Test Metric Existence For "Rhods_Aggregate_Availability" On ODS Prometheus [Tags] Sanity ... Tier1 ... ODS-636 + ... Monitoring Skip If RHODS Is Self-Managed ${expression} = Set Variable rhods_aggregate_availability&step=1 ${resp} = Prometheus.Run Query ${RHODS_PROMETHEUS_URL} ${RHODS_PROMETHEUS_TOKEN} ${expression} diff --git a/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0102__billing_metrics.robot b/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0102__billing_metrics.robot index 452e3dd09..4fd2bff15 100644 --- a/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0102__billing_metrics.robot +++ b/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0102__billing_metrics.robot @@ -27,6 +27,7 @@ Verify OpenShift Monitoring Results Are Correct When Running Undefined Queries [Tags] Sanity ... Tier1 ... ODS-173 + ... Monitoring Run OpenShift Metrics Query ${METRIC_RHODS_UNDEFINED} username=${OCP_ADMIN_USER.USERNAME} password=${OCP_ADMIN_USER.PASSWORD} ... auth_type=${OCP_ADMIN_USER.AUTH_TYPE} retry_attempts=1 Metrics.Verify Query Results Dont Contain Data @@ -37,6 +38,7 @@ Test Billing Metric (Notebook Cpu Usage) On OpenShift Monitoring [Tags] Sanity ... Tier1 ... ODS-175 + ... Monitoring Run Jupyter Notebook For 5 Minutes Verify Previus CPU Usage Is Greater Than Zero [Teardown] CleanUp JupyterHub And Close All Browsers @@ -46,6 +48,7 @@ Test Metric "Rhods_Total_Users" On Cluster Monitoring Prometheus [Tags] Sanity ... ODS-634 ... Tier1 + ... Monitoring Skip If RHODS Is Self-Managed ${value} = Run OpenShift Metrics Query query=rhods_total_users username=${OCP_ADMIN_USER.USERNAME} password=${OCP_ADMIN_USER.PASSWORD} ... auth_type=${OCP_ADMIN_USER.AUTH_TYPE} @@ -59,7 +62,7 @@ Test Metric "Rhods_Aggregate_Availability" On Cluster Monitoring Prometheus [Tags] Smoke ... ODS-637 ... Tier1 - + ... Monitoring Skip If RHODS Is Self-Managed ${value_openshift_observe} = Run OpenShift Metrics Query @@ -80,7 +83,7 @@ Test Metric "Active_Users" On OpenShift Monitoring On Cluster Monitoring Prometh [Tags] Sanity ... ODS-1053 ... Tier1 - + ... Monitoring ${active_users_before} = Run OpenShift Metrics Query ... username=${OCP_ADMIN_USER.USERNAME} password=${OCP_ADMIN_USER.PASSWORD} ... auth_type=${OCP_ADMIN_USER.AUTH_TYPE} query=cluster:usage:consumption:rhods:active_users @@ -110,6 +113,7 @@ Test Metric "Active Notebook Pod Time" On OpenShift Monitoring - Cluster Monitor [Tags] Sanity ... ODS-1055 ... Tier1 + ... Monitoring @{list_of_usernames} = Create List ${TEST_USER_3.USERNAME} ${TEST_USER_4.USERNAME} Log In N Users To JupyterLab And Launch A Notebook For Each Of Them ... list_of_usernames=${list_of_usernames} diff --git a/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0103__check_pager_duty.robot b/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0103__check_pager_duty.robot index f0e284cf4..d517b72b8 100644 --- a/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0103__check_pager_duty.robot +++ b/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0101__metrics/0103__check_pager_duty.robot @@ -33,6 +33,7 @@ PagerDuty Dummy Secret Verification ... Tier1 ... ODS-737 ... Deployment-Cli + ... Monitoring Skip If RHODS Is Self-Managed ${service_key} Get PagerDuty Key From Alertmanager ConfigMap ${secret_key} Get PagerDuty Key From Secrets diff --git a/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0102__alerts/0102__alerts.robot b/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0102__alerts/0102__alerts.robot index 874038d2d..3629d240f 100644 --- a/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0102__alerts/0102__alerts.robot +++ b/ods_ci/tests/Tests/0100__platform/0102__monitor_and_manage/0102__alerts/0102__alerts.robot @@ -32,7 +32,7 @@ Verify All Alerts Severity [Tags] Smoke ... Tier1 ... ODS-1227 - + ... Monitoring Verify "DeadManSnitch" Alerts Severity And Continue On Failure Verify "Kubeflow Notebook Controller Pod Is Not Running" Alerts Severity And Continue On Failure Verify "ODH Notebook Controller Pod Is Not Running" Alerts Severity And Continue On Failure @@ -46,7 +46,7 @@ Verify No Alerts Are Firing Except For DeadManSnitch # robocop: disable:too-l [Tags] Smoke ... Tier1 ... ODS-540 - + ... Monitoring Verify Alert Is Firing And Continue On Failure ... DeadManSnitch DeadManSnitch @@ -67,7 +67,7 @@ Verify Alert RHODS-PVC-Usage-Above-90 Is Fired When User PVC Is Above 90 Percent ... verifies that alert "User notebook pvc usage above 90%" is fired [Tags] Tier2 ... ODS-516 - + ... Monitoring Fill Up User PVC ${NOTEBOOK_REPO_URL} ${TEST_ALERT_PVC90_NOTEBOOK_PATH} Prometheus.Wait Until Alert Is Firing ${RHODS_PROMETHEUS_URL} @@ -83,7 +83,7 @@ Verify Alert RHODS-PVC-Usage-At-100 Is Fired When User PVC Is At 100 Percent ... verifies that alert "User notebook pvc usage at 100%" is fired [Tags] Tier2 ... ODS-517 - + ... Monitoring Fill Up User PVC ${NOTEBOOK_REPO_URL} ${TEST_ALERT_PVC100_NOTEBOOK_PATH} Prometheus.Wait Until Alert Is Firing ${RHODS_PROMETHEUS_URL} @@ -99,7 +99,7 @@ Verify Alerts Are Fired When RHODS Dashboard Is Down # robocop: disable:too-l ... are fired when rhods-dashboard is not working [Tags] Tier3 ... ODS-739 - + ... Monitoring Skip If Alert Is Already Firing ${RHODS_PROMETHEUS_URL} ... ${RHODS_PROMETHEUS_TOKEN} ... SLOs-haproxy_backend_http_responses_dashboard @@ -145,7 +145,7 @@ Verify Alert "Kubeflow notebook controller pod is not running" Is Fired When Kub ... when notebook-controller-deployment-xxx pod is not running [Tags] Tier3 ... ODS-1700 - + ... Monitoring Skip If Alert Is Already Firing ${RHODS_PROMETHEUS_URL} ... ${RHODS_PROMETHEUS_TOKEN} ... RHODS Notebook controllers @@ -177,7 +177,7 @@ Verify Alert "ODH notebook controller pod is not running" Is Fired When ODH Cont ... when odh-notebook-controller-manager-xxx pod is not running [Tags] Tier3 ... ODS-1701 - + ... Monitoring Skip If Alert Is Already Firing ${RHODS_PROMETHEUS_URL} ... ${RHODS_PROMETHEUS_TOKEN} ... RHODS Notebook controllers @@ -209,7 +209,7 @@ Verify That MT-SRE Are Not Paged For Alerts In Clusters Used For Development Or [Tags] Sanity ... ODS-1058 ... Tier1 - + ... Monitoring ${res} = Check Cluster Name Contain "Aisrhods" Or Not IF ${res} ${receiver} = Set Variable alerts-sink