From 23f7d2982ab625ba80c04aed4a0c4fb082d506a9 Mon Sep 17 00:00:00 2001 From: Alvaro Carvajal Date: Fri, 28 Feb 2025 21:22:27 +0100 Subject: [PATCH] Make AWS/GCP HANA cluster playbook follow best practices In some scenarios, playbooks to configure HANA resources in a cluster are failing to deploy a healthy HanaSR cluster in AWS. This could be either due to a race condition, or to not configuring the clone resource into the cluster in maintenance mode as specified in the best practices. This commits adds into the `cluster-hana.yaml` task calls to `cs_wait_for_idle` after commands which modify the cluster configuration, as well as modifying the cluster configuration so the SAPHana resource clone is added in maintenance mode. Also calls were added to remove the maintenance flag from this resource later in the playbook. --- ansible/playbooks/tasks/cluster-hana.yaml | 46 +++++++++++++---------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/ansible/playbooks/tasks/cluster-hana.yaml b/ansible/playbooks/tasks/cluster-hana.yaml index bf7dd408..f85ed6a9 100644 --- a/ansible/playbooks/tasks/cluster-hana.yaml +++ b/ansible/playbooks/tasks/cluster-hana.yaml @@ -37,11 +37,6 @@ {{ 'Master' if ansible_facts.packages['pacemaker'][0].version is version('2.1.9', '<') else 'Promoted' }} -- name: Ensure maintenance mode is active - ansible.builtin.command: - cmd: crm maintenance on - when: crm_maintenance_mode is false or crm_maintenance_mode == 'unknown' - - name: Create HANA topology resource ansible.builtin.command: cmd: >- @@ -136,6 +131,7 @@ clone-node-max="1" target-role="Started" interleave="true" + maintenance="true" {% if clone_command == 'clone' %}promotable="true"{% endif %} when: - is_primary @@ -179,6 +175,10 @@ - use_hana_sr_angi | bool - hana_filesystem_clone | length == 0 +- name: Wait for cluster to settle + ansible.builtin.command: + cmd: cs_wait_for_idle -s 5 + - name: Configure colocation [aws] ansible.builtin.command: cmd: >- @@ -205,6 +205,10 @@ - ip_colo | length == 0 - cloud_platform_is_gcp +- name: Wait for cluster to settle + ansible.builtin.command: + cmd: cs_wait_for_idle -s 5 + # Order configured as documentation for both AWS and GCP dictates # AWS: https://documentation.suse.com/sbp/sap-15/html/SLES4SAP-hana-sr-guide-perfopt-15-aws/index.html#id-constraints # GCP: https://cloud.google.com/solutions/sap/docs/sap-hana-ha-config-sles#create_the_saphana_primitive_resource @@ -220,25 +224,25 @@ - is_primary - cluster_order | length == 0 +- name: Wait for cluster to settle + ansible.builtin.command: + cmd: cs_wait_for_idle -s 5 -# Get current maintenance state -- name: Refresh cluster status +- name: Refresh SAPHana resource clone ansible.builtin.command: - cmd: crm configure show - register: refreshed_crm_conf_show - changed_when: false + cmd: "crm resource refresh {{ ms_saphanactl }}" -- name: Refresh crm facts - ansible.builtin.set_fact: - refreshed_crm_maintenance_mode: "{{ (refreshed_crm_conf_show.stdout | regex_search('maintenance-mode=([a-z]*)', '\\1'))[0] | default('unknown') }}" - changed_when: false +- name: Wait for cluster to settle + ansible.builtin.command: + cmd: cs_wait_for_idle -s 5 -- name: Ensure maintenance mode is disabled +- name: Remove maintenance mode from resource clone ansible.builtin.command: - cmd: crm maintenance off - when: - - is_primary - - refreshed_crm_maintenance_mode is true or refreshed_crm_maintenance_mode == 'unknown' + cmd: "crm resource maintenance {{ ms_saphanactl }} off" + +- name: Wait for cluster to settle + ansible.builtin.command: + cmd: cs_wait_for_idle -s 5 - name: Get cluster state ansible.builtin.command: @@ -255,6 +259,10 @@ - reg_crm_status.stdout | regex_search('Failed Resource Actions') | trim | length != 0 - is_primary +- name: Wait for cluster to settle + ansible.builtin.command: + cmd: cs_wait_for_idle -s 5 + # For debug purpose only - name: Get cluster status at the end ansible.builtin.command: