diff --git a/src/backups.py b/src/backups.py index 09e8d9fe38..8cf2c3c7fd 100644 --- a/src/backups.py +++ b/src/backups.py @@ -8,7 +8,9 @@ import os import re import tempfile +import time from datetime import datetime, timezone +from io import BytesIO from typing import Dict, List, Optional, Tuple import boto3 as boto3 @@ -18,10 +20,11 @@ from jinja2 import Template from lightkube import ApiError, Client from lightkube.resources.core_v1 import Endpoints +from ops import HookEvent from ops.charm import ActionEvent from ops.framework import Object from ops.jujuversion import JujuVersion -from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus +from ops.model import ActiveStatus, MaintenanceStatus from ops.pebble import ChangeError, ExecError from tenacity import RetryError, Retrying, stop_after_attempt, wait_fixed @@ -65,6 +68,9 @@ def __init__(self, charm, relation_name: str): self.framework.observe( self.s3_client.on.credentials_changed, self._on_s3_credential_changed ) + # When the leader unit is being removed, s3_client.on.credentials_gone is performed on it (and only on it). + # After a new leader is elected, the S3 connection must be reinitialized. + self.framework.observe(self.charm.on.leader_elected, self._on_s3_credential_changed) self.framework.observe(self.s3_client.on.credentials_gone, self._on_s3_credential_gone) self.framework.observe(self.charm.on.create_backup_action, self._on_create_backup_action) self.framework.observe(self.charm.on.list_backups_action, self._on_list_backups_action) @@ -147,34 +153,46 @@ def _can_unit_perform_backup(self) -> Tuple[bool, Optional[str]]: def can_use_s3_repository(self) -> Tuple[bool, Optional[str]]: """Returns whether the charm was configured to use another cluster repository.""" - # Prevent creating backups and storing in another cluster repository. + # Check model uuid + s3_parameters, _ = self._retrieve_s3_parameters() + s3_model_uuid = self._read_content_from_s3( + "model-uuid.txt", + s3_parameters, + ) + if s3_model_uuid and s3_model_uuid.strip() != self.model.uuid: + logger.debug( + f"can_use_s3_repository: incompatible model-uuid s3={s3_model_uuid.strip()}, local={self.model.uuid}" + ) + return False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE + output, _ = self._execute_command(["pgbackrest", "info", "--output=json"], timeout=30) if output is None: return False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE - if self.charm.unit.is_leader(): - for stanza in json.loads(output): - system_identifier_from_instance, error = self._execute_command([ - f'/usr/lib/postgresql/{self.charm._patroni.rock_postgresql_version.split(".")[0]}/bin/pg_controldata', - "/var/lib/postgresql/data/pgdata", - ]) - if error != "": - raise Exception(error) - system_identifier_from_instance = [ - line - for line in system_identifier_from_instance.splitlines() - if "Database system identifier" in line - ][0].split(" ")[-1] - system_identifier_from_stanza = str(stanza.get("db")[0]["system-id"]) - if system_identifier_from_instance != system_identifier_from_stanza or stanza.get( - "name" - ) != self.charm.app_peer_data.get("stanza", self.stanza_name): - # Prevent archiving of WAL files. - self.charm.app_peer_data.update({"stanza": ""}) - self.charm.update_config() - if self.charm._patroni.member_started: - self.charm._patroni.reload_patroni_configuration() - return False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE + for stanza in json.loads(output): + if stanza.get("name") != self.stanza_name: + logger.debug( + f"can_use_s3_repository: incompatible stanza name s3={stanza.get('name', '')}, local={self.stanza_name}" + ) + return False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE + + system_identifier_from_instance, error = self._execute_command([ + f'/usr/lib/postgresql/{self.charm._patroni.rock_postgresql_version.split(".")[0]}/bin/pg_controldata', + "/var/lib/postgresql/data/pgdata", + ]) + if error != "": + raise Exception(error) + system_identifier_from_instance = [ + line + for line in system_identifier_from_instance.splitlines() + if "Database system identifier" in line + ][0].split(" ")[-1] + system_identifier_from_stanza = str(stanza.get("db")[0]["system-id"]) + if system_identifier_from_instance != system_identifier_from_stanza: + logger.debug( + f"can_use_s3_repository: incompatible system identifier s3={system_identifier_from_stanza}, local={system_identifier_from_instance}" + ) + return False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE return True, None @@ -515,103 +533,117 @@ def _parse_backup_id(self, label) -> Tuple[str, str]: backup_type, ) - def _initialise_stanza(self) -> None: + def _initialise_stanza(self, event: HookEvent) -> bool: """Initialize the stanza. A stanza is the configuration for a PostgreSQL database cluster that defines where it is located, how it will be backed up, archiving options, etc. (more info in https://pgbackrest.org/user-guide.html#quickstart/configure-stanza). """ - if not self.charm.is_primary: - return - # Enable stanza initialisation if the backup settings were fixed after being invalid # or pointing to a repository where there are backups from another cluster. if self.charm.is_blocked and self.charm.unit.status.message not in S3_BLOCK_MESSAGES: - logger.warning("couldn't initialize stanza due to a blocked status") - return + logger.warning("couldn't initialize stanza due to a blocked status, deferring event") + event.defer() + return False self.charm.unit.status = MaintenanceStatus("initialising stanza") + # Create the stanza. try: - # Create the stanza. - self._execute_command(["pgbackrest", f"--stanza={self.stanza_name}", "stanza-create"]) + # If the tls is enabled, it requires all the units in the cluster to run the pgBackRest service to + # successfully complete validation, and upon receiving the same parent event other units should start it. + # Therefore, the first retry may fail due to the delay of these other units to start this service. 60s given + # for that or else the s3 initialization sequence will fail. + for attempt in Retrying(stop=stop_after_attempt(6), wait=wait_fixed(10), reraise=True): + with attempt: + self._execute_command([ + "pgbackrest", + f"--stanza={self.stanza_name}", + "stanza-create", + ]) except ExecError as e: logger.exception(e) - self.charm.unit.status = BlockedStatus(FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE) - return + self._s3_initialization_set_failure(FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE) + return False self.start_stop_pgbackrest_service() - # Store the stanza name to be used in configurations updates. + # Rest of the successful s3 initialization sequence such as s3-initialization-start and s3-initialization-done + # are left to the check_stanza func. if self.charm.unit.is_leader(): self.charm.app_peer_data.update({ "stanza": self.stanza_name, - "init-pgbackrest": "True", }) else: self.charm.unit_peer_data.update({ "stanza": self.stanza_name, - "init-pgbackrest": "True", }) - def check_stanza(self) -> None: - """Runs the pgbackrest stanza validation.""" - if not self.charm.is_primary or "init-pgbackrest" not in self.charm.app_peer_data: - return + return True + + def check_stanza(self) -> bool: + """Runs the pgbackrest stanza validation. + Returns: + whether stanza validation was successful. + """ # Update the configuration to use pgBackRest as the archiving mechanism. self.charm.update_config() self.charm.unit.status = MaintenanceStatus("checking stanza") try: - # Check that the stanza is correctly configured. - for attempt in Retrying(stop=stop_after_attempt(5), wait=wait_fixed(3)): + # If the tls is enabled, it requires all the units in the cluster to run the pgBackRest service to + # successfully complete validation, and upon receiving the same parent event other units should start it. + # Therefore, the first retry may fail due to the delay of these other units to start this service. 60s given + # for that or else the s3 initialization sequence will fail. + for attempt in Retrying(stop=stop_after_attempt(6), wait=wait_fixed(10), reraise=True): with attempt: if self.charm._patroni.member_started: self.charm._patroni.reload_patroni_configuration() self._execute_command(["pgbackrest", f"--stanza={self.stanza_name}", "check"]) - self.charm.unit.status = ActiveStatus() - except RetryError as e: - # If the check command doesn't succeed, remove the stanza name - # and rollback the configuration. - if self.charm.unit.is_leader(): - self.charm.app_peer_data.update({"stanza": ""}) - self.charm.app_peer_data.pop("init-pgbackrest", None) - self.charm.unit_peer_data.update({"stanza": "", "init-pgbackrest": ""}) - self.charm.update_config() - + self.charm._set_active_status() + except Exception as e: logger.exception(e) - self.charm.unit.status = BlockedStatus(FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE) - return + self._s3_initialization_set_failure(FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE) + return False if self.charm.unit.is_leader(): - self.charm.app_peer_data.pop("init-pgbackrest", None) - self.charm.unit_peer_data.pop("init-pgbackrest", None) + self.charm.app_peer_data.update({ + "s3-initialization-start": "", + }) + else: + self.charm.unit_peer_data.update({"s3-initialization-done": "True"}) + + return True def coordinate_stanza_fields(self) -> None: """Coordinate the stanza name between the primary and the leader units.""" - for unit, unit_data in self.charm._peers.data.items(): - if "stanza" not in unit_data: + if ( + not self.charm.unit.is_leader() + or "s3-initialization-start" not in self.charm.app_peer_data + ): + return + + for _unit, unit_data in self.charm._peers.data.items(): + if "s3-initialization-done" not in unit_data: continue - # If the stanza name is not set in the application databag, then the primary is not - # the leader unit, and it's needed to set the stanza name in the application databag. - if "stanza" not in self.charm.app_peer_data and self.charm.unit.is_leader(): - self.charm.app_peer_data.update({ - "stanza": self.stanza_name, - "init-pgbackrest": "True", - }) - break - # If the stanza was already checked and its name is still in the unit databag, mark - # the stanza as already checked in the application databag and remove it from the - # unit databag. - if "init-pgbackrest" not in unit_data: - if self.charm.unit.is_leader(): - self.charm.app_peer_data.pop("init-pgbackrest", None) - if "init-pgbackrest" not in self.charm.app_peer_data and unit == self.charm.unit: - self.charm.unit_peer_data.update({"stanza": ""}) - break + + self.charm.app_peer_data.update({ + "stanza": unit_data.get("stanza", ""), + "s3-initialization-block-message": unit_data.get( + "s3-initialization-block-message", "" + ), + "s3-initialization-start": "", + "s3-initialization-done": "True", + }) + + self.charm.update_config() + if self.charm._patroni.member_started: + self.charm._patroni.reload_patroni_configuration() + + break @property def _is_primary_pgbackrest_service_running(self) -> bool: @@ -674,22 +706,56 @@ def _on_s3_credential_changed(self, event: CredentialsChangedEvent): event.defer() return - # Verify the s3 relation only on the primary. - if not self.charm.is_primary: - return + self.start_stop_pgbackrest_service() + + if self.charm.unit.is_leader(): + self.charm.app_peer_data.update({ + "s3-initialization-block-message": "", + "s3-initialization-start": time.asctime(time.gmtime()), + "stanza": "", + "s3-initialization-done": "", + }) + if not self.charm.is_primary: + self.charm._set_active_status() + + if self.charm.is_primary and "s3-initialization-done" not in self.charm.unit_peer_data: + self._on_s3_credential_changed_primary(event) + + if self.charm.is_standby_leader: + logger.info( + "S3 credentials will not be connected on standby cluster until it becomes primary" + ) + + def _on_s3_credential_changed_primary(self, event: HookEvent) -> bool: + self.charm.update_config() + if self.charm._patroni.member_started: + self.charm._patroni.reload_patroni_configuration() try: self._create_bucket_if_not_exists() except (ClientError, ValueError): - self.charm.unit.status = BlockedStatus(FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE) - return + self._s3_initialization_set_failure(FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE) + return False can_use_s3_repository, validation_message = self.can_use_s3_repository() if not can_use_s3_repository: - self.charm.unit.status = BlockedStatus(validation_message) - return + self._s3_initialization_set_failure(validation_message) + return False + + if not self._initialise_stanza(event): + return False + + if not self.check_stanza(): + return False - self._initialise_stanza() + s3_parameters, _ = self._retrieve_s3_parameters() + self._upload_content_to_s3( + self.model.uuid, + "model-uuid.txt", + s3_parameters, + ) + + return True def _on_create_backup_action(self, event) -> None: # noqa: C901 """Request that pgBackRest creates a backup.""" @@ -732,10 +798,7 @@ def _on_create_backup_action(self, event) -> None: # noqa: C901 """ if not self._upload_content_to_s3( metadata, - os.path.join( - s3_parameters["path"], - f"backup/{self.stanza_name}/latest", - ), + f"backup/{self.stanza_name}/latest", s3_parameters, ): error_message = "Failed to upload metadata to provided S3" @@ -790,10 +853,7 @@ def _on_create_backup_action(self, event) -> None: # noqa: C901 """ self._upload_content_to_s3( logs, - os.path.join( - s3_parameters["path"], - f"backup/{self.stanza_name}/{backup_id}/backup.log", - ), + f"backup/{self.stanza_name}/{backup_id}/backup.log", s3_parameters, ) error_message = f"Failed to backup PostgreSQL with error: {str(e)}" @@ -809,10 +869,7 @@ def _on_create_backup_action(self, event) -> None: # noqa: C901 """ if not self._upload_content_to_s3( logs, - os.path.join( - s3_parameters["path"], - f"backup/{self.stanza_name}/{backup_id}/backup.log", - ), + f"backup/{self.stanza_name}/{backup_id}/backup.log", s3_parameters, ): error_message = "Error uploading logs to S3" @@ -835,11 +892,18 @@ def _on_s3_credential_gone(self, _) -> None: if self.charm.unit.is_leader(): self.charm.app_peer_data.update({ "stanza": "", - "init-pgbackrest": "", + "s3-initialization-start": "", + "s3-initialization-done": "", + "s3-initialization-block-message": "", }) - self.charm.unit_peer_data.update({"stanza": "", "init-pgbackrest": ""}) + self.charm.unit_peer_data.update({ + "stanza": "", + "s3-initialization-start": "", + "s3-initialization-done": "", + "s3-initialization-block-message": "", + }) if self.charm.is_blocked and self.charm.unit.status.message in S3_BLOCK_MESSAGES: - self.charm.unit.status = ActiveStatus() + self.charm._set_active_status() def _on_list_backups_action(self, event) -> None: """List the previously created backups.""" @@ -972,6 +1036,7 @@ def _on_restore_action(self, event): # noqa: C901 "restore-stanza": restore_stanza_timeline[0], "restore-timeline": restore_stanza_timeline[1] if restore_to_time else "", "restore-to-time": restore_to_time or "", + "s3-initialization-block-message": "", }) self.charm.update_config() @@ -1210,7 +1275,11 @@ def start_stop_pgbackrest_service(self) -> bool: return False # Stop the service if TLS is not enabled or there are no replicas. - if not self.charm.is_tls_enabled or len(self.charm.peer_members_endpoints) == 0: + if ( + not self.charm.is_tls_enabled + or len(self.charm.peer_members_endpoints) == 0 + or self.charm._patroni.get_standby_leader() + ): self.container.stop(self.charm.pgbackrest_server_service) return True @@ -1223,12 +1292,12 @@ def start_stop_pgbackrest_service(self) -> bool: return True def _upload_content_to_s3( - self: str, + self, content: str, s3_path: str, - s3_parameters: Dict, + s3_parameters: dict, ) -> bool: - """Uploads the provided contents to the provided S3 bucket. + """Uploads the provided contents to the provided S3 bucket relative to the path from the S3 config. Args: content: The content to upload to S3 @@ -1241,10 +1310,9 @@ def _upload_content_to_s3( a boolean indicating success. """ bucket_name = s3_parameters["bucket"] - s3_path = os.path.join(s3_parameters["path"], s3_path).lstrip("/") - logger.info(f"Uploading content to bucket={s3_parameters['bucket']}, path={s3_path}") + processed_s3_path = os.path.join(s3_parameters["path"], s3_path).lstrip("/") try: - logger.info(f"Uploading content to bucket={bucket_name}, path={s3_path}") + logger.info(f"Uploading content to bucket={bucket_name}, path={processed_s3_path}") session = boto3.session.Session( aws_access_key_id=s3_parameters["access-key"], aws_secret_access_key=s3_parameters["secret-key"], @@ -1261,11 +1329,89 @@ def _upload_content_to_s3( with tempfile.NamedTemporaryFile() as temp_file: temp_file.write(content.encode("utf-8")) temp_file.flush() - bucket.upload_file(temp_file.name, s3_path) + bucket.upload_file(temp_file.name, processed_s3_path) except Exception as e: logger.exception( - f"Failed to upload content to S3 bucket={bucket_name}, path={s3_path}", exc_info=e + f"Failed to upload content to S3 bucket={bucket_name}, path={processed_s3_path}", + exc_info=e, ) return False return True + + def _read_content_from_s3(self, s3_path: str, s3_parameters: dict) -> str | None: + """Reads specified content from the provided S3 bucket relative to the path from the S3 config. + + Args: + s3_path: The S3 path from which download the content + s3_parameters: A dictionary containing the S3 parameters + The following are expected keys in the dictionary: bucket, region, + endpoint, access-key and secret-key + + Returns: + a string with the content if object is successfully downloaded and None if file is not existing or error + occurred during download. + """ + bucket_name = s3_parameters["bucket"] + processed_s3_path = os.path.join(s3_parameters["path"], s3_path).lstrip("/") + try: + logger.info(f"Reading content from bucket={bucket_name}, path={processed_s3_path}") + session = boto3.session.Session( + aws_access_key_id=s3_parameters["access-key"], + aws_secret_access_key=s3_parameters["secret-key"], + region_name=s3_parameters["region"], + ) + s3 = session.resource( + "s3", + endpoint_url=self._construct_endpoint(s3_parameters), + verify=(self._tls_ca_chain_filename or None), + ) + bucket = s3.Bucket(bucket_name) + with BytesIO() as buf: + bucket.download_fileobj(processed_s3_path, buf) + return buf.getvalue().decode("utf-8") + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + logger.info( + f"No such object to read from S3 bucket={bucket_name}, path={processed_s3_path}" + ) + else: + logger.exception( + f"Failed to read content from S3 bucket={bucket_name}, path={processed_s3_path}", + exc_info=e, + ) + except Exception as e: + logger.exception( + f"Failed to read content from S3 bucket={bucket_name}, path={processed_s3_path}", + exc_info=e, + ) + + return None + + def _s3_initialization_set_failure( + self, block_message: str, update_leader_status: bool = True + ): + """Sets failed s3 initialization status with corresponding block_message in the app_peer_data (leader) or unit_peer_data (primary). + + Args: + block_message: s3 initialization block message + update_leader_status: whether to update leader status (with s3-initialization-block-message set already) + immediately; defaults to True + """ + if self.charm.unit.is_leader(): + # If performed on the leader, then leader == primary and there is no need to sync s3 data between two + # different units. Therefore, there is no need for s3-initialization-done key, and it is sufficient to clear + # s3-initialization-start key only. + self.charm.app_peer_data.update({ + "s3-initialization-block-message": block_message, + "s3-initialization-start": "", + "stanza": "", + }) + if update_leader_status: + self.charm._set_active_status() + else: + self.charm.unit_peer_data.update({ + "s3-initialization-block-message": block_message, + "s3-initialization-done": "True", + "stanza": "", + }) diff --git a/src/charm.py b/src/charm.py index d8da02e2ac..3135e8ba79 100755 --- a/src/charm.py +++ b/src/charm.py @@ -83,7 +83,7 @@ from requests import ConnectionError from tenacity import RetryError, Retrying, stop_after_attempt, stop_after_delay, wait_fixed -from backups import CANNOT_RESTORE_PITR, PostgreSQLBackups +from backups import CANNOT_RESTORE_PITR, S3_BLOCK_MESSAGES, PostgreSQLBackups from config import CharmConfig from constants import ( APP_SCOPE, @@ -599,10 +599,6 @@ def _on_peer_relation_changed(self, event: HookEvent) -> None: # noqa: C901 except ModelError as e: logger.warning("Cannot update read_only endpoints: %s", str(e)) - self.backup.coordinate_stanza_fields() - - self.backup.check_stanza() - # Start or stop the pgBackRest TLS server service when TLS certificate change. if not self.backup.start_stop_pgbackrest_service(): # Ping primary to start its TLS server. @@ -615,6 +611,26 @@ def _on_peer_relation_changed(self, event: HookEvent) -> None: # noqa: C901 else: self.unit_peer_data.pop("start-tls-server", None) + self.backup.coordinate_stanza_fields() + + # This is intended to be executed only when leader is reinitializing S3 connection due to the leader change. + if ( + "s3-initialization-start" in self.app_peer_data + and "s3-initialization-done" not in self.unit_peer_data + and self.is_primary + and not self.backup._on_s3_credential_changed_primary(event) + ): + return + + # Clean-up unit initialization data after successful sync to the leader. + if "s3-initialization-done" in self.app_peer_data and not self.unit.is_leader(): + self.unit_peer_data.update({ + "stanza": "", + "s3-initialization-block-message": "", + "s3-initialization-done": "", + "s3-initialization-start": "", + }) + self.async_replication.handle_read_only_mode() def _on_config_changed(self, event) -> None: @@ -969,6 +985,11 @@ def _set_active_status(self): if self.unit.status.message == INSUFFICIENT_SIZE_WARNING: return try: + if self.unit.is_leader() and "s3-initialization-block-message" in self.app_peer_data: + self.unit.status = BlockedStatus( + self.app_peer_data["s3-initialization-block-message"] + ) + return if self._patroni.get_primary(unit_name_pattern=True) == self.unit.name: self.unit.status = ActiveStatus("Primary") elif self.is_standby_leader: @@ -1351,7 +1372,9 @@ def _on_update_status_early_exit_checks(self, container) -> bool: self._check_pgdata_storage_size() - if self._has_blocked_status or self._has_non_restore_waiting_status: + if ( + self._has_blocked_status and self.unit.status not in S3_BLOCK_MESSAGES + ) or self._has_non_restore_waiting_status: # If charm was failing to disable plugin, try again and continue (user may have removed the objects) if self.unit.status.message == EXTENSION_OBJECT_MESSAGE: self.enable_disable_extensions() @@ -1422,6 +1445,8 @@ def _on_update_status(self, _) -> None: # Update the sync-standby endpoint in the async replication data. self.async_replication.update_async_replication_data() + self.backup.coordinate_stanza_fields() + self._set_active_status() def _was_restore_successful(self, container: Container, service: ServiceInfo) -> bool: @@ -1476,8 +1501,12 @@ def _was_restore_successful(self, container: Container, service: ServiceInfo) -> can_use_s3_repository, validation_message = self.backup.can_use_s3_repository() if not can_use_s3_repository: - self.unit.status = BlockedStatus(validation_message) - return False + self.app_peer_data.update({ + "stanza": "", + "s3-initialization-start": "", + "s3-initialization-done": "", + "s3-initialization-block-message": validation_message, + }) return True @@ -1834,7 +1863,7 @@ def update_config(self, is_creating_backup: bool = False) -> bool: pitr_target=self.app_peer_data.get("restore-to-time"), restore_timeline=self.app_peer_data.get("restore-timeline"), restore_to_latest=self.app_peer_data.get("restore-to-time", None) == "latest", - stanza=self.app_peer_data.get("stanza"), + stanza=self.app_peer_data.get("stanza", self.unit_peer_data.get("stanza")), restore_stanza=self.app_peer_data.get("restore-stanza"), parameters=postgresql_parameters, ) @@ -2134,6 +2163,8 @@ def is_pitr_failed(self, container: Container) -> Tuple[bool, bool]: patroni_exceptions = [] count = 0 while len(patroni_exceptions) == 0 and count < 10: + if count > 0: + time.sleep(3) try: log_exec = container.pebble.exec( ["pebble", "logs", "postgresql", "-n", "all"], combine_stderr=True @@ -2172,7 +2203,6 @@ def is_pitr_failed(self, container: Container) -> Tuple[bool, bool]: re.MULTILINE, ) count += 1 - time.sleep(3) if len(patroni_exceptions) > 0: logger.debug("Failures to bootstrap cluster detected on Patroni service logs") diff --git a/tests/unit/test_backups.py b/tests/unit/test_backups.py index c84af074bf..28f9a70a0f 100644 --- a/tests/unit/test_backups.py +++ b/tests/unit/test_backups.py @@ -210,60 +210,30 @@ def test_can_use_s3_repository(harness): ) as _rock_postgresql_version, patch("charm.PostgreSQLBackups._execute_command") as _execute_command, patch( - "charms.postgresql_k8s.v0.postgresql.PostgreSQL.get_last_archived_wal" - ) as _get_last_archived_wal, + "charm.PostgreSQLBackups._retrieve_s3_parameters", + return_value=({"path": "example"}, None), + ), + patch("charm.PostgreSQLBackups._read_content_from_s3") as _read_content_from_s3, ): - peer_rel_id = harness.model.get_relation(PEER).id - # Define the stanza name inside the unit relation data. - with harness.hooks_disabled(): - harness.update_relation_data( - peer_rel_id, - harness.charm.app.name, - {"stanza": harness.charm.backup.stanza_name}, - ) + # Test with bad model-uuid. + _read_content_from_s3.return_value = "bad" + assert harness.charm.backup.can_use_s3_repository() == ( + False, + ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, + ) # Test when nothing is returned from the pgBackRest info command. + _read_content_from_s3.return_value = harness.model.uuid _execute_command.return_value = (None, None) assert harness.charm.backup.can_use_s3_repository() == ( False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE, ) - # Test when the unit is a replica. pgbackrest_info_same_cluster_backup_output = ( f'[{{"db": [{{"system-id": "12345"}}], "name": "{harness.charm.backup.stanza_name}"}}]', None, ) - _execute_command.return_value = pgbackrest_info_same_cluster_backup_output - assert harness.charm.backup.can_use_s3_repository() == (True, None) - - # Assert that the stanza name is still in the unit relation data. - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { - "stanza": harness.charm.backup.stanza_name - } - - # Test when the unit is the leader and the workload is running, - # but an exception happens when retrieving the cluster system id. - _member_started.return_value = True - _execute_command.side_effect = [ - pgbackrest_info_same_cluster_backup_output, - ("", "fake error"), - ] - with harness.hooks_disabled(): - harness.set_leader() - try: - assert harness.charm.backup.can_use_s3_repository() == ( - False, - ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, - ) - assert False - except AssertionError as e: - raise e - except Exception: - pass - _update_config.assert_not_called() - _member_started.assert_not_called() - _reload_patroni_configuration.assert_not_called() # Test when the cluster system id can be retrieved, but it's different from the stanza system id. pgbackrest_info_other_cluster_system_id_backup_output = ( @@ -278,27 +248,12 @@ def test_can_use_s3_repository(harness): pgbackrest_info_other_cluster_system_id_backup_output, other_instance_system_identifier_output, ] - with harness.hooks_disabled(): - harness.update_relation_data( - peer_rel_id, - harness.charm.app.name, - {"stanza": harness.charm.backup.stanza_name}, - ) assert harness.charm.backup.can_use_s3_repository() == ( False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, ) - _update_config.assert_called_once() - _member_started.assert_called_once() - _reload_patroni_configuration.assert_called_once() - - # Assert that the stanza name is not present in the unit relation data anymore. - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} # Test when the cluster system id can be retrieved, but it's different from the stanza system id. - _update_config.reset_mock() - _member_started.reset_mock() - _reload_patroni_configuration.reset_mock() pgbackrest_info_other_cluster_name_backup_output = ( f'[{{"db": [{{"system-id": "12345"}}], "name": "another-model.{harness.charm.cluster_name}"}}]', None, @@ -311,34 +266,13 @@ def test_can_use_s3_repository(harness): pgbackrest_info_other_cluster_name_backup_output, same_instance_system_identifier_output, ] - with harness.hooks_disabled(): - harness.update_relation_data( - peer_rel_id, - harness.charm.app.name, - {"stanza": harness.charm.backup.stanza_name}, - ) assert harness.charm.backup.can_use_s3_repository() == ( False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, ) - _update_config.assert_called_once() - _member_started.assert_called_once() - _reload_patroni_configuration.assert_called_once() - - # Assert that the stanza name is not present in the unit relation data anymore. - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} # Test when the workload is not running. - _update_config.reset_mock() - _member_started.reset_mock() - _reload_patroni_configuration.reset_mock() _member_started.return_value = False - with harness.hooks_disabled(): - harness.update_relation_data( - peer_rel_id, - harness.charm.app.name, - {"stanza": harness.charm.backup.stanza_name}, - ) _execute_command.side_effect = [ pgbackrest_info_same_cluster_backup_output, other_instance_system_identifier_output, @@ -347,31 +281,14 @@ def test_can_use_s3_repository(harness): False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, ) - _update_config.assert_called_once() - _member_started.assert_called_once() - _reload_patroni_configuration.assert_not_called() - - # Assert that the stanza name is not present in the unit relation data anymore. - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} # Test when there is no backup from another cluster in the S3 repository. - with harness.hooks_disabled(): - harness.update_relation_data( - peer_rel_id, - harness.charm.app.name, - {"stanza": harness.charm.backup.stanza_name}, - ) _execute_command.side_effect = [ pgbackrest_info_same_cluster_backup_output, same_instance_system_identifier_output, ] assert harness.charm.backup.can_use_s3_repository() == (True, None) - # Assert that the stanza name is still in the unit relation data. - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { - "stanza": harness.charm.backup.stanza_name - } - def test_construct_endpoint(harness): # Test with an AWS endpoint without region. @@ -764,24 +681,22 @@ def test_initialise_stanza(harness): patch("charm.PostgresqlOperatorCharm.update_config") as _update_config, patch("charm.PostgreSQLBackups._execute_command") as _execute_command, patch( - "charm.PostgresqlOperatorCharm.is_primary", new_callable=PropertyMock - ) as _is_primary, + "charm.PostgreSQLBackups._s3_initialization_set_failure" + ) as _s3_initialization_set_failure, ): peer_rel_id = harness.model.get_relation(PEER).id - # Test when the unit is not the primary. - _is_primary.return_value = False - harness.charm.backup._initialise_stanza() - _execute_command.assert_not_called() - # Test when the unit is the primary, but it's in a blocked state - # other than the ones can be solved by new S3 settings. - _is_primary.return_value = True + mock_event = MagicMock() + + # Test when it's in a blocked state other than the ones can be solved by new S3 settings. harness.charm.unit.status = BlockedStatus("fake blocked state") - harness.charm.backup._initialise_stanza() + harness.charm.backup._initialise_stanza(mock_event) _execute_command.assert_not_called() + mock_event.defer.assert_called_once() # Test when the blocked state is any of the blocked stated that can be solved # by new S3 settings, but the stanza creation fails. + mock_event.reset_mock() stanza_creation_command = [ "pgbackrest", f"--stanza={harness.charm.backup.stanza_name}", @@ -795,15 +710,17 @@ def test_initialise_stanza(harness): FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE, ]: + _s3_initialization_set_failure.reset_mock() _execute_command.reset_mock() harness.charm.unit.status = BlockedStatus(blocked_state) - harness.charm.backup._initialise_stanza() - _execute_command.assert_called_once_with(stanza_creation_command) - assert isinstance(harness.charm.unit.status, BlockedStatus) - assert harness.charm.unit.status.message == FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE - - # Assert there is no stanza name in the application relation databag. - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} + harness.charm.backup._initialise_stanza(mock_event) + _execute_command.assert_called_with(stanza_creation_command) + mock_event.defer.assert_not_called() + # Only the leader will display the blocked status. + assert isinstance(harness.charm.unit.status, MaintenanceStatus) + _s3_initialization_set_failure.assert_called_once_with( + FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE + ) # Test when the archiving is working correctly (pgBackRest check command succeeds) # and the unit is not the leader. @@ -812,36 +729,37 @@ def test_initialise_stanza(harness): _member_started.reset_mock() _reload_patroni_configuration.reset_mock() _execute_command.side_effect = None - harness.charm.backup._initialise_stanza() + harness.charm.backup._initialise_stanza(mock_event) assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == { "stanza": f"{harness.charm.model.name}.patroni-postgresql-k8s", - "init-pgbackrest": "True", } assert isinstance(harness.charm.unit.status, MaintenanceStatus) + mock_event.defer.assert_not_called() # Test when the unit is the leader. with harness.hooks_disabled(): harness.set_leader() - harness.update_relation_data( - peer_rel_id, harness.charm.unit.name, {"stanza": "", "init-pgbackrest": ""} - ) - harness.charm.backup._initialise_stanza() + harness.update_relation_data(peer_rel_id, harness.charm.unit.name, {"stanza": ""}) + harness.charm.backup._initialise_stanza(mock_event) assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { - "stanza": f"{harness.charm.model.name}.patroni-postgresql-k8s", - "init-pgbackrest": "True", + "stanza": f"{harness.charm.model.name}.patroni-postgresql-k8s" } - assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} + mock_event.defer.assert_not_called() assert isinstance(harness.charm.unit.status, MaintenanceStatus) def test_check_stanza(harness): with ( - patch("charm.Patroni.reload_patroni_configuration") as _reload_patroni_configuration, - patch("charm.Patroni.member_started", new_callable=PropertyMock) as _member_started, + patch("charm.PostgresqlOperatorCharm.update_config"), patch("backups.wait_fixed", return_value=wait_fixed(0)), - patch("charm.PostgresqlOperatorCharm.update_config") as _update_config, + patch("charm.Patroni.member_started", new_callable=PropertyMock) as _member_started, + patch("charm.Patroni.reload_patroni_configuration") as _reload_patroni_configuration, patch("charm.PostgreSQLBackups._execute_command") as _execute_command, + patch("charm.PostgresqlOperatorCharm._set_active_status") as _set_active_status, + patch( + "charm.PostgreSQLBackups._s3_initialization_set_failure" + ) as _s3_initialization_set_failure, patch( "charm.PostgresqlOperatorCharm.is_primary", new_callable=PropertyMock ) as _is_primary, @@ -852,196 +770,153 @@ def test_check_stanza(harness): harness.update_relation_data( peer_rel_id, harness.charm.app.name, - {"stanza": "test-stanza", "init-pgbackrest": "True"}, - ) - harness.update_relation_data( - peer_rel_id, - harness.charm.unit.name, - {"stanza": "test-stanza", "init-pgbackrest": "True"}, + {"s3-initialization-start": "test-stanza"}, ) - # Test when the unit is not the primary. - _is_primary.return_value = False - harness.charm.backup.check_stanza() - _execute_command.assert_not_called() - - # Set the unit as primary. - _is_primary.return_value = True - stanza_check_command = [ "pgbackrest", f"--stanza={harness.charm.backup.stanza_name}", "check", ] - # Test when the archiving is not working correctly (pgBackRest check command fails). + _member_started.return_value = False _execute_command.side_effect = ExecError( command=stanza_check_command, exit_code=1, stdout="", stderr="fake error" ) + assert not harness.charm.backup.check_stanza() + _member_started.assert_called() + _reload_patroni_configuration.assert_not_called() + _set_active_status.assert_not_called() + _s3_initialization_set_failure.assert_called_once_with( + FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE + ) + + _execute_command.reset_mock() + _s3_initialization_set_failure.reset_mock() _member_started.return_value = True - harness.charm.backup.check_stanza() - assert _update_config.call_count == 2 - assert _member_started.call_count == 5 - assert _reload_patroni_configuration.call_count == 5 - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { - "stanza": "test-stanza", - "init-pgbackrest": "True", + _execute_command.side_effect = None + assert harness.charm.backup.check_stanza() + _reload_patroni_configuration.assert_called_once() + _execute_command.assert_called_once() + _set_active_status.assert_called_once() + _s3_initialization_set_failure.assert_not_called() + assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == { + "s3-initialization-done": "True" + } + + with harness.hooks_disabled(): + harness.set_leader() + assert harness.charm.backup.check_stanza() + assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} + + +def test_coordinate_stanza_fields(harness): + with ( + patch("charm.PostgresqlOperatorCharm.update_config") as _update_config, + patch("charm.Patroni.member_started", new_callable=PropertyMock) as _member_started, + patch("charm.Patroni.reload_patroni_configuration") as _reload_patroni_configuration, + ): + peer_rel_id = harness.model.get_relation(PEER).id + stanza_name = f"{harness.charm.model.name}.{harness.charm.app.name}" + + peer_data_primary_error = { + "s3-initialization-done": "True", + "s3-initialization-block-message": ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, + } + peer_data_primary_ok = { + "s3-initialization-done": "True", + "stanza": stanza_name, + } + peer_data_leader_start = { + "s3-initialization-start": "Thu Feb 24 05:00:00 2022", + } + peer_data_leader_error = { + "s3-initialization-done": "True", + "s3-initialization-block-message": ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, } + peer_data_leader_ok = {"s3-initialization-done": "True", "stanza": stanza_name} + peer_data_clean = { + "s3-initialization-start": "", + "s3-initialization-done": "", + "s3-initialization-block-message": "", + "stanza": "", + } + + # Add a new unit to the relation. + new_unit_name = "postgresql-k8s/1" + new_unit = Unit(new_unit_name, None, harness.charm.app._backend, {}) + harness.add_relation_unit(peer_rel_id, new_unit_name) + + # Test with clear values. + harness.charm.backup.coordinate_stanza_fields() + _member_started.assert_not_called() + assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} - assert isinstance(harness.charm.unit.status, BlockedStatus) - assert harness.charm.unit.status.message == FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE + assert harness.get_relation_data(peer_rel_id, new_unit) == {} - # Test when the archiving is not working correctly (pgBackRest check command fails) on leader. + # Test with primary failed prior leader s3 initialization sequence started. with harness.hooks_disabled(): - harness.set_leader(True) - harness.update_relation_data( - peer_rel_id, - harness.charm.app.name, - {"stanza": "test-stanza", "init-pgbackrest": "True"}, - ) - harness.update_relation_data( - peer_rel_id, - harness.charm.unit.name, - {"stanza": "test-stanza", "init-pgbackrest": "True"}, - ) - _execute_command.side_effect = ExecError( - command=stanza_check_command, exit_code=1, stdout="", stderr="fake error" - ) - _member_started.return_value = True - harness.charm.backup.check_stanza() - assert _update_config.call_count == 4 + harness.update_relation_data(peer_rel_id, new_unit_name, peer_data_primary_error) + harness.charm.backup.coordinate_stanza_fields() + _update_config.assert_not_called() + _member_started.assert_not_called() assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} - assert isinstance(harness.charm.unit.status, BlockedStatus) - assert harness.charm.unit.status.message == FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE + assert harness.get_relation_data(peer_rel_id, new_unit) == peer_data_primary_error - # Test when the archiving is working correctly (pgBackRest check command succeeds) - # and the unit is not the leader. + # Test with non-leader unit. with harness.hooks_disabled(): - harness.set_leader(False) harness.update_relation_data( - peer_rel_id, - harness.charm.app.name, - {"stanza": "test-stanza", "init-pgbackrest": "True"}, + peer_rel_id, harness.charm.app.name, peer_data_leader_start ) - harness.update_relation_data( - peer_rel_id, - harness.charm.unit.name, - {"stanza": "test-stanza", "init-pgbackrest": "True"}, - ) - _execute_command.reset_mock() - _update_config.reset_mock() - _member_started.reset_mock() - _reload_patroni_configuration.reset_mock() - _execute_command.side_effect = None - harness.charm.backup.check_stanza() + harness.charm.backup.coordinate_stanza_fields() + _update_config.assert_not_called() + _member_started.assert_not_called() + assert harness.get_relation_data(peer_rel_id, harness.charm.app) == peer_data_leader_start + assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} + assert harness.get_relation_data(peer_rel_id, new_unit) == peer_data_primary_error + + # Leader should sync fail result from the primary. + _member_started.return_value = False + with harness.hooks_disabled(): + harness.set_leader() + harness.charm.backup.coordinate_stanza_fields() _update_config.assert_called_once() _member_started.assert_called_once() - _reload_patroni_configuration.assert_called_once() - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { - "stanza": "test-stanza", - "init-pgbackrest": "True", - } - assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == { - "stanza": "test-stanza" - } - assert isinstance(harness.charm.unit.status, ActiveStatus) + _reload_patroni_configuration.assert_not_called() + assert harness.get_relation_data(peer_rel_id, harness.charm.app) == peer_data_leader_error + assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} + assert harness.get_relation_data(peer_rel_id, new_unit) == peer_data_primary_error - # Test when the unit is the leader. - harness.charm.unit.status = BlockedStatus("fake blocked state") + # Test with successful result from the primary. + _update_config.reset_mock() + _member_started.return_value = True with harness.hooks_disabled(): - harness.set_leader() - harness.update_relation_data( - peer_rel_id, - harness.charm.app.name, - {"init-pgbackrest": "True"}, - ) + harness.update_relation_data(peer_rel_id, harness.charm.app.name, peer_data_clean) harness.update_relation_data( - peer_rel_id, - harness.charm.unit.name, - {"init-pgbackrest": "True"}, + peer_rel_id, harness.charm.app.name, peer_data_leader_start ) - _update_config.reset_mock() - _member_started.reset_mock() - _reload_patroni_configuration.reset_mock() - harness.charm.backup.check_stanza() + harness.update_relation_data(peer_rel_id, new_unit_name, peer_data_clean) + harness.update_relation_data(peer_rel_id, new_unit_name, peer_data_primary_ok) + harness.charm.backup.coordinate_stanza_fields() _update_config.assert_called_once() - _member_started.assert_called_once() _reload_patroni_configuration.assert_called_once() - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { - "stanza": "test-stanza" - } - assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == { - "stanza": "test-stanza" - } - assert isinstance(harness.charm.unit.status, ActiveStatus) - - -def test_coordinate_stanza_fields(harness): - # Add a new unit to the relation. - peer_rel_id = harness.model.get_relation(PEER).id - new_unit_name = "postgresql-k8s/1" - new_unit = Unit(new_unit_name, None, harness.charm.app._backend, {}) - harness.add_relation_unit(peer_rel_id, new_unit_name) - - # Test when the stanza name is neither in the application relation databag nor in the unit relation databag. - harness.charm.backup.coordinate_stanza_fields() - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} - assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} - assert harness.get_relation_data(peer_rel_id, new_unit) == {} - - # Test when the stanza name is in the unit relation databag but the unit is not the leader. - stanza_name = f"{harness.charm.model.name}.patroni-{harness.charm.app.name}" - with harness.hooks_disabled(): - harness.update_relation_data( - peer_rel_id, new_unit_name, {"stanza": stanza_name, "init-pgbackrest": "True"} - ) - harness.charm.backup.coordinate_stanza_fields() - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} - assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} - assert harness.get_relation_data(peer_rel_id, new_unit) == { - "stanza": stanza_name, - "init-pgbackrest": "True", - } - - # Test when the unit is the leader. - with harness.hooks_disabled(): - harness.set_leader() - harness.charm.backup.coordinate_stanza_fields() - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { - "stanza": stanza_name, - "init-pgbackrest": "True", - } - assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} - assert harness.get_relation_data(peer_rel_id, new_unit) == { - "stanza": stanza_name, - "init-pgbackrest": "True", - } - - # Test when the stanza was already checked in the primary non-leader unit. - with harness.hooks_disabled(): - harness.update_relation_data(peer_rel_id, new_unit_name, {"init-pgbackrest": ""}) - harness.charm.backup.coordinate_stanza_fields() - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {"stanza": stanza_name} - assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} - assert harness.get_relation_data(peer_rel_id, new_unit) == {"stanza": stanza_name} - - # Test when the "init-pgbackrest" flag was removed from the application relation databag - # and this is the unit that has the stanza name in the unit relation databag. - with harness.hooks_disabled(): - harness.update_relation_data(peer_rel_id, harness.charm.unit.name, {"stanza": stanza_name}) - harness.charm.backup.coordinate_stanza_fields() - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {"stanza": stanza_name} - assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} - assert harness.get_relation_data(peer_rel_id, new_unit) == {"stanza": stanza_name} + assert harness.get_relation_data(peer_rel_id, harness.charm.app) == peer_data_leader_ok + assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} + assert harness.get_relation_data(peer_rel_id, new_unit) == peer_data_primary_ok - # Test when the unit is not the leader. - with harness.hooks_disabled(): - harness.set_leader(False) - harness.update_relation_data(peer_rel_id, harness.charm.unit.name, {"stanza": stanza_name}) - harness.charm.backup.coordinate_stanza_fields() - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {"stanza": stanza_name} - assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} - assert harness.get_relation_data(peer_rel_id, new_unit) == {"stanza": stanza_name} + # Test when leader is waiting for the primary result. + _update_config.reset_mock() + with harness.hooks_disabled(): + harness.update_relation_data(peer_rel_id, harness.charm.app.name, peer_data_clean) + harness.update_relation_data( + peer_rel_id, harness.charm.app.name, peer_data_leader_start + ) + harness.update_relation_data(peer_rel_id, new_unit_name, peer_data_clean) + harness.charm.backup.coordinate_stanza_fields() + _update_config.assert_not_called() + assert harness.get_relation_data(peer_rel_id, harness.charm.app) == peer_data_leader_start + assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} + assert harness.get_relation_data(peer_rel_id, new_unit) == {} def test_is_primary_pgbackrest_service_running(harness): @@ -1078,21 +953,28 @@ def test_is_primary_pgbackrest_service_running(harness): def test_on_s3_credential_changed(harness): with ( - patch("charm.PostgreSQLBackups._initialise_stanza") as _initialise_stanza, - patch("charm.PostgreSQLBackups.can_use_s3_repository") as _can_use_s3_repository, patch( - "charm.PostgreSQLBackups._create_bucket_if_not_exists" - ) as _create_bucket_if_not_exists, - patch( - "charm.PostgresqlOperatorCharm.is_primary", new_callable=PropertyMock - ) as _is_primary, + "charm.PostgreSQLBackups._render_pgbackrest_conf_file" + ) as _render_pgbackrest_conf_file, patch( "charm.PostgreSQLBackups._can_initialise_stanza", new_callable=PropertyMock ) as _can_initialise_stanza, patch( - "charm.PostgreSQLBackups._render_pgbackrest_conf_file" - ) as _render_pgbackrest_conf_file, + "charm.PostgreSQLBackups.start_stop_pgbackrest_service" + ) as _start_stop_pgbackrest_service, + patch("charm.PostgresqlOperatorCharm._set_active_status") as _set_active_status, + patch( + "charm.PostgresqlOperatorCharm.is_primary", new_callable=PropertyMock + ) as _is_primary, + patch( + "charm.PostgreSQLBackups._on_s3_credential_changed_primary" + ) as _on_s3_credential_changed_primary, patch("ops.framework.EventBase.defer") as _defer, + patch( + "charm.PostgresqlOperatorCharm.is_standby_leader", new_callable=PropertyMock + ) as _is_standby_leader, + patch("time.gmtime"), + patch("time.asctime", return_value="Thu Feb 24 05:00:00 2022"), ): peer_rel_id = harness.model.get_relation(PEER).id # Test when the cluster was not initialised yet. @@ -1102,9 +984,6 @@ def test_on_s3_credential_changed(harness): ) _defer.assert_called_once() _render_pgbackrest_conf_file.assert_not_called() - _create_bucket_if_not_exists.assert_not_called() - _can_use_s3_repository.assert_not_called() - _initialise_stanza.assert_not_called() # Test when the cluster is already initialised, but the charm fails to render # the pgBackRest configuration file due to missing S3 parameters. @@ -1122,9 +1001,6 @@ def test_on_s3_credential_changed(harness): _defer.assert_not_called() _render_pgbackrest_conf_file.assert_called_once() _can_initialise_stanza.assert_not_called() - _create_bucket_if_not_exists.assert_not_called() - _can_use_s3_repository.assert_not_called() - _initialise_stanza.assert_not_called() # Test when it's not possible to initialise the stanza in this unit. _render_pgbackrest_conf_file.return_value = True @@ -1132,109 +1008,149 @@ def test_on_s3_credential_changed(harness): harness.charm.backup.s3_client.on.credentials_changed.emit( relation=harness.model.get_relation(S3_PARAMETERS_RELATION, s3_rel_id) ) - _defer.assert_called_once() _can_initialise_stanza.assert_called_once() - _is_primary.assert_not_called() + _defer.assert_called_once() + _start_stop_pgbackrest_service.assert_not_called() - # Test that followers will not initialise the bucket - harness.charm.unit.status = ActiveStatus() - _render_pgbackrest_conf_file.reset_mock() - with harness.hooks_disabled(): - harness.update_relation_data( - peer_rel_id, - harness.charm.app.name, - {"cluster_initialised": "True"}, - ) - _can_initialise_stanza.return_value = True + # Test when unit is not a leader and can't do any peer data changes _is_primary.return_value = False + _can_initialise_stanza.return_value = True harness.charm.backup.s3_client.on.credentials_changed.emit( relation=harness.model.get_relation(S3_PARAMETERS_RELATION, s3_rel_id) ) - _render_pgbackrest_conf_file.assert_called_once() - _is_primary.assert_called_once() - _create_bucket_if_not_exists.assert_not_called() - assert isinstance(harness.charm.unit.status, ActiveStatus) - _can_use_s3_repository.assert_not_called() - _initialise_stanza.assert_not_called() + _is_standby_leader.assert_called_once() + assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { + "cluster_initialised": "True" + } - # Test when the charm render the pgBackRest configuration file, but fails to - # access or create the S3 bucket. - _is_primary.return_value = True - for error in [ - ClientError( - error_response={"Error": {"Code": 1, "message": "fake error"}}, - operation_name="fake operation name", - ), - ValueError, - ]: - _render_pgbackrest_conf_file.reset_mock() - _create_bucket_if_not_exists.reset_mock() - _create_bucket_if_not_exists.side_effect = error - harness.charm.backup.s3_client.on.credentials_changed.emit( - relation=harness.model.get_relation(S3_PARAMETERS_RELATION, s3_rel_id) - ) - _render_pgbackrest_conf_file.assert_called_once() - _create_bucket_if_not_exists.assert_called_once() - assert isinstance(harness.charm.unit.status, BlockedStatus) - assert ( - harness.charm.unit.status.message == FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE - ) - _can_use_s3_repository.assert_not_called() - _initialise_stanza.assert_not_called() + # Test when unit is a leader but not primary + _is_standby_leader.reset_mock() + with harness.hooks_disabled(): + harness.set_leader() + harness.charm.backup.s3_client.on.credentials_changed.emit( + relation=harness.model.get_relation(S3_PARAMETERS_RELATION, s3_rel_id) + ) + _set_active_status.assert_called_once() + _on_s3_credential_changed_primary.assert_not_called() + _is_standby_leader.assert_called_once() + assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { + "cluster_initialised": "True", + "s3-initialization-start": "Thu Feb 24 05:00:00 2022", + } - # Test when it's not possible to use the S3 repository due to backups from another cluster. - _create_bucket_if_not_exists.reset_mock() - _create_bucket_if_not_exists.side_effect = None - _can_use_s3_repository.return_value = (False, "fake validation message") + # Test when unit is a leader and primary + _is_primary.return_value = True + _is_standby_leader.reset_mock() + _set_active_status.reset_mock() + with harness.hooks_disabled(): + harness.set_leader() harness.charm.backup.s3_client.on.credentials_changed.emit( relation=harness.model.get_relation(S3_PARAMETERS_RELATION, s3_rel_id) ) - assert isinstance(harness.charm.unit.status, BlockedStatus) - assert harness.charm.unit.status.message == "fake validation message" + _on_s3_credential_changed_primary.assert_called_once() + _set_active_status.assert_not_called() + _is_standby_leader.assert_called_once() + + +def test_on_s3_credential_changed_primary(harness): + with ( + patch("charm.PostgresqlOperatorCharm.update_config"), + patch("charm.Patroni.member_started", new_callable=PropertyMock) as _member_started, + patch("charm.Patroni.reload_patroni_configuration") as _reload_patroni_configuration, + patch( + "charm.PostgreSQLBackups._create_bucket_if_not_exists" + ) as _create_bucket_if_not_exists, + patch( + "charm.PostgreSQLBackups._s3_initialization_set_failure" + ) as _s3_initialization_set_failure, + patch("charm.PostgreSQLBackups.can_use_s3_repository") as _can_use_s3_repository, + patch("charm.PostgreSQLBackups._initialise_stanza") as _initialise_stanza, + patch("charm.PostgreSQLBackups.check_stanza") as _check_stanza, + patch( + "charm.PostgreSQLBackups._retrieve_s3_parameters", + return_value=({"path": "example"}, None), + ), + patch("charm.PostgreSQLBackups._upload_content_to_s3") as _upload_content_to_s3, + ): + mock_event = MagicMock() + + _member_started.return_value = False + _create_bucket_if_not_exists.side_effect = ValueError() + assert not harness.charm.backup._on_s3_credential_changed_primary(mock_event) + _member_started.assert_called_once() + _reload_patroni_configuration.assert_not_called() _create_bucket_if_not_exists.assert_called_once() + _s3_initialization_set_failure.assert_called_once_with( + FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE + ) + _can_use_s3_repository.assert_not_called() + + _s3_initialization_set_failure.reset_mock() + _member_started.return_value = True + _create_bucket_if_not_exists.side_effect = None + _can_use_s3_repository.return_value = (False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE) + assert not harness.charm.backup._on_s3_credential_changed_primary(mock_event) + _reload_patroni_configuration.assert_called_once() _can_use_s3_repository.assert_called_once() + _s3_initialization_set_failure.assert_called_once_with( + ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE + ) _initialise_stanza.assert_not_called() - # Test when the stanza can be initialised and the pgBackRest service can start. - _can_use_s3_repository.reset_mock() _can_use_s3_repository.return_value = (True, None) - harness.charm.backup.s3_client.on.credentials_changed.emit( - relation=harness.model.get_relation(S3_PARAMETERS_RELATION, s3_rel_id) - ) - _can_use_s3_repository.assert_called_once() + _initialise_stanza.return_value = False + assert not harness.charm.backup._on_s3_credential_changed_primary(mock_event) _initialise_stanza.assert_called_once() + _check_stanza.assert_not_called() + + _initialise_stanza.return_value = True + _check_stanza.return_value = False + assert not harness.charm.backup._on_s3_credential_changed_primary(mock_event) + _check_stanza.assert_called_once() + _upload_content_to_s3.assert_not_called() + + _check_stanza.return_value = True + assert harness.charm.backup._on_s3_credential_changed_primary(mock_event) + _upload_content_to_s3.assert_called_once() def test_on_s3_credential_gone(harness): - with patch("ops.model.Container.stop") as _stop: + with ( + patch("ops.model.Container.stop") as _stop, + patch("charm.PostgresqlOperatorCharm._set_active_status") as _set_active_status, + ): + full_peer_s3_parameters = { + "stanza": "test-stanza", + "s3-initialization-start": "Thu Feb 24 05:00:00 2022", + "s3-initialization-done": "True", + "s3-initialization-block-message": ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, + } + peer_rel_id = harness.model.get_relation(PEER).id # Test that unrelated blocks will remain harness.charm.unit.status = BlockedStatus("test block") harness.charm.backup._on_s3_credential_gone(None) - assert isinstance(harness.charm.unit.status, BlockedStatus) + _set_active_status.assert_not_called() # Test that s3 related blocks will be cleared harness.charm.unit.status = BlockedStatus(ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE) harness.charm.backup._on_s3_credential_gone(None) - assert isinstance(harness.charm.unit.status, ActiveStatus) + _set_active_status.assert_called_once() # Test removal of relation data when the unit is not the leader. with harness.hooks_disabled(): harness.update_relation_data( peer_rel_id, harness.charm.app.name, - {"stanza": "test-stanza", "init-pgbackrest": "True"}, + full_peer_s3_parameters, ) harness.update_relation_data( peer_rel_id, - harness.charm.app.name, - {"stanza": "test-stanza", "init-pgbackrest": "True"}, + harness.charm.unit.name, + full_peer_s3_parameters, ) harness.charm.backup._on_s3_credential_gone(None) - assert harness.get_relation_data(peer_rel_id, harness.charm.app) == { - "stanza": "test-stanza", - "init-pgbackrest": "True", - } + assert harness.get_relation_data(peer_rel_id, harness.charm.app) == full_peer_s3_parameters assert harness.get_relation_data(peer_rel_id, harness.charm.unit) == {} # Test removal of relation data when the unit is the leader. @@ -1243,7 +1159,7 @@ def test_on_s3_credential_gone(harness): harness.update_relation_data( peer_rel_id, harness.charm.unit.name, - {"stanza": "test-stanza", "init-pgbackrest": "True"}, + full_peer_s3_parameters, ) harness.charm.backup._on_s3_credential_gone(None) assert harness.get_relation_data(peer_rel_id, harness.charm.app) == {} @@ -1310,7 +1226,7 @@ def test_on_create_backup_action(harness): harness.charm.backup._on_create_backup_action(mock_event) _upload_content_to_s3.assert_called_once_with( expected_metadata, - f"test-path/backup/{harness.charm.model.name}.{harness.charm.cluster_name}/latest", + f"backup/{harness.charm.model.name}.{harness.charm.cluster_name}/latest", mock_s3_parameters, ) mock_event.fail.assert_called_once() @@ -1356,12 +1272,12 @@ def test_on_create_backup_action(harness): _upload_content_to_s3.assert_has_calls([ call( expected_metadata, - f"test-path/backup/{harness.charm.model.name}.{harness.charm.cluster_name}/latest", + f"backup/{harness.charm.model.name}.{harness.charm.cluster_name}/latest", mock_s3_parameters, ), call( "Stdout:\nfake stdout\n\nStderr:\nfake stderr\n", - f"test-path/backup/{harness.charm.model.name}.{harness.charm.cluster_name}/2023-01-01T09:00:00Z/backup.log", + f"backup/{harness.charm.model.name}.{harness.charm.cluster_name}/2023-01-01T09:00:00Z/backup.log", mock_s3_parameters, ), ]) @@ -1380,12 +1296,12 @@ def test_on_create_backup_action(harness): _upload_content_to_s3.assert_has_calls([ call( expected_metadata, - f"test-path/backup/{harness.charm.model.name}.{harness.charm.cluster_name}/latest", + f"backup/{harness.charm.model.name}.{harness.charm.cluster_name}/latest", mock_s3_parameters, ), call( "Stdout:\nfake stdout\n\nStderr:\nfake stderr\n", - f"test-path/backup/{harness.charm.model.name}.{harness.charm.cluster_name}/2023-01-01T09:00:00Z/backup.log", + f"backup/{harness.charm.model.name}.{harness.charm.cluster_name}/2023-01-01T09:00:00Z/backup.log", mock_s3_parameters, ), ]) @@ -1403,12 +1319,12 @@ def test_on_create_backup_action(harness): _upload_content_to_s3.assert_has_calls([ call( expected_metadata, - f"test-path/backup/{harness.charm.model.name}.{harness.charm.cluster_name}/latest", + f"backup/{harness.charm.model.name}.{harness.charm.cluster_name}/latest", mock_s3_parameters, ), call( "Stdout:\nfake stdout\n\nStderr:\nfake stderr\n", - f"test-path/backup/{harness.charm.model.name}.{harness.charm.cluster_name}/2023-01-01T09:00:00Z/backup.log", + f"backup/{harness.charm.model.name}.{harness.charm.cluster_name}/2023-01-01T09:00:00Z/backup.log", mock_s3_parameters, ), ]) @@ -1922,6 +1838,7 @@ def test_start_stop_pgbackrest_service(harness): patch( "charm.PostgresqlOperatorCharm.is_primary", new_callable=PropertyMock ) as _is_primary, + patch("charm.Patroni.get_standby_leader") as _get_standby_leader, patch("ops.model.Container.restart") as _restart, patch("ops.model.Container.stop") as _stop, patch( @@ -1963,9 +1880,17 @@ def test_start_stop_pgbackrest_service(harness): _stop.assert_called_once() _restart.assert_not_called() - # Test when the service hasn't started in the primary yet. + # Test when it's a standby. _stop.reset_mock() _peer_members_endpoints.return_value = ["fake-member-endpoint"] + _get_standby_leader.return_value = "standby" + assert harness.charm.backup.start_stop_pgbackrest_service() + _stop.assert_called_once() + _restart.assert_not_called() + + # Test when the service hasn't started in the primary yet. + _stop.reset_mock() + _get_standby_leader.return_value = None _is_primary.return_value = False _is_primary_pgbackrest_service_running.return_value = False assert harness.charm.backup.start_stop_pgbackrest_service() is False diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 9b10277bc4..5ad1f11914 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -771,7 +771,6 @@ def test_on_update_status_after_restore_operation(harness): # Test when it's not possible to use the configured S3 repository. _update_config.reset_mock() - _handle_processes_failures.reset_mock() _set_active_status.reset_mock() with harness.hooks_disabled(): harness.update_relation_data( @@ -782,13 +781,11 @@ def test_on_update_status_after_restore_operation(harness): _can_use_s3_repository.return_value = (False, "fake validation message") harness.charm.on.update_status.emit() _update_config.assert_called_once() - _handle_processes_failures.assert_not_called() - _set_active_status.assert_not_called() - assert isinstance(harness.charm.unit.status, BlockedStatus) - assert harness.charm.unit.status.message == "fake validation message" - + _set_active_status.assert_called_once() # Assert that the backup id is not in the application relation databag anymore. - assert harness.get_relation_data(rel_id, harness.charm.app) == {} + assert harness.get_relation_data(rel_id, harness.charm.app) == { + "s3-initialization-block-message": "fake validation message", + } def test_on_upgrade_charm(harness): @@ -1358,7 +1355,6 @@ def test_on_peer_relation_changed(harness): patch( "backups.PostgreSQLBackups.start_stop_pgbackrest_service" ) as _start_stop_pgbackrest_service, - patch("backups.PostgreSQLBackups.check_stanza") as _check_stanza, patch("backups.PostgreSQLBackups.coordinate_stanza_fields") as _coordinate_stanza_fields, patch("charm.Patroni.reinitialize_postgresql") as _reinitialize_postgresql, patch( @@ -1379,7 +1375,6 @@ def test_on_peer_relation_changed(harness): _add_members.assert_not_called() _update_config.assert_not_called() _coordinate_stanza_fields.assert_not_called() - _check_stanza.assert_not_called() _start_stop_pgbackrest_service.assert_not_called() # Test when the cluster has already initialised, but the unit is not the leader and is not @@ -1396,7 +1391,6 @@ def test_on_peer_relation_changed(harness): _add_members.assert_not_called() _update_config.assert_not_called() _coordinate_stanza_fields.assert_not_called() - _check_stanza.assert_not_called() _start_stop_pgbackrest_service.assert_not_called() # Test when the unit is the leader. @@ -1407,7 +1401,6 @@ def test_on_peer_relation_changed(harness): _add_members.assert_called_once() _update_config.assert_not_called() _coordinate_stanza_fields.assert_not_called() - _check_stanza.assert_not_called() _start_stop_pgbackrest_service.assert_not_called() # Test when the unit is part of the cluster but the container @@ -1428,7 +1421,6 @@ def test_on_peer_relation_changed(harness): _defer.assert_not_called() _update_config.assert_not_called() _coordinate_stanza_fields.assert_not_called() - _check_stanza.assert_not_called() _start_stop_pgbackrest_service.assert_not_called() # Test when the container is ready but Patroni hasn't started yet. @@ -1438,7 +1430,6 @@ def test_on_peer_relation_changed(harness): _defer.assert_called_once() _update_config.assert_called_once() _coordinate_stanza_fields.assert_not_called() - _check_stanza.assert_not_called() _start_stop_pgbackrest_service.assert_not_called() # Test when Patroni has already started but this is a replica with a @@ -1448,7 +1439,6 @@ def test_on_peer_relation_changed(harness): _set_active_status.reset_mock() _defer.reset_mock() _coordinate_stanza_fields.reset_mock() - _check_stanza.reset_mock() _start_stop_pgbackrest_service.reset_mock() _is_primary.return_value = values[0] _member_replication_lag.return_value = values[1] @@ -1457,13 +1447,11 @@ def test_on_peer_relation_changed(harness): if _is_primary.return_value == values[0] or int(values[1]) <= 1000: _defer.assert_not_called() _coordinate_stanza_fields.assert_called_once() - _check_stanza.assert_called_once() _start_stop_pgbackrest_service.assert_called_once() _set_active_status.assert_called_once() else: _defer.assert_called_once() _coordinate_stanza_fields.assert_not_called() - _check_stanza.assert_not_called() _start_stop_pgbackrest_service.assert_not_called() _set_active_status.assert_not_called()