Skip to content

Commit

Permalink
Update charm libs for alert logging of backup/restore success/failure (
Browse files Browse the repository at this point in the history
  • Loading branch information
shayancanonical authored Jun 30, 2023
1 parent 3bdf012 commit c977411
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
41 changes: 27 additions & 14 deletions lib/charms/mysql/v0/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def is_unit_blocked(self) -> bool:
from ops.jujuversion import JujuVersion
from ops.model import ActiveStatus, BlockedStatus

from constants import MYSQL_DATA_DIR

logger = logging.getLogger(__name__)

MYSQL_BACKUPS = "mysql-backups"
Expand All @@ -93,7 +95,7 @@ def is_unit_blocked(self) -> bool:

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 5
LIBPATCH = 6


class MySQLBackups(Object):
Expand Down Expand Up @@ -225,10 +227,12 @@ def _on_create_backup(self, event: ActionEvent) -> None:
logger.info("A backup has been requested on unit")

if not self.charm.s3_integrator_relation_exists:
logger.error("Backup failed: missing relation with S3 integrator charm")
event.fail("Missing relation with S3 integrator charm")
return

if not self.charm._mysql.is_mysqld_running():
logger.error(f"Backup failed: process mysqld is not running on {self.charm.unit.name}")
event.fail("Process mysqld not running")
return

Expand All @@ -237,6 +241,7 @@ def _on_create_backup(self, event: ActionEvent) -> None:
# Retrieve and validate missing S3 parameters
s3_parameters, missing_parameters = self._retrieve_s3_parameters()
if missing_parameters:
logger.error(f"Backup failed: missing S3 parameters {missing_parameters}")
event.fail(f"Missing S3 parameters: {missing_parameters}")
return

Expand All @@ -245,7 +250,7 @@ def _on_create_backup(self, event: ActionEvent) -> None:
# Check if this unit can perform backup
can_unit_perform_backup, validation_message = self._can_unit_perform_backup()
if not can_unit_perform_backup:
logger.warning(validation_message)
logger.error(f"Backup failed: {validation_message}")
event.fail(validation_message)
return

Expand All @@ -259,25 +264,26 @@ def _on_create_backup(self, event: ActionEvent) -> None:
"""

if not upload_content_to_s3(metadata, f"{backup_path}.metadata", s3_parameters):
logger.error("Backup failed: Failed to upload metadata to provided S3")
event.fail("Failed to upload metadata to provided S3")
return

# Run operations to prepare for the backup
success, error_message = self._pre_backup()
if not success:
logger.warning(error_message)
logger.error(f"Backup failed: {error_message}")
event.fail(error_message)
return

# Perform the backup
success, error_message = self._backup(backup_path, s3_parameters)
if not success:
logger.warning(error_message)
logger.error(f"Backup failed: {error_message}")
event.fail(error_message)

success, error_message = self._post_backup()
if not success:
logger.error(error_message)
logger.error(f"Backup failed: {error_message}")
self.charm.unit.status = BlockedStatus(
"Failed to create backup; instance in bad state"
)
Expand All @@ -287,13 +293,14 @@ def _on_create_backup(self, event: ActionEvent) -> None:
# Run operations to clean up after the backup
success, error_message = self._post_backup()
if not success:
logger.error(error_message)
logger.error(f"Backup failed: {error_message}")
self.charm.unit.status = BlockedStatus(
"Failed to create backup; instance in bad state"
)
event.fail(error_message)
return

logger.info(f"Backup succeeded: with backup-id {datetime_backup_requested}")
event.set_results(
{
"backup-id": datetime_backup_requested,
Expand Down Expand Up @@ -424,26 +431,26 @@ def _pre_restore_checks(self, event: ActionEvent) -> bool:
"""
if not self.charm.s3_integrator_relation_exists:
error_message = "Missing relation with S3 integrator charm"
logger.warning(error_message)
logger.error(f"Restore failed: {error_message}")
event.fail(error_message)
return False

if not event.params.get("backup-id"):
error_message = "Missing backup-id to restore"
logger.warning(error_message)
logger.error(f"Restore failed: {error_message}")
event.fail(error_message)
return False

if not self.charm._mysql.is_server_connectable():
error_message = "Server running mysqld is not connectable"
logger.warning(error_message)
logger.error(f"Restore failed: {error_message}")
event.fail(error_message)
return False

logger.info("Checking if the unit is waiting to start or restart")
if self.charm.is_unit_busy():
error_message = "Unit is waiting to start or restart"
logger.warning(error_message)
logger.error(f"Restore failed: {error_message}")
event.fail(error_message)
return False

Expand All @@ -452,7 +459,7 @@ def _pre_restore_checks(self, event: ActionEvent) -> bool:
error_message = (
"Unit cannot restore backup as there are more than one units in the cluster"
)
logger.warning(error_message)
logger.error(f"Restore failed: {error_message}")
event.fail(error_message)
return False

Expand All @@ -473,27 +480,29 @@ def _on_restore(self, event: ActionEvent) -> None:
# Retrieve and validate missing S3 parameters
s3_parameters, missing_parameters = self._retrieve_s3_parameters()
if missing_parameters:
logger.error(f"Restore failed: missing S3 parameters {missing_parameters}")
event.fail(f"Missing S3 parameters: {missing_parameters}")
return

# Validate the provided backup id
logger.info("Validating provided backup-id in the specified s3 path")
s3_backup_md5 = str(pathlib.Path(s3_parameters["path"]) / f"{backup_id}.md5")
if not fetch_and_check_existence_of_s3_path(s3_backup_md5, s3_parameters):
logger.error(f"Restore failed: invalid backup-id {backup_id}")
event.fail(f"Invalid backup-id: {backup_id}")
return

# Run operations to prepare for the restore
success, error_message = self._pre_restore()
if not success:
logger.warning(error_message)
logger.error(f"Restore failed: {error_message}")
event.fail(error_message)
return

# Perform the restore
success, recoverable, error_message = self._restore(backup_id, s3_parameters)
if not success:
logger.warning(error_message)
logger.error(f"Restore failed: {error_message}")
event.fail(error_message)

if recoverable:
Expand All @@ -506,11 +515,12 @@ def _on_restore(self, event: ActionEvent) -> None:
# Run post-restore operations
success, error_message = self._post_restore()
if not success:
logger.warning(error_message)
logger.error(f"Restore failed: {error_message}")
self.charm.unit.status = BlockedStatus(error_message)
event.fail(error_message)
return

logger.info("Restore succeeded")
event.set_results(
{
"completed": "ok",
Expand Down Expand Up @@ -583,6 +593,9 @@ def _clean_data_dir_and_start_mysqld(self) -> Tuple[bool, Optional[str]]:
try:
self.charm._mysql.delete_temp_restore_directory()
self.charm._mysql.delete_temp_backup_directory()
# Old backups may contain the temp backup directory (as previously, the temp
# backup directory was created in the mysql data directory to reduce IOPS latency)
self.charm._mysql.delete_temp_backup_directory(from_directory=MYSQL_DATA_DIR)
except MySQLDeleteTempRestoreDirectoryError:
return False, "Failed to delete the temp restore directory"
except MySQLDeleteTempBackupDirectoryError:
Expand Down
4 changes: 2 additions & 2 deletions src/mysql_k8s_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,10 +331,10 @@ def execute_backup_commands(
group=MYSQL_SYSTEM_GROUP,
)

def delete_temp_backup_directory(self) -> None:
def delete_temp_backup_directory(self, from_directory: str = MYSQL_DATA_DIR) -> None:
"""Delete the temp backup directory in the data directory."""
super().delete_temp_backup_directory(
MYSQL_DATA_DIR,
from_directory,
user=MYSQL_SYSTEM_USER,
group=MYSQL_SYSTEM_GROUP,
)
Expand Down

0 comments on commit c977411

Please sign in to comment.