diff --git a/medusa/download.py b/medusa/download.py index 3da5ece4..768e18a6 100644 --- a/medusa/download.py +++ b/medusa/download.py @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import json +import logging import pathlib import shutil import sys @@ -24,13 +24,12 @@ from medusa.filtering import filter_fqtns -def download_data(storageconfig, backup, fqtns_to_restore, destination): - +def download_data(storage_config, backup, fqtns_to_restore, destination): manifest = json.loads(backup.manifest) _check_available_space(manifest, destination) - with Storage(config=storageconfig) as storage: + with Storage(config=storage_config) as storage: for section in manifest: @@ -70,9 +69,9 @@ def download_data(storageconfig, backup, fqtns_to_restore, destination): ) -def download_cmd(config, backup_name, download_destination, keyspaces, tables, ignore_system_keyspaces): - - with Storage(config=config.storage) as storage: +def download_cmd(config, backup_name, download_destination, keyspaces, tables, ignore_system_keyspaces, + bucket_name=None, prefix=None): + with Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix) as storage: if not download_destination.is_dir(): logging.error('{} is not a directory'.format(download_destination)) diff --git a/medusa/fetch_tokenmap.py b/medusa/fetch_tokenmap.py index 26a92dc8..c23047a3 100644 --- a/medusa/fetch_tokenmap.py +++ b/medusa/fetch_tokenmap.py @@ -18,8 +18,8 @@ from medusa.storage import Storage -def main(config, backup_name): - with Storage(config=config.storage) as storage: +def main(config, backup_name, bucket_name, prefix): + with Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix) as storage: backup = storage.get_cluster_backup(backup_name) if not backup: logging.error('No such backup') diff --git a/medusa/listing.py b/medusa/listing.py index 2799e268..a93e2b15 100644 --- a/medusa/listing.py +++ b/medusa/listing.py @@ -22,7 +22,6 @@ def get_backups(storage, config, show_all): - cluster_backups = sorted( storage.list_cluster_backups(), key=lambda b: b.started @@ -36,8 +35,8 @@ def get_backups(storage, config, show_all): return cluster_backups -def list_backups(config, show_all): - with Storage(config=config.storage) as storage: +def list_backups(config, show_all, bucket_name=None, prefix=None): + with Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix) as storage: list_backups_w_storage(config, show_all, storage) diff --git a/medusa/medusacli.py b/medusa/medusacli.py index ff460fe6..e38d34cf 100644 --- a/medusa/medusacli.py +++ b/medusa/medusacli.py @@ -98,7 +98,7 @@ def configure_console_logging(verbosity, without_log_timestamp): @click.option('-v', '--verbosity', help='Verbosity', default=0, count=True) @click.option('--without-log-timestamp', help='Do not show timestamp in logs', default=False, is_flag=True) @click.option('--config-file', help='Specify config file') -@click.option('--bucket-name', help='Bucket name') +@click.option('--bucket-name', help='Explicit bucket name to over-ride the one from config') @click.option('--key-file', help='GCP credentials key file') @click.option('--prefix', help='Prefix for shared storage') @click.option('--fqdn', help='Act as another host') @@ -167,22 +167,27 @@ def backup_cluster(medusaconfig, backup_name, seed_target, stagger, enable_md5_c @cli.command(name='fetch-tokenmap') @click.option('--backup-name', help='backup name', required=True) +@click.option('--bucket-name', help='Bucket with backup', required=False, default=None) +@click.option('--prefix', help='Path prefix in multi-tenant buckets', required=False, default=None) @pass_MedusaConfig -def fetch_tokenmap(medusaconfig, backup_name): +def fetch_tokenmap(medusaconfig, backup_name, bucket_name, prefix): """ Get the token/node mapping for a specific backup """ - medusa.fetch_tokenmap.main(medusaconfig, backup_name) + medusa.fetch_tokenmap.main(medusaconfig, backup_name, bucket_name, prefix) @cli.command(name='list-backups') @click.option('--show-all/--no-show-all', default=False, help="List all backups in the bucket") +@click.option('--bucket-name', + help='Explicit bucket name to over-ride the one from config', required=False, default=None) +@click.option('--prefix', help='Path prefix in multi-tenant buckets', required=False, default=None) @pass_MedusaConfig -def list_backups(medusaconfig, show_all): +def list_backups(medusaconfig, show_all, bucket_name, prefix): """ List backups """ - medusa.listing.list_backups(medusaconfig, show_all) + medusa.listing.list_backups(medusaconfig, show_all, bucket_name, prefix) @cli.command(name='download') @@ -194,13 +199,17 @@ def list_backups(medusaconfig, show_all): multiple=True, default={}) @click.option('--ignore-system-keyspaces', help='Do not download cassandra system keyspaces', required=True, is_flag=True, default=False) +@click.option('--bucket-name', + help='Explicit bucket name to over-ride the one from config', required=False, default=None) +@click.option('--prefix', help='Path prefix in multi-tenant buckets', required=False, default=None) @pass_MedusaConfig -def download(medusaconfig, backup_name, download_destination, keyspaces, tables, ignore_system_keyspaces): +def download(medusaconfig, backup_name, download_destination, keyspaces, tables, ignore_system_keyspaces, bucket_name, + prefix): """ Download backup """ medusa.download.download_cmd(medusaconfig, backup_name, Path(download_destination), keyspaces, tables, - ignore_system_keyspaces) + ignore_system_keyspaces, bucket_name, prefix) @cli.command(name='restore-cluster') @@ -224,9 +233,13 @@ def download(medusaconfig, backup_name, download_destination, keyspaces, tables, @click.option('--version-target', help='Target Cassandra version', required=False, default="3.11.9") @click.option('--ignore-racks', help='Disable matching nodes based on rack topology', required=False, default=False, is_flag=True) +@click.option('--bucket-name', + help='Explicit bucket name to over-ride the one from config', required=False, default=None) +@click.option('--prefix', help='Path prefix in multi-tenant buckets', required=False, default=None) @pass_MedusaConfig def restore_cluster(medusaconfig, backup_name, seed_target, temp_dir, host_list, keep_auth, bypass_checks, - verify, keyspaces, tables, parallel_restores, use_sstableloader, version_target, ignore_racks): + verify, keyspaces, tables, parallel_restores, use_sstableloader, version_target, ignore_racks, + bucket_name, prefix): """ Restore Cassandra cluster """ @@ -243,7 +256,9 @@ def restore_cluster(medusaconfig, backup_name, seed_target, temp_dir, host_list, int(parallel_restores), use_sstableloader, version_target, - ignore_racks) + ignore_racks, + bucket_name, + prefix) @cli.command(name='restore-node') @@ -264,14 +279,18 @@ def restore_cluster(medusaconfig, backup_name, seed_target, temp_dir, host_list, @click.option('--use-sstableloader', help='Use the sstableloader to load the backup into the cluster', default=False, is_flag=True) @click.option('--version-target', help='Target Cassandra version', required=False, default="3.11.9") +@click.option('--bucket-name', + help='Explicit bucket name to over-ride the one from config', required=False, default=None) +@click.option('--prefix', help='Path prefix in multi-tenant buckets', required=False, default=None) @pass_MedusaConfig def restore_node(medusaconfig, temp_dir, backup_name, in_place, keep_auth, seeds, verify, keyspaces, tables, - use_sstableloader, version_target): + use_sstableloader, version_target, bucket_name, prefix): """ Restore single Cassandra node """ medusa.restore_node.restore_node(medusaconfig, Path(temp_dir), backup_name, in_place, keep_auth, seeds, - verify, set(keyspaces), set(tables), use_sstableloader, version_target) + verify, set(keyspaces), set(tables), use_sstableloader, version_target, + bucket_name, prefix) @cli.command(name='status') diff --git a/medusa/restore_cluster.py b/medusa/restore_cluster.py index 9a85b358..31fa6c8f 100644 --- a/medusa/restore_cluster.py +++ b/medusa/restore_cluster.py @@ -35,7 +35,8 @@ def orchestrate(config, backup_name, seed_target, temp_dir, host_list, keep_auth, bypass_checks, verify, keyspaces, - tables, parallel_restores, use_sstableloader=False, version_target=None, ignore_racks=False): + tables, parallel_restores, use_sstableloader=False, version_target=None, ignore_racks=False, + bucket_name=None, prefix=None): monitoring = Monitoring(config=config.monitoring) try: restore_start_time = datetime.datetime.now() @@ -57,18 +58,16 @@ def orchestrate(config, backup_name, seed_target, temp_dir, host_list, keep_auth logging.error(err_msg) raise RuntimeError(err_msg) - with Storage(config=config.storage) as storage: - + with Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix) as storage: try: cluster_backup = storage.get_cluster_backup(backup_name) except KeyError: err_msg = 'No such backup --> {}'.format(backup_name) logging.error(err_msg) raise RuntimeError(err_msg) - restore = RestoreJob(cluster_backup, config, temp_dir, host_list, seed_target, keep_auth, verify, parallel_restores, keyspaces, tables, bypass_checks, use_sstableloader, version_target, - ignore_racks) + ignore_racks, bucket_name, prefix) restore.execute() restore_end_time = datetime.datetime.now() @@ -103,7 +102,7 @@ class RestoreJob(object): def __init__(self, cluster_backup, config, temp_dir, host_list, seed_target, keep_auth, verify, parallel_restores, keyspaces=None, tables=None, bypass_checks=False, use_sstableloader=False, - version_target=None, ignore_racks=False): + version_target=None, ignore_racks=False, bucket_name=None, prefix=None): self.id = uuid.uuid4() self.ringmap = None self.cluster_backup = cluster_backup @@ -129,6 +128,8 @@ def __init__(self, cluster_backup, config, temp_dir, host_list, seed_target, kee self.fqdn_resolver = HostnameResolver(fqdn_resolver, k8s_mode) self._version_target = version_target self.ignore_racks = ignore_racks + self.bucket_name = bucket_name + self.prefix = prefix def prepare_restore(self): logging.info('Ensuring the backup is found and is complete') @@ -427,7 +428,7 @@ def _build_restore_cmd(self): command = 'mkdir -p {work}; cd {work} && medusa-wrapper {sudo} medusa {config} ' \ '--fqdn=%s -vvv restore-node ' \ '{in_place} {keep_auth} %s {verify} --backup-name {backup} --temp-dir {temp_dir} ' \ - '{use_sstableloader} {keyspaces} {tables}' \ + '{use_sstableloader} {keyspaces} {tables} {bucket_name} {prefix}' \ .format(work=self.work_dir, sudo='sudo' if medusa.utils.evaluate_boolean(self.config.cassandra.use_sudo) else '', config=f'--config-file {self.config.file_path}' if self.config.file_path else '', @@ -438,7 +439,9 @@ def _build_restore_cmd(self): temp_dir=self.temp_dir, use_sstableloader='--use-sstableloader' if self.use_sstableloader else '', keyspaces=keyspace_options, - tables=table_options) + tables=table_options, + bucket_name=f'--bucket-name {self.bucket_name}' if self.bucket_name is not None else '', + prefix=f'--prefix {self.prefix}' if self.prefix is not None else '') logging.debug('Preparing to restore on all nodes with the following command: {}'.format(command)) diff --git a/medusa/restore_node.py b/medusa/restore_node.py index e0a3a0b5..5f8dd2d1 100644 --- a/medusa/restore_node.py +++ b/medusa/restore_node.py @@ -38,12 +38,12 @@ def restore_node(config, temp_dir, backup_name, in_place, keep_auth, seeds, verify, keyspaces, tables, - use_sstableloader=False, version_target=None): + use_sstableloader=False, version_target=None, bucket_name=None, prefix=None): if in_place and keep_auth: logging.error('Cannot keep system_auth when restoring in-place. It would be overwritten') sys.exit(1) - with Storage(config=config.storage) as storage: + with Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix) as storage: capture_release_version(storage, version_target) if not use_sstableloader: diff --git a/medusa/status.py b/medusa/status.py index eb1fe5f1..6283ba43 100644 --- a/medusa/status.py +++ b/medusa/status.py @@ -23,8 +23,8 @@ TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S' -def status(config, backup_name): - with Storage(config=config.storage) as storage: +def status(config, backup_name, bucket_name=None, prefix=None): + with Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix) as storage: try: cluster_backup = storage.get_cluster_backup(backup_name) diff --git a/medusa/storage/__init__.py b/medusa/storage/__init__.py index 73ffc1ce..1a96c82a 100644 --- a/medusa/storage/__init__.py +++ b/medusa/storage/__init__.py @@ -24,18 +24,16 @@ import medusa.index -from medusa.storage.cluster_backup import ClusterBackup -from medusa.storage.node_backup import NodeBackup from medusa.storage.abstract_storage import ManifestObject, AbstractBlob +from medusa.storage.azure_storage import AzureStorage +from medusa.storage.cluster_backup import ClusterBackup from medusa.storage.google_storage import GoogleStorage from medusa.storage.local_storage import LocalStorage -from medusa.storage.s3_storage import S3Storage +from medusa.storage.node_backup import NodeBackup from medusa.storage.s3_rgw import S3RGWStorage -from medusa.storage.azure_storage import AzureStorage -from medusa.storage.s3_base_storage import S3BaseStorage +from medusa.storage.s3_storage import S3BaseStorage, S3Storage from medusa.utils import evaluate_boolean - # pattern meant to match just the blob name, not the entire path # the path is covered by the initial .* # also retains extension if the name has any @@ -62,13 +60,16 @@ def format_bytes_str(value): class Storage(object): - def __init__(self, *, config): + def __init__(self, *, config, bucket_name=None, prefix=None): self._config = config # Used to bypass dependency checks when running in Kubernetes self._k8s_mode = evaluate_boolean(config.k8s_mode) if config.k8s_mode else False - self._prefix = pathlib.Path(config.prefix or '.') + self._prefix = pathlib.Path(prefix or config.prefix or '.') + logging.debug(f'Using prefix {self._prefix}') self.prefix_path = str(self._prefix) + '/' if len(str(self._prefix)) > 1 else '' + self._bucket_name = bucket_name self.storage_driver = self._load_storage() + self._bucket_name = bucket_name self.storage_provider = self._config.storage_provider def __enter__(self): diff --git a/medusa/storage/abstract_storage.py b/medusa/storage/abstract_storage.py index d21aa22c..734d33c4 100644 --- a/medusa/storage/abstract_storage.py +++ b/medusa/storage/abstract_storage.py @@ -52,9 +52,10 @@ class AbstractStorage(abc.ABC): # sometimes we store Cassandra version in this it seems api_version = None - def __init__(self, config): + def __init__(self, config, bucket_name=None): self.config = config - self.bucket_name = config.bucket_name + self.bucket_name = bucket_name if bucket_name is not None else config.bucket_name + logging.debug(f'Using bucket {self.bucket_name}') @abc.abstractmethod def connect(self): diff --git a/medusa/storage/google_storage.py b/medusa/storage/google_storage.py index 01fc2863..7622ef93 100644 --- a/medusa/storage/google_storage.py +++ b/medusa/storage/google_storage.py @@ -43,8 +43,6 @@ def __init__(self, config): self.service_file = str(Path(config.key_file).expanduser()) logging.info("Using service file: {}".format(self.service_file)) - self.bucket_name = config.bucket_name - logging.debug('Connecting to Google Storage') logging.getLogger('gcloud.aio.storage.storage').setLevel(logging.WARNING) diff --git a/medusa/storage/local_storage.py b/medusa/storage/local_storage.py index 41e0f7c7..ea757084 100644 --- a/medusa/storage/local_storage.py +++ b/medusa/storage/local_storage.py @@ -31,14 +31,14 @@ class LocalStorage(AbstractStorage): def __init__(self, config): + # in Python we usually put this last, bur we need it to set the bucket_name + super().__init__(config) + self.config = config - self.bucket_name = self.config.bucket_name self.root_dir = Path(config.base_path) / self.bucket_name self.root_dir.mkdir(parents=True, exist_ok=True) - super().__init__(config) - def connect(self): pass @@ -177,7 +177,7 @@ def get_object_datetime(self, blob): def get_cache_path(self, path): # Full path for files that will be taken from previous backups - return "{}/{}/{}".format(self.config.base_path, self.config.bucket_name, path) + return "{}/{}/{}".format(self.config.base_path, self.bucket_name, path) @staticmethod def blob_matches_manifest(blob, object_in_manifest, enable_md5_checks=False): diff --git a/medusa/storage/s3_base_storage.py b/medusa/storage/s3_base_storage.py index 2c250f85..04cc348f 100644 --- a/medusa/storage/s3_base_storage.py +++ b/medusa/storage/s3_base_storage.py @@ -110,8 +110,6 @@ def __init__(self, config): self.credentials = self._consolidate_credentials(config) logging.info('Using credentials {}'.format(self.credentials)) - self.bucket_name: str = config.bucket_name - self.storage_provider = config.storage_provider self.connection_extra_args = self._make_connection_arguments(config) diff --git a/tests/integration/features/steps/integration_steps.py b/tests/integration/features/steps/integration_steps.py index f7f45ff9..53af1444 100644 --- a/tests/integration/features/steps/integration_steps.py +++ b/tests/integration/features/steps/integration_steps.py @@ -1214,7 +1214,7 @@ def _there_is_no_latest_complete_backup(context): @then(r"I can list and print backups without errors") def _can_list_print_backups_without_error(context): - medusa.listing.list_backups(config=context.medusa_config, show_all=True) + medusa.listing.list_backups(config=context.medusa_config, show_all=True, bucket_name=None, prefix=None) @then(r'the latest complete cluster backup is "{expected_backup_name}"') @@ -1491,7 +1491,8 @@ def _i_delete_the_backup_named(context, backup_name, all_nodes=False): @then(r'I can fetch the tokenmap of the backup named "{backup_name}"') def _i_can_fecth_tokenmap_of_backup_named(context, backup_name): - tokenmap = medusa.fetch_tokenmap.main(backup_name=backup_name, config=context.medusa_config) + tokenmap = medusa.fetch_tokenmap.main(backup_name=backup_name, config=context.medusa_config, bucket_name=None, + prefix=None) assert "127.0.0.1" in tokenmap