diff --git a/src/databricks/labs/ucx/config.py b/src/databricks/labs/ucx/config.py index 0a852282ea..9ca34f6f98 100644 --- a/src/databricks/labs/ucx/config.py +++ b/src/databricks/labs/ucx/config.py @@ -47,6 +47,8 @@ class WorkspaceConfig: # pylint: disable=too-many-instance-attributes # Whether the assessment should capture a specific list of databases, if not specified, it will list all databases. include_databases: list[str] | None = None + # Whether the assessment should capture databases matching specific prefixes, if not specified, it will list all databases. + include_database_prefixes: list[str] | None = None # Whether the tables in mounts crawler should crawl a specific list of mounts. # If not specified, it will list all mounts. diff --git a/src/databricks/labs/ucx/install.py b/src/databricks/labs/ucx/install.py index cfdeedd1c5..40ac7d744c 100644 --- a/src/databricks/labs/ucx/install.py +++ b/src/databricks/labs/ucx/install.py @@ -243,6 +243,7 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig: configure_groups = ConfigureGroups(self.prompts) configure_groups.run() include_databases = self._select_databases() + include_database_prefixes = self._select_database_prefixes() skip_tacl_migration = self.prompts.confirm("Do you want to skip TACL migration when migrating tables?") @@ -272,6 +273,7 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig: log_level=log_level, num_threads=num_threads, include_databases=include_databases, + include_database_prefixes=include_database_prefixes, trigger_job=trigger_job, recon_tolerance_percent=recon_tolerance_percent, upload_dependencies=upload_dependencies, @@ -428,6 +430,16 @@ def _select_databases(self): return [x.strip() for x in selected_databases.split(",")] return None + def _select_database_prefixes(self): + selected_prefixes = self.prompts.question( + "Comma-separated list of database prefixes to include (e.g., 'dev_', 'test_'). " + "If not specified, prefix filtering will not be applied", + default="", + ) + if selected_prefixes != "": + return [x.strip() for x in selected_prefixes.split(",")] + return None + def configure_warehouse(self) -> str: def warehouse_type(_): return _.warehouse_type.value if not _.enable_serverless_compute else "SERVERLESS"