From 971a7e520f7cc525c3e4b6c2e2aa98942103413c Mon Sep 17 00:00:00 2001 From: Krzysztof Swietlicki Date: Tue, 7 Nov 2023 12:13:13 +0100 Subject: [PATCH] tenant_parser.py: add support for extra-config-paths When creating/updating repo_map, 'extra-config-paths' list is extracted from tenant configuration and saved in repo's 'tenants' dictionary (next to 'jobs' and 'roles') Extra-config-paths values are used for Scraper class initialization. The 'scrape_job_files' method extends the whitelist with it 'test_integration' tests were extended to verify this new functionality --- tests/scraper/test_integration.py | 82 +++++++++++++++++++++++++++++-- tests/testdata/test.foo.yaml | 9 ++++ zubbi/scraper/main.py | 7 +-- zubbi/scraper/scraper.py | 5 +- zubbi/scraper/tenant_parser.py | 13 +++-- 5 files changed, 104 insertions(+), 12 deletions(-) diff --git a/tests/scraper/test_integration.py b/tests/scraper/test_integration.py index f44d0cd..9e6f474 100644 --- a/tests/scraper/test_integration.py +++ b/tests/scraper/test_integration.py @@ -29,6 +29,33 @@ run: playbooks/non-existing-playbook.yaml """ +MOCKED_JOB_CONTENT_2 = """ +- job: + name: even-cooler-new-job + parent: super-base-job + description: | + This is another job for testing purposes. + run: playbooks/non-existing-super-playbook.yaml +""" + +MOCKED_PROJECT_CONTENT = """ +- job: + name: super-duper-new-job + parent: lame-base-job + description: | + This is yet another job for testing purposes. + run: playbooks/non-existing-hyper-playbook.yaml + +- project: + name: my-simple-project + check: + jobs: + - noop + gate: + jobs: + - super-duper-new-job +""" + MOCKED_ROLE_DESCRIPTION = """ Role description containing some reStructuredText expressions. @@ -110,6 +137,17 @@ class MockGitHubRepository(GitHubRepository): "roles/foobar/README": "Simple text in a file without extension", "roles/empty-dir/REAMDE.whatever": "This file won't be checked out", }, + "orga1/repo3": { + REPO_ROOT: { + "project-extra.yaml": MockContents("project-extra.yaml", MockContents.FILE), + "zuul-extra.d": MockContents("zuul-extra.d", MockContents.DIR), + }, + "project-extra.yaml": MOCKED_PROJECT_CONTENT, + "zuul-extra.d": { + "jobs.yaml": MockContents("zuul-extra.d/jobs.yaml", MockContents.FILE) + }, + "zuul-extra.d/jobs.yaml": MOCKED_JOB_CONTENT_2, + }, # Empty repositories "orga2/repo1": {}, "orga2/repo3": {}, @@ -206,6 +244,39 @@ def test_scrape(): }, }, ), + "orga1/repo3": ( + { + "project-extra.yaml": { + "last_changed": "2018-09-17 15:15:15", + "blame": [], + "content": "\n- job:\n" + " name: super-duper-new-job\n" + " parent: lame-base-job\n" + " description: |\n" + " This is yet another job for testing purposes.\n" + " run: playbooks/non-existing-hyper-playbook.yaml\n" + "\n- project:\n" + " name: my-simple-project\n" + " check:\n" + " jobs:\n" + " - noop\n" + " gate:\n" + " jobs:\n" + " - super-duper-new-job\n", + }, + "zuul-extra.d/jobs.yaml": { + "last_changed": "2018-09-17 15:15:15", + "blame": [], + "content": "\n- job:\n" + " name: even-cooler-new-job\n" + " parent: super-base-job\n" + " description: |\n" + " This is another job for testing purposes.\n" + " run: playbooks/non-existing-super-playbook.yaml\n", + }, + }, + {}, + ), "orga2/repo1": ({}, {}), "orga2/repo3": ({}, {}), } @@ -221,7 +292,10 @@ def test_scrape(): for repo, tenants in repo_map.items(): gh_repo = MockGitHubRepository(repo) - job_files, role_files = Scraper(gh_repo).scrape() + extra_config_paths = tenants["tenants"]["extra_config_paths"] + if repo == "orga1/repo3": + assert len(extra_config_paths) == 2 + job_files, role_files = Scraper(gh_repo, extra_config_paths).scrape() assert (job_files, role_files) == expected[repo] @@ -232,15 +306,15 @@ def test_scrape_not_github(): expected_repo_map = { "repo1": { "connection_name": "gerrit", - "tenants": {"jobs": ["bar"], "roles": ["bar"]}, + "tenants": {"extra_config_paths": [], "jobs": ["bar"], "roles": ["bar"]}, }, "repo2": { "connection_name": "gerrit", - "tenants": {"jobs": ["bar"], "roles": ["bar"]}, + "tenants": {"extra_config_paths": [], "jobs": ["bar"], "roles": ["bar"]}, }, "repo3": { "connection_name": "gerrit", - "tenants": {"jobs": ["bar"], "roles": ["bar"]}, + "tenants": {"extra_config_paths": [], "jobs": ["bar"], "roles": ["bar"]}, }, } diff --git a/tests/testdata/test.foo.yaml b/tests/testdata/test.foo.yaml index 9645c94..11bb56c 100644 --- a/tests/testdata/test.foo.yaml +++ b/tests/testdata/test.foo.yaml @@ -7,9 +7,18 @@ - orga1/repo1: exclude: [pipeline, project] - orga1/repo2 + - orga1/repo3: + exclude: + - project + - pipeline + extra-config-paths: + - project-extra.yaml + - zuul-extra.d/ - orga2/repo1 untrusted-projects: - orga2/repo1: {shadow: orga1/repo2} - orga1/repo2: exclude: [project] + extra-config-paths: + - zuul-extra.d/ - orga2/repo3 diff --git a/zubbi/scraper/main.py b/zubbi/scraper/main.py index 039b9f4..4ca0546 100644 --- a/zubbi/scraper/main.py +++ b/zubbi/scraper/main.py @@ -487,6 +487,7 @@ def _scrape_repo_map( for repo_name, repo_data in repo_map.items(): # Extract the data from the repo_data tenants = repo_data["tenants"] + extra_config_paths = tenants["extra_config_paths"] connection_name = repo_data["connection_name"] cached_repo = repo_cache.setdefault(repo_name, repo_data) @@ -528,7 +529,7 @@ def _scrape_repo_map( es_repo.provider = provider # scrape the repo if is part of the tenant config - scrape_repo(repo, tenants, reusable_repos, scrape_time) + scrape_repo(repo, extra_config_paths, tenants, reusable_repos, scrape_time) # Store the information for the repository itself, if it was scraped successfully LOGGER.info("Updating repo definition for '%s' in Elasticsearch", repo_name) @@ -555,8 +556,8 @@ def _scrape_repo_map( ) -def scrape_repo(repo, tenants, reusable_repos, scrape_time): - job_files, role_files = Scraper(repo).scrape() +def scrape_repo(repo, extra_config_paths, tenants, reusable_repos, scrape_time): + job_files, role_files = Scraper(repo, extra_config_paths).scrape() is_rusable_repo = repo.repo_name in reusable_repos jobs = [] diff --git a/zubbi/scraper/scraper.py b/zubbi/scraper/scraper.py index 4055fd2..c49cd23 100644 --- a/zubbi/scraper/scraper.py +++ b/zubbi/scraper/scraper.py @@ -33,8 +33,9 @@ class Scraper: - def __init__(self, repo): + def __init__(self, repo, extra_config_paths=[]): self.repo = repo + self.extra_config_paths = extra_config_paths def scrape(self): LOGGER.info("Scraping '%s'", self.repo.name) @@ -55,7 +56,7 @@ def scrape_job_files(self): job_files = self.iterate_directory( REPO_ROOT, - whitelist=ZUUL_DIRECTORIES + ZUUL_FILES, + whitelist=ZUUL_DIRECTORIES + ZUUL_FILES + self.extra_config_paths, # NOTE (felix): As we provide this directly to the # str.endswith() method, the argument must be a str or a # tuple of strings, otherwise the following exception is diff --git a/zubbi/scraper/tenant_parser.py b/zubbi/scraper/tenant_parser.py index ca20b03..103a9e9 100644 --- a/zubbi/scraper/tenant_parser.py +++ b/zubbi/scraper/tenant_parser.py @@ -62,7 +62,7 @@ def parse(self): self.tenants.append(tenant_name) def _update_repo_map(self, project, connection_name, tenant): - project_name, exclude = self._extract_project(project) + project_name, exclude, extra_config_paths = self._extract_project(project) # Map the current tenant to the current repository repo_tenant_entry = self.repo_map.setdefault( @@ -74,15 +74,22 @@ def _update_repo_map(self, project, connection_name, tenant): if "jobs" not in exclude: repo_tenant_entry["tenants"]["jobs"].append(tenant) repo_tenant_entry["tenants"]["roles"].append(tenant) + repo_tenant_entry["tenants"]["extra_config_paths"] = extra_config_paths def _extract_project(self, project): project_name = project exclude = [] + extra_config_paths = [] if type(project) is dict: # Get the first key of the dict containing the project name. project_name = list(project.keys())[0] - exclude = project.get("exclude", []) - return project_name, exclude + exclude = project[project_name].get("exclude", []) + # NOTE (swietlicki): directories in extra-config-path section contain + # trailing slash, while inside the Scraper.iterate_directory() the comparison + # is done against dir names without trailing slash + for item in project[project_name].get("extra-config-paths", []): + extra_config_paths.append(item[:-1] if item.endswith("/") else item) + return project_name, exclude, extra_config_paths def _load_tenant_sources_from_file(self, sources_file): LOGGER.info("Parsing tenant sources file '%s'", sources_file)