From 971a7e520f7cc525c3e4b6c2e2aa98942103413c Mon Sep 17 00:00:00 2001
From: Krzysztof Swietlicki <Krzysztof.Swietlicki@partner.bmw.de>
Date: Tue, 7 Nov 2023 12:13:13 +0100
Subject: [PATCH] tenant_parser.py: add support for extra-config-paths

When creating/updating repo_map, 'extra-config-paths' list is extracted from
tenant configuration and saved in repo's 'tenants' dictionary (next to 'jobs'
and 'roles')

Extra-config-paths values are used for Scraper class initialization.
The 'scrape_job_files' method extends the whitelist with it

'test_integration' tests were extended to verify this new functionality
---
 tests/scraper/test_integration.py | 82 +++++++++++++++++++++++++++++--
 tests/testdata/test.foo.yaml      |  9 ++++
 zubbi/scraper/main.py             |  7 +--
 zubbi/scraper/scraper.py          |  5 +-
 zubbi/scraper/tenant_parser.py    | 13 +++--
 5 files changed, 104 insertions(+), 12 deletions(-)

diff --git a/tests/scraper/test_integration.py b/tests/scraper/test_integration.py
index f44d0cd..9e6f474 100644
--- a/tests/scraper/test_integration.py
+++ b/tests/scraper/test_integration.py
@@ -29,6 +29,33 @@
     run: playbooks/non-existing-playbook.yaml
 """
 
+MOCKED_JOB_CONTENT_2 = """
+- job:
+    name: even-cooler-new-job
+    parent: super-base-job
+    description: |
+      This is another job for testing purposes.
+    run: playbooks/non-existing-super-playbook.yaml
+"""
+
+MOCKED_PROJECT_CONTENT = """
+- job:
+    name: super-duper-new-job
+    parent: lame-base-job
+    description: |
+      This is yet another job for testing purposes.
+    run: playbooks/non-existing-hyper-playbook.yaml
+
+- project:
+    name: my-simple-project
+    check:
+      jobs:
+        - noop
+    gate:
+      jobs:
+        - super-duper-new-job
+"""
+
 MOCKED_ROLE_DESCRIPTION = """
 Role description containing some reStructuredText expressions.
 
@@ -110,6 +137,17 @@ class MockGitHubRepository(GitHubRepository):
             "roles/foobar/README": "Simple text in a file without extension",
             "roles/empty-dir/REAMDE.whatever": "This file won't be checked out",
         },
+        "orga1/repo3": {
+            REPO_ROOT: {
+                "project-extra.yaml": MockContents("project-extra.yaml", MockContents.FILE),
+                "zuul-extra.d": MockContents("zuul-extra.d", MockContents.DIR),
+            },
+            "project-extra.yaml": MOCKED_PROJECT_CONTENT,
+            "zuul-extra.d": {
+                "jobs.yaml": MockContents("zuul-extra.d/jobs.yaml", MockContents.FILE)
+            },
+            "zuul-extra.d/jobs.yaml": MOCKED_JOB_CONTENT_2,
+        },
         # Empty repositories
         "orga2/repo1": {},
         "orga2/repo3": {},
@@ -206,6 +244,39 @@ def test_scrape():
                 },
             },
         ),
+        "orga1/repo3": (
+            {
+                "project-extra.yaml": {
+                    "last_changed": "2018-09-17 15:15:15",
+                    "blame": [],
+                    "content": "\n- job:\n"
+                    "    name: super-duper-new-job\n"
+                    "    parent: lame-base-job\n"
+                    "    description: |\n"
+                    "      This is yet another job for testing purposes.\n"
+                    "    run: playbooks/non-existing-hyper-playbook.yaml\n"
+                    "\n- project:\n"
+                    "    name: my-simple-project\n"
+                    "    check:\n"
+                    "      jobs:\n"
+                    "        - noop\n"
+                    "    gate:\n"
+                    "      jobs:\n"
+                    "        - super-duper-new-job\n",
+                },
+                "zuul-extra.d/jobs.yaml": {
+                    "last_changed": "2018-09-17 15:15:15",
+                    "blame": [],
+                    "content": "\n- job:\n"
+                    "    name: even-cooler-new-job\n"
+                    "    parent: super-base-job\n"
+                    "    description: |\n"
+                    "      This is another job for testing purposes.\n"
+                    "    run: playbooks/non-existing-super-playbook.yaml\n",
+                },
+            },
+            {},
+        ),
         "orga2/repo1": ({}, {}),
         "orga2/repo3": ({}, {}),
     }
@@ -221,7 +292,10 @@ def test_scrape():
 
     for repo, tenants in repo_map.items():
         gh_repo = MockGitHubRepository(repo)
-        job_files, role_files = Scraper(gh_repo).scrape()
+        extra_config_paths = tenants["tenants"]["extra_config_paths"]
+        if repo == "orga1/repo3":
+            assert len(extra_config_paths) == 2
+        job_files, role_files = Scraper(gh_repo, extra_config_paths).scrape()
         assert (job_files, role_files) == expected[repo]
 
 
@@ -232,15 +306,15 @@ def test_scrape_not_github():
     expected_repo_map = {
         "repo1": {
             "connection_name": "gerrit",
-            "tenants": {"jobs": ["bar"], "roles": ["bar"]},
+            "tenants": {"extra_config_paths": [], "jobs": ["bar"], "roles": ["bar"]},
         },
         "repo2": {
             "connection_name": "gerrit",
-            "tenants": {"jobs": ["bar"], "roles": ["bar"]},
+            "tenants": {"extra_config_paths": [], "jobs": ["bar"], "roles": ["bar"]},
         },
         "repo3": {
             "connection_name": "gerrit",
-            "tenants": {"jobs": ["bar"], "roles": ["bar"]},
+            "tenants": {"extra_config_paths": [], "jobs": ["bar"], "roles": ["bar"]},
         },
     }
 
diff --git a/tests/testdata/test.foo.yaml b/tests/testdata/test.foo.yaml
index 9645c94..11bb56c 100644
--- a/tests/testdata/test.foo.yaml
+++ b/tests/testdata/test.foo.yaml
@@ -7,9 +7,18 @@
           - orga1/repo1:
               exclude: [pipeline, project]
           - orga1/repo2
+          - orga1/repo3:
+              exclude:
+                - project
+                - pipeline
+              extra-config-paths:
+                - project-extra.yaml
+                - zuul-extra.d/
           - orga2/repo1
         untrusted-projects:
           - orga2/repo1: {shadow: orga1/repo2}
           - orga1/repo2:
               exclude: [project]
+              extra-config-paths:
+                - zuul-extra.d/
           - orga2/repo3
diff --git a/zubbi/scraper/main.py b/zubbi/scraper/main.py
index 039b9f4..4ca0546 100644
--- a/zubbi/scraper/main.py
+++ b/zubbi/scraper/main.py
@@ -487,6 +487,7 @@ def _scrape_repo_map(
         for repo_name, repo_data in repo_map.items():
             # Extract the data from the repo_data
             tenants = repo_data["tenants"]
+            extra_config_paths = tenants["extra_config_paths"]
             connection_name = repo_data["connection_name"]
 
             cached_repo = repo_cache.setdefault(repo_name, repo_data)
@@ -528,7 +529,7 @@ def _scrape_repo_map(
             es_repo.provider = provider
 
             # scrape the repo if is part of the tenant config
-            scrape_repo(repo, tenants, reusable_repos, scrape_time)
+            scrape_repo(repo, extra_config_paths, tenants, reusable_repos, scrape_time)
 
             # Store the information for the repository itself, if it was scraped successfully
             LOGGER.info("Updating repo definition for '%s' in Elasticsearch", repo_name)
@@ -555,8 +556,8 @@ def _scrape_repo_map(
     )
 
 
-def scrape_repo(repo, tenants, reusable_repos, scrape_time):
-    job_files, role_files = Scraper(repo).scrape()
+def scrape_repo(repo, extra_config_paths, tenants, reusable_repos, scrape_time):
+    job_files, role_files = Scraper(repo, extra_config_paths).scrape()
 
     is_rusable_repo = repo.repo_name in reusable_repos
     jobs = []
diff --git a/zubbi/scraper/scraper.py b/zubbi/scraper/scraper.py
index 4055fd2..c49cd23 100644
--- a/zubbi/scraper/scraper.py
+++ b/zubbi/scraper/scraper.py
@@ -33,8 +33,9 @@
 
 
 class Scraper:
-    def __init__(self, repo):
+    def __init__(self, repo, extra_config_paths=[]):
         self.repo = repo
+        self.extra_config_paths = extra_config_paths
 
     def scrape(self):
         LOGGER.info("Scraping '%s'", self.repo.name)
@@ -55,7 +56,7 @@ def scrape_job_files(self):
 
         job_files = self.iterate_directory(
             REPO_ROOT,
-            whitelist=ZUUL_DIRECTORIES + ZUUL_FILES,
+            whitelist=ZUUL_DIRECTORIES + ZUUL_FILES + self.extra_config_paths,
             # NOTE (felix): As we provide this directly to the
             # str.endswith() method, the argument must be a str or a
             # tuple of strings, otherwise the following exception is
diff --git a/zubbi/scraper/tenant_parser.py b/zubbi/scraper/tenant_parser.py
index ca20b03..103a9e9 100644
--- a/zubbi/scraper/tenant_parser.py
+++ b/zubbi/scraper/tenant_parser.py
@@ -62,7 +62,7 @@ def parse(self):
             self.tenants.append(tenant_name)
 
     def _update_repo_map(self, project, connection_name, tenant):
-        project_name, exclude = self._extract_project(project)
+        project_name, exclude, extra_config_paths = self._extract_project(project)
 
         # Map the current tenant to the current repository
         repo_tenant_entry = self.repo_map.setdefault(
@@ -74,15 +74,22 @@ def _update_repo_map(self, project, connection_name, tenant):
         if "jobs" not in exclude:
             repo_tenant_entry["tenants"]["jobs"].append(tenant)
         repo_tenant_entry["tenants"]["roles"].append(tenant)
+        repo_tenant_entry["tenants"]["extra_config_paths"] = extra_config_paths
 
     def _extract_project(self, project):
         project_name = project
         exclude = []
+        extra_config_paths = []
         if type(project) is dict:
             # Get the first key of the dict containing the project name.
             project_name = list(project.keys())[0]
-            exclude = project.get("exclude", [])
-        return project_name, exclude
+            exclude = project[project_name].get("exclude", [])
+            # NOTE (swietlicki): directories in extra-config-path section contain
+            # trailing slash, while inside the Scraper.iterate_directory() the comparison
+            # is done against dir names without trailing slash
+            for item in project[project_name].get("extra-config-paths", []):
+                extra_config_paths.append(item[:-1] if item.endswith("/") else item)
+        return project_name, exclude, extra_config_paths
 
     def _load_tenant_sources_from_file(self, sources_file):
         LOGGER.info("Parsing tenant sources file '%s'", sources_file)