From 5baa517543eb67c6fdbd1e68cf35f158aa3e9f15 Mon Sep 17 00:00:00 2001
From: Michael Sarahan <msarahan@nvidia.com>
Date: Mon, 8 Jan 2024 13:32:28 -0600
Subject: [PATCH] prototype implementation of PEP725 PURL mapping

---
 grayskull/cli/stdout.py                   | 18 +++++++-
 grayskull/strategy/py_base.py             | 24 +++++++----
 grayskull/strategy/py_toml.py             | 51 +++++++++++++++++++++++
 grayskull/strategy/pypi.py                |  7 +++-
 grayskull/utils.py                        | 12 +++---
 tests/test_flit.py                        |  8 ----
 tests/{test_poetry.py => test_py_toml.py} | 35 +++++++++++++++-
 7 files changed, 129 insertions(+), 26 deletions(-)
 delete mode 100644 tests/test_flit.py
 rename tests/{test_poetry.py => test_py_toml.py} (84%)

diff --git a/grayskull/cli/stdout.py b/grayskull/cli/stdout.py
index 1f9440904..650f2335c 100644
--- a/grayskull/cli/stdout.py
+++ b/grayskull/cli/stdout.py
@@ -65,6 +65,7 @@ def print_requirements(
 ) -> set:
     all_missing_deps = set()
     re_search = re.compile(r"^\s*([a-z0-9\.\-\_]+)(.*)", re.IGNORECASE | re.DOTALL)
+    re_purl = re.compile(r"[a-z]+\:[\.a-z0-9_-]+\/[\.a-z0-9_-]+", re.IGNORECASE)
 
     def print_req(list_pkg):
         if isinstance(list_pkg, str):
@@ -78,13 +79,18 @@ def print_req(list_pkg):
                 pkg_name = pkg.replace("<{", "{{")
                 options = ""
                 colour = Fore.GREEN
+            elif re_purl.match(pkg):
+                pkg_name = pkg
+                options = ""
+                colour = Fore.YELLOW
+                all_missing_deps.add(pkg)
             elif search_result:
                 pkg_name, options = search_result.groups()
                 if is_pkg_available(pkg_name):
                     colour = Fore.GREEN
                 else:
-                    all_missing_deps.add(pkg_name)
                     colour = Fore.RED
+                    all_missing_deps.add(pkg_name)
             else:
                 continue
             print_msg(f"  - {colour}{Style.BRIGHT}{pkg_name}{Style.RESET_ALL}{options}")
@@ -102,7 +108,15 @@ def print_req(list_pkg):
         print_msg(f"{key.capitalize()} requirements (optional):")
         print_req(req_list)
 
-    print_msg(f"\n{Fore.RED}RED{Style.RESET_ALL}: Missing packages")
+    print_msg(
+        f"\n{Fore.RED}RED{Style.RESET_ALL}: Package names not available on conda-forge"
+    )
+    print_msg(
+        (
+            f"{Fore.YELLOW}YELLOW{Style.RESET_ALL}: "
+            "PEP-725 PURLs that did not map to known package"
+        )
+    )
     print_msg(f"{Fore.GREEN}GREEN{Style.RESET_ALL}: Packages available on conda-forge")
 
     if CLIConfig().list_missing_deps:
diff --git a/grayskull/strategy/py_base.py b/grayskull/strategy/py_base.py
index df4289742..00d658e5f 100644
--- a/grayskull/strategy/py_base.py
+++ b/grayskull/strategy/py_base.py
@@ -546,10 +546,12 @@ def clean_list_pkg(pkg, list_pkgs):
         return [p for p in list_pkgs if pkg != p.strip().split(" ", 1)[0]]
 
     for pkg in requirements["host"]:
-        pkg_name = RE_DEPS_NAME.match(pkg).group(0)
-        if pkg_name in PIN_PKG_COMPILER.keys():
-            requirements["run"] = clean_list_pkg(pkg_name, requirements["run"])
-            requirements["run"].append(PIN_PKG_COMPILER[pkg_name])
+        pkg_name_match = RE_DEPS_NAME.match(pkg)
+        if pkg_name_match:
+            pkg_name = pkg_name_match.group(0)
+            if pkg_name in PIN_PKG_COMPILER.keys():
+                requirements["run"] = clean_list_pkg(pkg_name, requirements["run"])
+                requirements["run"].append(PIN_PKG_COMPILER[pkg_name])
 
 
 def discover_license(metadata: dict) -> List[ShortLicense]:
@@ -733,6 +735,14 @@ def merge_setup_toml_metadata(setup_metadata: dict, pyproject_metadata: dict) ->
             setup_metadata.get("install_requires", []),
             pyproject_metadata["requirements"]["run"],
         )
+    # this is not a valid setup_metadata field, but we abuse it to pass it
+    # through to the conda recipe generator downstream. It's because setup.py
+    # does not have a notion of build vs. host requirements. It only has
+    # equivalents to host and run.
+    if pyproject_metadata["requirements"]["build"]:
+        setup_metadata["__build_requirements_placeholder"] = pyproject_metadata[
+            "requirements"
+        ]["build"]
     if pyproject_metadata["requirements"]["run_constrained"]:
         setup_metadata["requirements_run_constrained"] = pyproject_metadata[
             "requirements"
@@ -802,9 +812,8 @@ def ensure_pep440_in_req_list(list_req: List[str]) -> List[str]:
 
 
 def split_deps(deps: str) -> List[str]:
-    deps = deps.split(",")
     result = []
-    for d in deps:
+    for d in deps.split(","):
         constrain = ""
         for val in re.split(r"([><!=~^]+)", d):
             if not val:
@@ -819,7 +828,8 @@ def split_deps(deps: str) -> List[str]:
 def ensure_pep440(pkg: str) -> str:
     if not pkg:
         return pkg
-    if pkg.strip().startswith("<{") or pkg.strip().startswith("{{"):
+    pkg = pkg.strip()
+    if any([pkg.startswith(pattern) for pattern in ("<{", "{{")]):
         return pkg
     split_pkg = pkg.strip().split(" ")
     if len(split_pkg) <= 1:
diff --git a/grayskull/strategy/py_toml.py b/grayskull/strategy/py_toml.py
index 4a27b15f3..2dd1c9f63 100644
--- a/grayskull/strategy/py_toml.py
+++ b/grayskull/strategy/py_toml.py
@@ -246,6 +246,56 @@ def add_flit_metadata(metadata: dict, toml_metadata: dict) -> dict:
     return metadata
 
 
+def is_pep725_present(toml_metadata: dict):
+    return "external" in toml_metadata
+
+
+def get_pep725_mapping(purl: str):
+    """This function maps a PURL to the name in the conda ecosystem. It is expected
+    that this will be provided on a per-ecosystem basis (such as by conda-forge)"""
+
+    package_mapping = {
+        "virtual:compiler/c": "{{ compiler('c') }}",
+        "virtual:compiler/cpp": "{{ compiler('cxx') }}",
+        "virtual:compiler/fortran": "{{ compiler('fortran') }}",
+        "virtual:compiler/rust": "{{ compiler('rust') }}",
+        "virtual:interface/blas": "{{ blas }}",
+    }
+    return package_mapping.get(purl, purl)
+
+
+def add_pep725_metadata(metadata: dict, toml_metadata: dict):
+    if not is_pep725_present(toml_metadata):
+        return metadata
+
+    externals = toml_metadata["external"]
+    # each of these is a list of PURLs. For each one we find,
+    # we need to map it to the the conda ecosystem
+    requirements = metadata.get("requirements", {})
+    section_map = (
+        ("build", "build-requires"),
+        ("host", "host-requires"),
+        ("run", "dependencies"),
+    )
+    for conda_section, pep725_section in section_map:
+        requirements[conda_section] = [
+            get_pep725_mapping(purl) for purl in externals.get(pep725_section, [])
+        ]
+        # TODO: handle optional dependencies properly
+        optional_features = toml_metadata.get(f"optional-{pep725_section}", {})
+        for feature_name, feature_deps in optional_features.items():
+            requirements[conda_section].append(
+                f'# OPTIONAL dependencies from feature "{feature_name}"'
+            )
+            requirements[conda_section].extend(feature_deps)
+        if not requirements[conda_section]:
+            del requirements[conda_section]
+
+    if requirements:
+        metadata["requirements"] = requirements
+    return metadata
+
+
 def get_all_toml_info(path_toml: Union[Path, str]) -> dict:
     with open(path_toml, "rb") as f:
         toml_metadata = tomli.load(f)
@@ -288,5 +338,6 @@ def get_all_toml_info(path_toml: Union[Path, str]) -> dict:
 
     add_poetry_metadata(metadata, toml_metadata)
     add_flit_metadata(metadata, toml_metadata)
+    add_pep725_metadata(metadata, toml_metadata)
 
     return metadata
diff --git a/grayskull/strategy/pypi.py b/grayskull/strategy/pypi.py
index 2e9a40ff1..1bed893b7 100644
--- a/grayskull/strategy/pypi.py
+++ b/grayskull/strategy/pypi.py
@@ -111,6 +111,7 @@ def get_val(key):
         "requires_dist": requires_dist,
         "sdist_path": get_val("sdist_path"),
         "requirements_run_constrained": get_val("requirements_run_constrained"),
+        "__build_requirements_placeholder": get_val("__build_requirements_placeholder"),
     }
 
 
@@ -556,6 +557,8 @@ def extract_requirements(metadata: dict, config, recipe) -> Dict[str, List[str]]
     requires_dist = format_dependencies(metadata.get("requires_dist", []), name)
     setup_requires = metadata.get("setup_requires", [])
     host_req = format_dependencies(setup_requires or [], config.name)
+    build_requires = metadata.get("__build_requirements_placeholder", [])
+    build_req = format_dependencies(build_requires or [], config.name)
     if not requires_dist and not host_req and not metadata.get("requires_python"):
         if config.is_strict_cf:
             py_constrain = (
@@ -571,7 +574,9 @@ def extract_requirements(metadata: dict, config, recipe) -> Dict[str, List[str]]
 
     run_req = get_run_req_from_requires_dist(requires_dist, config)
     host_req = get_run_req_from_requires_dist(host_req, config)
-    build_req = [f"<{{ compiler('{c}') }}}}" for c in metadata.get("compilers", [])]
+    build_req = build_req or [
+        f"<{{ compiler('{c}') }}}}" for c in metadata.get("compilers", [])
+    ]
     if build_req:
         config.is_arch = True
 
diff --git a/grayskull/utils.py b/grayskull/utils.py
index 7100f98d3..f19bd3e6c 100644
--- a/grayskull/utils.py
+++ b/grayskull/utils.py
@@ -150,7 +150,7 @@ def rm_duplicated_deps(all_requirements: Union[list, set, None]) -> Optional[lis
                 new_reqs[canonicalized] = " ".join(constrains)
         else:
             new_reqs[canonicalized] = " ".join(constrains)
-    return [re.sub(r"\s+(#)", "  \\1", v.strip()) for v in new_reqs.values()]
+    return [re.sub(r"(?:[^\s]+)\s+(#)", "  \\1", v) for v in new_reqs.values()]
 
 
 def format_dependencies(all_dependencies: List, name: str) -> List:
@@ -162,11 +162,16 @@ def format_dependencies(all_dependencies: List, name: str) -> List:
     :return: list of dependencies formatted
     """
     formatted_dependencies = []
+    #  PURL fields          scheme         type           name
+    re_purl = re.compile(r"[a-zA-Z]+\:[\.a-zA-Z0-9_-]+\/[\.a-zA-Z0-9_-]+")
     re_deps = re.compile(r"^\s*([\.a-zA-Z0-9_-]+)\s*(.*)\s*$", re.MULTILINE | re.DOTALL)
     re_remove_space = re.compile(r"([<>!=]+)\s+")
     re_remove_tags = re.compile(r"\s*(\[.*\])", re.DOTALL)
     re_remove_comments = re.compile(r"\s+#.*", re.DOTALL)
     for req in all_dependencies:
+        if re_purl.match(req):
+            formatted_dependencies.append(req)
+            continue
         match_req = re_deps.match(req)
         deps_name = req
         if name is not None and deps_name.replace("-", "_") == name.replace("-", "_"):
@@ -220,11 +225,6 @@ def generate_recipe(
             copyfile(file_to_recipe, os.path.join(recipe_folder, name))
 
 
-def get_clean_yaml(recipe_yaml: CommentedMap) -> CommentedMap:
-    clean_yaml(recipe_yaml)
-    return add_new_lines_after_section(recipe_yaml)
-
-
 def add_new_lines_after_section(recipe_yaml: CommentedMap) -> CommentedMap:
     for section in recipe_yaml.keys():
         if section == "package":
diff --git a/tests/test_flit.py b/tests/test_flit.py
deleted file mode 100644
index bed329b30..000000000
--- a/tests/test_flit.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from grayskull.strategy.py_toml import add_flit_metadata
-
-
-def test_add_flit_metadata():
-    metadata = {"build": {"entry_points": []}}
-    toml_metadata = {"tool": {"flit": {"scripts": {"key": "value"}}}}
-    result = add_flit_metadata(metadata, toml_metadata)
-    assert result == {"build": {"entry_points": ["key = value"]}}
diff --git a/tests/test_poetry.py b/tests/test_py_toml.py
similarity index 84%
rename from tests/test_poetry.py
rename to tests/test_py_toml.py
index 24227ff4e..4fcc44d5a 100644
--- a/tests/test_poetry.py
+++ b/tests/test_py_toml.py
@@ -8,6 +8,8 @@
 from grayskull.main import generate_recipes_from_list, init_parser
 from grayskull.strategy.py_toml import (
     InvalidVersion,
+    add_flit_metadata,
+    add_pep725_metadata,
     add_poetry_metadata,
     encode_poetry_version,
     get_all_toml_info,
@@ -18,6 +20,13 @@
 )
 
 
+def test_add_flit_metadata():
+    metadata = {"build": {"entry_points": []}}
+    toml_metadata = {"tool": {"flit": {"scripts": {"key": "value"}}}}
+    result = add_flit_metadata(metadata, toml_metadata)
+    assert result == {"build": {"entry_points": ["key = value"]}}
+
+
 @pytest.mark.parametrize(
     "version, major, minor, patch",
     [
@@ -160,7 +169,7 @@ def test_poetry_langchain_snapshot(tmpdir):
     assert filecmp.cmp(snapshot_path, output_path, shallow=False)
 
 
-def test_get_constrained_dep_version_not_present():
+def test_poetry_get_constrained_dep_version_not_present():
     assert (
         get_constrained_dep(
             {"git": "https://codeberg.org/hjacobs/pytest-kind.git"}, "pytest-kind"
@@ -169,7 +178,7 @@ def test_get_constrained_dep_version_not_present():
     )
 
 
-def test_entrypoints():
+def test_poetry_entrypoints():
     poetry = {
         "requirements": {"host": ["setuptools"], "run": ["python"]},
         "build": {},
@@ -198,3 +207,25 @@ def test_entrypoints():
         },
         "test": {},
     }
+
+
+@pytest.mark.parametrize(
+    "conda_section, pep725_section",
+    [("build", "build-requires"), ("host", "host-requires"), ("run", "dependencies")],
+)
+@pytest.mark.parametrize(
+    "purl, purl_translated",
+    [
+        ("virtual:compiler/c", '${{ compiler("c") }}'),
+        ("pkg:alice/bob", "pkg:alice/bob"),
+    ],
+)
+def test_pep725_section_lookup(conda_section, pep725_section, purl, purl_translated):
+    toml_metadata = {
+        "external": {
+            pep725_section: [purl],
+        }
+    }
+    assert add_pep725_metadata({}, toml_metadata) == {
+        "requirements": {conda_section: [purl_translated]}
+    }