From d4d3bd8ebedb163a60c73a7aaf795bdba2beb8e8 Mon Sep 17 00:00:00 2001 From: Max Mehl Date: Fri, 7 Feb 2025 16:51:24 +0100 Subject: [PATCH] use purl-tools for purl->clearlydefined logic --- complassist/_clearlydefined.py | 63 ++-------------------------------- complassist/_helpers.py | 24 ------------- complassist/_sbom_enrich.py | 5 +-- complassist/main.py | 7 ++-- poetry.lock | 25 +++++++++++--- pyproject.toml | 2 +- 6 files changed, 31 insertions(+), 95 deletions(-) diff --git a/complassist/_clearlydefined.py b/complassist/_clearlydefined.py index 9963069..6c2ee68 100644 --- a/complassist/_clearlydefined.py +++ b/complassist/_clearlydefined.py @@ -5,70 +5,13 @@ """Functions concerning working with ClearlyDefined""" import logging -import sys from os.path import join as pathjoin from urllib.parse import urljoin -from packageurl import PackageURL +from purltools import purl2clearlydefined from requests.exceptions import JSONDecodeError -from ._helpers import make_request_with_retry, replacer - - -def purl_to_cd_coordinates(purl: str) -> str: - """ - Converts a Package URL (purl) to ClearlyDefined coordinates. - - Parses the purl and translates it into a coordinate format compatible with - ClearlyDefined, handling necessary type conversions and provider mappings. - - Args: - purl (str): The Package URL to be converted. - - Returns: - str: The ClearlyDefined coordinates derived from the purl. - - Raises: - SystemExit: If the provided purl is not valid, the function logs a - critical error and exits. - """ - try: - purl_obj = PackageURL.from_string(purl) - except ValueError as exc: - logging.critical("Package URL '%s' does not seem to be a valid purl: %s", purl, exc) - sys.exit(1) - - logging.debug("purl string '%s' converted to purl object '%s'", purl, repr(purl_obj)) - - # Convert to dict, replacing empty values with "-" - p = purl_obj.to_dict(empty="-") - - # Fix types that are different in purl and CD - type_fix = {"cargo": "crate", "github": "git"} - - coordinates: dict = { - "type": replacer(p.get("type", ""), type_fix), - "provider": "", - "namespace": p.get("namespace"), - "name": p.get("name"), - "version": p.get("version"), - } - - # Update coordinates with provider, based on type - type_to_provider = { - "crate": "cratesio", - "git": "github", - "maven": "mavencentral", - "npm": "npmjs", - "pypi": "pypi", - } - coordinates["provider"] = replacer(coordinates["type"], type_to_provider) - - coordinates_string = "/".join([v for _, v in coordinates.items()]) - - logging.debug("Converted '%s' to '%s'", purl, coordinates_string) - - return coordinates_string +from ._helpers import make_request_with_retry def _cdapi_call( @@ -255,7 +198,7 @@ def get_clearlydefined_license_and_copyright_in_batches( ClearlyDefined API did not return valid data. """ # Create connections between coordinates <-> purl - coordinates_purls = {purl_to_cd_coordinates(purl): purl for purl in purls} + coordinates_purls = {purl2clearlydefined(purl): purl for purl in purls} # Request the CD API for the coordinates api_return = _cdapi_call( path="", method="POST", json_dict=list(coordinates_purls.keys()), expand="-files" diff --git a/complassist/_helpers.py b/complassist/_helpers.py index cd51e4c..1ed764e 100644 --- a/complassist/_helpers.py +++ b/complassist/_helpers.py @@ -17,30 +17,6 @@ def dict_to_json(data: dict) -> str: return json.dumps(data, indent=2, sort_keys=False) -def replacer(string: str, replacement_dict: dict) -> str: - """ - Replaces a string based on a replacement dictionary. - - If the string matches a key in the replacement dictionary, it is replaced by - the corresponding value. If no match is found, the original string is - returned. - - Args: - string (str): The string to be checked and possibly replaced. - replacement_dict (dict): A dictionary where keys are strings to be - replaced and values are their replacements. - - Returns: - str: The replaced string if a match is found, otherwise the original - string. - """ - if string in replacement_dict: - replacement = replacement_dict.get(string, "") - return replacement - - return string - - def read_json_file(path: str) -> dict: """Open a JSON file and return it as dict""" with open(path, "r", encoding="UTF-8") as jsonfile: diff --git a/complassist/_sbom_enrich.py b/complassist/_sbom_enrich.py index 36d5ee6..eda7b77 100644 --- a/complassist/_sbom_enrich.py +++ b/complassist/_sbom_enrich.py @@ -7,11 +7,12 @@ import logging from datetime import datetime +from purltools import purl2clearlydefined + from . import __version__ from ._clearlydefined import ( get_clearlydefined_license_and_copyright, get_clearlydefined_license_and_copyright_in_batches, - purl_to_cd_coordinates, ) from ._helpers import extract_excerpt, read_json_file, write_json_file from ._sbom_parse import ( @@ -291,7 +292,7 @@ def enrich_sbom_with_clearlydefined( for purl in all_purls: logging.info("Getting ClearlyDefined data for %s", purl) cd_license, cd_copyright = get_clearlydefined_license_and_copyright( - coordinates=purl_to_cd_coordinates(purl) + coordinates=purl2clearlydefined(purl) ) clearlydefined_data[purl] = {"license": cd_license, "copyright": cd_copyright} diff --git a/complassist/main.py b/complassist/main.py index d3809da..7aedb77 100644 --- a/complassist/main.py +++ b/complassist/main.py @@ -12,11 +12,12 @@ import logging import sys +from purltools import purl2clearlydefined + from . import __version__ from ._clearlydefined import ( get_clearlydefined_license_and_copyright, print_clearlydefined_result, - purl_to_cd_coordinates, ) from ._helpers import dict_to_json from ._licensing import get_outbound_candidate, list_all_licenses @@ -306,11 +307,11 @@ def main(): # pylint: disable=too-many-branches, too-many-statements elif args.command == "clearlydefined": # ClearlyDefined conversion if args.clearlydefined_command == "convert": - print(purl_to_cd_coordinates(purl=args.purl)) + print(purl2clearlydefined(purl=args.purl)) elif args.clearlydefined_command == "fetch": if args.purl: - coordinates = purl_to_cd_coordinates(purl=args.purl) + coordinates = purl2clearlydefined(purl=args.purl) else: coordinates = args.coordinates diff --git a/poetry.lock b/poetry.lock index 3835bec..8e707da 100644 --- a/poetry.lock +++ b/poetry.lock @@ -551,13 +551,13 @@ dev = ["deepdiff (==7.0.1)", "dlint (==0.14.1)", "flake8 (==7.0.0)", "flake8-202 [[package]] name = "packageurl-python" -version = "0.15.6" +version = "0.16.0" description = "A purl aka. Package URL parser and builder" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "packageurl_python-0.15.6-py3-none-any.whl", hash = "sha256:a40210652c89022772a6c8340d6066f7d5dc67132141e5284a4db7a27d0a8ab0"}, - {file = "packageurl_python-0.15.6.tar.gz", hash = "sha256:cbc89afd15d5f4d05db4f1b61297e5b97a43f61f28799f6d282aff467ed2ee96"}, + {file = "packageurl_python-0.16.0-py3-none-any.whl", hash = "sha256:5c3872638b177b0f1cf01c3673017b7b27ebee485693ae12a8bed70fa7fa7c35"}, + {file = "packageurl_python-0.16.0.tar.gz", hash = "sha256:69e3bf8a3932fe9c2400f56aaeb9f86911ecee2f9398dbe1b58ec34340be365d"}, ] [package.extras] @@ -618,6 +618,21 @@ files = [ [package.dependencies] wcwidth = "*" +[[package]] +name = "purl-tools" +version = "0.1.0" +description = "A small library that supports with various tasks around Package URLs" +optional = false +python-versions = "<4.0,>=3.10" +files = [ + {file = "purl_tools-0.1.0-py3-none-any.whl", hash = "sha256:a06079d97b19a5de936fe4a4109ecb62e12b97304ee45e55efc69e2f0cc37015"}, + {file = "purl_tools-0.1.0.tar.gz", hash = "sha256:c14960a2aec96d21387edc0934e3288c632f2bc86d11338f7a82ec968a172ec8"}, +] + +[package.dependencies] +packageurl-python = ">=0.16.0,<0.17.0" +requests = ">=2.31.0,<3.0.0" + [[package]] name = "pydantic" version = "2.9.2" @@ -1193,4 +1208,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "caee76519ccd0c029043f491879debb6c0bde46d1fccf44a4e56c6b88315244d" +content-hash = "2252d8320e0eaa4cde01a8d163ae1d2960c642230b239d9bc8a20f295fb19558" diff --git a/pyproject.toml b/pyproject.toml index b3057aa..c9de4a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,11 +34,11 @@ compliance-assistant = 'complassist.main:main' [tool.poetry.dependencies] python = "^3.10" -packageurl-python = "^0.15.1" requests = "^2.32.3" flict = "^1.2.14" docker = "^7.1.0" license-expression = "^30.3.0" +purl-tools = "^0.1.0" [tool.poetry.group.dev.dependencies]