Skip to content

Commit

Permalink
Merge pull request #11 from OpenRailAssociation/license-compliance
Browse files Browse the repository at this point in the history
Add commands to help with license compliance
  • Loading branch information
mxmehl authored Aug 13, 2024
2 parents 11f9e2e + 6b9a947 commit 04cb7af
Show file tree
Hide file tree
Showing 9 changed files with 274 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
- uses: actions/checkout@v4
- uses: ./.github/actions/poetrybuild
- name: Lint with pylint
run: poetry run pylint complassist/
run: poetry run pylint --disable=fixme complassist/

formatting:
runs-on: ubuntu-22.04
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ SPDX-License-Identifier: Apache-2.0
- **SBOM Enrichment**: Enhance an existing SBOM with detailed licensing and copyright information using ClearlyDefined data.
- **SBOM Parsing**: Extract specific information from a CycloneDX SBOM.
- **License and Copyright Information Retrieval**: Fetch licensing and copyright details for a single package from ClearlyDefined.
- **License compliance support**: Extract and unify licenses from SBOM, suggest possible license outbound candidates

Some of these features are made possible by excellent programs such as [flict](https://github.com/vinland-technology/flict) and [cdxgen](https://github.com/CycloneDX/cdxgen).

## Requirements

Expand Down Expand Up @@ -110,6 +112,7 @@ For each command, you can get detailed options, e.g. `compliance-assistant sbom-
* Enrich an SBOM with ClearlyDefined data: `compliance-assistant sbom-enrich -f /tmp/my-sbom.json -o /tmp/my-enriched-sbom.json`
* Extract certain data from an SBOM: `compliance-assistant sbom-parse -f /tmp/my-enriched-sbom.json -e purl,copyright,name`
* Gather ClearlyDefined licensing/copyright information for one package: `compliance-assistant clearlydefined -p pkg:pypi/inwx-dns-recordmaster@0.3.1`
* Get license outbound candidate based on licenses from SBOM: `compliance-assistant licensing outbound -f /tmp/my-enriched-sbom.json`

### Run as GitHub workflow

Expand Down
54 changes: 46 additions & 8 deletions complassist/_flict.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,62 @@

# We need to run flict as subprocess as usage as library is too complicated
def _run_flict(
command: str, *arguments, options: list | None = None, warn_on_error: bool = True
) -> str:
command: str,
*arguments,
options: list | None = None,
warn_on_error: bool = True,
) -> tuple[int, str, str]:
"""
Run flict with a command (e.g. 'verify') and a list of arguments
(e.g. '-il', 'GPL-2.0-only', '-ol', 'MIT'), and a list of general options (e.g. ["-ip"])
Return output as str
Return: exit code, stdout, stderr
"""
if options is None:
options = []
cmd = ["flict", *options, command, *arguments]
logging.debug("Running flict: %s", cmd)
ret = subprocess.run(cmd, capture_output=True, check=False)
if ret.returncode != 0:
code = ret.returncode
stderr = ret.stderr.decode("UTF-8").strip()
stdout = ret.stdout.decode("UTF-8").strip()
if code != 0:
# If only warning requested, only log error, return normal output
if warn_on_error:
logging.warning("flict exited with an error (%s): %s", ret.returncode, ret.stderr)
logging.warning(
"flict exited with an error (%s): %s",
code,
stderr,
)

return ret.stdout.decode("UTF-8").strip()
return code, stdout, stderr


def flict_simplify(expression: str, output_format: str) -> str:
def flict_simplify(expression: str, output_format: str, no_relicensing: bool = True) -> str:
"""Simplify a license expression using flict"""
return _run_flict("simplify", expression, options=["-of", output_format])
options = ["-of", output_format]
if no_relicensing:
options.append("-nr")
_, simplified, _ = _run_flict("simplify", expression, options=options)

logging.debug("Simplified '%s' to '%s' using flict", expression, simplified)

return simplified


def flict_simplify_list(expressions: list[str]) -> list[str]:
"""Simplify a list of license expressions"""
simplified = []
for lic in expressions:
simplified.append(flict_simplify(lic, output_format="text"))

return list(set(simplified))


def flict_outbound_candidate(expression: str, output_format: str) -> str:
"""Get possible outbound license candidates using flict"""
# TODO: `-el` would make this command more helpful but it has an error:
# https://github.com/vinland-technology/flict/issues/391
_, outbound_candidate, _ = _run_flict(
"outbound-candidate", expression, options=["-nr", "-of", output_format]
)
return outbound_candidate
110 changes: 110 additions & 0 deletions complassist/_licensing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# SPDX-FileCopyrightText: 2024 DB Systel GmbH
#
# SPDX-License-Identifier: Apache-2.0

"""Open Source License Compliance helpers"""

import logging

from license_expression import ExpressionError, Licensing, get_spdx_licensing

from ._flict import flict_outbound_candidate, flict_simplify, flict_simplify_list
from ._sbom_parse import extract_items_from_cdx_sbom


def _extract_license_expression_and_names_from_sbom(
sbom_path: str, use_flict: bool = False
) -> tuple[list[str], list[str]]:
"""Exract all SPDX expressions and license names from an SBOM"""
lic_expressions = []
lic_names = []

for item in extract_items_from_cdx_sbom(
sbom_path, information=["name", "purl", "licenses-short"], use_flict=use_flict
):
licenses_short: list[dict] = item.get("licenses-short", [])

for entry in licenses_short:
if lic_expression := entry.get("expression", ""):
lic_expressions.append(lic_expression)
# Use license name instead
else:
lic_dict: dict = entry.get("license", {})
if lic_name := lic_dict.get("name", ""):
lic_names.append(lic_name)

# Make expressions and names unique, and sort them
expressions = sorted(list(set(lic_expressions)))
# If using flict, simplify these found licenses. Will reduce possible
# duplicates and fix problematic SPDX expressions (e.g. MPL-2.0+)
# That's far more performant than doing that for each license in the SBOM
if use_flict:
expressions = flict_simplify_list(expressions)
names = sorted(list(set(lic_names)))

return expressions, names


def list_all_licenses(sbom_path: str, use_flict: bool = False) -> list[str]:
"""List all detected licenses of an SBOM, unified and sorted"""
expressions, names = _extract_license_expression_and_names_from_sbom(sbom_path, use_flict)

# Combine both SPDX expressions and names, sort and unify again
return sorted(list(set(expressions + names)))


def _validate_spdx_licenses(licenses: list[str]) -> list[str]:
"""Check a list of licenses for whether they are valid SPDX. Only return
valid licenses, warn on bad expression"""
valid_licenses: list[str] = []
spdx: Licensing = get_spdx_licensing()

for lic in licenses:
try:
spdx.parse(lic, validate=True)
valid_licenses.append(lic)
except ExpressionError as exc:
logging.error(
"The license expression/name '%s' found in the given SBOM is no valid SPDX "
"expression. Therefore, it cannot be taken into consideration for the evaluation. "
"Error message: %s",
lic,
exc,
)

return valid_licenses


def _craft_single_spdx_expression(licenses: list[str]):
"""Convert multiple SPDX licenses and expressions into one large expression"""
# Put all licenses into brackets
licenses = [f"({lic})" for lic in licenses]

return " AND ".join(licenses)


def get_outbound_candidate(sbom_path: str, simplify: bool = True) -> dict[str, str | list[str]]:
"""Get license outbound candidates from an SBOM"""
logging.info("Extracting, simplifying and validating found licenses. This can take a while")
licenses_in_sbom = list_all_licenses(sbom_path, use_flict=simplify)

# Check whether all licenses are valid SPDX expressions
licenses = _validate_spdx_licenses(licenses_in_sbom)

# Combine single licenses into one large SPDX license expression
expression = _craft_single_spdx_expression(licenses)
if simplify:
logging.debug("Simplify crafted license expression %s", expression)
expression = flict_simplify(expression, output_format="text")
logging.debug("Simplified licenses expression: %s", expression)

# Get outbound candidate
logging.info("Calculating possible outbound candidates")
outbound_candidate: str = flict_outbound_candidate(expression, output_format="text")

return {
"licenses_in_sbom": licenses_in_sbom,
"considered_licenses": licenses,
"checked_expression": expression,
"outbound_candidate": outbound_candidate,
}
2 changes: 1 addition & 1 deletion complassist/_sbom_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def _enrich_component_with_cd_data(component: dict) -> None:
"""
# Get purl, original licenses, and short/simplified licenses data from component
raw_data = extract_items_from_component(
component, ["purl", "licenses", "licenses-short", "copyright"], True
component, ["purl", "licenses", "licenses-short", "copyright"], use_flict=True
)
# Put raw data into separate variables, slightly adapted
purl = raw_data["purl"]
Expand Down
13 changes: 7 additions & 6 deletions complassist/_sbom_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from ._helpers import read_json_file


def _simplify_licenses_data(licenses_data: list[dict], use_flict: bool = True) -> list[dict]:
"""Simplify a list of license ids/expressions/names to a single string,
def _unify_licenses_data(licenses_data: list[dict], use_flict: bool = True) -> list[dict]:
"""Convert a list of license ids/expressions/names to a single string,
either an expression or a name"""

# Case 1: no data
Expand Down Expand Up @@ -92,15 +92,16 @@ def _shorten_cdx_licenses_item(licenses: list, use_flict: bool = True) -> list:
licdata,
)

simplified_license_data = _simplify_licenses_data(collection, use_flict=use_flict)
simplified_license_data = _unify_licenses_data(collection, use_flict=use_flict)
return _license_short_to_valid_cdx_item(simplified_license_data)


def extract_items_from_component(component: dict, items: list, use_flict: bool) -> dict:
"""Extract certain items from a single component of a CycloneDX SBOM"""
logging.debug(
"Handling component: purl = %s, name = %s", component.get("purl"), component.get("name")
)
# Very noisy logging, disabled
# logging.debug(
# "Handling component: purl = %s, name = %s", component.get("purl"), component.get("name")
# )
extraction = {}
# Loop requested data points for extraction
for item in items:
Expand Down
Loading

0 comments on commit 04cb7af

Please sign in to comment.