Skip to content

Commit

Permalink
make clearlydefined batch size configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
mxmehl committed Aug 14, 2024
1 parent 9d55bb6 commit 5c5ac71
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
9 changes: 4 additions & 5 deletions complassist/_sbom_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def _update_sbom_metadata(sbom: dict) -> dict:


def enrich_sbom_with_clearlydefined(
sbom_file: str, output_file: str, in_batches: bool = True
sbom_file: str, output_file: str, in_batches: bool = True, batch_size: int = 15
) -> None:
"""
Parse a SBOM and enrich license/copyright data of each component with
Expand All @@ -253,6 +253,7 @@ def enrich_sbom_with_clearlydefined(
sbom_file (str): Path to the input SBOM file.
output_file (str): Path to save the enriched SBOM.
in_batches (bool): Ask ClearlyDefined API for multiple packages at once.
batch_size (int): Number of packages for batch request at ClearlyDefined.
"""

sbom: dict[str, list[dict]] = read_json_file(sbom_file)
Expand All @@ -263,11 +264,9 @@ def enrich_sbom_with_clearlydefined(
c["purl"] for c in extract_items_from_cdx_sbom(sbom_file, information=["purl"])
]
if in_batches:

# Split all purls in batches of `max_components` size
max_components = 10
# Split all purls in batches of `batch_size` size
purls_batches: list[list[str]] = [
all_purls[x : x + max_components] for x in range(0, len(all_purls), max_components)
all_purls[x : x + batch_size] for x in range(0, len(all_purls), batch_size)
]
for batch in purls_batches:
logging.info("Getting ClearlyDefined data for %s", ", ".join(batch))
Expand Down
14 changes: 13 additions & 1 deletion complassist/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
parser_sbom_enrich = subparser_sbom.add_parser(
"enrich",
help="Enrich a CycloneDX SBOM and its licensing/copyright data via ClearlyDefined",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
parents=[common_flags],
)
parser_sbom_enrich.add_argument(
Expand All @@ -85,6 +86,12 @@
help="Path where the enriched SBOM shall be saved. Use '-' to print it to stdout.",
required=True,
)
parser_sbom_enrich.add_argument(
"--batch-size",
help="The number of packages to request information for from ClearlyDefined at once.",
default=25,
type=int,
)
parser_sbom_enrich.add_argument(
"--no-batches",
help=(
Expand Down Expand Up @@ -258,7 +265,12 @@ def main(): # pylint: disable=too-many-branches, too-many-statements

# Enrich SBOM by ClearlyDefined data
elif args.sbom_command == "enrich":
enrich_sbom_with_clearlydefined(args.file, args.output, not args.no_batches)
enrich_sbom_with_clearlydefined(
sbom_file=args.file,
output_file=args.output,
in_batches=not args.no_batches,
batch_size=args.batch_size,
)

# Parse info from SBOM
elif args.sbom_command == "parse":
Expand Down

0 comments on commit 5c5ac71

Please sign in to comment.