From 9a2517c2e1e848039a00b31099a68319c2ea36da Mon Sep 17 00:00:00 2001 From: Andrian Sevastyanov Date: Thu, 31 Oct 2024 12:18:42 -0600 Subject: [PATCH 1/5] Fix for PIP Inspector not working when pkg_resources package is not available in newer Python versions --- src/main/resources/pip-inspector.py | 81 ++++++++++++++++++----------- 1 file changed, 51 insertions(+), 30 deletions(-) diff --git a/src/main/resources/pip-inspector.py b/src/main/resources/pip-inspector.py index de66c4b7d3..681f31ecf4 100644 --- a/src/main/resources/pip-inspector.py +++ b/src/main/resources/pip-inspector.py @@ -1,6 +1,6 @@ # pylint: disable=fixme, line-too-long, import-error, no-name-in-module # -# Copyright (c) 2020 Synopsys, Inc. +# Copyright (c) 2024 Black Duck Software Inc. # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -34,8 +34,7 @@ from getopt import getopt, GetoptError from os import path import sys -from re import split -from pkg_resources import working_set, Requirement +from re import split, match, IGNORECASE import pip pip_major_version = int(pip.__version__.split(".")[0]) @@ -128,43 +127,65 @@ def populate_dependency_tree(project_root_node, requirements_path): def recursively_resolve_dependencies(package_name, history): """Forms a DependencyNode by recursively resolving its dependencies. Tracks history for cyclic dependencies.""" - package = get_package_by_name(package_name) + dependency_node, child_names = get_package_by_name(package_name) - if package is None: + if dependency_node is None: return None - dependency_node = DependencyNode(package.project_name, package.version) - - if package_name.lower() not in history: - history.append(package_name.lower()) - for package_dependency in package.requires(): - child_node = recursively_resolve_dependencies(package_dependency.key, history) + if dependency_node.name not in history: + for child_name in child_names: + child_node = recursively_resolve_dependencies(child_name, history + [dependency_node.name]) if child_node is not None: dependency_node.children = dependency_node.children + [child_node] return dependency_node +try: # attempt to import and rely on importlib.metadata which has been available since Python 3.8 + import importlib.metadata -def get_package_by_name(package_name): - """Looks up a package from the pip cache""" - if package_name is None: - return None - - package_dict = working_set.by_key - try: - # TODO: By using pkg_resources.Requirement.parse to get the correct key, we may not need to attempt the other - # methods. Robust tests are needed to confirm. - return package_dict[Requirement.parse(package_name).key] - except: - pass - - name_variants = (package_name, package_name.lower(), package_name.replace('-', '_'), package_name.replace('_', '-')) - for name_variant in name_variants: - if name_variant in package_dict: - return package_dict[name_variant] - - return None + def get_package_by_name(package_name): + if package_name is None: + return None, None + try: + metadata = importlib.metadata.metadata(package_name) + except importlib.metadata.PackageNotFoundError: + return None, None + + dependency_node = DependencyNode(metadata["Name"], metadata["Version"]) + + requirement_names = [] + requirements = importlib.metadata.requires(dependency_node.name) + if requirements is not None: + for requirement in requirements: + requirement_name_match_result = match("([A-Z0-9][A-Z0-9._-]*[A-Z0-9]|[A-Z0-9])", requirement, IGNORECASE) + if requirement_name_match_result is not None: + requirement_names.append(requirement_name_match_result[0]) + return dependency_node, requirement_names +except ImportError: # fall back to using deprecated pkg_resources when the newer library is not available + from pkg_resources import working_set, Requirement + + def get_package_by_name(package_name): + """Looks up a package from the pip cache""" + if package_name is None: + return None, None + + package = None + + package_dict = working_set.by_key + try: + # TODO: By using pkg_resources.Requirement.parse to get the correct key, we may not need to attempt the other + # methods. Robust tests are needed to confirm. + package = package_dict[Requirement.parse(package_name).key] + except: + name_variants = (package_name, package_name.lower(), package_name.replace('-', '_'), package_name.replace('_', '-')) + for name_variant in name_variants: + if name_variant in package_dict: + return package_dict[name_variant] + + if package is None: + return None, None + return DependencyNode(package.project_name, package.version), [requirement.key for requirement in package.requires()] class DependencyNode(object): """Represents a python dependency in a tree graph with a name, version, and array of children DependencyNodes""" From 1b7f8692939599777236329786b6ad3e21bf8746 Mon Sep 17 00:00:00 2001 From: Andrian Sevastyanov Date: Thu, 31 Oct 2024 13:31:01 -0600 Subject: [PATCH 2/5] Strip whitespace from package name before attempting lookup --- src/main/resources/pip-inspector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/pip-inspector.py b/src/main/resources/pip-inspector.py index 681f31ecf4..b4a7972948 100644 --- a/src/main/resources/pip-inspector.py +++ b/src/main/resources/pip-inspector.py @@ -148,7 +148,7 @@ def get_package_by_name(package_name): return None, None try: - metadata = importlib.metadata.metadata(package_name) + metadata = importlib.metadata.metadata(package_name.strip()) except importlib.metadata.PackageNotFoundError: return None, None From bdd4c379c36ca91e7aba6787ac9d01d5525c93e0 Mon Sep 17 00:00:00 2001 From: Andrian Sevastyanov Date: Thu, 31 Oct 2024 13:58:49 -0600 Subject: [PATCH 3/5] Uniquify requirement list per package --- src/main/resources/pip-inspector.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/resources/pip-inspector.py b/src/main/resources/pip-inspector.py index b4a7972948..ad2ec7f70e 100644 --- a/src/main/resources/pip-inspector.py +++ b/src/main/resources/pip-inspector.py @@ -133,8 +133,9 @@ def recursively_resolve_dependencies(package_name, history): return None if dependency_node.name not in history: + history.append(dependency_node.name) for child_name in child_names: - child_node = recursively_resolve_dependencies(child_name, history + [dependency_node.name]) + child_node = recursively_resolve_dependencies(child_name, history) if child_node is not None: dependency_node.children = dependency_node.children + [child_node] @@ -161,7 +162,7 @@ def get_package_by_name(package_name): requirement_name_match_result = match("([A-Z0-9][A-Z0-9._-]*[A-Z0-9]|[A-Z0-9])", requirement, IGNORECASE) if requirement_name_match_result is not None: requirement_names.append(requirement_name_match_result[0]) - return dependency_node, requirement_names + return dependency_node, set(requirement_names) except ImportError: # fall back to using deprecated pkg_resources when the newer library is not available from pkg_resources import working_set, Requirement From 20aa46974f3686e52a5a93d8be7d084ccd79e541 Mon Sep 17 00:00:00 2001 From: Andrian Sevastyanov Date: Tue, 5 Nov 2024 15:11:36 -0700 Subject: [PATCH 4/5] Use pip internal search_packages_info to look up packages when possible --- src/main/resources/pip-inspector.py | 38 ++++++++++++++++------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/main/resources/pip-inspector.py b/src/main/resources/pip-inspector.py index ad2ec7f70e..ae4b0f11c1 100644 --- a/src/main/resources/pip-inspector.py +++ b/src/main/resources/pip-inspector.py @@ -34,7 +34,7 @@ from getopt import getopt, GetoptError from os import path import sys -from re import split, match, IGNORECASE +from re import split import pip pip_major_version = int(pip.__version__.split(".")[0]) @@ -141,29 +141,33 @@ def recursively_resolve_dependencies(package_name, history): return dependency_node -try: # attempt to import and rely on importlib.metadata which has been available since Python 3.8 - import importlib.metadata +use_pip_internal_to_search_packages = True +try: + from pip._internal.commands.show import search_packages_info +except ImportError: + try: + from pip.commands.show import search_packages_info + except ImportError: + use_pip_internal_to_search_packages = False + +if use_pip_internal_to_search_packages: def get_package_by_name(package_name): if package_name is None: return None, None - try: - metadata = importlib.metadata.metadata(package_name.strip()) - except importlib.metadata.PackageNotFoundError: + package_info = None + + for p in search_packages_info([package_name.strip()]): + package_info = p + + if package_info is None: return None, None - dependency_node = DependencyNode(metadata["Name"], metadata["Version"]) - - requirement_names = [] - requirements = importlib.metadata.requires(dependency_node.name) - if requirements is not None: - for requirement in requirements: - requirement_name_match_result = match("([A-Z0-9][A-Z0-9._-]*[A-Z0-9]|[A-Z0-9])", requirement, IGNORECASE) - if requirement_name_match_result is not None: - requirement_names.append(requirement_name_match_result[0]) - return dependency_node, set(requirement_names) -except ImportError: # fall back to using deprecated pkg_resources when the newer library is not available + if type(package_info) == dict: # prior to pip 21.2 search_packages_info results were dicts + return DependencyNode(package_info["name"], package_info["version"]), package_info["requires"] + return DependencyNode(package_info.name, package_info.version), package_info.requires +else: from pkg_resources import working_set, Requirement def get_package_by_name(package_name): From a196942b68553e522bf2cfd3562cdd76d031dafb Mon Sep 17 00:00:00 2001 From: Andrian Sevastyanov Date: Thu, 7 Nov 2024 14:51:28 -0700 Subject: [PATCH 5/5] Minor refactor --- src/main/resources/pip-inspector.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/main/resources/pip-inspector.py b/src/main/resources/pip-inspector.py index ae4b0f11c1..8a9cb5f99a 100644 --- a/src/main/resources/pip-inspector.py +++ b/src/main/resources/pip-inspector.py @@ -96,9 +96,6 @@ def resolve_project_node(project_name): def populate_dependency_tree(project_root_node, requirements_path): """Resolves the dependencies of the user-provided requirements.txt and appends them to the dependency tree""" try: - # This line is pretty much the only reason why we call the internal pip APIs anymore. We should consider if we - # can do this with a more generalized approach. - # --rotte DEC 2020 parsed_requirements = parse_requirements(requirements_path, session=PipSession()) for parsed_requirement in parsed_requirements: package_name = None @@ -156,10 +153,7 @@ def get_package_by_name(package_name): if package_name is None: return None, None - package_info = None - - for p in search_packages_info([package_name.strip()]): - package_info = p + package_info = next(search_packages_info([package_name.strip()]), None) if package_info is None: return None, None