diff --git a/tests/python/__init__.py b/tests/python/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/python/get_interpreter_info.py b/tests/python/get_interpreter_info.py new file mode 100644 index 00000000..f39506bf --- /dev/null +++ b/tests/python/get_interpreter_info.py @@ -0,0 +1,582 @@ +""" +Queries information about the current Python interpreter and prints it as JSON. + +The script will exit with status 0 on known error that are turned into rust errors. +""" + +import sys + +import json +import os +import platform +import struct +import sysconfig + + +def format_full_version(info): + version = "{0.major}.{0.minor}.{0.micro}".format(info) + kind = info.releaselevel + if kind != "final": + version += kind[0] + str(info.serial) + return version + + +if sys.version_info[0] < 3: + print( + json.dumps( + { + "result": "error", + "kind": "unsupported_python_version", + "python_version": format_full_version(sys.version_info), + } + ) + ) + sys.exit(0) + +if hasattr(sys, "implementation"): + implementation_version = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name +else: + implementation_version = "0" + implementation_name = "" + +python_full_version = platform.python_version() +# For local builds of Python, at time of writing, the version numbers end with +# a `+`. This makes the version non-PEP-440 compatible since a `+` indicates +# the start of a local segment which must be non-empty. Thus, `uv` chokes on it +# and spits out an error[1] when trying to create a venv using a "local" build +# of Python. Arguably, the right fix for this is for CPython to use a PEP-440 +# compatible version number[2]. +# +# However, as a work-around for now, as suggested by pradyunsg[3] as one +# possible direction forward, we strip the `+`. +# +# This fix does unfortunately mean that one cannot specify a Python version +# constraint that specifically selects a local version[4]. But at the time of +# writing, it seems reasonable to block such functionality on this being fixed +# upstream (in some way). +# +# Another alternative would be to treat such invalid versions as strings (which +# is what PEP-508 suggests), but this leads to undesirable behavior in this +# case. For example, let's say you have a Python constraint of `>=3.9.1` and +# a local build of Python with a version `3.11.1+`. Using string comparisons +# would mean the constraint wouldn't be satisfied: +# +# >>> "3.9.1" < "3.11.1+" +# False +# +# So in the end, we just strip the trailing `+`, as was done in the days of old +# for legacy version numbers[5]. +# +# [1]: https://github.com/astral-sh/uv/issues/1357 +# [2]: https://github.com/python/cpython/issues/99968 +# [3]: https://github.com/pypa/packaging/issues/678#issuecomment-1436033646 +# [4]: https://github.com/astral-sh/uv/issues/1357#issuecomment-1947645243 +# [5]: https://github.com/pypa/packaging/blob/085ff41692b687ae5b0772a55615b69a5b677be9/packaging/version.py#L168-L193 +if len(python_full_version) > 0 and python_full_version[-1] == "+": + python_full_version = python_full_version[:-1] + + +def _running_under_venv() -> bool: + """Checks if sys.base_prefix and sys.prefix match. + + This handles PEP 405 compliant virtual environments. + """ + return sys.prefix != getattr(sys, "base_prefix", sys.prefix) + + +def _running_under_legacy_virtualenv() -> bool: + """Checks if sys.real_prefix is set. + + This handles virtual environments created with pypa's virtualenv. + """ + # pypa/virtualenv case + return hasattr(sys, "real_prefix") + + +def running_under_virtualenv() -> bool: + """True if we're running inside a virtual environment, False otherwise.""" + return _running_under_venv() or _running_under_legacy_virtualenv() + + +def get_major_minor_version() -> str: + """ + Return the major-minor version of the current Python as a string, e.g. + "3.7" or "3.10". + """ + return "{}.{}".format(*sys.version_info) + + +def get_virtualenv(): + """Return the expected Scheme for virtualenvs created by this interpreter. + + The paths returned should be relative to a root directory. + + This is based on virtualenv's path discovery logic: + https://github.com/pypa/virtualenv/blob/5cd543fdf8047600ff2737babec4a635ad74d169/src/virtualenv/discovery/py_info.py#L80C9-L80C17 + """ + scheme_names = sysconfig.get_scheme_names() + + # Determine the scheme to use, if any. + if "venv" in scheme_names: + sysconfig_scheme = "venv" + elif sys.version_info[:2] == (3, 10) and "deb_system" in scheme_names: + # debian / ubuntu python 3.10 without `python3-distutils` will report + # mangled `local/bin` / etc. names for the default prefix + # intentionally select `posix_prefix` which is the unaltered posix-like paths + sysconfig_scheme = "posix_prefix" + else: + sysconfig_scheme = None + + # Use `sysconfig`, if available. + if sysconfig_scheme: + import re + + sysconfig_paths = { + i: sysconfig.get_path(i, expand=False, scheme=sysconfig_scheme) + for i in sysconfig.get_path_names() + } + + # Determine very configuration variable that we need to resolve. + config_var_keys = set() + + conf_var_re = re.compile(r"\{\w+}") + for element in sysconfig_paths.values(): + for k in conf_var_re.findall(element): + config_var_keys.add(k[1:-1]) + config_var_keys.add("PYTHONFRAMEWORK") + + # Look them up. + sysconfig_vars = {i: sysconfig.get_config_var(i or "") for i in config_var_keys} + + # Information about the prefix (determines the Python home). + prefix = os.path.abspath(sys.prefix) + base_prefix = os.path.abspath(sys.base_prefix) + + # Information about the exec prefix (dynamic stdlib modules). + base_exec_prefix = os.path.abspath(sys.base_exec_prefix) + exec_prefix = os.path.abspath(sys.exec_prefix) + + # Set any prefixes to empty, which makes the resulting paths relative. + prefixes = prefix, exec_prefix, base_prefix, base_exec_prefix + sysconfig_vars.update( + {k: "" if v in prefixes else v for k, v in sysconfig_vars.items()} + ) + + def expand_path(path: str) -> str: + return path.format(**sysconfig_vars).replace("/", os.sep).lstrip(os.sep) + + return { + "purelib": expand_path(sysconfig_paths["purelib"]), + "platlib": expand_path(sysconfig_paths["platlib"]), + "include": os.path.join( + "include", "site", f"python{get_major_minor_version()}" + ), + "scripts": expand_path(sysconfig_paths["scripts"]), + "data": expand_path(sysconfig_paths["data"]), + } + else: + # Disable the use of the setuptools shim, if it's injected. Per pip: + # + # > If pip's going to use distutils, it should not be using the copy that setuptools + # > might have injected into the environment. This is done by removing the injected + # > shim, if it's injected. + # + # > See https://github.com/pypa/pip/issues/8761 for the original discussion and + # > rationale for why this is done within pip. + try: + __import__("_distutils_hack").remove_shim() + except (ImportError, AttributeError): + pass + + # Use distutils primarily because that's what pip does. + # https://github.com/pypa/pip/blob/ae5fff36b0aad6e5e0037884927eaa29163c0611/src/pip/_internal/locations/__init__.py#L249 + import warnings + + with warnings.catch_warnings(): # disable warning for PEP-632 + warnings.simplefilter("ignore") + from distutils import dist + from distutils.command.install import SCHEME_KEYS + + d = dist.Distribution({"script_args": "--no-user-cfg"}) + if hasattr(sys, "_framework"): + sys._framework = None + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + i = d.get_command_obj("install", create=True) + + i.prefix = os.sep + i.finalize_options() + distutils_paths = { + key: (getattr(i, f"install_{key}")[1:]).lstrip(os.sep) + for key in SCHEME_KEYS + } + + return { + "purelib": distutils_paths["purelib"], + "platlib": distutils_paths["platlib"], + "include": os.path.join( + "include", "site", f"python{get_major_minor_version()}" + ), + "scripts": distutils_paths["scripts"], + "data": distutils_paths["data"], + } + + +def get_scheme(): + """Return the Scheme for the current interpreter. + + The paths returned should be absolute. + + This is based on pip's path discovery logic: + https://github.com/pypa/pip/blob/ae5fff36b0aad6e5e0037884927eaa29163c0611/src/pip/_internal/locations/__init__.py#L230 + """ + + def get_sysconfig_scheme(): + """Get the "scheme" corresponding to the input parameters. + + Uses the `sysconfig` module to get the scheme. + + Based on (with default arguments): + https://github.com/pypa/pip/blob/ae5fff36b0aad6e5e0037884927eaa29163c0611/src/pip/_internal/locations/_sysconfig.py#L124 + """ + + def is_osx_framework() -> bool: + return bool(sysconfig.get_config_var("PYTHONFRAMEWORK")) + + # Notes on _infer_* functions. + # Unfortunately ``get_default_scheme()`` didn't exist before 3.10, so there's no + # way to ask things like "what is the '_prefix' scheme on this platform". These + # functions try to answer that with some heuristics while accounting for ad-hoc + # platforms not covered by CPython's default sysconfig implementation. If the + # ad-hoc implementation does not fully implement sysconfig, we'll fall back to + # a POSIX scheme. + + _AVAILABLE_SCHEMES = set(sysconfig.get_scheme_names()) + + _PREFERRED_SCHEME_API = getattr(sysconfig, "get_preferred_scheme", None) + + def _should_use_osx_framework_prefix() -> bool: + """Check for Apple's ``osx_framework_library`` scheme. + + Python distributed by Apple's Command Line Tools has this special scheme + that's used when: + + * This is a framework build. + * We are installing into the system prefix. + + This does not account for ``pip install --prefix`` (also means we're not + installing to the system prefix), which should use ``posix_prefix``, but + logic here means ``_infer_prefix()`` outputs ``osx_framework_library``. But + since ``prefix`` is not available for ``sysconfig.get_default_scheme()``, + which is the stdlib replacement for ``_infer_prefix()``, presumably Apple + wouldn't be able to magically switch between ``osx_framework_library`` and + ``posix_prefix``. ``_infer_prefix()`` returning ``osx_framework_library`` + means its behavior is consistent whether we use the stdlib implementation + or our own, and we deal with this special case in ``get_scheme()`` instead. + """ + return ( + "osx_framework_library" in _AVAILABLE_SCHEMES + and not running_under_virtualenv() + and is_osx_framework() + ) + + def _infer_prefix() -> str: + """Try to find a prefix scheme for the current platform. + + This tries: + + * A special ``osx_framework_library`` for Python distributed by Apple's + Command Line Tools, when not running in a virtual environment. + * Implementation + OS, used by PyPy on Windows (``pypy_nt``). + * Implementation without OS, used by PyPy on POSIX (``pypy``). + * OS + "prefix", used by CPython on POSIX (``posix_prefix``). + * Just the OS name, used by CPython on Windows (``nt``). + + If none of the above works, fall back to ``posix_prefix``. + """ + if _PREFERRED_SCHEME_API: + return _PREFERRED_SCHEME_API("prefix") + if _should_use_osx_framework_prefix(): + return "osx_framework_library" + implementation_suffixed = f"{sys.implementation.name}_{os.name}" + if implementation_suffixed in _AVAILABLE_SCHEMES: + return implementation_suffixed + if sys.implementation.name in _AVAILABLE_SCHEMES: + return sys.implementation.name + suffixed = f"{os.name}_prefix" + if suffixed in _AVAILABLE_SCHEMES: + return suffixed + if os.name in _AVAILABLE_SCHEMES: # On Windows, prefx is just called "nt". + return os.name + return "posix_prefix" + + scheme_name = _infer_prefix() + paths = sysconfig.get_paths(scheme=scheme_name) + + # Logic here is very arbitrary, we're doing it for compatibility, don't ask. + # 1. Pip historically uses a special header path in virtual environments. + if running_under_virtualenv(): + python_xy = f"python{get_major_minor_version()}" + paths["include"] = os.path.join(sys.prefix, "include", "site", python_xy) + + return { + "platlib": paths["platlib"], + "purelib": paths["purelib"], + "include": paths["include"], + "scripts": paths["scripts"], + "data": paths["data"], + } + + def get_distutils_scheme(): + """Get the "scheme" corresponding to the input parameters. + + Uses the deprecated `distutils` module to get the scheme. + + Based on (with default arguments): + https://github.com/pypa/pip/blob/ae5fff36b0aad6e5e0037884927eaa29163c0611/src/pip/_internal/locations/_distutils.py#L115 + """ + # Disable the use of the setuptools shim, if it's injected. Per pip: + # + # > If pip's going to use distutils, it should not be using the copy that setuptools + # > might have injected into the environment. This is done by removing the injected + # > shim, if it's injected. + # + # > See https://github.com/pypa/pip/issues/8761 for the original discussion and + # > rationale for why this is done within pip. + try: + __import__("_distutils_hack").remove_shim() + except (ImportError, AttributeError): + pass + + import warnings + + with warnings.catch_warnings(): # disable warning for PEP-632 + warnings.simplefilter("ignore") + from distutils.dist import Distribution + + dist_args = {} + + d = Distribution(dist_args) + try: + d.parse_config_files() + except UnicodeDecodeError: + pass + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + i = d.get_command_obj("install", create=True) + + i.finalize_options() + + scheme = {} + for key in ("purelib", "platlib", "headers", "scripts", "data"): + scheme[key] = getattr(i, "install_" + key) + + # install_lib specified in setup.cfg should install *everything* + # into there (i.e. it takes precedence over both purelib and + # platlib). Note, i.install_lib is *always* set after + # finalize_options(); we only want to override here if the user + # has explicitly requested it hence going back to the config + if "install_lib" in d.get_option_dict("install"): + # noinspection PyUnresolvedReferences + scheme.update({"purelib": i.install_lib, "platlib": i.install_lib}) + + if running_under_virtualenv(): + # noinspection PyUnresolvedReferences + scheme["headers"] = os.path.join( + i.prefix, + "include", + "site", + f"python{get_major_minor_version()}", + "UNKNOWN", + ) + + return { + "platlib": scheme["platlib"], + "purelib": scheme["purelib"], + "include": os.path.dirname(scheme["headers"]), + "scripts": scheme["scripts"], + "data": scheme["data"], + } + + # By default, pip uses sysconfig on Python 3.10+. + # But Python distributors can override this decision by setting: + # sysconfig._PIP_USE_SYSCONFIG = True / False + # Rationale in https://github.com/pypa/pip/issues/10647 + use_sysconfig = bool( + getattr(sysconfig, "_PIP_USE_SYSCONFIG", sys.version_info >= (3, 10)) + ) + + if use_sysconfig: + return get_sysconfig_scheme() + else: + return get_distutils_scheme() + + +def get_operating_system_and_architecture(): + """Determine the Python interpreter architecture and operating system. + + This can differ from uv's architecture and operating system. For example, Apple + Silicon Macs can run both x86_64 and aarch64 binaries transparently. + """ + # https://github.com/pypa/packaging/blob/cc938f984bbbe43c5734b9656c9837ab3a28191f/src/packaging/_musllinux.py#L84 + # Note that this is not `os.name`. + # https://docs.python.org/3/library/sysconfig.html#sysconfig.get_platform + # windows x86 will return win32 + platform_info = sysconfig.get_platform().split("-", 1) + if len(platform_info) == 1: + if platform_info[0] == "win32": + operating_system, version_arch = "win", "i386" + else: + # unknown_operating_system will flow to the final error print + operating_system, version_arch = platform_info[0], "" + else: + [operating_system, version_arch] = platform_info + if "-" in version_arch: + # Ex: macosx-11.2-arm64 + version, architecture = version_arch.rsplit("-", 1) + else: + # Ex: linux-x86_64 + version = None + architecture = version_arch + + if sys.version_info < (3, 7): + print( + json.dumps( + { + "result": "error", + "kind": "unsupported_python_version", + "python_version": format_full_version(sys.version_info), + } + ) + ) + sys.exit(0) + + if operating_system == "linux": + # noinspection PyProtectedMember + from .packaging._manylinux import _get_glibc_version + + # noinspection PyProtectedMember + from .packaging._musllinux import _get_musl_version + + musl_version = _get_musl_version(sys.executable) + glibc_version = _get_glibc_version() + if musl_version: + operating_system = { + "name": "musllinux", + "major": musl_version[0], + "minor": musl_version[1], + } + elif glibc_version != (-1, -1): + operating_system = { + "name": "manylinux", + "major": glibc_version[0], + "minor": glibc_version[1], + } + else: + print(json.dumps({"result": "error", "kind": "libc_not_found"})) + sys.exit(0) + elif operating_system == "win": + operating_system = { + "name": "windows", + } + elif operating_system == "macosx": + # Apparently, Mac OS is reporting i386 sometimes in sysconfig.get_platform even + # though that's not a thing anymore. + # https://github.com/astral-sh/uv/issues/2450 + version, _, architecture = platform.mac_ver() + + # https://github.com/pypa/packaging/blob/cc938f984bbbe43c5734b9656c9837ab3a28191f/src/packaging/tags.py#L356-L363 + is_32bit = struct.calcsize("P") == 4 + if is_32bit: + if architecture.startswith("ppc"): + architecture = "ppc" + else: + architecture = "i386" + + version = version.split(".") + operating_system = { + "name": "macos", + "major": int(version[0]), + "minor": int(version[1]), + } + elif operating_system in [ + "freebsd", + "netbsd", + "openbsd", + "dragonfly", + "illumos", + "haiku", + ]: + operating_system = { + "name": operating_system, + "release": version, + } + else: + print( + json.dumps( + { + "result": "error", + "kind": "unknown_operating_system", + "operating_system": operating_system, + } + ) + ) + sys.exit(0) + return {"os": operating_system, "arch": architecture} + + +def main() -> None: + markers = { + "implementation_name": implementation_name, + "implementation_version": implementation_version, + "os_name": os.name, + "platform_machine": platform.machine(), + "platform_python_implementation": platform.python_implementation(), + "platform_release": platform.release(), + "platform_system": platform.system(), + "platform_version": platform.version(), + "python_full_version": python_full_version, + "python_version": ".".join(platform.python_version_tuple()[:2]), + "sys_platform": sys.platform, + } + os_and_arch = get_operating_system_and_architecture() + + manylinux_compatible = True + if os_and_arch["os"]["name"] == "manylinux": + # noinspection PyProtectedMember + from .packaging._manylinux import _get_glibc_version, _is_compatible + + manylinux_compatible = _is_compatible( + arch=os_and_arch["arch"], version=_get_glibc_version() + ) + + interpreter_info = { + "result": "success", + "markers": markers, + "sys_base_prefix": sys.base_prefix, + "sys_base_exec_prefix": sys.base_exec_prefix, + "sys_prefix": sys.prefix, + "sys_base_executable": getattr(sys, "_base_executable", None), + "sys_executable": sys.executable, + "sys_path": sys.path, + "stdlib": sysconfig.get_path("stdlib"), + "sysconfig_prefix": sysconfig.get_config_var("prefix"), + "scheme": get_scheme(), + "virtualenv": get_virtualenv(), + "platform": os_and_arch, + "manylinux_compatible": manylinux_compatible, + # The `t` abiflag for freethreading Python. + # https://peps.python.org/pep-0703/#build-configuration-changes + "gil_disabled": bool(sysconfig.get_config_var("Py_GIL_DISABLED")), + # Determine if the interpreter is 32-bit or 64-bit. + # https://github.com/python/cpython/blob/b228655c227b2ca298a8ffac44d14ce3d22f6faa/Lib/venv/__init__.py#L136 + "pointer_size": "64" if sys.maxsize > 2**32 else "32", + } + print(json.dumps(interpreter_info)) + + +if __name__ == "__main__": + main() diff --git a/tests/python/packaging/LICENSE.APACHE b/tests/python/packaging/LICENSE.APACHE new file mode 100644 index 00000000..f433b1a5 --- /dev/null +++ b/tests/python/packaging/LICENSE.APACHE @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/tests/python/packaging/LICENSE.BSD b/tests/python/packaging/LICENSE.BSD new file mode 100644 index 00000000..42ce7b75 --- /dev/null +++ b/tests/python/packaging/LICENSE.BSD @@ -0,0 +1,23 @@ +Copyright (c) Donald Stufft and individual contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/tests/python/packaging/README.md b/tests/python/packaging/README.md new file mode 100644 index 00000000..e136276c --- /dev/null +++ b/tests/python/packaging/README.md @@ -0,0 +1,6 @@ +# `pypa/packaging` + +This directory contains vendored [pypa/packaging](https://github.com/pypa/packaging) modules as of +[cc938f984bbbe43c5734b9656c9837ab3a28191f](https://github.com/pypa/packaging/tree/cc938f984bbbe43c5734b9656c9837ab3a28191f/src/packaging). + +The files are licensed under BSD-2-Clause OR Apache-2.0. diff --git a/tests/python/packaging/__init__.py b/tests/python/packaging/__init__.py new file mode 100644 index 00000000..658836a9 --- /dev/null +++ b/tests/python/packaging/__init__.py @@ -0,0 +1,15 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" + +__version__ = "24.1.dev0" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014 %s" % __author__ diff --git a/tests/python/packaging/_elffile.py b/tests/python/packaging/_elffile.py new file mode 100644 index 00000000..f7a02180 --- /dev/null +++ b/tests/python/packaging/_elffile.py @@ -0,0 +1,110 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +from __future__ import annotations + +import enum +import os +import struct +from typing import IO + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> str | None: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/tests/python/packaging/_manylinux.py b/tests/python/packaging/_manylinux.py new file mode 100644 index 00000000..baa9fac4 --- /dev/null +++ b/tests/python/packaging/_manylinux.py @@ -0,0 +1,262 @@ +from __future__ import annotations + +import collections +import contextlib +import functools +import os +import re +import sys +import warnings +from typing import Generator, Iterator, NamedTuple, Sequence + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]: + try: + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None + + +def _is_linux_armhf(executable: str) -> bool: + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) + + +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) + + +def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool: + if "armv7l" in archs: + return _is_linux_armhf(executable) + if "i686" in archs: + return _is_linux_i686(executable) + allowed_archs = { + "x86_64", + "aarch64", + "ppc64", + "ppc64le", + "s390x", + "loongarch64", + "riscv64", + } + return any(arch in allowed_archs for arch in archs) + + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50) + + +class _GLibCVersion(NamedTuple): + major: int + minor: int + + +def _glibc_version_string_confstr() -> str | None: + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 + try: + # Should be a string like "glibc 2.17". + version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION") + assert version_string is not None + _, version = version_string.rsplit() + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes() -> str | None: + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + process_namespace = ctypes.CDLL(None) + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str: str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _glibc_version_string() -> str | None: + """Returns glibc version string, or None if not using glibc.""" + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _parse_glibc_version(version_str: str) -> tuple[int, int]: + """Parse glibc version. + + We use a regexp instead of str.split because we want to discard any + random junk that might come after the minor version -- this might happen + in patched/forked versions of glibc (e.g. Linaro's version of glibc + uses version strings like "2.20-2014.11"). See gh-3588. + """ + m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) + if not m: + warnings.warn( + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", + RuntimeWarning, + ) + return -1, -1 + return int(m.group("major")), int(m.group("minor")) + + +@functools.lru_cache() +def _get_glibc_version() -> tuple[int, int]: + version_str = _glibc_version_string() + if version_str is None: + return (-1, -1) + return _parse_glibc_version(version_str) + + +# From PEP 513, PEP 600 +def _is_compatible(arch: str, version: _GLibCVersion) -> bool: + sys_glibc = _get_glibc_version() + if sys_glibc < version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux + except ImportError: + return True + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible(version[0], version[1], arch) + if result is not None: + return bool(result) + return True + if version == _GLibCVersion(2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if version == _GLibCVersion(2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if version == _GLibCVersion(2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate manylinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be manylinux-compatible. + + :returns: An iterator of compatible manylinux tags. + """ + if not _have_compatible_abi(sys.executable, archs): + return + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = _GLibCVersion(2, 16) + if set(archs) & {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = _GLibCVersion(2, 4) + current_glibc = _GLibCVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) + for arch in archs: + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(arch, glibc_version): + yield f"{tag}_{arch}" + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(arch, glibc_version): + yield f"{legacy_tag}_{arch}" diff --git a/tests/python/packaging/_musllinux.py b/tests/python/packaging/_musllinux.py new file mode 100644 index 00000000..b4ca2380 --- /dev/null +++ b/tests/python/packaging/_musllinux.py @@ -0,0 +1,85 @@ +"""PEP 656 support. + +This module implements logic to detect if the currently running Python is +linked against musl, and what musl version is used. +""" + +from __future__ import annotations + +import functools +import re +import subprocess +import sys +from typing import Iterator, NamedTuple, Sequence + +from ._elffile import ELFFile + + +class _MuslVersion(NamedTuple): + major: int + minor: int + + +def _parse_musl_version(output: str) -> _MuslVersion | None: + lines = [n for n in (n.strip() for n in output.splitlines()) if n] + if len(lines) < 2 or lines[0][:4] != "musl": + return None + m = re.match(r"Version (\d+)\.(\d+)", lines[1]) + if not m: + return None + return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2))) + + +@functools.lru_cache() +def _get_musl_version(executable: str) -> _MuslVersion | None: + """Detect currently-running musl runtime version. + + This is done by checking the specified executable's dynamic linking + information, and invoking the loader to parse its output for a version + string. If the loader is musl, the output would be something like:: + + musl libc (x86_64) + Version 1.2.2 + Dynamic Program Loader + """ + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: + return None + proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True) + return _parse_musl_version(proc.stderr) + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate musllinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be musllinux-compatible. + + :returns: An iterator of compatible musllinux tags. + """ + sys_musl = _get_musl_version(sys.executable) + if sys_musl is None: # Python not dynamically linked against musl. + return + for arch in archs: + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + + +if __name__ == "__main__": # pragma: no cover + import sysconfig + + plat = sysconfig.get_platform() + assert plat.startswith("linux-"), "not linux" + + print("plat:", plat) + print("musl:", _get_musl_version(sys.executable)) + print("tags:", end=" ") + for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])): + print(t, end="\n ") diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 646ad669..f0606b72 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -6,6 +6,12 @@ set -exuo pipefail # Get script directory MY_DIR=$(dirname "${BASH_SOURCE[0]}") +PYTHONPATH=${MY_DIR} python3.13 -m python.get_interpreter_info + +unset _PYTHON_HOST_PLATFORM +RUST_LOG=trace uv --verbose python list --no-python-downloads +exit 1 + if [ "${AUDITWHEEL_POLICY:0:10}" == "musllinux_" ]; then EXPECTED_PYTHON_COUNT=9 EXPECTED_PYTHON_COUNT_ALL=9 @@ -49,9 +55,6 @@ if [ ${EXPECTED_PYTHON_COUNT_ALL} -ne ${PYTHON_COUNT} ]; then exit 1 fi -unset _PYTHON_HOST_PLATFORM -RUST_LOG=trace uv --verbose python list --no-python-downloads - PYTHON_COUNT=0 for PYTHON in /opt/python/*/bin/python; do # Smoke test to make sure that our Pythons work, and do indeed detect as