diff --git a/src/auditwheel/architecture.py b/src/auditwheel/architecture.py new file mode 100644 index 00000000..dfa64aeb --- /dev/null +++ b/src/auditwheel/architecture.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import functools +import platform +import struct +import sys +from enum import Enum + + +class Architecture(Enum): + value: str + + aarch64 = "aarch64" + armv7l = "armv7l" + i686 = "i686" + loongarch64 = "loongarch64" + ppc64 = "ppc64" + ppc64le = "ppc64le" + riscv64 = "riscv64" + s390x = "s390x" + x86_64 = "x86_64" + x86_64_v2 = "x86_64_v2" + x86_64_v3 = "x86_64_v3" + x86_64_v4 = "x86_64_v4" + + def __str__(self): + return self.value + + @property + def baseline(self): + if self.value.startswith("x86_64"): + return Architecture.x86_64 + return self + + @classmethod + @functools.lru_cache(None) + def _member_list(cls) -> list[Architecture]: + return list(cls) + + def is_subset(self, other: Architecture) -> bool: + if self.baseline != other.baseline: + return False + member_list = Architecture._member_list() + return member_list.index(self) <= member_list.index(other) + + def is_superset(self, other: Architecture) -> bool: + if self.baseline != other.baseline: + return False + return other.is_subset(self) + + @staticmethod + def get_native_architecture(*, bits: int | None = None) -> Architecture: + machine = platform.machine() + if sys.platform.startswith("win"): + machine = {"AMD64": "x86_64", "ARM64": "aarch64", "x86": "i686"}.get( + machine, machine + ) + elif sys.platform.startswith("darwin"): + machine = {"arm64": "aarch64"}.get(machine, machine) + + if bits is None: + # c.f. https://github.com/pypa/packaging/pull/711 + bits = 8 * struct.calcsize("P") + + if machine in {"x86_64", "i686"}: + machine = {64: "x86_64", 32: "i686"}[bits] + elif machine in {"aarch64", "armv8l"}: + # use armv7l policy for 64-bit arm kernel in 32-bit mode (armv8l) + machine = {64: "aarch64", 32: "armv7l"}[bits] + + return Architecture(machine) diff --git a/src/auditwheel/lddtree.py b/src/auditwheel/lddtree.py index 0f15bd33..f0a69b7b 100644 --- a/src/auditwheel/lddtree.py +++ b/src/auditwheel/lddtree.py @@ -22,19 +22,58 @@ from fnmatch import fnmatch from pathlib import Path +from elftools.elf.constants import E_FLAGS from elftools.elf.elffile import ELFFile +from elftools.elf.sections import NoteSection +from .architecture import Architecture from .libc import Libc, get_libc log = logging.getLogger(__name__) __all__ = ["DynamicExecutable", "DynamicLibrary", "ldd"] +@dataclass(frozen=True) +class Platform: + _elf_osabi: str + _elf_class: int + _elf_little_endian: bool + _elf_machine: str + _base_arch: Architecture | None + _ext_arch: Architecture | None + _error_msg: str | None + + def is_compatible(self, other: Platform) -> bool: + os_abis = frozenset((self._elf_osabi, other._elf_osabi)) + compat_sets = ( + frozenset(f"ELFOSABI_{x}" for x in ("NONE", "SYSV", "GNU", "LINUX")), + ) + return ( + (len(os_abis) == 1 or any(os_abis.issubset(x) for x in compat_sets)) + and self._elf_class == other._elf_class + and self._elf_little_endian == other._elf_little_endian + and self._elf_machine == other._elf_machine + ) + + @property + def baseline_architecture(self) -> Architecture: + if self._base_arch is not None: + return self._base_arch + raise ValueError(self._error_msg) + + @property + def extended_architecture(self) -> Architecture | None: + if self._error_msg is not None: + raise ValueError(self._error_msg) + return self._ext_arch + + @dataclass(frozen=True) class DynamicLibrary: soname: str path: str | None realpath: str | None + platform: Platform | None = None needed: frozenset[str] = frozenset() @@ -43,12 +82,80 @@ class DynamicExecutable: interpreter: str | None path: str realpath: str + platform: Platform needed: frozenset[str] rpath: tuple[str, ...] runpath: tuple[str, ...] libraries: dict[str, DynamicLibrary] +def _get_platform(elf: ELFFile) -> Platform: + elf_osabi = elf.header["e_ident"]["EI_OSABI"] + elf_class = elf.elfclass + elf_little_endian = elf.little_endian + elf_machine = elf["e_machine"] + base_arch = { + ("EM_386", 32, True): Architecture.i686, + ("EM_X86_64", 64, True): Architecture.x86_64, + ("EM_PPC64", 64, True): Architecture.ppc64le, + ("EM_PPC64", 64, False): Architecture.ppc64, + ("EM_RISCV", 64, True): Architecture.riscv64, + ("EM_AARCH64", 64, True): Architecture.aarch64, + ("EM_S390", 64, False): Architecture.s390x, + ("EM_ARM", 32, True): Architecture.armv7l, + ("EM_LOONGARCH", 64, True): Architecture.loongarch64, + }.get((elf_machine, elf_class, elf_little_endian), None) + ext_arch: Architecture | None = None + error_msg: str | None = None + flags = elf["e_flags"] + assert base_arch is None or base_arch.baseline == base_arch + if base_arch is None: + error_msg = "Unknown architecture" + elif base_arch == Architecture.x86_64: + for section in elf.iter_sections(): + if not isinstance(section, NoteSection): + continue + for note in section.iter_notes(): + if note["n_type"] != "NT_GNU_PROPERTY_TYPE_0": + continue + if note["n_name"] != "GNU": + continue + for prop in note["n_desc"]: + if prop.pr_type != "GNU_PROPERTY_X86_ISA_1_NEEDED": + continue + if prop.pr_datasz != 4: + continue + data = prop.pr_data + data -= data & 1 # clear baseline + if data & 8 == 8: + ext_arch = Architecture.x86_64_v4 + break + if data & 4 == 4: + ext_arch = Architecture.x86_64_v3 + break + if data & 2 == 2: + ext_arch = Architecture.x86_64_v2 + break + if data != 0: + error_msg = "unknown x86_64 ISA" + break + elif base_arch == Architecture.armv7l: + if (flags & E_FLAGS.EF_ARM_EABIMASK) != E_FLAGS.EF_ARM_EABI_VER5: + error_msg = "Invalid ARM EABI version for armv7l" + elif (flags & E_FLAGS.EF_ARM_ABI_FLOAT_HARD) != E_FLAGS.EF_ARM_ABI_FLOAT_HARD: + error_msg = "armv7l shall use hard-float" + + return Platform( + elf_osabi, + elf_class, + elf_little_endian, + elf_machine, + base_arch, + ext_arch, + error_msg, + ) + + def normpath(path: str) -> str: """Normalize a path @@ -243,43 +350,16 @@ def load_ld_paths(root: str = "/", prefix: str = "") -> dict[str, list[str]]: return ldpaths -def compatible_elfs(elf1: ELFFile, elf2: ELFFile) -> bool: - """See if two ELFs are compatible - - This compares the aspects of the ELF to see if they're compatible: - bit size, endianness, machine type, and operating system. - - Parameters - ---------- - elf1 : ELFFile - elf2 : ELFFile - - Returns - ------- - True if compatible, False otherwise - """ - osabis = frozenset(e.header["e_ident"]["EI_OSABI"] for e in (elf1, elf2)) - compat_sets = ( - frozenset(f"ELFOSABI_{x}" for x in ("NONE", "SYSV", "GNU", "LINUX")), - ) - return ( - (len(osabis) == 1 or any(osabis.issubset(x) for x in compat_sets)) - and elf1.elfclass == elf2.elfclass - and elf1.little_endian == elf2.little_endian - and elf1.header["e_machine"] == elf2.header["e_machine"] - ) - - def find_lib( - elf: ELFFile, lib: str, ldpaths: list[str], root: str = "/" + platform: Platform, lib: str, ldpaths: list[str], root: str = "/" ) -> tuple[str | None, str | None]: """Try to locate a ``lib`` that is compatible to ``elf`` in the given ``ldpaths`` Parameters ---------- - elf : ELFFile - The elf which the library should be compatible with (ELF wise) + platform : Platform + The platform which the library should be compatible with (ELF wise) lib : str The library (basename) to search for ldpaths : list[str] @@ -299,7 +379,7 @@ def find_lib( if os.path.exists(target): with open(target, "rb") as f: libelf = ELFFile(f) - if compatible_elfs(elf, libelf): + if platform.is_compatible(_get_platform(libelf)): return target, path return None, None @@ -371,7 +451,6 @@ def ldd( with open(path, "rb") as f: elf = ELFFile(f) - # If this is the first ELF, extract the interpreter. if _first: for segment in elf.iter_segments(): @@ -391,6 +470,9 @@ def ldd( log.debug(" ldpaths[interp] = %s", ldpaths["interp"]) break + # get the platform + platform = _get_platform(elf) + # Parse the ELF's dynamic tags. for segment in elf.iter_segments(): if segment.header.p_type != "PT_DYNAMIC": @@ -411,67 +493,63 @@ def ldd( # probably fine since the runtime ldso does the same. break - if _first: - # Propagate the rpaths used by the main ELF since those will be - # used at runtime to locate things. - ldpaths["rpath"] = rpaths - ldpaths["runpath"] = runpaths - log.debug(" ldpaths[rpath] = %s", rpaths) - log.debug(" ldpaths[runpath] = %s", runpaths) - - # Search for the libs this ELF uses. - all_ldpaths = ( - ldpaths["rpath"] - + rpaths - + runpaths - + ldpaths["env"] - + ldpaths["runpath"] - + ldpaths["conf"] - + ldpaths["interp"] - ) - for soname in needed: - if soname in _all_libs: - continue - if soname in _excluded_libs: - continue - if any(fnmatch(soname, e) for e in exclude): - log.info("Excluding %s", soname) - _excluded_libs.add(soname) - continue - # TODO we should avoid keeping elf here, related to compat - realpath, fullpath = find_lib(elf, soname, all_ldpaths, root) - if realpath is not None and any(fnmatch(realpath, e) for e in exclude): - log.info("Excluding %s", realpath) - _excluded_libs.add(soname) - continue - _all_libs[soname] = DynamicLibrary(soname, fullpath, realpath) - if realpath is None or fullpath is None: - continue - lret = ldd( - realpath, - root, - prefix, - ldpaths, - display=fullpath, - exclude=exclude, - _all_libs=_all_libs, - ) - _all_libs[soname] = DynamicLibrary( - soname, fullpath, realpath, lret.needed - ) - del elf + if _first: + # Propagate the rpaths used by the main ELF since those will be + # used at runtime to locate things. + ldpaths["rpath"] = rpaths + ldpaths["runpath"] = runpaths + log.debug(" ldpaths[rpath] = %s", rpaths) + log.debug(" ldpaths[runpath] = %s", runpaths) + + # Search for the libs this ELF uses. + all_ldpaths = ( + ldpaths["rpath"] + + rpaths + + runpaths + + ldpaths["env"] + + ldpaths["runpath"] + + ldpaths["conf"] + + ldpaths["interp"] + ) + for soname in needed: + if soname in _all_libs: + continue + if soname in _excluded_libs: + continue + if any(fnmatch(soname, e) for e in exclude): + log.info("Excluding %s", soname) + _excluded_libs.add(soname) + continue + realpath, fullpath = find_lib(platform, soname, all_ldpaths, root) + if realpath is not None and any(fnmatch(realpath, e) for e in exclude): + log.info("Excluding %s", realpath) + _excluded_libs.add(soname) + continue + _all_libs[soname] = DynamicLibrary(soname, fullpath, realpath) + if realpath is None or fullpath is None: + continue + dependency = ldd(realpath, root, prefix, ldpaths, fullpath, exclude, _all_libs) + _all_libs[soname] = DynamicLibrary( + soname, + fullpath, + realpath, + dependency.platform, + dependency.needed, + ) + if interpreter is not None: soname = os.path.basename(interpreter) _all_libs[soname] = DynamicLibrary( - soname, interpreter, readlink(interpreter, root, prefixed=True) + soname, interpreter, readlink(interpreter, root, prefixed=True), platform ) return DynamicExecutable( interpreter, path if display is None else display, path, + platform, frozenset(needed - _excluded_libs), tuple(rpaths), tuple(runpaths), diff --git a/src/auditwheel/main_repair.py b/src/auditwheel/main_repair.py index f2bbb64b..a1051bc0 100644 --- a/src/auditwheel/main_repair.py +++ b/src/auditwheel/main_repair.py @@ -101,6 +101,13 @@ def configure_parser(sub_parsers): help="Do not check for higher policy compatibility", default=False, ) + p.add_argument( + "--disable-isa-ext-check", + dest="DISABLE_ISA_EXT_CHECK", + action="store_true", + help="Do not check for extended ISA compatibility (e.g. x86_64_v2)", + default=False, + ) p.set_defaults(func=execute) @@ -123,7 +130,9 @@ def execute(args, parser: argparse.ArgumentParser): os.makedirs(args.WHEEL_DIR) try: - wheel_abi = analyze_wheel_abi(wheel_policy, wheel_file, exclude) + wheel_abi = analyze_wheel_abi( + wheel_policy, wheel_file, exclude, args.DISABLE_ISA_EXT_CHECK + ) except NonPlatformWheel: logger.info(NonPlatformWheel.LOG_MESSAGE) return 1 @@ -155,6 +164,13 @@ def execute(args, parser: argparse.ArgumentParser): ) parser.error(msg) + if reqd_tag > wheel_policy.get_priority_by_name(wheel_abi.machine_tag): + msg = ( + f'cannot repair "{wheel_file}" to "{args.PLAT}" ABI because it ' + "depends on unsupported ISA extensions." + ) + parser.error(msg) + abis = [policy["name"]] + policy["aliases"] if (not args.ONLY_PLAT) and reqd_tag < wheel_policy.get_priority_by_name( wheel_abi.overall_tag diff --git a/src/auditwheel/main_show.py b/src/auditwheel/main_show.py index 4b90efbe..c88537bf 100644 --- a/src/auditwheel/main_show.py +++ b/src/auditwheel/main_show.py @@ -12,6 +12,13 @@ def configure_parser(sub_parsers): help = "Audit a wheel for external shared library dependencies." p = sub_parsers.add_parser("show", help=help, description=help) p.add_argument("WHEEL_FILE", help="Path to wheel file.") + p.add_argument( + "--disable-isa-ext-check", + dest="DISABLE_ISA_EXT_CHECK", + action="store_true", + help="Do not check for extended ISA compatibility (e.g. x86_64_v2)", + default=False, + ) p.set_defaults(func=execute) @@ -36,7 +43,9 @@ def execute(args, parser: argparse.ArgumentParser): parser.error(f"cannot access {args.WHEEL_FILE}. No such file") try: - winfo = analyze_wheel_abi(wheel_policy, args.WHEEL_FILE, frozenset()) + winfo = analyze_wheel_abi( + wheel_policy, args.WHEEL_FILE, frozenset(), args.DISABLE_ISA_EXT_CHECK + ) except NonPlatformWheel: logger.info(NonPlatformWheel.LOG_MESSAGE) return 1 @@ -70,6 +79,14 @@ def execute(args, parser: argparse.ArgumentParser): if args.verbose < 1: return None + if ( + wheel_policy.get_priority_by_name(winfo.machine_tag) + < wheel_policy.priority_highest + ): + printp("This wheel depends on unsupported ISA extensions.") + if args.verbose < 1: + return None + if len(libs_with_versions) == 0: printp( "The wheel references no external versioned symbols from " diff --git a/src/auditwheel/policy/__init__.py b/src/auditwheel/policy/__init__.py index 023d50d0..805c3935 100644 --- a/src/auditwheel/policy/__init__.py +++ b/src/auditwheel/policy/__init__.py @@ -2,10 +2,7 @@ import json import logging -import platform as _platform_module import re -import struct -import sys from collections import defaultdict from collections.abc import Generator from os.path import abspath, dirname, join @@ -14,6 +11,7 @@ from auditwheel.elfutils import filter_undefined_symbols, is_subdir +from ..architecture import Architecture from ..lddtree import DynamicExecutable from ..libc import Libc, get_libc from ..musllinux import find_musl_libc, get_musl_version @@ -36,7 +34,7 @@ def __init__( *, libc: Libc | None = None, musl_policy: str | None = None, - arch: str | None = None, + arch: Architecture | None = None, ) -> None: if libc is None: libc = get_libc() if musl_policy is None else Libc.MUSL @@ -51,13 +49,14 @@ def __init__( msg = f"Invalid 'musl_policy': '{musl_policy}'" raise ValueError(msg) if arch is None: - arch = get_arch_name() + arch = Architecture.get_native_architecture() policies = json.loads(_POLICY_JSON_MAP[libc].read_text()) self._policies = [] - self._arch_name = arch + self._architecture = arch self._libc_variant = libc self._musl_policy = musl_policy + base_arch = arch.baseline.value _validate_pep600_compliance(policies) for policy in policies: if self._musl_policy is not None and policy["name"] not in { @@ -65,17 +64,12 @@ def __init__( self._musl_policy, }: continue - if ( - self._arch_name in policy["symbol_versions"] - or policy["name"] == "linux" - ): + if arch.value in policy["symbol_versions"] or policy["name"] == "linux": if policy["name"] != "linux": - policy["symbol_versions"] = policy["symbol_versions"][ - self._arch_name - ] - policy["name"] = policy["name"] + "_" + self._arch_name + policy["symbol_versions"] = policy["symbol_versions"][base_arch] + policy["name"] = policy["name"] + "_" + base_arch policy["aliases"] = [ - alias + "_" + self._arch_name for alias in policy["aliases"] + alias + "_" + base_arch for alias in policy["aliases"] ] policy["lib_whitelist"] = _fixup_musl_libc_soname( libc, arch, policy["lib_whitelist"] @@ -85,6 +79,10 @@ def __init__( if self._libc_variant == Libc.MUSL: assert len(self._policies) == 2, self._policies + @property + def architecture(self) -> Architecture: + return self._architecture + @property def policies(self): return self._policies @@ -97,29 +95,30 @@ def priority_highest(self): def priority_lowest(self): return min(p["priority"] for p in self._policies) - def get_policy_by_name(self, name: str) -> dict | None: + def get_policy_by_name(self, name: str) -> dict: matches = [ p for p in self._policies if p["name"] == name or name in p["aliases"] ] if len(matches) == 0: - return None + msg = f"no policy named {name!r} found" + raise LookupError(msg) if len(matches) > 1: msg = "Internal error. Policies should be unique" raise RuntimeError(msg) return matches[0] - def get_policy_name(self, priority: int) -> str | None: + def get_policy_name(self, priority: int) -> str: matches = [p["name"] for p in self._policies if p["priority"] == priority] if len(matches) == 0: - return None + msg = f"no policy with priority {priority} found" + raise LookupError(msg) if len(matches) > 1: msg = "Internal error. priorities should be unique" raise RuntimeError(msg) return matches[0] - def get_priority_by_name(self, name: str) -> int | None: - policy = self.get_policy_by_name(name) - return None if policy is None else policy["priority"] + def get_priority_by_name(self, name: str) -> int: + return self.get_policy_by_name(name)["priority"] def versioned_symbols_policy(self, versioned_symbols: dict[str, set[str]]) -> int: def policy_is_satisfied( @@ -227,23 +226,6 @@ def get_req_external(libs: set[str], whitelist: set[str]) -> set[str]: return ret -def get_arch_name(*, bits: int | None = None) -> str: - machine = _platform_module.machine() - if sys.platform == "darwin" and machine == "arm64": - return "aarch64" - - if bits is None: - # c.f. https://github.com/pypa/packaging/pull/711 - bits = 8 * struct.calcsize("P") - - if machine in {"x86_64", "i686"}: - return {64: "x86_64", 32: "i686"}[bits] - if machine in {"aarch64", "armv8l"}: - # use armv7l policy for 64-bit arm kernel in 32-bit mode (armv8l) - return {64: "aarch64", 32: "armv7l"}[bits] - return machine - - def _validate_pep600_compliance(policies) -> None: symbol_versions: dict[str, dict[str, set[str]]] = {} lib_whitelist: set[str] = set() @@ -276,25 +258,25 @@ def _validate_pep600_compliance(policies) -> None: symbol_versions[arch] = symbol_versions_arch -def _fixup_musl_libc_soname(libc: Libc, arch: str, whitelist): +def _fixup_musl_libc_soname(libc: Libc, arch: Architecture, whitelist): if libc != Libc.MUSL: return whitelist soname_map = { "libc.so": { - "x86_64": "libc.musl-x86_64.so.1", - "i686": "libc.musl-x86.so.1", - "aarch64": "libc.musl-aarch64.so.1", - "s390x": "libc.musl-s390x.so.1", - "ppc64le": "libc.musl-ppc64le.so.1", - "armv7l": "libc.musl-armv7.so.1", - "riscv64": "libc.musl-riscv64.so.1", - "loongarch64": "libc.musl-loongarch64.so.1", + Architecture.x86_64: "libc.musl-x86_64.so.1", + Architecture.i686: "libc.musl-x86.so.1", + Architecture.aarch64: "libc.musl-aarch64.so.1", + Architecture.s390x: "libc.musl-s390x.so.1", + Architecture.ppc64le: "libc.musl-ppc64le.so.1", + Architecture.armv7l: "libc.musl-armv7.so.1", + Architecture.riscv64: "libc.musl-riscv64.so.1", + Architecture.loongarch64: "libc.musl-loongarch64.so.1", } } new_whitelist = [] for soname in whitelist: if soname in soname_map: - new_soname = soname_map[soname][arch] + new_soname = soname_map[soname][arch.baseline] logger.debug("Replacing whitelisted '%s' by '%s'", soname, new_soname) new_whitelist.append(new_soname) else: diff --git a/src/auditwheel/wheel_abi.py b/src/auditwheel/wheel_abi.py index 99fe3cdc..51da38fc 100644 --- a/src/auditwheel/wheel_abi.py +++ b/src/auditwheel/wheel_abi.py @@ -4,10 +4,12 @@ import itertools import logging import os -from collections import defaultdict, namedtuple +from collections import defaultdict from collections.abc import Mapping from copy import deepcopy +from dataclasses import dataclass from os.path import basename +from typing import Any from . import json from .elfutils import ( @@ -18,23 +20,23 @@ elf_references_PyFPE_jbuf, ) from .genericpkgctx import InGenericPkgCtx -from .lddtree import ldd +from .lddtree import DynamicExecutable, ldd from .policy import WheelPolicies log = logging.getLogger(__name__) -WheelAbIInfo = namedtuple( # noqa: PYI024 - "WheelAbIInfo", - [ - "overall_tag", - "external_refs", - "ref_tag", - "versioned_symbols", - "sym_tag", - "ucs_tag", - "pyfpe_tag", - "blacklist_tag", - ], -) + + +@dataclass(frozen=True) +class WheelAbIInfo: + overall_tag: str + external_refs: dict[str, Any] + ref_tag: str + versioned_symbols: dict[str, set[str]] + sym_tag: str + ucs_tag: str + pyfpe_tag: str + blacklist_tag: str + machine_tag: str class WheelAbiError(Exception): @@ -64,19 +66,19 @@ def get_wheel_elfdata( with InGenericPkgCtx(wheel_fn) as ctx: shared_libraries_in_purelib = [] + shared_libraries_with_invalid_machine = [] platform_wheel = False for fn, elf in elf_file_filter(ctx.iter_files()): - platform_wheel = True - # Check for invalid binary wheel format: no shared library should # be found in purelib so_path_split = fn.split(os.sep) + so_name = so_path_split[-1] # If this is in purelib, add it to the list of shared libraries in # purelib if "purelib" in so_path_split: - shared_libraries_in_purelib.append(so_path_split[-1]) + shared_libraries_in_purelib.append(so_name) # If at least one shared library exists in purelib, this is going # to fail and there's no need to do further checks @@ -84,6 +86,19 @@ def get_wheel_elfdata( log.debug("processing: %s", fn) elftree = ldd(fn, exclude=exclude) + try: + arch = elftree.platform.baseline_architecture + if arch != wheel_policy.architecture.baseline: + shared_libraries_with_invalid_machine.append(so_name) + log.warning("ignoring: %s with %s architecture", so_name, arch) + continue + except ValueError: + shared_libraries_with_invalid_machine.append(so_name) + log.warning("ignoring: %s with unknown architecture", so_name) + continue + + platform_wheel = True + for key, value in elf_find_versioned_symbols(elf): log.debug("key %s, value %s", key, value) versioned_symbols[key].add(value) @@ -109,9 +124,6 @@ def get_wheel_elfdata( # its internal references later. nonpy_elftree[fn] = elftree - if not platform_wheel: - raise NonPlatformWheel - # If at least one shared library exists in purelib, raise an error if shared_libraries_in_purelib: libraries = "\n\t".join(shared_libraries_in_purelib) @@ -123,6 +135,16 @@ def get_wheel_elfdata( ) raise RuntimeError(msg) + if not platform_wheel: + if not shared_libraries_with_invalid_machine: + raise NonPlatformWheel + libraries = "\n\t".join(shared_libraries_with_invalid_machine) + msg = ( + "Invalid binary wheel, found the following shared library/libraries " + f"with a different target architecture:\n\t{libraries}\n" + ) + raise NonPlatformWheel(msg) + # Get a list of all external libraries needed by ELFs in the wheel. needed_libs = { lib @@ -226,8 +248,50 @@ def get_symbol_policies( return result +def _get_machine_policy( + wheel_policy: WheelPolicies, + elftree_by_fn: dict[str, DynamicExecutable], + external_so_names: frozenset[str], +) -> int: + result = wheel_policy.priority_highest + machine_to_check = {} + for fn, dynamic_executable in elftree_by_fn.items(): + if fn in machine_to_check: + continue + machine_to_check[fn] = dynamic_executable.platform.extended_architecture + for dependency in dynamic_executable.libraries.values(): + if dependency.soname not in external_so_names: + continue + if dependency.realpath is None: + continue + assert dependency.platform is not None + if dependency.realpath in machine_to_check: + continue + machine_to_check[dependency.realpath] = ( + dependency.platform.extended_architecture + ) + + for fn, extended_architecture in machine_to_check.items(): + if extended_architecture is None: + continue + if wheel_policy.architecture.is_superset(extended_architecture): + continue + log.warning( + "ELF file %r requires %r instruction set, not in %r", + fn, + extended_architecture.value, + wheel_policy.architecture.value, + ) + result = wheel_policy.priority_lowest + + return result + + def analyze_wheel_abi( - wheel_policy: WheelPolicies, wheel_fn: str, exclude: frozenset[str] + wheel_policy: WheelPolicies, + wheel_fn: str, + exclude: frozenset[str], + disable_isa_ext_check: bool, ) -> WheelAbIInfo: external_refs = { p["name"]: {"libs": {}, "blacklist": {}, "priority": p["priority"]} @@ -271,6 +335,13 @@ def analyze_wheel_abi( default=wheel_policy.priority_lowest, ) + if disable_isa_ext_check: + machine_policy = wheel_policy.priority_highest + else: + machine_policy = _get_machine_policy( + wheel_policy, elftree_by_fn, frozenset(external_libs.values()) + ) + if has_ucs2: ucs_policy = wheel_policy.priority_lowest else: @@ -286,8 +357,16 @@ def analyze_wheel_abi( ucs_tag = wheel_policy.get_policy_name(ucs_policy) pyfpe_tag = wheel_policy.get_policy_name(pyfpe_policy) blacklist_tag = wheel_policy.get_policy_name(blacklist_policy) + machine_tag = wheel_policy.get_policy_name(machine_policy) overall_tag = wheel_policy.get_policy_name( - min(symbol_policy, ref_policy, ucs_policy, pyfpe_policy, blacklist_policy) + min( + symbol_policy, + ref_policy, + ucs_policy, + pyfpe_policy, + blacklist_policy, + machine_policy, + ) ) return WheelAbIInfo( @@ -299,6 +378,7 @@ def analyze_wheel_abi( ucs_tag, pyfpe_tag, blacklist_tag, + machine_tag, ) diff --git a/tests/integration/test_bundled_wheels.py b/tests/integration/test_bundled_wheels.py index b4d14a0f..2310bb0e 100644 --- a/tests/integration/test_bundled_wheels.py +++ b/tests/integration/test_bundled_wheels.py @@ -16,6 +16,7 @@ import pytest from auditwheel import lddtree, main_repair +from auditwheel.architecture import Architecture from auditwheel.libc import Libc from auditwheel.policy import WheelPolicies from auditwheel.wheel_abi import analyze_wheel_abi @@ -67,8 +68,8 @@ def test_analyze_wheel_abi(file, external_libs, exclude): cp.setenv("LD_LIBRARY_PATH", f"{HERE}") importlib.reload(lddtree) - wheel_policies = WheelPolicies(libc=Libc.GLIBC, arch="x86_64") - winfo = analyze_wheel_abi(wheel_policies, str(HERE / file), exclude) + wheel_policies = WheelPolicies(libc=Libc.GLIBC, arch=Architecture.x86_64) + winfo = analyze_wheel_abi(wheel_policies, str(HERE / file), exclude, False) assert ( set(winfo.external_refs["manylinux_2_5_x86_64"]["libs"]) == external_libs ), f"{HERE}, {exclude}, {os.environ}" @@ -78,11 +79,12 @@ def test_analyze_wheel_abi(file, external_libs, exclude): def test_analyze_wheel_abi_pyfpe(): - wheel_policies = WheelPolicies(libc=Libc.GLIBC, arch="x86_64") + wheel_policies = WheelPolicies(libc=Libc.GLIBC, arch=Architecture.x86_64) winfo = analyze_wheel_abi( wheel_policies, str(HERE / "fpewheel-0.0.0-cp35-cp35m-linux_x86_64.whl"), frozenset(), + False, ) assert ( winfo.sym_tag == "manylinux_2_5_x86_64" @@ -120,6 +122,7 @@ def test_wheel_source_date_epoch(tmp_path, monkeypatch): WHEEL_DIR=str(wheel_output_path), WHEEL_FILE=[str(wheel_path)], EXCLUDE=[], + DISABLE_ISA_EXT_CHECK=False, cmd="repair", func=Mock(), prog="auditwheel", diff --git a/tests/integration/test_manylinux.py b/tests/integration/test_manylinux.py index f5b10503..9414602b 100644 --- a/tests/integration/test_manylinux.py +++ b/tests/integration/test_manylinux.py @@ -16,12 +16,13 @@ import pytest from elftools.elf.elffile import ELFFile -from auditwheel.policy import WheelPolicies, get_arch_name +from auditwheel.architecture import Architecture +from auditwheel.policy import WheelPolicies logger = logging.getLogger(__name__) ENCODING = "utf-8" -PLATFORM = get_arch_name() +PLATFORM = Architecture.get_native_architecture().value MANYLINUX1_IMAGE_ID = f"quay.io/pypa/manylinux1_{PLATFORM}:latest" MANYLINUX2010_IMAGE_ID = f"quay.io/pypa/manylinux2010_{PLATFORM}:latest" MANYLINUX2014_IMAGE_ID = f"quay.io/pypa/manylinux2014_{PLATFORM}:latest" @@ -189,11 +190,14 @@ def tmp_docker_image(base, commands, setup_env=None): client.images.remove(image.id) -def assert_show_output(manylinux_ctr, wheel, expected_tag, strict): - output = docker_exec(manylinux_ctr, f"auditwheel show /io/{wheel}") +def assert_show_output(manylinux_ctr, wheel, expected_tag, strict, isa_ext_check=True): + isa_ext_check_arg = "" if isa_ext_check else "--disable-isa-ext-check" + output = docker_exec( + manylinux_ctr, f"auditwheel show {isa_ext_check_arg} /io/{wheel}" + ) output = output.replace("\n", " ") match = SHOW_RE.match(output) - assert match + assert match, f"{SHOW_RE.pattern!r} not found in:\n{output}" assert match["wheel"] == wheel if strict or "musllinux" in expected_tag: assert match["tag"] == expected_tag @@ -218,7 +222,7 @@ def build_numpy(container, policy, output_dir): # https://github.com/numpy/numpy/issues/27932 fix_hwcap = "echo '#define HWCAP_S390_VX 2048' >> /usr/include/bits/hwcap.h" docker_exec(container, f'sh -c "{fix_hwcap}"') - elif policy.startswith("manylinux_2_28_"): + elif policy.startswith(("manylinux_2_28_", "manylinux_2_34_")): docker_exec(container, "dnf install -y openblas-devel") else: if tuple(int(part) for part in NUMPY_VERSION.split(".")[:2]) >= (1, 26): @@ -391,16 +395,18 @@ def test_build_wheel_with_binary_executable( orig_wheel = filenames[0] assert "manylinux" not in orig_wheel + # manylinux_2_34_x86_64 uses x86_64_v2 for this test + isa_ext_check = policy != "manylinux_2_34_x86_64" + isa_ext_check_arg = "" if isa_ext_check else "--disable-isa-ext-check" + # Repair the wheel using the appropriate manylinux container - repair_command = ( - f"auditwheel repair --plat {policy} --only-plat -w /io /io/{orig_wheel}" - ) + repair_command = f"auditwheel repair --plat {policy} {isa_ext_check_arg} --only-plat -w /io /io/{orig_wheel}" docker_exec(manylinux_ctr, repair_command) filenames = os.listdir(io_folder) assert len(filenames) == 2 repaired_wheel = f"testpackage-0.0.1-py3-none-{tag}.whl" assert repaired_wheel in filenames - assert_show_output(manylinux_ctr, repaired_wheel, policy, False) + assert_show_output(manylinux_ctr, repaired_wheel, policy, False, isa_ext_check) docker_exec(docker_python, "pip install /io/" + repaired_wheel) output = docker_exec( diff --git a/tests/unit/test_architecture.py b/tests/unit/test_architecture.py new file mode 100644 index 00000000..56d248bc --- /dev/null +++ b/tests/unit/test_architecture.py @@ -0,0 +1,93 @@ +import platform +import struct +import sys + +import pytest + +from auditwheel.architecture import Architecture + + +@pytest.mark.parametrize( + ("reported_arch", "expected_arch"), + [ + ("armv7l", Architecture.armv7l), + ("armv8l", Architecture.armv7l), + ("aarch64", Architecture.armv7l), + ("i686", Architecture.i686), + ("x86_64", Architecture.i686), + ], +) +def test_32bits_arch_name(reported_arch, expected_arch, monkeypatch): + monkeypatch.setattr(platform, "machine", lambda: reported_arch) + machine = Architecture.get_native_architecture(bits=32) + assert machine == expected_arch + + +@pytest.mark.parametrize( + ("reported_arch", "expected_arch"), + [ + ("armv8l", Architecture.aarch64), + ("aarch64", Architecture.aarch64), + ("ppc64le", Architecture.ppc64le), + ("i686", Architecture.x86_64), + ("x86_64", Architecture.x86_64), + ], +) +def test_64bits_arch_name(reported_arch, expected_arch, monkeypatch): + monkeypatch.setattr(platform, "machine", lambda: reported_arch) + machine = Architecture.get_native_architecture(bits=64) + assert machine == expected_arch + + +@pytest.mark.parametrize( + ("maxsize", "sizeof_voidp", "expected"), + [ + # 64-bit + (9223372036854775807, 8, Architecture.x86_64), + # 32-bit + (2147483647, 4, Architecture.i686), + # 64-bit w/ 32-bit sys.maxsize: GraalPy, IronPython, Jython + (2147483647, 8, Architecture.x86_64), + ], +) +def test_arch_name_bits(maxsize, sizeof_voidp, expected, monkeypatch): + def _calcsize(fmt): + assert fmt == "P" + return sizeof_voidp + + monkeypatch.setattr(platform, "machine", lambda: "x86_64") + monkeypatch.setattr(sys, "maxsize", maxsize) + monkeypatch.setattr(struct, "calcsize", _calcsize) + machine = Architecture.get_native_architecture() + assert machine == expected + + +@pytest.mark.parametrize( + ("smaller", "larger"), + [ + (Architecture.x86_64, Architecture.x86_64_v4), + (Architecture.x86_64, Architecture.x86_64), + (Architecture.x86_64, Architecture.x86_64_v2), + (Architecture.x86_64_v2, Architecture.x86_64_v3), + (Architecture.x86_64_v3, Architecture.x86_64_v4), + ], +) +def test_order_valid(smaller, larger): + assert smaller.is_subset(larger) + assert larger.is_superset(smaller) + + +@pytest.mark.parametrize( + ("smaller", "larger"), + [ + (Architecture.x86_64, Architecture.x86_64_v4), + (Architecture.x86_64, Architecture.x86_64_v2), + (Architecture.x86_64_v2, Architecture.x86_64_v3), + (Architecture.x86_64_v3, Architecture.x86_64_v4), + (Architecture.aarch64, Architecture.x86_64), + (Architecture.x86_64, Architecture.aarch64), + ], +) +def test_order_invalid(smaller, larger): + assert not smaller.is_superset(larger) + assert not larger.is_subset(smaller) diff --git a/tests/unit/test_policy.py b/tests/unit/test_policy.py index 6684f586..a65f477d 100644 --- a/tests/unit/test_policy.py +++ b/tests/unit/test_policy.py @@ -1,20 +1,17 @@ from __future__ import annotations -import platform import re -import struct -import sys from contextlib import nullcontext as does_not_raise import pytest +from auditwheel.architecture import Architecture from auditwheel.error import InvalidLibc -from auditwheel.lddtree import DynamicExecutable, DynamicLibrary +from auditwheel.lddtree import DynamicExecutable, DynamicLibrary, Platform from auditwheel.libc import Libc from auditwheel.policy import ( WheelPolicies, _validate_pep600_compliance, - get_arch_name, get_libc, get_replace_platforms, ) @@ -36,62 +33,6 @@ def raises(exception, match=None, escape=True): return pytest.raises(exception, match=match) -@pytest.mark.parametrize( - ("reported_arch", "expected_arch"), - [ - ("armv6l", "armv6l"), - ("armv7l", "armv7l"), - ("armv8l", "armv7l"), - ("aarch64", "armv7l"), - ("i686", "i686"), - ("x86_64", "i686"), - ], -) -def test_32bits_arch_name(reported_arch, expected_arch, monkeypatch): - monkeypatch.setattr(platform, "machine", lambda: reported_arch) - machine = get_arch_name(bits=32) - assert machine == expected_arch - - -@pytest.mark.parametrize( - ("reported_arch", "expected_arch"), - [ - ("armv8l", "aarch64"), - ("aarch64", "aarch64"), - ("ppc64le", "ppc64le"), - ("i686", "x86_64"), - ("x86_64", "x86_64"), - ], -) -def test_64bits_arch_name(reported_arch, expected_arch, monkeypatch): - monkeypatch.setattr(platform, "machine", lambda: reported_arch) - machine = get_arch_name(bits=64) - assert machine == expected_arch - - -@pytest.mark.parametrize( - ("maxsize", "sizeof_voidp", "expected"), - [ - # 64-bit - (9223372036854775807, 8, "x86_64"), - # 32-bit - (2147483647, 4, "i686"), - # 64-bit w/ 32-bit sys.maxsize: GraalPy, IronPython, Jython - (2147483647, 8, "x86_64"), - ], -) -def test_arch_name_bits(maxsize, sizeof_voidp, expected, monkeypatch): - def _calcsize(fmt): - assert fmt == "P" - return sizeof_voidp - - monkeypatch.setattr(platform, "machine", lambda: "x86_64") - monkeypatch.setattr(sys, "maxsize", maxsize) - monkeypatch.setattr(struct, "calcsize", _calcsize) - machine = get_arch_name() - assert machine == expected - - @pytest.mark.parametrize( ("name", "expected"), [ @@ -196,19 +137,20 @@ def test_pep600_compliance(): class TestPolicyAccess: def test_get_by_priority(self): - _arch = get_arch_name() + arch = Architecture.get_native_architecture() wheel_policy = WheelPolicies() - assert wheel_policy.get_policy_name(65) == f"manylinux_2_27_{_arch}" - assert wheel_policy.get_policy_name(70) == f"manylinux_2_24_{_arch}" - assert wheel_policy.get_policy_name(80) == f"manylinux_2_17_{_arch}" - if _arch in {"x86_64", "i686"}: - assert wheel_policy.get_policy_name(90) == f"manylinux_2_12_{_arch}" - assert wheel_policy.get_policy_name(100) == f"manylinux_2_5_{_arch}" - assert wheel_policy.get_policy_name(0) == f"linux_{_arch}" + assert wheel_policy.get_policy_name(65) == f"manylinux_2_27_{arch}" + assert wheel_policy.get_policy_name(70) == f"manylinux_2_24_{arch}" + assert wheel_policy.get_policy_name(80) == f"manylinux_2_17_{arch}" + if arch in {Architecture.x86_64, Architecture.i686}: + assert wheel_policy.get_policy_name(90) == f"manylinux_2_12_{arch}" + assert wheel_policy.get_policy_name(100) == f"manylinux_2_5_{arch}" + assert wheel_policy.get_policy_name(0) == f"linux_{arch}" def test_get_by_priority_missing(self): wheel_policy = WheelPolicies() - assert wheel_policy.get_policy_name(101) is None + with pytest.raises(LookupError): + wheel_policy.get_policy_name(101) def test_get_by_priority_duplicate(self): wheel_policy = WheelPolicies() @@ -220,21 +162,22 @@ def test_get_by_priority_duplicate(self): wheel_policy.get_policy_name(0) def test_get_by_name(self): - _arch = get_arch_name() + arch = Architecture.get_native_architecture() wheel_policy = WheelPolicies() - assert wheel_policy.get_priority_by_name(f"manylinux_2_27_{_arch}") == 65 - assert wheel_policy.get_priority_by_name(f"manylinux_2_24_{_arch}") == 70 - assert wheel_policy.get_priority_by_name(f"manylinux2014_{_arch}") == 80 - assert wheel_policy.get_priority_by_name(f"manylinux_2_17_{_arch}") == 80 - if _arch in {"x86_64", "i686"}: - assert wheel_policy.get_priority_by_name(f"manylinux2010_{_arch}") == 90 - assert wheel_policy.get_priority_by_name(f"manylinux_2_12_{_arch}") == 90 - assert wheel_policy.get_priority_by_name(f"manylinux1_{_arch}") == 100 - assert wheel_policy.get_priority_by_name(f"manylinux_2_5_{_arch}") == 100 + assert wheel_policy.get_priority_by_name(f"manylinux_2_27_{arch}") == 65 + assert wheel_policy.get_priority_by_name(f"manylinux_2_24_{arch}") == 70 + assert wheel_policy.get_priority_by_name(f"manylinux2014_{arch}") == 80 + assert wheel_policy.get_priority_by_name(f"manylinux_2_17_{arch}") == 80 + if arch in {Architecture.x86_64, Architecture.i686}: + assert wheel_policy.get_priority_by_name(f"manylinux2010_{arch}") == 90 + assert wheel_policy.get_priority_by_name(f"manylinux_2_12_{arch}") == 90 + assert wheel_policy.get_priority_by_name(f"manylinux1_{arch}") == 100 + assert wheel_policy.get_priority_by_name(f"manylinux_2_5_{arch}") == 100 def test_get_by_name_missing(self): wheel_policy = WheelPolicies() - assert wheel_policy.get_priority_by_name("nosuchpolicy") is None + with pytest.raises(LookupError): + wheel_policy.get_priority_by_name("nosuchpolicy") def test_get_by_name_duplicate(self): wheel_policy = WheelPolicies() @@ -262,11 +205,11 @@ def test_filter_libs(self): ] unfiltered_libs = ["libfoo.so.1.0", "libbar.so.999.999.999"] libs = filtered_libs + unfiltered_libs - lddtree = DynamicExecutable( interpreter=None, path="/path/to/lib", realpath="/path/to/lib", + platform=Platform("", 64, True, "EM_X86_64", "x86_64", None, None), needed=frozenset(libs), libraries={ lib: DynamicLibrary(lib, f"/path/to/{lib}", f"/path/to/{lib}") @@ -293,7 +236,7 @@ def test_filter_libs(self): (Libc.GLIBC, None, None, does_not_raise()), (Libc.MUSL, "musllinux_1_1", None, does_not_raise()), (None, "musllinux_1_1", None, does_not_raise()), - (None, None, "aarch64", does_not_raise()), + (None, None, Architecture.aarch64, does_not_raise()), # invalid ( Libc.GLIBC, @@ -303,7 +246,6 @@ def test_filter_libs(self): ), (Libc.MUSL, "manylinux_1_1", None, raises(ValueError, "Invalid 'musl_policy'")), (Libc.MUSL, "musllinux_5_1", None, raises(AssertionError)), - (Libc.MUSL, "musllinux_1_1", "foo", raises(AssertionError)), # platform dependant ( Libc.MUSL, @@ -322,4 +264,4 @@ def test_wheel_policies_args(libc, musl_policy, arch, exception): if musl_policy is not None: assert wheel_policies._musl_policy == musl_policy if arch is not None: - assert wheel_policies._arch_name == arch + assert wheel_policies.architecture == arch