Skip to content

Commit

Permalink
chore: use a dataclass for ldd result
Browse files Browse the repository at this point in the history
  • Loading branch information
mayeut committed Feb 1, 2025
1 parent 5276a7b commit d4570da
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 104 deletions.
19 changes: 19 additions & 0 deletions src/auditwheel/json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import dataclasses
import json
from enum import Enum
from typing import Any


def _encode_value(value: Any) -> Any:
if dataclasses.is_dataclass(value) and not isinstance(value, type):
return dataclasses.asdict(value)
if isinstance(value, frozenset):
return sorted(value)
if isinstance(value, Enum):
return repr(value)
msg = f"object of type {value.__class__.__name__!r} can't be encoded to JSON"
raise TypeError(msg)


def dumps(obj: Any):
return json.dumps(obj, indent=4, default=_encode_value)
162 changes: 87 additions & 75 deletions src/auditwheel/lddtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
"""Read the ELF dependency tree
This does not work like `ldd` in that we do not execute/load code (only read
files on disk), and we parse the dependency structure as a tree rather than
a flat list.
files on disk).
"""

from __future__ import annotations
Expand All @@ -19,16 +18,35 @@
import glob
import logging
import os
from dataclasses import dataclass
from fnmatch import fnmatch
from pathlib import Path
from typing import Any

from elftools.elf.elffile import ELFFile

from .libc import Libc, get_libc

log = logging.getLogger(__name__)
__all__ = ["lddtree"]
__all__ = ["DynamicExecutable", "DynamicLibrary", "ldd"]


@dataclass(frozen=True)
class DynamicLibrary:
soname: str
path: str | None
realpath: str | None
needed: frozenset[str] = frozenset()


@dataclass(frozen=True)
class DynamicExecutable:
interpreter: str | None
path: str
realpath: str
needed: frozenset[str]
rpath: tuple[str, ...]
runpath: tuple[str, ...]
libraries: dict[str, DynamicLibrary]


def normpath(path: str) -> str:
Expand Down Expand Up @@ -282,20 +300,20 @@ def find_lib(
with open(target, "rb") as f:
libelf = ELFFile(f)
if compatible_elfs(elf, libelf):
return (target, path)
return target, path

return (None, None)
return None, None


def lddtree(
def ldd(
path: str,
root: str = "/",
prefix: str = "",
ldpaths: dict[str, list[str]] | None = None,
display: str | None = None,
exclude: frozenset[str] = frozenset(),
_all_libs: dict | None = None,
) -> dict:
_all_libs: dict[str, DynamicLibrary] | None = None,
) -> DynamicExecutable:
"""Parse the ELF dependency tree of the specified file
Parameters
Expand Down Expand Up @@ -343,17 +361,13 @@ def lddtree(
if _all_libs is None:
_all_libs = {}

ret: dict[str, Any] = {
"interp": None,
"path": path if display is None else display,
"realpath": path,
"needed": [],
"rpath": [],
"runpath": [],
"libs": _all_libs,
}
log.debug("ldd(%s)", path)

log.debug("lddtree(%s)", path)
interpreter: str | None = None
needed: set[str] = set()
rpaths: list[str] = []
runpaths: list[str] = []
_excluded_libs: set[str] = set()

with open(path, "rb") as f:
elf = ELFFile(f)
Expand All @@ -366,12 +380,7 @@ def lddtree(

interp = segment.get_interp_name()
log.debug(" interp = %s", interp)
ret["interp"] = normpath(root + interp)
ret["libs"][os.path.basename(interp)] = {
"path": ret["interp"],
"realpath": readlink(ret["interp"], root, prefixed=True),
"needed": [],
}
interpreter = normpath(root + interp)
# XXX: Should read it and scan for /lib paths.
ldpaths["interp"] = [
normpath(root + os.path.dirname(interp)),
Expand All @@ -383,10 +392,6 @@ def lddtree(
break

# Parse the ELF's dynamic tags.
libs: list[str] = []
rpaths: list[str] = []
runpaths: list[str] = []
_excluded_libs: set[str] = set()
for segment in elf.iter_segments():
if segment.header.p_type != "PT_DYNAMIC":
continue
Expand All @@ -397,71 +402,78 @@ def lddtree(
elif t.entry.d_tag == "DT_RUNPATH":
runpaths = parse_ld_paths(t.runpath, path=path, root=root)
elif t.entry.d_tag == "DT_NEEDED":
if t.needed in _excluded_libs or any(
fnmatch(t.needed, e) for e in exclude
):
log.info("Excluding %s", t.needed)
_excluded_libs.add(t.needed)
else:
libs.append(t.needed)
needed.add(t.needed)
if runpaths:
# If both RPATH and RUNPATH are set, only the latter is used.
rpaths = []

# XXX: We assume there is only one PT_DYNAMIC. This is
# probably fine since the runtime ldso does the same.
break

if _first:
# Propagate the rpaths used by the main ELF since those will be
# used at runtime to locate things.
ldpaths["rpath"] = rpaths
ldpaths["runpath"] = runpaths
log.debug(" ldpaths[rpath] = %s", rpaths)
log.debug(" ldpaths[runpath] = %s", runpaths)
ret["rpath"] = rpaths
ret["runpath"] = runpaths

# Search for the libs this ELF uses.
all_ldpaths: list[str] | None = None
for lib in libs:
if lib in _all_libs:
all_ldpaths = (
ldpaths["rpath"]
+ rpaths
+ runpaths
+ ldpaths["env"]
+ ldpaths["runpath"]
+ ldpaths["conf"]
+ ldpaths["interp"]
)
for soname in needed:
if soname in _all_libs:
continue
if soname in _excluded_libs:
continue
if any(fnmatch(soname, e) for e in exclude):
log.info("Excluding %s", soname)
_excluded_libs.add(soname)
continue
if all_ldpaths is None:
all_ldpaths = (
ldpaths["rpath"]
+ rpaths
+ runpaths
+ ldpaths["env"]
+ ldpaths["runpath"]
+ ldpaths["conf"]
+ ldpaths["interp"]
)
realpath, fullpath = find_lib(elf, lib, all_ldpaths, root)
if lib in _excluded_libs or (
realpath is not None and any(fnmatch(realpath, e) for e in exclude)
):
# TODO we should avoid keeping elf here, related to compat
realpath, fullpath = find_lib(elf, soname, all_ldpaths, root)
if realpath is not None and any(fnmatch(realpath, e) for e in exclude):
log.info("Excluding %s", realpath)
_excluded_libs.add(lib)
_excluded_libs.add(soname)
continue
_all_libs[lib] = {
"realpath": realpath,
"path": fullpath,
"needed": [],
}
if realpath and fullpath:
lret = lddtree(
realpath,
root,
prefix,
ldpaths,
display=fullpath,
exclude=exclude,
_all_libs=_all_libs,
)
_all_libs[lib]["needed"] = lret["needed"]
_all_libs[soname] = DynamicLibrary(soname, fullpath, realpath)
if realpath is None or fullpath is None:
continue
lret = ldd(
realpath,
root,
prefix,
ldpaths,
display=fullpath,
exclude=exclude,
_all_libs=_all_libs,
)
_all_libs[soname] = DynamicLibrary(
soname, fullpath, realpath, lret.needed
)

del elf

ret["needed"] = [lib for lib in libs if lib not in _excluded_libs]

return ret
if interpreter is not None:
soname = os.path.basename(interpreter)
_all_libs[soname] = DynamicLibrary(
soname, interpreter, readlink(interpreter, root, prefixed=True)
)

return DynamicExecutable(
interpreter,
path if display is None else display,
path,
frozenset(needed - _excluded_libs),
tuple(rpaths),
tuple(runpaths),
_all_libs,
)
7 changes: 3 additions & 4 deletions src/auditwheel/main_lddtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ def configure_subparser(sub_parsers):


def execute(args, p: argparse.ArgumentParser): # noqa: ARG001
import json
from . import json
from .lddtree import ldd

from .lddtree import lddtree

logger.info(json.dumps(lddtree(args.file), indent=4))
logger.info(json.dumps(ldd(args.file)))
4 changes: 2 additions & 2 deletions src/auditwheel/main_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def printp(text: str) -> None:


def execute(args, parser: argparse.ArgumentParser):
import json
from os.path import basename, isfile

from . import json
from .wheel_abi import NonPlatformWheel, analyze_wheel_abi

wheel_policy = WheelPolicies()
Expand Down Expand Up @@ -99,7 +99,7 @@ def execute(args, parser: argparse.ArgumentParser):
printp("The wheel requires no external shared libraries! :)")
else:
printp("The following external shared libraries are required by the wheel:")
print(json.dumps(dict(sorted(libs.items())), indent=4))
print(json.dumps(dict(sorted(libs.items()))))

for p in sorted(wheel_policy.policies, key=lambda p: p["priority"]):
if p["priority"] > wheel_policy.get_priority_by_name(winfo.overall_tag):
Expand Down
21 changes: 11 additions & 10 deletions src/auditwheel/policy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from auditwheel.elfutils import filter_undefined_symbols, is_subdir

from ..lddtree import DynamicExecutable
from ..libc import Libc, get_libc
from ..musllinux import find_musl_libc, get_musl_version

Expand Down Expand Up @@ -159,10 +160,10 @@ def policy_is_satisfied(

return max(matching_policies)

def lddtree_external_references(self, lddtree: dict, wheel_path: str) -> dict:
# XXX: Document the lddtree structure, or put it in something
# more stable than a big nested dict
def filter_libs(libs: set[str], whitelist: set[str]) -> Generator[str]:
def lddtree_external_references(
self, lddtree: DynamicExecutable, wheel_path: str
) -> dict:
def filter_libs(libs: frozenset[str], whitelist: set[str]) -> Generator[str]:
for lib in libs:
if "ld-linux" in lib or lib in ["ld64.so.2", "ld64.so.1"]:
# always exclude ELF dynamic linker/loader
Expand All @@ -185,7 +186,7 @@ def get_req_external(libs: set[str], whitelist: set[str]) -> set[str]:
while libs:
lib = libs.pop()
reqs.add(lib)
for dep in filter_libs(lddtree["libs"][lib]["needed"], whitelist):
for dep in filter_libs(lddtree.libraries[lib].needed, whitelist):
if dep not in reqs:
libs.add(dep)
return reqs
Expand All @@ -201,23 +202,23 @@ def get_req_external(libs: set[str], whitelist: set[str]) -> set[str]:
# whitelist is the complete set of all libraries. so nothing
# is considered "external" that needs to be copied in.
whitelist = set(p["lib_whitelist"])
blacklist_libs = set(p["blacklist"].keys()) & set(lddtree["needed"])
blacklist_libs = set(p["blacklist"].keys()) & lddtree.needed
blacklist = {k: p["blacklist"][k] for k in blacklist_libs}
blacklist = filter_undefined_symbols(lddtree["realpath"], blacklist)
blacklist = filter_undefined_symbols(lddtree.realpath, blacklist)
needed_external_libs = get_req_external(
set(filter_libs(lddtree["needed"], whitelist)), whitelist
set(filter_libs(lddtree.needed, whitelist)), whitelist
)

pol_ext_deps = {}
for lib in needed_external_libs:
if is_subdir(lddtree["libs"][lib]["realpath"], wheel_path):
if is_subdir(lddtree.libraries[lib].realpath, wheel_path):
# we didn't filter libs that resolved via RPATH out
# earlier because we wanted to make sure to pick up
# our elf's indirect dependencies. But now we want to
# filter these ones out, since they're not "external".
logger.debug("RPATH FTW: %s", lib)
continue
pol_ext_deps[lib] = lddtree["libs"][lib]["realpath"]
pol_ext_deps[lib] = lddtree.libraries[lib].realpath
ret[p["name"]] = {
"libs": pol_ext_deps,
"priority": p["priority"],
Expand Down
Loading

0 comments on commit d4570da

Please sign in to comment.