Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix .files and inferred packages_distributions for .egg-info packages #437

Merged
merged 23 commits into from
Apr 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
fed3a41
tests/fixtures: Fix FilesDef type to include bytes values
jherland Mar 10, 2023
578322a
Add tests for egg-info package with no installed modules
jherland Mar 10, 2023
61b0f29
Distribution.files: Prefer *.egg-info/installed-files.txt to SOURCES.txt
jherland Mar 10, 2023
8026db2
Add tests for egg-info package with .files from inaccurate SOURCES.txt
jherland Mar 10, 2023
22d9ea5
Distribution.files: Only return files that actually exist
jherland Mar 10, 2023
a107ce5
Merge branch 'main' into egg-metadata-work
jaraco Mar 18, 2023
b391f77
squash! Add tests for egg-info package with no installed modules
jherland Mar 19, 2023
110f00d
Add test case demonstrating inferring module names from installed-fil…
jherland Mar 19, 2023
eeb2ed1
Fix issues with inferring module names from installed-files.txt
jherland Mar 19, 2023
a2dc88a
squash! Add tests for egg-info package with .files from inaccurate SO…
jherland Mar 19, 2023
f62bf95
squash! Add tests for egg-info package with no installed modules
jherland Mar 19, 2023
61eca31
squash! Distribution.files: Only return files that actually exist
jherland Mar 19, 2023
9b165a9
Refactor logic for skipping missing files out of magic_files()
jherland Mar 19, 2023
33eb7b4
Rewrite docstrings to clarify the expected output format, and why we …
jherland Mar 19, 2023
fa9cca4
test_packages_distributions_all_module_types() must create existing f…
jherland Mar 19, 2023
70ff991
test_packages_distributions_all_module_types: Create valid import names
jherland Mar 19, 2023
5dbe83c
Revert "test_packages_distributions_all_module_types: Create valid im…
jaraco Apr 9, 2023
4e7f79f
Revert "test_packages_distributions_all_module_types() must create ex…
jaraco Apr 9, 2023
812db6f
Merge branch 'main' into egg-metadata-work
jaraco Apr 9, 2023
387f3bd
Merge branch 'main' into egg-metadata-work
jaraco Apr 10, 2023
8818432
⚫ Fade to black.
jaraco Apr 10, 2023
3d7ee19
Refactor to avoid missed coverage
jaraco Apr 10, 2023
b8a8b5d
Update changelog.
jaraco Apr 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
v6.3.0
======

* #115: Support ``installed-files.txt`` for ``Distribution.files``
when present.

v6.2.1
======

Expand Down
58 changes: 51 additions & 7 deletions importlib_metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import functools
import itertools
import posixpath
import contextlib
import collections
import inspect

Expand Down Expand Up @@ -461,8 +462,8 @@ def files(self):
:return: List of PackagePath for this distribution or None

Result is `None` if the metadata file that enumerates files
(i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
missing.
(i.e. RECORD for dist-info, or installed-files.txt or
SOURCES.txt for egg-info) is missing.
Result may be empty if the metadata exists but is empty.
"""

Expand All @@ -475,9 +476,19 @@ def make_file(name, hash=None, size_str=None):

@pass_none
def make_files(lines):
return list(starmap(make_file, csv.reader(lines)))
return starmap(make_file, csv.reader(lines))

return make_files(self._read_files_distinfo() or self._read_files_egginfo())
@pass_none
def skip_missing_files(package_paths):
return list(filter(lambda path: path.locate().exists(), package_paths))

return skip_missing_files(
make_files(
self._read_files_distinfo()
or self._read_files_egginfo_installed()
or self._read_files_egginfo_sources()
)
)

def _read_files_distinfo(self):
"""
Expand All @@ -486,10 +497,43 @@ def _read_files_distinfo(self):
text = self.read_text('RECORD')
return text and text.splitlines()

def _read_files_egginfo(self):
def _read_files_egginfo_installed(self):
"""
SOURCES.txt might contain literal commas, so wrap each line
in quotes.
Read installed-files.txt and return lines in a similar
CSV-parsable format as RECORD: each file must be placed
relative to the site-packages directory, and must also be
quoted (since file names can contain literal commas).

This file is written when the package is installed by pip,
but it might not be written for other installation methods.
Hence, even if we can assume that this file is accurate
when it exists, we cannot assume that it always exists.
"""
text = self.read_text('installed-files.txt')
# We need to prepend the .egg-info/ subdir to the lines in this file.
# But this subdir is only available in the PathDistribution's self._path
# which is not easily accessible from this base class...
subdir = getattr(self, '_path', None)
jaraco marked this conversation as resolved.
Show resolved Hide resolved
if not text or not subdir:
return
with contextlib.suppress(Exception):
ret = [
str((subdir / line).resolve().relative_to(self.locate_file('')))
for line in text.splitlines()
]
return map('"{}"'.format, ret)
jaraco marked this conversation as resolved.
Show resolved Hide resolved

def _read_files_egginfo_sources(self):
"""
Read SOURCES.txt and return lines in a similar CSV-parsable
format as RECORD: each file name must be quoted (since it
might contain literal commas).

Note that SOURCES.txt is not a reliable source for what
files are installed by a package. This file is generated
for a source archive, and the files that are present
there (e.g. setup.py) may not correctly reflect the files
that are present after the package has been installed.
"""
text = self.read_text('SOURCES.txt')
return text and map('"{}"'.format, text.splitlines())
Expand Down
97 changes: 95 additions & 2 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,10 @@ def setUp(self):


# Except for python/mypy#731, prefer to define
# FilesDef = Dict[str, Union['FilesDef', str]]
FilesDef = Dict[str, Union[Dict[str, Union[Dict[str, str], str]], str]]
# FilesDef = Dict[str, Union['FilesDef', str, bytes]]
FilesDef = Dict[
str, Union[Dict[str, Union[Dict[str, Union[str, bytes]], str, bytes]], str, bytes]
]
jaraco marked this conversation as resolved.
Show resolved Hide resolved


class DistInfoPkg(OnSysPath, SiteDir):
Expand Down Expand Up @@ -214,6 +216,97 @@ def setUp(self):
build_files(EggInfoPkg.files, prefix=self.site_dir)


class EggInfoPkgPipInstalledNoToplevel(OnSysPath, SiteDir):
files: FilesDef = {
"egg_with_module_pkg.egg-info": {
"PKG-INFO": "Name: egg_with_module-pkg",
# SOURCES.txt is made from the source archive, and contains files
# (setup.py) that are not present after installation.
"SOURCES.txt": """
egg_with_module.py
setup.py
egg_with_module_pkg.egg-info/PKG-INFO
egg_with_module_pkg.egg-info/SOURCES.txt
egg_with_module_pkg.egg-info/top_level.txt
""",
# installed-files.txt is written by pip, and is a strictly more
# accurate source than SOURCES.txt as to the installed contents of
# the package.
"installed-files.txt": """
../egg_with_module.py
PKG-INFO
SOURCES.txt
top_level.txt
""",
# missing top_level.txt (to trigger fallback to installed-files.txt)
},
"egg_with_module.py": """
def main():
print("hello world")
""",
}

def setUp(self):
super().setUp()
build_files(EggInfoPkgPipInstalledNoToplevel.files, prefix=self.site_dir)


class EggInfoPkgPipInstalledNoModules(OnSysPath, SiteDir):
jaraco marked this conversation as resolved.
Show resolved Hide resolved
files: FilesDef = {
"egg_with_no_modules_pkg.egg-info": {
"PKG-INFO": "Name: egg_with_no_modules-pkg",
# SOURCES.txt is made from the source archive, and contains files
# (setup.py) that are not present after installation.
"SOURCES.txt": """
setup.py
egg_with_no_modules_pkg.egg-info/PKG-INFO
egg_with_no_modules_pkg.egg-info/SOURCES.txt
egg_with_no_modules_pkg.egg-info/top_level.txt
""",
# installed-files.txt is written by pip, and is a strictly more
# accurate source than SOURCES.txt as to the installed contents of
# the package.
"installed-files.txt": """
PKG-INFO
SOURCES.txt
top_level.txt
""",
# top_level.txt correctly reflects that no modules are installed
"top_level.txt": b"\n",
},
}

def setUp(self):
super().setUp()
build_files(EggInfoPkgPipInstalledNoModules.files, prefix=self.site_dir)


class EggInfoPkgSourcesFallback(OnSysPath, SiteDir):
files: FilesDef = {
"sources_fallback_pkg.egg-info": {
"PKG-INFO": "Name: sources_fallback-pkg",
# SOURCES.txt is made from the source archive, and contains files
# (setup.py) that are not present after installation.
"SOURCES.txt": """
sources_fallback.py
setup.py
sources_fallback_pkg.egg-info/PKG-INFO
sources_fallback_pkg.egg-info/SOURCES.txt
""",
# missing installed-files.txt (i.e. not installed by pip) and
# missing top_level.txt (to trigger fallback to SOURCES.txt)
},
"sources_fallback.py": """
def main():
print("hello world")
""",
}

def setUp(self):
super().setUp()
build_files(EggInfoPkgSourcesFallback.files, prefix=self.site_dir)


class EggInfoFile(OnSysPath, SiteDir):
files: FilesDef = {
"egginfo_file.egg-info": """
Expand Down
33 changes: 26 additions & 7 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def suppress_known_deprecation():

class APITests(
fixtures.EggInfoPkg,
fixtures.EggInfoPkgPipInstalledNoToplevel,
fixtures.EggInfoPkgPipInstalledNoModules,
fixtures.EggInfoPkgSourcesFallback,
fixtures.DistInfoPkg,
fixtures.DistInfoPkgWithDot,
fixtures.EggInfoFile,
Expand Down Expand Up @@ -62,15 +65,28 @@ def test_prefix_not_matched(self):
distribution(prefix)

def test_for_top_level(self):
self.assertEqual(
distribution('egginfo-pkg').read_text('top_level.txt').strip(), 'mod'
)
tests = [
('egginfo-pkg', 'mod'),
('egg_with_no_modules-pkg', ''),
]
for pkg_name, expect_content in tests:
with self.subTest(pkg_name):
self.assertEqual(
distribution(pkg_name).read_text('top_level.txt').strip(),
expect_content,
)

def test_read_text(self):
top_level = [
path for path in files('egginfo-pkg') if path.name == 'top_level.txt'
][0]
self.assertEqual(top_level.read_text(), 'mod\n')
tests = [
('egginfo-pkg', 'mod\n'),
('egg_with_no_modules-pkg', '\n'),
]
for pkg_name, expect_content in tests:
with self.subTest(pkg_name):
top_level = [
path for path in files(pkg_name) if path.name == 'top_level.txt'
][0]
self.assertEqual(top_level.read_text(), expect_content)

def test_entry_points(self):
eps = entry_points()
Expand Down Expand Up @@ -184,6 +200,9 @@ def test_files_dist_info(self):

def test_files_egg_info(self):
self._test_files(files('egginfo-pkg'))
self._test_files(files('egg_with_module-pkg'))
self._test_files(files('egg_with_no_modules-pkg'))
self._test_files(files('sources_fallback-pkg'))

def test_version_egg_info_file(self):
self.assertEqual(version('egginfo-file'), '0.1')
Expand Down
49 changes: 48 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,21 @@ def test_metadata_loads_egg_info(self):
assert meta['Description'] == 'pôrˈtend'


class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase):
class DiscoveryTests(
fixtures.EggInfoPkg,
fixtures.EggInfoPkgPipInstalledNoToplevel,
fixtures.EggInfoPkgPipInstalledNoModules,
fixtures.EggInfoPkgSourcesFallback,
jaraco marked this conversation as resolved.
Show resolved Hide resolved
fixtures.DistInfoPkg,
unittest.TestCase,
):
def test_package_discovery(self):
dists = list(distributions())
assert all(isinstance(dist, Distribution) for dist in dists)
assert any(dist.metadata['Name'] == 'egginfo-pkg' for dist in dists)
assert any(dist.metadata['Name'] == 'egg_with_module-pkg' for dist in dists)
assert any(dist.metadata['Name'] == 'egg_with_no_modules-pkg' for dist in dists)
assert any(dist.metadata['Name'] == 'sources_fallback-pkg' for dist in dists)
assert any(dist.metadata['Name'] == 'distinfo-pkg' for dist in dists)

def test_invalid_usage(self):
Expand Down Expand Up @@ -362,3 +372,40 @@ def test_packages_distributions_all_module_types(self):
assert distributions[f'in_package_{i}'] == ['all_distributions']

assert not any(name.endswith('.dist-info') for name in distributions)


class PackagesDistributionsEggTest(
fixtures.EggInfoPkg,
fixtures.EggInfoPkgPipInstalledNoToplevel,
fixtures.EggInfoPkgPipInstalledNoModules,
fixtures.EggInfoPkgSourcesFallback,
unittest.TestCase,
):
def test_packages_distributions_on_eggs(self):
"""
Test old-style egg packages with a variation of 'top_level.txt',
'SOURCES.txt', and 'installed-files.txt', available.
"""
distributions = packages_distributions()

def import_names_from_package(package_name):
return {
import_name
for import_name, package_names in distributions.items()
if package_name in package_names
}

# egginfo-pkg declares one import ('mod') via top_level.txt
assert import_names_from_package('egginfo-pkg') == {'mod'}

# egg_with_module-pkg has one import ('egg_with_module') inferred from
# installed-files.txt (top_level.txt is missing)
assert import_names_from_package('egg_with_module-pkg') == {'egg_with_module'}

# egg_with_no_modules-pkg should not be associated with any import names
# (top_level.txt is empty, and installed-files.txt has no .py files)
assert import_names_from_package('egg_with_no_modules-pkg') == set()

# sources_fallback-pkg has one import ('sources_fallback') inferred from
# SOURCES.txt (top_level.txt and installed-files.txt is missing)
assert import_names_from_package('sources_fallback-pkg') == {'sources_fallback'}