Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] setuptools: add support for reproducible source distributions #1512

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 242 additions & 0 deletions setuptools/archive_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,35 @@
import tarfile
import os
import shutil
import struct
import time
import posixpath
import contextlib
from distutils.errors import DistutilsError
from distutils.dir_util import mkpath
from distutils import log

try:
from pwd import getpwnam
except ImportError:
getpwnam = None

try:
from grp import getgrnam
except ImportError:
getgrnam = None

try:
import lzma
except:
lzma = None

from pkg_resources import ensure_directory

__all__ = [
"unpack_archive", "unpack_zipfile", "unpack_tarfile", "default_filter",
"UnrecognizedFormat", "extraction_drivers", "unpack_directory",
"make_archive",
]


Expand Down Expand Up @@ -171,3 +191,225 @@ def unpack_tarfile(filename, extract_dir, progress_filter=default_filter):


extraction_drivers = unpack_directory, unpack_zipfile, unpack_tarfile


def _get_gid(name):
"""Returns a gid, given a group name."""
if getgrnam is None or name is None:
return None
try:
result = getgrnam(name)
except KeyError:
result = None
if result is not None:
return result[2]
return None

def _get_uid(name):
"""Returns an uid, given a user name."""
if getpwnam is None or name is None:
return None
try:
result = getpwnam(name)
except KeyError:
result = None
if result is not None:
return result[2]
return None

def _find_sorted(base_dir):
yield base_dir
for path in sorted(
os.path.join(root, name)
for root, dirs, files in os.walk(base_dir)
for name in dirs + files
):
yield path


def make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
owner=None, group=None, timestamp=None):
"""Create a (possibly compressed) tar file from all the files under
'base_dir'.

'compress' must be "gzip" (the default), "bzip2", "xz", "compress", or
None. ("compress" will be deprecated in Python 3.2)

'owner' and 'group' can be used to define an owner and a group for the
archive that is being built. If not provided, the current owner and group
will be used.

'compress' is used to set the header timestamp when using "gzip".

The output tar file will be named 'base_dir' + ".tar", possibly plus
the appropriate compression extension (".gz", ".bz2", ".xz" or ".Z").

Returns the output filename.
"""
tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', 'xz': 'xz', None: ''}
compress_ext = {'gzip': '.gz', 'bzip2': '.bz2', 'xz': '.xz'}

# flags for compression program, each element of list will be an argument
if compress is not None and compress not in compress_ext.keys():
raise ValueError(
"bad value for 'compress': must be None, 'gzip', 'bzip2', "
"'xz' or 'compress'")

archive_name = base_name + '.tar'
archive_name += compress_ext.get(compress, '')

mkpath(os.path.dirname(archive_name), dry_run=dry_run)

log.info('Creating tar archive')

if dry_run:
return archive_name

uid = _get_uid(owner)
gid = _get_gid(group)

def _set_uid_gid(tarinfo):
if gid is not None:
tarinfo.gid = gid
tarinfo.gname = group
if uid is not None:
tarinfo.uid = uid
tarinfo.uname = owner
if timestamp is not None:
tarinfo.mtime = timestamp
return tarinfo

tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
try:
for path in _find_sorted(base_dir):
tar.add(path, recursive=False, filter=_set_uid_gid)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be simplified, now that python/cpython#2263 is merged? Same for the zip part.

finally:
tar.close()

# Patch gzip header to use the timestamp
# provided instead of the current time.
if compress == 'gzip' and timestamp is not None:
with open(archive_name, 'r+b') as fp:
fp.seek(4)
fp.write(struct.pack('<L', timestamp))

return archive_name

def make_zipfile(base_name, base_dir, verbose=0, dry_run=0, timestamp=None):
"""Create a zip file from all the files under 'base_dir'.

The output zip file will be named 'base_name' + ".zip".
Returns the name of the output zip file.
"""
zip_filename = base_name + ".zip"
mkpath(os.path.dirname(zip_filename), dry_run=dry_run)

log.info("creating '%s' and adding '%s' to it",
zip_filename, base_dir)

if dry_run:
return zip_filename

compression = zipfile.ZIP_DEFLATED
try:
zip = zipfile.ZipFile(zip_filename, "w",
compression=compression)
except RuntimeError:
compression = zipfile.ZIP_STORED
zip = zipfile.ZipFile(zip_filename, "w",
compression=compression)
try:
for path in filter(os.path.isfile, _find_sorted(base_dir)):
st = os.stat(path)
date_time = time.localtime(
st.st_mtime if timestamp is None else timestamp
)[0:6]
info = zipfile.ZipInfo(path, date_time)
info.compress_type = compression
info.external_attr = st.st_mode << 16
with open(path, 'rb') as fp:
zip.writestr(info, fp.read())
log.info("adding '%s'", path)
finally:
zip.close()

return zip_filename

ARCHIVE_FORMATS = {
'gztar': (make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
'bztar': (make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
'tar': (make_tarball, [('compress', None)], "uncompressed tar file"),
'zip': (make_zipfile, [],"ZIP file")
}
if lzma is not None:
ARCHIVE_FORMATS.update({
'xztar': (make_tarball, [('compress', 'xz')], "xz'ed tar-file"),
})

def check_archive_formats(formats):
"""Returns the first format from the 'format' list that is unknown.

If all formats are known, returns None
"""
for format in formats:
if format not in ARCHIVE_FORMATS:
return format
return None

def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
dry_run=0, owner=None, group=None, timestamp=None):
"""Create an archive file (eg. zip or tar).

'base_name' is the name of the file to create, minus any format-specific
extension; 'format' is the archive format: one of "zip", "tar", "gztar",
"bztar", "xztar", or "ztar".

'root_dir' is a directory that will be the root directory of the
archive; ie. we typically chdir into 'root_dir' before creating the
archive. 'base_dir' is the directory where we start archiving from;
ie. 'base_dir' will be the common prefix of all files and
directories in the archive. 'root_dir' and 'base_dir' both default
to the current directory. Returns the name of the archive file.

'owner' and 'group' are used when creating a tar archive. By default,
uses the current owner and group.

'timestamp' is used when creating a gzip'ed tar-file, to set the
gzip header timestamp.
"""
save_cwd = os.getcwd()
if root_dir is not None:
log.debug("changing into '%s'", root_dir)
base_name = os.path.abspath(base_name)
if not dry_run:
os.chdir(root_dir)

if base_dir is None:
base_dir = os.curdir

kwargs = {'dry_run': dry_run}

try:
format_info = ARCHIVE_FORMATS[format]
except KeyError:
raise ValueError("unknown archive format '%s'" % format)

func = format_info[0]
for arg, val in format_info[1]:
kwargs[arg] = val

if format != 'zip':
kwargs['owner'] = owner
kwargs['group'] = group

if timestamp is not None:
kwargs['timestamp'] = timestamp

try:
filename = func(base_name, base_dir, **kwargs)
finally:
if root_dir is not None:
log.debug("changing back to '%s'", save_cwd)
os.chdir(save_cwd)

return filename
27 changes: 27 additions & 0 deletions setuptools/command/sdist.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from distutils import log
from distutils.errors import DistutilsPlatformError
import distutils.command.sdist as orig
import os
import sys
import time
import io
import contextlib

from setuptools import archive_util
from setuptools.extern import six

from .py36compat import sdist_add_defaults
Expand Down Expand Up @@ -63,6 +66,24 @@ def initialize_options(self):
orig.sdist.initialize_options(self)

self._default_to_gztar()
self._timestamp = int(os.environ.get('SOURCE_DATE_EPOCH',
int(time.time())))

def finalize_options(self):
if self.manifest is None:
self.manifest = "MANIFEST"
if self.template is None:
self.template = "MANIFEST.in"

self.ensure_string_list('formats')

bad_format = archive_util.check_archive_formats(self.formats)
if bad_format:
raise DistutilsOptionError(
"unknown archive format '%s'" % bad_format)

if self.dist_dir is None:
self.dist_dir = "dist"

def _default_to_gztar(self):
# only needed on Python prior to 3.6.
Expand Down Expand Up @@ -154,6 +175,12 @@ def check_readme(self):
', '.join(self.READMES)
)

def make_archive(self, base_name, format, root_dir=None, base_dir=None,
owner=None, group=None):
return archive_util.make_archive(base_name, format, root_dir, base_dir,
dry_run=self.dry_run, owner=owner,
group=group, timestamp=self._timestamp)

def make_release_tree(self, base_dir, files):
orig.sdist.make_release_tree(self, base_dir, files)

Expand Down
Loading