From 48da661f42fc1f0d4aaf5620d8fb04fc66ca130e Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Sat, 6 Apr 2024 22:15:04 +0000 Subject: [PATCH] Add unstable2sqlelf script Add a new script that downloads all the tags for the debian distribution as sqlite databases. --- tools/debian-unstable-tags.txt | 97 ++++++++++++++++++++++++++++++++++ tools/docker2sqlelf.py | 26 ++++++--- tools/unstable2sqlelf.py | 61 +++++++++++++++++++++ 3 files changed, 176 insertions(+), 8 deletions(-) create mode 100644 tools/debian-unstable-tags.txt create mode 100755 tools/unstable2sqlelf.py diff --git a/tools/debian-unstable-tags.txt b/tools/debian-unstable-tags.txt new file mode 100644 index 0000000..2717576 --- /dev/null +++ b/tools/debian-unstable-tags.txt @@ -0,0 +1,97 @@ +unstable-20240330 +unstable-20240311 +unstable-20240211 +unstable-20240130 +unstable-20240110 +unstable-20231218 +unstable-20231120 +unstable-20231030 +unstable-20231009 +unstable-20230919 +unstable-20230904 +unstable-20230814 +unstable-20230725 +unstable-20230703 +unstable-20230612 +unstable-20230522 +unstable-20230502 +unstable-20230411 +unstable-20230320 +unstable-20230227 +unstable-20230208 +unstable-20230202 +unstable-20230109 +unstable-20221219 +unstable-20221205 +unstable-20221114 +unstable-20221024 +unstable-20221004 +unstable-20220912 +unstable-20220822 +unstable-20220801 +unstable-20220711 +unstable-20220622 +unstable-20220527 +unstable-20220509 +unstable-20220418 +unstable-20220328 +unstable-20220316 +unstable-20220228 +unstable-20220125 +unstable-20211220 +unstable-20211201 +unstable-20211115 +unstable-20211011 +unstable-20210927 +unstable-20210902 +unstable-20210816 +unstable-20210721 +unstable-20210621 +unstable-20210511 +unstable-20210408 +unstable-20210329 +unstable-20210326 +unstable-20210311 +unstable-20210208 +unstable-20210111 +unstable-20201209 +unstable-20201117 +unstable-20201012 +unstable-20200908 +unstable-20200803 +unstable-20200720 +unstable-20200607 +unstable-20200514 +unstable-20200511 +unstable-20200422 +unstable-20200414 +unstable-20200327 +unstable-20200224 +unstable-20200130 +unstable-20191224 +unstable-20191118 +unstable-20191014 +unstable-20190910 +unstable-20190812 +unstable-20190708 +unstable-20190610 +unstable-20190506 +unstable-20190326 +unstable-20190228 +unstable-20190204 +unstable-20190122 +unstable-20181226 +unstable-20181112 +unstable-20181011 +unstable-20180831 +unstable-20180716 +unstable-20180625 +unstable-20180426 +unstable-20180312 +unstable-20180213 +unstable-20171210 +unstable-20171009 +unstable-20170907 +unstable-20170723 +unstable-20170620 +unstable-20170606 diff --git a/tools/docker2sqlelf.py b/tools/docker2sqlelf.py index 735b87c..38dc530 100755 --- a/tools/docker2sqlelf.py +++ b/tools/docker2sqlelf.py @@ -15,7 +15,14 @@ LOG = logging.getLogger(__name__) -def docker2sqelf(image_name: str, keep_temp_dir: bool = False) -> str: +def docker2sqlelf(image_name: str, keep_temp_dir: bool, database_path: str) -> None: + """Given a docker image, convert it to a sqlelf database. + + Args: + image_name: The docker image name + keep_temp_dir: Whether to keep the temporary directory + database_path: The path to export the database to + """ client = docker.from_env() temp_dir = tempfile.mkdtemp() @@ -31,6 +38,7 @@ def cleanup() -> None: atexit.register(cleanup) + client.images.pull(image_name) container = client.containers.create(image_name) LOG.info(f"Created container with ID {container.id}") @@ -47,8 +55,6 @@ def cleanup() -> None: container.remove() # pyright: ignore LOG.info(f"Removed container {container.id}") - modified_image_name = image_name.replace(":", "-") - filenames: list[str] = reduce( lambda a, b: a + b, map( @@ -71,11 +77,9 @@ def cleanup() -> None: engine = sql.make_sql_engine(filenames, cache_flags=elf.CacheFlag.ALL()) LOG.info("Dumping the sqlite database") - database_filename = f"{modified_image_name}.sqlite" - engine.dump(database_filename) + engine.dump(database_path) - LOG.info(f"Created database {database_filename}") - return database_filename + LOG.info(f"Created database {database_path}") if __name__ == "__main__": @@ -92,6 +96,12 @@ def cleanup() -> None: parser.add_argument( "-k", "--keep", help="Keep temporary directory", action="store_true" ) + parser.add_argument( + "-d", + "--database", + help="Database path to export to", + default="database.sqlite", + ) args = parser.parse_args() - docker2sqelf(args.image_name, args.keep) + docker2sqlelf(args.image_name, args.keep, args.database) diff --git a/tools/unstable2sqlelf.py b/tools/unstable2sqlelf.py new file mode 100755 index 0000000..b46dfcb --- /dev/null +++ b/tools/unstable2sqlelf.py @@ -0,0 +1,61 @@ +#! /usr/bin/env python3 +""" +Run this script like so: +python -m tools.unstable2sqlelf unstable-sqlite +""" +import argparse +import logging +import os +import re +import sqlite3 + +from tools.docker2sqlelf import docker2sqlelf + +LOG = logging.getLogger(__name__) + + +def add_distribution_timestamp(database_path: str, docker_tag: str) -> None: + """ + Add a distribution timestamp column to the ELF_HEADERS table in the given database. + """ + LOG.info(f"Adding distribution timestamp to database: {database_path}") + match = re.search(r"unstable-(\d+)", docker_tag) + if match is None: + raise ValueError(f"Invalid docker tag: {docker_tag}") + timestamp = match.group(1) + conn = sqlite3.connect(database_path) + cursor = conn.cursor() + cursor.execute("ALTER TABLE ELF_HEADERS ADD COLUMN DistributionTimestamp") + cursor.execute("UPDATE ELF_HEADERS SET DistributionTimestamp = ?", (timestamp,)) + conn.commit() + conn.close() + + +if __name__ == "__main__": + # Setup the logging config + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s", + ) + parser = argparse.ArgumentParser( + description="Download all unstable Debian distributions as sqlite databases." + ) + parser.add_argument( + "output_directory", + help="Output directory to store the files.", + default="unstable-sqlite", + ) + args = parser.parse_args() + + LOG.info(f"Creating output directory: {args.output_directory}") + os.makedirs(args.output_directory, exist_ok=True) + + with open( + os.path.join(os.path.dirname(__file__), "debian-unstable-tags.txt"), "r" + ) as file: + tags = file.read().splitlines() + for tag in tags: + LOG.info(f"Processing tag: {tag}") + db_path = os.path.join(args.output_directory, f"debian-{tag}.sqlite") + docker2sqlelf(f"debian:{tag}", False, db_path) + add_distribution_timestamp(db_path, tag)