diff --git a/conda_forge_tick/auto_tick.py b/conda_forge_tick/auto_tick.py index c4d94b9ca..229c49e70 100644 --- a/conda_forge_tick/auto_tick.py +++ b/conda_forge_tick/auto_tick.py @@ -9,7 +9,7 @@ import traceback import typing from dataclasses import dataclass -from typing import Literal, cast +from typing import AnyStr, Literal, cast from urllib.error import URLError from uuid import uuid4 @@ -41,6 +41,7 @@ ) from conda_forge_tick.lazy_json_backends import ( LazyJson, + does_key_exist_in_hashmap, get_all_keys_for_hashmap, lazy_json_transaction, remove_key_for_hashmap, @@ -50,7 +51,6 @@ PR_LIMIT, load_migrators, ) -from conda_forge_tick.migration_runner import run_migration from conda_forge_tick.migrators import MigrationYaml, Migrator, Version from conda_forge_tick.migrators.version import VersionMigrationError from conda_forge_tick.os_utils import eval_cmd @@ -69,6 +69,7 @@ sanitize_string, ) +from .migration_runner import run_migration from .migrators_types import MigrationUidTypedDict from .models.pr_json import PullRequestData, PullRequestInfoSpecial, PullRequestState @@ -847,10 +848,11 @@ def _run_migrator_on_feedstock_branch( return good_prs, break_loop -def _is_migrator_done(_mg_start, good_prs, time_per, pr_limit): +def _is_migrator_done( + _mg_start, good_prs, time_per, pr_limit, git_backend: GitPlatformBackend +): curr_time = time.time() - backend = github_backend() - api_req = backend.get_api_requests_left() + api_req = git_backend.get_api_requests_left() if curr_time - START_TIME > TIMEOUT: logger.info( @@ -885,7 +887,27 @@ def _is_migrator_done(_mg_start, good_prs, time_per, pr_limit): return False -def _run_migrator(migrator, mctx, temp, time_per, git_backend: GitPlatformBackend): +def _run_migrator( + migrator: Migrator, + mctx: MigratorSessionContext, + temp: list[AnyStr], + time_per: float, + git_backend: GitPlatformBackend, + feedstock: str | None = None, +) -> int: + """ + Run a migrator. + + :param migrator: The migrator to run. + :param mctx: The migrator session context. + :param temp: The list of temporary files. + :param time_per: The time limit of this migrator. + :param git_backend: The GitPlatformBackend instance to use. + :param feedstock: The feedstock to update, if None, all feedstocks are updated. Does not contain the `-feedstock` + suffix. + + :return: The number of "good" PRs created by the migrator. + """ _mg_start = time.time() migrator_name = get_migrator_name(migrator) @@ -907,6 +929,15 @@ def _run_migrator(migrator, mctx, temp, time_per, git_backend: GitPlatformBacken possible_nodes = list(migrator.order(effective_graph, mctx.graph)) + if feedstock: + if feedstock not in possible_nodes: + logger.info( + f"Feedstock {feedstock}-feedstock is not a candidate for migration of {migrator_name}. " + f"If you want to investigate this, run the make-migrators command." + ) + return 0 + possible_nodes = [feedstock] + # version debugging info if isinstance(migrator, Version): print("possible version migrations:", flush=True) @@ -939,7 +970,9 @@ def _run_migrator(migrator, mctx, temp, time_per, git_backend: GitPlatformBacken flush=True, ) - if _is_migrator_done(_mg_start, good_prs, time_per, migrator.pr_limit): + if _is_migrator_done( + _mg_start, good_prs, time_per, migrator.pr_limit, git_backend + ): return 0 for node_name in possible_nodes: @@ -956,7 +989,9 @@ def _run_migrator(migrator, mctx, temp, time_per, git_backend: GitPlatformBacken ): # Don't let CI timeout, break ahead of the timeout so we make certain # to write to the repo - if _is_migrator_done(_mg_start, good_prs, time_per, migrator.pr_limit): + if _is_migrator_done( + _mg_start, good_prs, time_per, migrator.pr_limit, git_backend + ): break base_branches = migrator.get_possible_feedstock_branches(attrs) @@ -1051,18 +1086,27 @@ def _setup_limits(): resource.setrlimit(resource.RLIMIT_AS, (limit_int, limit_int)) -def _update_nodes_with_bot_rerun(gx: nx.DiGraph): - """Go through all the open PRs and check if they are rerun""" +def _update_nodes_with_bot_rerun(gx: nx.DiGraph, feedstock: str | None = None): + """ + Go through all the open PRs and check if they are rerun + + :param gx: the dependency graph + :param feedstock: The feedstock to update. If None, all feedstocks are updated. Does not contain the `-feedstock` + suffix. + """ print("processing bot-rerun labels", flush=True) - for i, (name, node) in enumerate(gx.nodes.items()): + nodes = gx.nodes.items() if not feedstock else [(feedstock, gx.nodes[feedstock])] + + for i, (name, node) in enumerate(nodes): # logger.info( # f"node: {i} memory usage: " # f"{psutil.Process().memory_info().rss // 1024 ** 2}MB", # ) with node["payload"] as payload: if payload.get("archived", False): + logger.debug(f"skipping archived package {name}") continue with payload["pr_info"] as pri, payload["version_pr_info"] as vpri: # reset bad @@ -1112,12 +1156,24 @@ def _filter_ignored_versions(attrs, version): return version -def _update_nodes_with_new_versions(gx): - """Updates every node with it's new version (when available)""" +def _update_nodes_with_new_versions(gx: nx.DiGraph, feedstock: str | None = None): + """ + Updates every node with its new version (when available) + + :param gx: the dependency graph + :param feedstock: the feedstock to update, if None, all feedstocks are updated. Does not contain the `-feedstock` + suffix. + """ print("updating nodes with new versions", flush=True) - version_nodes = get_all_keys_for_hashmap("versions") + if feedstock and not does_key_exist_in_hashmap("versions", feedstock): + logger.warning(f"Feedstock {feedstock}-feedstock not found in versions hashmap") + return + + version_nodes = ( + get_all_keys_for_hashmap("versions") if not feedstock else [feedstock] + ) for node in version_nodes: version_data = LazyJson(f"versions/{node}.json").data @@ -1143,13 +1199,42 @@ def _update_nodes_with_new_versions(gx): vpri["new_version"] = version_from_data -def _remove_closed_pr_json(): +def _remove_closed_pr_json(feedstock: str | None = None): + """ + Remove the pull request information for closed PRs. + + :param feedstock: The feedstock to remove the PR information for. If None, all PR information is removed. If you pass + a feedstock, closed pr_json files are not removed because this would require iterating all pr_json files. Does not + contain the `-feedstock` suffix. + """ print("collapsing closed PR json", flush=True) + if feedstock: + pr_info_nodes = ( + [feedstock] if does_key_exist_in_hashmap("pr_info", feedstock) else [] + ) + version_pr_info_nodes = ( + [feedstock] + if does_key_exist_in_hashmap("version_pr_info", feedstock) + else [] + ) + + if not pr_info_nodes: + logger.warning( + f"Feedstock {feedstock}-feedstock not found in pr_info hashmap" + ) + if not version_pr_info_nodes: + logger.warning( + f"Feedstock {feedstock}-feedstock not found in version_pr_info hashmap" + ) + else: + pr_info_nodes = get_all_keys_for_hashmap("pr_info") + version_pr_info_nodes = get_all_keys_for_hashmap("version_pr_info") + # first we go from nodes to pr json and update the pr info and remove the data name_nodes = [ - ("pr_info", get_all_keys_for_hashmap("pr_info")), - ("version_pr_info", get_all_keys_for_hashmap("version_pr_info")), + ("pr_info", pr_info_nodes), + ("version_pr_info", version_pr_info_nodes), ] for name, nodes in name_nodes: for node in nodes: @@ -1182,6 +1267,11 @@ def _remove_closed_pr_json(): # at this point, any json blob referenced in the pr info is state != closed # so we can remove anything that is empty or closed + if feedstock: + logger.info( + "Since you requested a run for a specific feedstock, we are not removing closed pr_json files." + ) + return nodes = get_all_keys_for_hashmap("pr_json") for node in nodes: pr = LazyJson(f"pr_json/{node}.json") @@ -1192,22 +1282,32 @@ def _remove_closed_pr_json(): ) -def _update_graph_with_pr_info(): - _remove_closed_pr_json() +def _update_graph_with_pr_info(feedstock: str | None = None): + """ + :param feedstock: The feedstock to update the graph for. If None, all feedstocks are updated. Does not contain the + `-feedstock` suffix. + """ + _remove_closed_pr_json(feedstock) gx = load_existing_graph() - _update_nodes_with_bot_rerun(gx) - _update_nodes_with_new_versions(gx) + _update_nodes_with_bot_rerun(gx, feedstock) + _update_nodes_with_new_versions(gx, feedstock) dump_graph(gx) -def main(ctx: CliContext) -> None: +def main(ctx: CliContext, feedstock: str | None = None) -> None: + """ + Run the main bot logic. + + :param ctx: The CLI context. + :param feedstock: If not None, only the given feedstock is updated. Does not contain the `-feedstock` suffix. + """ global START_TIME START_TIME = time.time() _setup_limits() with fold_log_lines("updating graph with PR info"): - _update_graph_with_pr_info() + _update_graph_with_pr_info(feedstock) deploy(ctx, dirs_to_deploy=["version_pr_info", "pr_json", "pr_info"]) # record tmp dir so we can be sure to clean it later @@ -1227,6 +1327,7 @@ def main(ctx: CliContext) -> None: smithy_version=smithy_version, pinning_version=pinning_version, ) + # TODO: this does not support --online migrators = load_migrators() # compute the time per migrator @@ -1260,7 +1361,7 @@ def main(ctx: CliContext) -> None: for mg_ind, migrator in enumerate(migrators): good_prs = _run_migrator( - migrator, mctx, temp, time_per_migrator[mg_ind], git_backend + migrator, mctx, temp, time_per_migrator[mg_ind], git_backend, feedstock ) if good_prs > 0: pass @@ -1275,5 +1376,5 @@ def main(ctx: CliContext) -> None: # ], # ) - logger.info("API Calls Remaining: %d", github_backend().get_api_requests_left()) + logger.info("API Calls Remaining: %s", git_backend.get_api_requests_left()) logger.info("Done") diff --git a/conda_forge_tick/cli.py b/conda_forge_tick/cli.py index a8e56e51f..38977fd1f 100644 --- a/conda_forge_tick/cli.py +++ b/conda_forge_tick/cli.py @@ -1,7 +1,6 @@ import logging import os import time -from typing import Optional import click from click import Context, IntRange @@ -131,31 +130,46 @@ def make_graph( @job_option @n_jobs_option @click.argument( - "package", + "feedstock", required=False, + default=None, + type=str, ) @pass_context def update_upstream_versions( - ctx: CliContext, job: int, n_jobs: int, package: Optional[str] + ctx: CliContext, job: int, n_jobs: int, feedstock: str | None ) -> None: """ Update the upstream versions of feedstocks in the graph. - If PACKAGE is given, only update that package, otherwise update all packages. + If FEEDSTOCK is given, only update that feedstock, otherwise update all feedstocks. + The FEEDSTOCK argument should omit the `-feedstock` suffix. """ from . import update_upstream_versions check_job_param_relative(job, n_jobs) - update_upstream_versions.main(ctx, job=job, n_jobs=n_jobs, package=package) + update_upstream_versions.main(ctx, job=job, n_jobs=n_jobs, feedstock=feedstock) @main.command(name="auto-tick") +@click.argument( + "feedstock", + required=False, + default=None, + type=str, +) @pass_context -def auto_tick(ctx: CliContext) -> None: +def auto_tick(ctx: CliContext, feedstock: str | None) -> None: + """ + Run the main bot logic that runs all migrations, updates the graph accordingly, and opens the corresponding PRs. + + If FEEDSTOCK is given, only run the bot for that feedstock, otherwise run the bot for all feedstocks. + The FEEDSTOCK argument should omit the `-feedstock` suffix. + """ from . import auto_tick - auto_tick.main(ctx) + auto_tick.main(ctx, feedstock=feedstock) @main.command(name="make-status-report") diff --git a/conda_forge_tick/git_utils.py b/conda_forge_tick/git_utils.py index cc001d05e..99a5bef95 100644 --- a/conda_forge_tick/git_utils.py +++ b/conda_forge_tick/git_utils.py @@ -1073,7 +1073,7 @@ def format_field(key: str, value: str) -> str: return f"{key}:\n{value}" return f"{key}: {value}" - output += "".join(format_field(key, value) for key, value in data.items()) + output += "\n".join(format_field(key, value) for key, value in data.items()) output += f"\n{border}" logger.debug(output) diff --git a/conda_forge_tick/lazy_json_backends.py b/conda_forge_tick/lazy_json_backends.py index 9bae2c57e..db13fc333 100644 --- a/conda_forge_tick/lazy_json_backends.py +++ b/conda_forge_tick/lazy_json_backends.py @@ -630,6 +630,17 @@ def get_all_keys_for_hashmap(name): return backend.hkeys(name) +def does_key_exist_in_hashmap(name: str, key: str) -> bool: + """ + Check if a key exists in a hashmap, using the primary backend. + :param name: The hashmap name. + :param key: The key to check. + :return: True if the key exists, False otherwise. + """ + backend = LAZY_JSON_BACKENDS[CF_TICK_GRAPH_DATA_PRIMARY_BACKEND]() + return backend.hexists(name, key) + + @contextlib.contextmanager def lazy_json_transaction(): try: diff --git a/conda_forge_tick/make_migrators.py b/conda_forge_tick/make_migrators.py index ca3b823a5..f6cc11700 100644 --- a/conda_forge_tick/make_migrators.py +++ b/conda_forge_tick/make_migrators.py @@ -753,7 +753,6 @@ def add_noarch_python_min_migrator( def initialize_migrators( gx: nx.DiGraph, - dry_run: bool = False, ) -> MutableSequence[Migrator]: migrators: List[Migrator] = [] @@ -883,10 +882,7 @@ def load_migrators(skip_paused: bool = True) -> MutableSequence[Migrator]: def main(ctx: CliContext) -> None: gx = load_existing_graph() - migrators = initialize_migrators( - gx, - dry_run=ctx.dry_run, - ) + migrators = initialize_migrators(gx) with ( fold_log_lines("dumping migrators to JSON"), lazy_json_override_backends( diff --git a/conda_forge_tick/update_upstream_versions.py b/conda_forge_tick/update_upstream_versions.py index 3110462e1..452f7f663 100644 --- a/conda_forge_tick/update_upstream_versions.py +++ b/conda_forge_tick/update_upstream_versions.py @@ -436,7 +436,7 @@ def update_upstream_versions( debug: bool = False, job=1, n_jobs=1, - package: Optional[str] = None, + feedstock: Optional[str] = None, ) -> None: """ Update the upstream versions of packages. @@ -445,15 +445,15 @@ def update_upstream_versions( :param debug: Whether to run in debug mode :param job: The job number :param n_jobs: The total number of jobs - :param package: The package to update. If None, update all packages. + :param feedstock: The feedstock to update. If None, update all feedstocks. Does not contain the `-feedstock` suffix. """ - if package and package not in gx.nodes: - logger.error(f"Package {package} not found in graph. Exiting.") + if feedstock and feedstock not in gx.nodes: + logger.error(f"Feedstock {feedstock}-feedstock not found in graph. Exiting.") return # In the future, we should have some sort of typed graph structure all_nodes: Iterable[Tuple[str, Mapping[str, Mapping]]] = ( - [(package, gx.nodes.get(package))] if package else gx.nodes.items() + [(feedstock, gx.nodes.get(feedstock))] if feedstock else gx.nodes.items() ) job_nodes = filter_nodes_for_job(all_nodes, job, n_jobs) @@ -481,7 +481,7 @@ def extract_payload(node: Tuple[str, Mapping[str, Mapping]]) -> Tuple[str, Mappi updater = ( _update_upstream_versions_sequential - if debug or package + if debug or feedstock else _update_upstream_versions_process_pool ) @@ -493,14 +493,14 @@ def main( ctx: CliContext, job: int = 1, n_jobs: int = 1, - package: Optional[str] = None, + feedstock: Optional[str] = None, ) -> None: """ Main function for updating the upstream versions of packages. :param ctx: The CLI context. :param job: The job number. :param n_jobs: The total number of jobs. - :param package: The package to update. If None, update all packages. + :param feedstock: The feedstock to update. If None, update all feedstocks. Does not contain the `-feedstock` suffix. """ logger.info("Reading graph") # Graph enabled for inspection @@ -514,5 +514,5 @@ def main( debug=ctx.debug, job=job, n_jobs=n_jobs, - package=package, + feedstock=feedstock, ) diff --git a/tests/test_cli.py b/tests/test_cli.py index ff2bd62ca..a671e90cd 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -141,19 +141,34 @@ def test_cli_mock_commands_pass_context( @pytest.mark.parametrize("job, n_jobs", [(1, 5), (3, 7), (4, 4)]) -@pytest.mark.parametrize("package", ["foo", "bar", "baz"]) +@pytest.mark.parametrize("feedstock", ["foo", "bar"]) @mock.patch("conda_forge_tick.update_upstream_versions.main") def test_cli_mock_update_upstream_versions( - cmd_mock: MagicMock, job: int, n_jobs: int, package: str + cmd_mock: MagicMock, job: int, n_jobs: int, feedstock: str ): runner = CliRunner() result = runner.invoke( main, - ["update-upstream-versions", f"--job={job}", f"--n-jobs={n_jobs}", package], + ["update-upstream-versions", f"--job={job}", f"--n-jobs={n_jobs}", feedstock], ) assert result.exit_code == 0 - cmd_mock.assert_called_once_with(mock.ANY, job=job, n_jobs=n_jobs, package=package) + cmd_mock.assert_called_once_with( + mock.ANY, job=job, n_jobs=n_jobs, feedstock=feedstock + ) + + +@pytest.mark.parametrize("feedstock", ["foo", "bar"]) +@mock.patch("conda_forge_tick.auto_tick.main") +def test_cli_mock_auto_tick(cmd_mock: MagicMock, feedstock: str): + runner = CliRunner() + result = runner.invoke( + main, + ["auto-tick", feedstock], + ) + + assert result.exit_code == 0 + cmd_mock.assert_called_once_with(mock.ANY, feedstock=feedstock) @pytest.mark.parametrize("job, n_jobs", [(1, 5), (3, 7), (4, 4)]) diff --git a/tests/test_lazy_json_backends.py b/tests/test_lazy_json_backends.py index 828d65fcf..430b933ec 100644 --- a/tests/test_lazy_json_backends.py +++ b/tests/test_lazy_json_backends.py @@ -16,6 +16,7 @@ GithubLazyJsonBackend, LazyJson, MongoDBLazyJsonBackend, + does_key_exist_in_hashmap, dump, dumps, get_all_keys_for_hashmap, @@ -604,6 +605,17 @@ def test_lazy_json_backends_hashmap(tmpdir): assert get_all_keys_for_hashmap("lazy_json") == [] +def test_does_key_exist_in_hashmap(tmpdir): + with pushd(tmpdir): + LazyJson("vanilla.json") + LazyJson("node_attrs/chocolate.json") + LazyJson("versions/strawberry.json") + + assert does_key_exist_in_hashmap("node_attrs", "chocolate") + assert not does_key_exist_in_hashmap("node_attrs", "vanilla") + assert not does_key_exist_in_hashmap("node_attrs", "strawberry") + + def test_github_base_url() -> None: github_backend = GithubLazyJsonBackend() assert github_backend.base_url == CF_TICK_GRAPH_GITHUB_BACKEND_BASE_URL + "/" diff --git a/tests/test_upstream_versions.py b/tests/test_upstream_versions.py index 652273bb3..e3908f00d 100644 --- a/tests/test_upstream_versions.py +++ b/tests/test_upstream_versions.py @@ -1223,15 +1223,15 @@ def test_include_node_bad_pull_request_upstream(caplog): def test_update_upstream_versions_nonexistent_package(caplog): - package_name = "nonexistent-package" + feedstock_name = "nonexistent" caplog.set_level(logging.DEBUG) update_upstream_versions( nx.DiGraph(), - package=package_name, + feedstock=feedstock_name, ) - assert "Package nonexistent-package not found in graph" in caplog.text + assert f"Feedstock {feedstock_name}-feedstock not found in graph" in caplog.text @mock.patch("conda_forge_tick.update_upstream_versions.filter_nodes_for_job") @@ -1640,12 +1640,12 @@ def test_main( ctx = CliContext() ctx.debug = debug - main(ctx, job=3, n_jobs=10, package="testpackage") + main(ctx, job=3, n_jobs=10, feedstock="testpackage") makedirs_mock.assert_called_once_with("versions", exist_ok=True) load_graph_mock.assert_called_once() update_upstream_versions_mock.assert_called_once_with( - gx, debug=debug, job=3, n_jobs=10, package="testpackage" + gx, debug=debug, job=3, n_jobs=10, feedstock="testpackage" )