From d88ca8eec12f003a13af15f524e9e5c4219d053e Mon Sep 17 00:00:00 2001 From: Manuel Naranjo Date: Mon, 18 Mar 2024 12:20:45 +0100 Subject: [PATCH] fix: stop copying targets, consume directly Since we have downloaded_file_path that points to the actual jar name in our http_file invocations we can directly consume from those targets instead of copying, this has an impact not just in IO as we reduce copying, but also reduces the amount of targets bazel is aware as we don't need an extra node, reducing the amount of RAM needed for rules_jvm_external slightly. In large maven repositories we use at Booking.com we saw a decrease in the disk cache from 2.4GB to 300MB, which aligns with a repository_cache of roughly 2.1GB. The time it takes to do a `bazel build @maven//...` went from 120 seconds to 50 seconds on a i9-13900H using 75% of the cores. --- WORKSPACE | 4 + coursier.bzl | 3 + private/dependency_tree_parser.bzl | 86 +++++++------------ private/extensions/download_pinned_deps.bzl | 4 + private/rules/jvm_import.bzl | 4 +- .../regression_testing_install.json | 14 +-- 6 files changed, 53 insertions(+), 62 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index eedb8a68c..e73c43019 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -142,6 +142,10 @@ maven_install( ], ) +load("@global_exclusion_testing//:defs.bzl", _global_exclusion_testing_pinned_maven_install = "pinned_maven_install") + +_global_exclusion_testing_pinned_maven_install() + maven_install( name = "manifest_stamp_testing", artifacts = [ diff --git a/coursier.bzl b/coursier.bzl index ae9bbdb4d..2fcd64d0d 100644 --- a/coursier.bzl +++ b/coursier.bzl @@ -18,6 +18,7 @@ load( "SUPPORTED_PACKAGING_TYPES", "contains_git_conflict_markers", "escape", + "get_packaging", "is_maven_local_path", ) load("//private:dependency_tree_parser.bzl", "parser") @@ -567,6 +568,8 @@ def _pinned_coursier_fetch_impl(repository_ctx): # File-path is relative defined from http_file traveling to repository_ctx. " netrc = \"../%s/netrc\"," % (repository_ctx.name), ]) + if get_packaging(artifact["coordinates"]) == "exe": + http_files.append(" executable = True,") if len(artifact["urls"]) == 0 and importer.has_m2local(maven_install_json_content) and artifact.get("file") != None: if _is_windows(repository_ctx): user_home = repository_ctx.os.environ.get("USERPROFILE").replace("\\", "/") diff --git a/private/dependency_tree_parser.bzl b/private/dependency_tree_parser.bzl index 150431221..75aaed396 100644 --- a/private/dependency_tree_parser.bzl +++ b/private/dependency_tree_parser.bzl @@ -29,26 +29,6 @@ load( "strip_packaging_and_classifier_and_version", ) -def _genrule_copy_artifact_from_http_file(artifact, visibilities): - http_file_repository = escape(artifact["coordinates"]) - - file = artifact.get("out", artifact["file"]) - - genrule = [ - "genrule(", - " name = \"%s_extension\"," % http_file_repository, - " srcs = [\"@%s//file\"]," % http_file_repository, - " outs = [\"%s\"]," % file, - " cmd = \"cp $< $@\",", - ] - if get_packaging(artifact["coordinates"]) == "exe": - genrule.append(" executable = True,") - genrule.extend([ - " visibility = [%s]" % (",".join(["\"%s\"" % v for v in visibilities])), - ")", - ]) - return "\n".join(genrule) - def _deduplicate_list(items): seen_items = {} unique_items = [] @@ -66,6 +46,13 @@ def _find_repository_url(artifact_url, repositories): longest_match = repository return longest_match +_ARTIFACT_JAR = """ +alias( + name = "{artifact_path}", + actual = "{source}", + visibility = ["//visibility:public"], +)""" + def _generate_target( repository_ctx, jar_versionless_target_labels, @@ -87,7 +74,7 @@ def _generate_target( # (jvm|aar)_import( # packaging = artifact_path.split(".").pop() - if packaging == "jar": + if packaging == "jar" or artifact_path.endswith("//file"): # Regular `java_import` invokes ijar on all JARs, causing some Scala and # Kotlin compile interface JARs to be incorrect. We replace java_import # with a simple jvm_import Starlark rule that skips ijar. @@ -120,7 +107,7 @@ def _generate_target( # srcjar = "https/repo1.maven.org/maven2/org/hamcrest/hamcrest-library/1.3/hamcrest-library-1.3-sources.jar", # is_dylib = False - if packaging == "jar": + if packaging == "jar" or artifact_path.endswith("//file"): target_import_string.append("\tjars = [\"%s\"]," % artifact_path) if srcjar_paths != None and target_label in srcjar_paths: target_import_string.append("\tsrcjar = \"%s\"," % srcjar_paths[target_label]) @@ -133,7 +120,7 @@ def _generate_target( jar_versionless_target_labels.append(target_label) dylib = simple_coord.split(":")[-1] + "." + packaging to_return.append( -""" + """ genrule( name = "{dylib}_extension", srcs = ["@{repository}//file"], @@ -141,10 +128,10 @@ genrule( cmd = "cp $< $@", visibility = ["//visibility:public"], )""".format( - dylib = dylib, - repository = escape(artifact["coordinates"]))) - - + dylib = dylib, + repository = escape(artifact["coordinates"]), + ), + ) # 4. Generate the deps attribute with references to other target labels. # @@ -161,7 +148,6 @@ genrule( else: target_import_string.append("\tdeps = [") - # Dedupe dependencies here. Sometimes coursier will return "x.y:z:aar:version" and "x.y:z:version" in the # same list of dependencies. target_import_labels = [] @@ -320,18 +306,6 @@ genrule( to_return.append("alias(\n\tname = \"%s\",\n\tactual = \"%s\",\n%s)" % (versioned_target_alias_label, target_label, alias_visibility)) - # 11. If using maven_install.json, use a genrule to copy the file from the http_file - # repository into this repository. - # - # genrule( - # name = "org_hamcrest_hamcrest_library_1_3_extension", - # srcs = ["@org_hamcrest_hamcrest_library_1_3//file"], - # outs = ["@maven//:v1/https/repo1.maven.org/maven2/org/hamcrest/hamcrest-library/1.3/hamcrest-library-1.3.jar"], - # cmd = "cp $< $@", - # ) - if repository_ctx.attr.maven_install_json: - to_return.append(_genrule_copy_artifact_from_http_file(artifact, default_visibilities)) - return to_return # Generate BUILD file with jvm_import and aar_import for each artifact in @@ -351,6 +325,8 @@ def _generate_imports(repository_ctx, dependencies, explicit_artifacts, neverlin # seen_imports :: string -> bool seen_imports = {} + added_aliases = {} + # A list of versionless target labels for jar artifacts. This is used for # generating a compatibility layer for repositories. For example, if we generate # @maven//:junit_junit, we also generate @junit_junit//jar as an alias to it. @@ -378,8 +354,6 @@ def _generate_imports(repository_ctx, dependencies, explicit_artifacts, neverlin seen_imports[artifact_path] = True target_label = escape(strip_packaging_and_classifier_and_version(artifact["coordinates"])) srcjar_paths[target_label] = artifact_path - if repository_ctx.attr.maven_install_json: - all_imports.append(_genrule_copy_artifact_from_http_file(artifact, default_visibilities)) # Iterate through the list of artifacts, and generate the target declaration strings. for artifact in dependencies: @@ -401,31 +375,26 @@ def _generate_imports(repository_ctx, dependencies, explicit_artifacts, neverlin elif repository_ctx.attr.fetch_javadoc and get_classifier(artifact["coordinates"]) == "javadoc": seen_imports[target_label] = True all_imports.append( - "filegroup(\n\tname = \"%s\",\n\tsrcs = [\"%s\"],\n\ttags = [\"javadoc\"],\n\tvisibility = [\"//visibility:public\"],\n)" % (target_label, artifact_path), + "filegroup(\n\tname = \"%s\",\n\tsrcs = [\"%s\"],\n\ttags = [\"javadoc\"],\n\tvisibility = [\"//visibility:public\"],\n)" % (target_label, escape(artifact["coordinates"])), ) elif get_packaging(artifact["coordinates"]) in ("exe", "json"): seen_imports[target_label] = True - versioned_target_alias_label = "%s_extension" % escape(artifact["coordinates"]) - all_imports.append( - "alias(\n\tname = \"%s\",\n\tactual = \"%s\",\n\tvisibility = [\"//visibility:public\"],\n)" % (target_label, versioned_target_alias_label), - ) if repository_ctx.attr.maven_install_json: - all_imports.append(_genrule_copy_artifact_from_http_file(artifact, default_visibilities)) + all_imports.append( + "alias(\n\tname = \"%s\",\n\tactual = \"@%s//file\",\n\tvisibility = [\"//visibility:public\"],\n)" % (target_label, escape(artifact["coordinates"])), + ) elif target_label in labels_to_override: # Override target labels with the user provided mapping, instead of generating # a jvm_import/aar_import based on information in dep_tree. seen_imports[target_label] = True all_imports.append( - "alias(\n\tname = \"%s\",\n\tactual = \"%s\",\n\tvisibility = [\"//visibility:public\"],)" % (target_label, labels_to_override.get(target_label)), + "alias(\n\tname = \"%s\",\n\tactual = \"%s\",\n\tvisibility = [\"//visibility:public\"],\n)" % (target_label, labels_to_override.get(target_label)), ) - if repository_ctx.attr.maven_install_json: - # Provide the downloaded artifact as a file target. - all_imports.append(_genrule_copy_artifact_from_http_file(artifact, default_visibilities)) raw_artifact = dict(artifact) raw_artifact["coordinates"] = "original_" + artifact["coordinates"] raw_artifact["maven_coordinates"] = artifact["coordinates"] raw_artifact["out"] = "original_" + artifact["file"] - + raw_artifact["file"] = "@%s//file" % escape(artifact["coordinates"]) all_imports.extend(_generate_target( repository_ctx, jar_versionless_target_labels, @@ -440,6 +409,17 @@ def _generate_imports(repository_ctx, dependencies, explicit_artifacts, neverlin )) elif artifact_path != None: + if artifact["file"] not in added_aliases: + added_aliases[artifact["file"]] = True + repo = escape(artifact["coordinates"]) + if repository_ctx.attr.maven_install_json: + all_imports.append( + _ARTIFACT_JAR.format( + artifact_path = artifact["file"], + source = "@%s//file" % repo, + ), + ) + all_imports.extend(_generate_target( repository_ctx, jar_versionless_target_labels, diff --git a/private/extensions/download_pinned_deps.bzl b/private/extensions/download_pinned_deps.bzl index ff94ef99d..3b8ad6677 100644 --- a/private/extensions/download_pinned_deps.bzl +++ b/private/extensions/download_pinned_deps.bzl @@ -1,4 +1,5 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") +load("//private:coursier_utilities.bzl", "get_packaging") load("//private/rules:urls.bzl", "get_m2local_url") def escape(string): @@ -44,6 +45,9 @@ def download_pinned_deps(mctx, artifacts, http_files, has_m2local): # https://github.com/bazelbuild/rules_jvm_external/issues/1028 attrs["downloaded_file_path"] = "v1/%s" % artifact["file"] + if get_packaging(artifact["coordinates"]) == "exe": + attrs["executable"] = True + http_file(**attrs) return seen_repo_names diff --git a/private/rules/jvm_import.bzl b/private/rules/jvm_import.bzl index 26292aed5..99748695e 100644 --- a/private/rules/jvm_import.bzl +++ b/private/rules/jvm_import.bzl @@ -25,7 +25,7 @@ def _jvm_import_impl(ctx): injar = ctx.files.jars[0] if ctx.attr._stamp_manifest[StampManifestProvider].stamp_enabled: - outjar = ctx.actions.declare_file("processed_" + injar.basename, sibling = injar) + outjar = ctx.actions.declare_file("processed_" + injar.basename) args = ctx.actions.args() args.add_all(["--source", injar, "--output", outjar]) args.add_all(["--manifest-entry", "Target-Label:{target_label}".format(target_label = label)]) @@ -40,7 +40,7 @@ def _jvm_import_impl(ctx): else: outjar = injar - compilejar = ctx.actions.declare_file("header_" + injar.basename, sibling = injar) + compilejar = ctx.actions.declare_file("header_" + injar.basename) args = ctx.actions.args() args.add_all(["--source", outjar, "--output", compilejar]) diff --git a/tests/custom_maven_install/regression_testing_install.json b/tests/custom_maven_install/regression_testing_install.json index e465bd5d3..cc135d99f 100644 --- a/tests/custom_maven_install/regression_testing_install.json +++ b/tests/custom_maven_install/regression_testing_install.json @@ -1,7 +1,7 @@ { "__AUTOGENERATED_FILE_DO_NOT_MODIFY_THIS_FILE_MANUALLY": "THERE_IS_NO_DATA_ONLY_ZUUL", "__INPUT_ARTIFACTS_HASH": 827556415, - "__RESOLVED_ARTIFACTS_HASH": -929249637, + "__RESOLVED_ARTIFACTS_HASH": 198634068, "artifacts": { "android.arch.core:common": { "shasums": { @@ -1477,7 +1477,7 @@ "org.openjfx:javafx-base": { "shasums": { "jar": "c5084a74417a89c69a0c122fae96a4b70bf619fc3d6218ea102a4047ec85ad04", - "mac": "2d8052a08fd2e5d98e1d5a16d724ea5dd02102879de20a193225f57199803983" + "linux": "2ebf6fa2cbbe1c8b4f7780e06e97beb038f644d5ecf9f15a41c5e88ee0ef9cf1" }, "version": "11.0.1" }, @@ -2429,7 +2429,7 @@ "org.objenesis:objenesis" ], "org.openjfx:javafx-base": [ - "org.openjfx:javafx-base:jar:mac" + "org.openjfx:javafx-base:jar:linux" ], "org.ow2.asm:asm-analysis": [ "org.ow2.asm:asm-tree" @@ -5623,7 +5623,7 @@ "org.objenesis.instantiator.util", "org.objenesis.strategy" ], - "org.openjfx:javafx-base:jar:mac": [ + "org.openjfx:javafx-base:jar:linux": [ "com.sun.javafx", "com.sun.javafx.beans", "com.sun.javafx.binding", @@ -6065,7 +6065,7 @@ "org.mockito:mockito-core", "org.objenesis:objenesis", "org.openjfx:javafx-base", - "org.openjfx:javafx-base:jar:mac", + "org.openjfx:javafx-base:jar:linux", "org.ow2.asm:asm", "org.ow2.asm:asm-analysis", "org.ow2.asm:asm-commons", @@ -6334,7 +6334,7 @@ "org.mockito:mockito-core", "org.objenesis:objenesis", "org.openjfx:javafx-base", - "org.openjfx:javafx-base:jar:mac", + "org.openjfx:javafx-base:jar:linux", "org.ow2.asm:asm", "org.ow2.asm:asm-analysis", "org.ow2.asm:asm-commons", @@ -6603,7 +6603,7 @@ "org.mockito:mockito-core", "org.objenesis:objenesis", "org.openjfx:javafx-base", - "org.openjfx:javafx-base:jar:mac", + "org.openjfx:javafx-base:jar:linux", "org.ow2.asm:asm", "org.ow2.asm:asm-analysis", "org.ow2.asm:asm-commons",