Skip to content

Commit

Permalink
Back out "Revert D51666197: [buck2][erlang] precalculate dependencies…
Browse files Browse the repository at this point in the history
… for erlang builds"

Summary: This was reverted yesterday, since we broke a test (which didn't run on the original diff)

Reviewed By: ir-regular

Differential Revision: D51747938

fbshipit-source-id: 01d1ba2408a9ae126348be43dd0df13210589884
  • Loading branch information
TheGeorge authored and facebook-github-bot committed Dec 1, 2023
1 parent 0fb3b84 commit 147a930
Show file tree
Hide file tree
Showing 6 changed files with 241 additions and 138 deletions.
147 changes: 82 additions & 65 deletions prelude/erlang/erlang_build.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,11 @@ def _generate_beam_artifacts(
input_mapping = build_environment.input_mapping,
)

dep_info_content = to_term_args({paths.basename(artifact): {"dep_file": dep_file, "path": artifact} for artifact, dep_file in updated_build_environment.deps_files.items()})
dep_info_file = ctx.actions.write(_dep_info_name(toolchain), dep_info_content)

for erl in src_artifacts:
_build_erl(ctx, toolchain, updated_build_environment, erl, beam_mapping[module_name(erl)])
_build_erl(ctx, toolchain, updated_build_environment, dep_info_file, erl, beam_mapping[module_name(erl)])

return updated_build_environment

Expand Down Expand Up @@ -426,13 +429,29 @@ def _build_erl(
ctx: AnalysisContext,
toolchain: Toolchain,
build_environment: BuildEnvironment,
dep_info_file: Artifact,
src: Artifact,
output: Artifact) -> None:
"""Compile erl files into beams."""

trampoline = toolchain.erlc_trampoline
erlc = toolchain.otp_binaries.erlc

final_dep_file = ctx.actions.declare_output(_dep_final_name(toolchain, src))
finalize_deps_cmd = cmd_args(
toolchain.otp_binaries.escript,
toolchain.dependency_finalizer,
src,
dep_info_file,
final_dep_file.as_output(),
)
finalize_deps_cmd.hidden(build_environment.deps_files.values())
ctx.actions.run(
finalize_deps_cmd,
category = "dependency_finalizer",
identifier = action_identifier(toolchain, src.basename),
)

def dynamic_lambda(ctx: AnalysisContext, artifacts, outputs):
erl_opts = _get_erl_opts(ctx, toolchain, src)
erlc_cmd = cmd_args(
Expand All @@ -449,7 +468,7 @@ def _build_erl(
src,
],
)
erlc_cmd, mapping = _add_dependencies_to_args(ctx, artifacts, [outputs[output].short_path], {}, {}, erlc_cmd, build_environment)
erlc_cmd, mapping = _add_dependencies_to_args(artifacts, final_dep_file, erlc_cmd, build_environment)
erlc_cmd = _add_full_dependencies(erlc_cmd, build_environment)
_run_with_env(
ctx,
Expand All @@ -461,7 +480,7 @@ def _build_erl(
always_print_stderr = True,
)

ctx.actions.dynamic_output(dynamic = build_environment.deps_files.values(), inputs = [src], outputs = [output], f = dynamic_lambda)
ctx.actions.dynamic_output(dynamic = [final_dep_file], inputs = [src], outputs = [output], f = dynamic_lambda)
return None

def _build_edoc(
Expand Down Expand Up @@ -512,82 +531,66 @@ def _build_edoc(
return None

def _add_dependencies_to_args(
ctx: AnalysisContext,
artifacts,
queue: list[str],
done: dict[str, bool],
input_mapping: dict[str, (bool, [str, Artifact])],
final_dep_file: Artifact,
args: cmd_args,
build_environment: BuildEnvironment) -> (cmd_args, dict[str, (bool, [str, Artifact])]):
"""Add the transitive closure of all per-file Erlang dependencies as specified in the deps files to the `args` with .hidden.
This function traverses the deps specified in the deps files and adds all discovered dependencies.
"""
if not queue:
return args, input_mapping
input_mapping = {}
deps = artifacts[final_dep_file].read_json()

# silently ignore not found dependencies and let erlc report the not found stuff
for dep in deps:
artifact = None
file = dep["file"]
if dep["type"] == "include_lib":
app = dep["app"]
if (app, file) in build_environment.includes:
artifact = build_environment.includes[(app, file)]
input_mapping[file] = (True, build_environment.input_mapping[artifact.basename])
else:
# the file might come from OTP
input_mapping[file] = (False, paths.join(app, "include", file))
continue

next_round = []
elif dep["type"] == "include":
# these includes can either reside in the private includes
# or the public ones
if file in build_environment.private_includes:
artifact = build_environment.private_includes[file]

for key in queue:
if key not in build_environment.deps_files:
continue
deps = artifacts[build_environment.deps_files[key]].read_json()

# silently ignore not found dependencies and let erlc report the not found stuff
for dep in deps:
file = dep["file"]
if dep["type"] == "include_lib":
app = dep["app"]
if (app, file) in build_environment.includes:
artifact = build_environment.includes[(app, file)]
if artifact.basename in build_environment.input_mapping:
input_mapping[file] = (True, build_environment.input_mapping[artifact.basename])
else:
# at this point we don't know the application the include is coming
# from, and have to check all public include directories
candidates = [key for key in build_environment.includes.keys() if key[1] == file]
if len(candidates) > 1:
offending_apps = [app for (app, _) in candidates]
fail("-include(\"%s\") is ambiguous as the following applications declare public includes with the same name: %s" % (file, offending_apps))
elif candidates:
artifact = build_environment.includes[candidates[0]]
input_mapping[file] = (True, build_environment.input_mapping[artifact.basename])
else:
# the file might come from OTP
input_mapping[file] = (False, paths.join(app, "include", file))
continue

elif dep["type"] == "include":
# these includes can either reside in the private includes
# or the public ones
if file in build_environment.private_includes:
artifact = build_environment.private_includes[file]

if artifact.basename in build_environment.input_mapping:
input_mapping[file] = (True, build_environment.input_mapping[artifact.basename])
else:
# at this point we don't know the application the include is coming
# from, and have to check all public include directories
candidates = [key for key in build_environment.includes.keys() if key[1] == file]
if len(candidates) > 1:
offending_apps = [app for (app, _) in candidates]
fail("-include(\"%s\") is ambiguous as the following applications declare public includes with the same name: %s" % (file, offending_apps))
elif candidates:
artifact = build_environment.includes[candidates[0]]
input_mapping[file] = (True, build_environment.input_mapping[artifact.basename])
else:
# we didn't find the include, build will fail during compile
continue

elif (dep["type"] == "behaviour" or
dep["type"] == "parse_transform" or
dep["type"] == "manual_dependency"):
module, _ = paths.split_extension(file)
if module in build_environment.beams:
artifact = build_environment.beams[module]
else:
# we didn't find the include, build will fail during compile
continue

elif (dep["type"] == "behaviour" or
dep["type"] == "parse_transform" or
dep["type"] == "manual_dependency"):
module, _ = paths.split_extension(file)
if module in build_environment.beams:
artifact = build_environment.beams[module]
else:
fail("unrecognized dependency type %s", (dep["type"]))
continue

next_key = artifact.short_path
if next_key not in done:
done[next_key] = True
next_round.append(next_key)
args.hidden(artifact)
else:
fail("unrecognized dependency type %s", (dep["type"]))

args.hidden(artifact)

# STARLARK does not have unbound loops (while loops) and we use recursion instead.
return _add_dependencies_to_args(ctx, artifacts, next_round, done, input_mapping, args, build_environment)
return args, input_mapping

def _add_full_dependencies(erlc_cmd: cmd_args, build_environment: BuildEnvironment) -> cmd_args:
for artifact in build_environment.full_dependencies:
Expand Down Expand Up @@ -746,6 +749,20 @@ def _dep_file_name(toolchain: Toolchain, src: Artifact) -> str:
src.short_path + ".dep",
)

def _dep_final_name(toolchain: Toolchain, src: Artifact) -> str:
return paths.join(
_build_dir(toolchain),
"__dep_files",
src.short_path + ".final.dep",
)

def _dep_info_name(toolchain: Toolchain) -> str:
return paths.join(
_build_dir(toolchain),
"__dep_files",
"app.info.dep",
)

def _merge(a: dict, b: dict) -> dict:
""" sefely merge two dict """
r = dict(a)
Expand Down
1 change: 1 addition & 0 deletions prelude/erlang/erlang_info.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ ErlangToolchainInfo = provider(
"escript_builder": provider_field(typing.Any, default = None),
# analyzing .(h|e)rl dependencies
"dependency_analyzer": provider_field(typing.Any, default = None),
"dependency_finalizer": provider_field(typing.Any, default = None),
# trampoline rerouting stdout to stderr
"erlc_trampoline": provider_field(typing.Any, default = None),
# name to parse_transform artifacts mapping for core parse_transforms (that are always used) and
Expand Down
6 changes: 6 additions & 0 deletions prelude/erlang/erlang_toolchain.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Toolchain = record(
app_file_script = field(Artifact),
boot_script_builder = field(Artifact),
dependency_analyzer = field(Artifact),
dependency_finalizer = field(Artifact),
erlc_trampoline = field(Artifact),
escript_builder = field(Artifact),
otp_binaries = field(Tools),
Expand All @@ -60,6 +61,7 @@ ToolchainUtillInfo = provider(
"boot_script_builder": provider_field(typing.Any, default = None),
"core_parse_transforms": provider_field(typing.Any, default = None),
"dependency_analyzer": provider_field(typing.Any, default = None),
"dependency_finalizer": provider_field(typing.Any, default = None),
"edoc": provider_field(typing.Any, default = None),
"erlc_trampoline": provider_field(typing.Any, default = None),
"escript_builder": provider_field(typing.Any, default = None),
Expand Down Expand Up @@ -91,6 +93,7 @@ def _multi_version_toolchain_impl(ctx: AnalysisContext) -> list[Provider]:
app_file_script = toolchain_info.app_file_script,
boot_script_builder = toolchain_info.boot_script_builder,
dependency_analyzer = toolchain_info.dependency_analyzer,
dependency_finalizer = toolchain_info.dependency_finalizer,
erl_opts = toolchain_info.erl_opts,
erlc_trampoline = toolchain_info.erlc_trampoline,
escript_builder = toolchain_info.escript_builder,
Expand Down Expand Up @@ -180,6 +183,7 @@ def _config_erlang_toolchain_impl(ctx: AnalysisContext) -> list[Provider]:
app_file_script = utils.app_src_script,
boot_script_builder = utils.boot_script_builder,
dependency_analyzer = utils.dependency_analyzer,
dependency_finalizer = utils.dependency_finalizer,
erl_opts = erl_opts,
env = ctx.attrs.env,
emu_flags = emu_flags,
Expand Down Expand Up @@ -350,6 +354,7 @@ def _toolchain_utils(ctx: AnalysisContext) -> list[Provider]:
boot_script_builder = ctx.attrs.boot_script_builder,
core_parse_transforms = ctx.attrs.core_parse_transforms,
dependency_analyzer = ctx.attrs.dependency_analyzer,
dependency_finalizer = ctx.attrs.dependency_finalizer,
edoc = ctx.attrs.edoc,
erlc_trampoline = ctx.attrs.erlc_trampoline,
escript_builder = ctx.attrs.escript_builder,
Expand All @@ -366,6 +371,7 @@ toolchain_utilities = rule(
"boot_script_builder": attrs.source(),
"core_parse_transforms": attrs.list(attrs.dep()),
"dependency_analyzer": attrs.source(),
"dependency_finalizer": attrs.source(),
"edoc": attrs.source(),
"erlc_trampoline": attrs.source(),
"escript_builder": attrs.source(),
Expand Down
1 change: 1 addition & 0 deletions prelude/erlang/toolchain/BUCK.v2
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ toolchain_utilities(
boot_script_builder = "boot_script_builder.escript",
core_parse_transforms = [":transform_project_root"],
dependency_analyzer = "dependency_analyzer.escript",
dependency_finalizer = "dependency_finalizer.escript",
edoc = "edoc_cli.escript",
erlc_trampoline = "erlc_trampoline.sh",
escript_builder = "escript_builder.escript",
Expand Down
89 changes: 16 additions & 73 deletions prelude/erlang/toolchain/dependency_analyzer.escript
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,22 @@
%%% Extract direct dependencies from a given erl or hrl file
%%%
%%% usage:
%%% dependency_analyzer.escript some_file.(h|e)rl [out.json]
%%% dependency_analyzer.escript some_file.(h|e)rl [out.term]
%%%
%%% The output of the tool is written either to stdout,
%%% or a given output file. The JSON format is as follows:
%%% or a given output file. The format is as follows and intended to
%%% be consumed by other file:consult/1:
%%% ```
%%% [{"type": "include"
%%% | "include_lib"
%%% | "behaviour"
%%% | "parse_transform"
%%% | "manual_dependency",
%%% "file": "header_or_source_file.(h|e)rl",
%%% ["app": "application"][only for "include_lib"]
%%% },
%%% [#{"type" := "include"
%%% | "include_lib"
%%% | "behaviour"
%%% | "parse_transform"
%%% | "manual_dependency",
%%% "file" := "header_or_source_file.(h|e)rl",
%%% ["app" => "application"][only for "include_lib"]
%%% },
%%% ...
%%% ]
%%% ].
%%% '''
%%% @end

Expand Down Expand Up @@ -74,8 +75,7 @@

%% -build_dependencies(Modules)
-define(MATCH_MANUAL_DEPENDENCIES(Modules),
{tree, attribute, _,
{attribute, {tree, atom, _, build_dependencies}, [{tree, list, _, {list, Modules, none}}]}}
{tree, attribute, _, {attribute, {tree, atom, _, build_dependencies}, [{tree, list, _, {list, Modules, none}}]}}
).

%% entry point
Expand All @@ -90,13 +90,13 @@ main(_) ->

-spec usage() -> ok.
usage() ->
io:format("dependency_analyzer.escript some_file.(h|e)rl [out.json]").
io:format("dependency_analyzer.escript some_file.(h|e)rl [out.term]").

-spec do(file:filename(), {file, file:filename()} | stdout) -> ok.
do(InFile, Outspec) ->
{ok, Forms} = epp_dodger:parse_file(InFile),
Dependencies = process_forms(Forms, []),
OutData = unicode:characters_to_binary(to_json_list(Dependencies)),
Dependencies = lists:sort(process_forms(Forms, [])),
OutData = unicode:characters_to_binary(io_lib:format("~p.", [Dependencies])),
case Outspec of
{file, File} ->
file:write_file(File, OutData);
Expand Down Expand Up @@ -140,60 +140,3 @@ process_forms([_ | Rest], Acc) ->
-spec module_to_erl(module()) -> file:filename().
module_to_erl(Module) ->
unicode:characters_to_list([atom_to_list(Module), ".erl"]).

%%%
%%% JSON encoding: base-line escripts we use in our toolchain need to be dependency less
%%%

-spec to_json_list([#{string() => string()}]) -> string().
to_json_list(Dependencies) ->
[
"[",
string:join([json_encode_dependency(Dependency) || Dependency <- Dependencies], ","),
"]"
].

-spec json_encode_dependency(#{string() => string()}) -> string().
json_encode_dependency(Dep) ->
Elements = maps:fold(
fun(Key, Value, Acc) ->
[[json_string_escape(Key), ":", json_string_escape(Value)] | Acc]
end,
[],
Dep
),
["{", string:join(Elements, ","), "}"].

-spec json_string_escape(string()) -> string().
json_string_escape(Str) ->
[
"\"",
[json_escape_char(C) || C <- Str],
"\""
].

-spec json_escape_char(non_neg_integer()) -> non_neg_integer() | string().
json_escape_char($\") ->
[$\\, $\"];
json_escape_char($\\) ->
[$\\, $\\];
json_escape_char($\/) ->
[$\\, $\/];
json_escape_char($\b) ->
[$\\, $\b];
json_escape_char($\f) ->
[$\\, $\f];
json_escape_char($\n) ->
[$\\, $\n];
json_escape_char($\r) ->
[$\\, $\r];
json_escape_char($\t) ->
[$\\, $\t];
json_escape_char(C) when C >= 16#20 andalso C =< 16#10FFFF ->
%% unescaped, 16#5C (\) and 16#22 (") are handled above
C;
json_escape_char(C) when C < 16#10000 ->
io_lib:format("\\u~s", [string:pad(integer_to_list(C, 16), 4, leading, " ")]);
json_escape_char(_) ->
%% TODO: support extended unicode characters
error(utf8_extended_character_not_supported).
Loading

0 comments on commit 147a930

Please sign in to comment.