Skip to content

Commit

Permalink
Enable instrumentation of read_configs in starlark
Browse files Browse the repository at this point in the history
Summary:
This diff enables intrumentation of buckconfigs by adding shims around known `read_config` calls in starlark and exporting those functions from prelude to users.

If you want to instrument a known buckconfig key like `fbcode.sanitizer`, you can add `-c buckconfig.log=fbcode.sanitizer` to the buck command.

If you want to instrument all keys, add `-c buckconfig.log_all_in_json=true`. to your buck command. Note this is extremely slower and more memory hungry (about 20-40x) for larger binaries so only enable it when absolutely necessary.

If you want to see stacktraces for those instrumented buckconfigs, you can add `buckconfig.stacktraces=true`.

We shim around all known `read_config`-like functions in prelude. It's possible that we may have missed some other read_config functions that were used or exported from prelude. We need instrumentation from core to close this gap and also make instrumentation significantly more performant

Reviewed By: IanChilds

Differential Revision: D64739027

fbshipit-source-id: 16b63642438d415ce17c34cab9429fc97b8d9966
  • Loading branch information
Scott Cao authored and facebook-github-bot committed Nov 8, 2024
1 parent c78fc93 commit 508bccd
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 1 deletion.
7 changes: 7 additions & 0 deletions native.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ load("@prelude//rust:rust_common.bzl", "rust_common_macro_wrapper")
load("@prelude//rust:rust_library.bzl", "rust_library_macro_wrapper")
load("@prelude//rust:with_workspace.bzl", "with_rust_workspace")
load("@prelude//user:all.bzl", _user_rules = "rules")
load("@prelude//utils:buckconfig.bzl", _read_config = "read_config_with_logging", _read_root_config = "read_root_config_with_logging", log_buckconfigs = "LOG_BUCKCONFIGS")
load("@prelude//utils:expect.bzl", "expect")
load("@prelude//utils:selects.bzl", "selects")
load(":is_full_meta_repo.bzl", "is_full_meta_repo")
Expand Down Expand Up @@ -483,7 +484,13 @@ __extra_rules__ = {
"versioned_alias": _versioned_alias_macro_stub,
}

__overridden_builtins__ = {
"read_config": _read_config,
"read_root_config": _read_root_config,
} if log_buckconfigs else {}

__shimmed_native__ = __struct_to_dict(__buck2_builtins__)
__shimmed_native__.update(__overridden_builtins__)
__shimmed_native__.update(__rules__)
__shimmed_native__.update(_user_rules)

Expand Down
84 changes: 83 additions & 1 deletion utils/buckconfig.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,92 @@ def _next_word(val, start, delimiter):

return -1

# Below are some utilities to log and track `read_config` calls
# You can enable this with `-c buckconfig.log=<a single key>`, ex. `-c buckconfig.log=build.use_limited_hybrid`.
# To print in json, use `-c buckconfig.log_json=build.use_limited_hybrid`.
# This will print a stacktrace on stderr for every `read_config` call of `build.use_limited_hybrid`.
# Would recommend piping the stderr to file because otherwise printing to stderr can be slow and hard to read.
# You can also print a record for all keys with `-c buckconfig.log_all_in_json=true`. This prints all `read_config` calls
# without stacktraces, where each entry is a JSON that includes the cell, section, and key.
# NOTE: even without stacktraces, log all mode is extremely slow and memory hungry, often 20-40x slower and more
# memory hungry than the equivalent run without, so only use it on small graphs or when absolutely necessary.

def _buck_config_log_keys(json: bool) -> set[(str, str)]:
log_section_and_key = read_root_config("buckconfig", "log_json" if json else "log")
if not log_section_and_key:
return set()

# Unfortunately, due to buckconfigs allowing `.`, it's possible to have multiple
# ambiguous section/key for a single buckconfig, so check for that here.
result = set()
splits = log_section_and_key.split(".")
for i in range(1, len(splits)):
section = ".".join(splits[:i])
key = ".".join(splits[i:])
result.add((section, key))
return result

_BUCKCONFIG_LOG_KEYS = _buck_config_log_keys(json = False)
_BUCKCONFIG_LOG_JSON_KEYS = _buck_config_log_keys(json = True)
_BUCKCONFIG_LOG_ALL = read_root_config("buckconfig", "log_all_in_json") in ("True", "true")
LOG_BUCKCONFIGS = bool(_BUCKCONFIG_LOG_KEYS or _BUCKCONFIG_LOG_JSON_KEYS or _BUCKCONFIG_LOG_ALL)

def _log_read_config(section: str, key: str):
if _BUCKCONFIG_LOG_ALL:
output = {
"starlark_log_all_buckconfigs": {
"cell": get_cell_name(),
"key": key,
"section": section,
},
}

# This only prints if buckconfig is set
# buildifier: disable=print
print(json.encode(output))

if _BUCKCONFIG_LOG_JSON_KEYS:
if (section, key) in _BUCKCONFIG_LOG_JSON_KEYS:
output = {
"starlark_log_buckconfig": {
"call_stack": call_stack(),
"cell": get_cell_name(),
},
}

# This only prints if buckconfig is set
# buildifier: disable=print
print(json.encode(output))

if _BUCKCONFIG_LOG_KEYS:
if (section, key) in _BUCKCONFIG_LOG_KEYS:
# This only prints if buckconfig is set
# Need to do everything in one print statement because otherwise lines from parallel print
# invocations at load time will get interlaced
# buildifier: disable=print
print("========starlark_log_buckconfig========\n{}\n".format(call_stack()))

def read_config_with_logging(
section: str,
key: str,
default = None):
_log_read_config(section, key)
return read_config(section, key, default)

def read_root_config_with_logging(
section: str,
key: str,
default = None):
_log_read_config(section, key)
return read_root_config(section, key, default)

_read_config = read_config_with_logging if LOG_BUCKCONFIGS else read_config
_read_root_config = read_root_config_with_logging if LOG_BUCKCONFIGS else read_root_config

def read(section, field, default = None, root_cell = False):
"""Read a `string` from `.buckconfig`."""

read_config_func = read_root_config if root_cell else read_config
read_config_func = _read_root_config if root_cell else _read_config
return read_config_func(section, field, default)

# Alias for `read` that's explicit about the type being returned.
Expand Down

0 comments on commit 508bccd

Please sign in to comment.