Track crate dependency count over time (#5228)

### What * Part of #4788 * Closes #5101 ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using newly built examples: [app.rerun.io](https://app.rerun.io/pr/5228/index.html) * Using examples from latest `main` build: [app.rerun.io](https://app.rerun.io/pr/5228/index.html?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [app.rerun.io](https://app.rerun.io/pr/5228/index.html?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! - [PR Build Summary](https://build.rerun.io/pr/5228) - [Docs preview](https://rerun.io/preview/0223b21104bad205cfc5004aa33386d9b52fd603/docs)  - [Examples preview](https://rerun.io/preview/0223b21104bad205cfc5004aa33386d9b52fd603/examples)  - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) --------- Co-authored-by: jprochazk <[email protected]>
rerun-io · Feb 21, 2024 · 2625247 · 2625247
1 parent 9b37940
commit 2625247
Show file tree

Hide file tree

Showing 5 changed files with 247 additions and 90 deletions.
diff --git a/.github/workflows/reusable_track_size.yml b/.github/workflows/reusable_track_size.yml
@@ -97,12 +97,18 @@ jobs:
             entries+=("$name:$file:MiB")
           done
 
-          data=$(python3 scripts/ci/sizes.py measure "${entries[@]}")
-          echo "$data" > "/tmp/data.json"
+          python3 scripts/ci/count_bytes.py "${entries[@]}"  > /tmp/sizes.json
+
+          python3 scripts/ci/count_dependencies.py -p re_sdk    --no-default-features > /tmp/deps1.json
+          python3 scripts/ci/count_dependencies.py -p re_viewer --all-features        > /tmp/deps2.json
+          python3 scripts/ci/count_dependencies.py -p rerun     --all-features        > /tmp/deps3.json
+
+          # Merge the results, putting dependencies first (on top):
+          jq -s '.[0] + .[1] + .[2] + .[3]' /tmp/deps1.json /tmp/deps2.json /tmp/deps3.json /tmp/sizes.json > /tmp/data.json
 
           comparison=$(
-            python3 scripts/ci/sizes.py compare \
-              --threshold=5% \
+            python3 scripts/ci/compare.py \
+              --threshold=2% \
               --before-header=${{ (inputs.PR_NUMBER && github.event.pull_request.base.ref) || 'Before' }} \
               --after-header=${{ github.ref_name }} \
               "/tmp/prev.json" "/tmp/data.json"

diff --git a/rerun_cpp/src/rerun/c/rerun.h b/rerun_cpp/src/rerun/c/rerun.h
diff --git a/scripts/ci/sizes.py → scripts/ci/compare.py b/scripts/ci/sizes.py → scripts/ci/compare.py
@@ -1,28 +1,20 @@
 #!/usr/bin/env python3
 
 """
-Measure or compare sizes of a list of files.
+Compare sizes of a list of files.
 
 This produces the format for use in https://github.com/benchmark-action/github-action-benchmark.
 
 Use the script:
-    python3 scripts/ci/sizes.py --help
+    python3 scripts/ci/compare.py --help
 
-    python3 scripts/ci/sizes.py measure \
-        "Wasm":web_viewer/re_viewer_bg.wasm
-
-    python3 scripts/ci/sizes.py measure --format=github \
-        "Wasm":web_viewer/re_viewer_bg.wasm
-
-    python3 scripts/ci/sizes.py compare --threshold=20 previous.json current.json
+    python3 scripts/ci/compare.py --threshold=20 previous.json current.json
 """
 from __future__ import annotations
 
 import argparse
 import json
-import os.path
 import sys
-from enum import Enum
 from pathlib import Path
 from typing import Any
 
@@ -78,17 +70,6 @@ def render_table_rows(rows: list[Any], headers: list[str]) -> str:
     return table
 
 
-class Format(Enum):
-    JSON = "json"
-    GITHUB = "github"
-
-    def render(self, data: list[dict[str, str]]) -> str:
-        if self is Format.JSON:
-            return json.dumps(data)
-        if self is Format.GITHUB:
-            return render_table_dict(data)
-
-
 def compare(
     previous_path: str,
     current_path: str,
@@ -113,23 +94,40 @@ def compare(
     rows: list[tuple[str, str, str, str]] = []
     for name, entry in entries.items():
         if "previous" in entry and "current" in entry:
-            previous_bytes = float(entry["previous"]["value"]) * DIVISORS[entry["previous"]["unit"]]
-            current_bytes = float(entry["current"]["value"]) * DIVISORS[entry["current"]["unit"]]
-            unit = get_unit(min(previous_bytes, current_bytes))
-            div = get_divisor(unit)
-
-            abs_diff_bytes = abs(current_bytes - previous_bytes)
-            min_diff_bytes = previous_bytes * (threshold_pct / 100)
-            if abs_diff_bytes >= min_diff_bytes:
+            previous_unit = entry["previous"]["unit"]
+            current_unit = entry["current"]["unit"]
+
+            previous = float(entry["previous"]["value"])
+            current = float(entry["current"]["value"])
+
+            if previous_unit == current_unit:
+                div = 1
+                unit = previous_unit
+            else:
+                previous_divisor = DIVISORS.get(previous_unit, 1)
+                current_divisor = DIVISORS.get(current_unit, 1)
+
+                previous_bytes = previous * previous_divisor
+                current_bytes = current * current_divisor
+
                 previous = previous_bytes / div
                 current = current_bytes / div
-                change_pct = ((current_bytes - previous_bytes) / previous_bytes) * 100
+
+                unit = get_unit(min(previous_bytes, current_bytes))
+                div = get_divisor(unit)
+
+            change_pct = ((current - previous) / previous) * 100
+            if abs(change_pct) >= threshold_pct:
+                if unit in DIVISORS:
+                    change = f"{change_pct:+.2f}%"
+                else:
+                    change = f"{format_num(current - previous)} {unit}"
                 rows.append(
                     (
                         name,
-                        f"{previous:.2f} {unit}",
-                        f"{current:.2f} {unit}",
-                        f"{change_pct:+.2f}%",
+                        f"{format_num(previous)} {unit}",
+                        f"{format_num(current)} {unit}",
+                        change,
                     )
                 )
         elif "current" in entry:
@@ -148,85 +146,52 @@ def compare(
         sys.stdout.flush()
 
 
-def measure(files: list[str], format: Format) -> None:
-    output: list[dict[str, str]] = []
-    for arg in files:
-        parts = arg.split(":")
-        name = parts[0]
-        file = parts[1]
-        size = os.path.getsize(file)
-        unit = parts[2] if len(parts) > 2 else get_unit(size)
-        div = get_divisor(unit)
-
-        output.append(
-            {
-                "name": name,
-                "value": str(round(size / div, 2)),
-                "unit": unit,
-            }
-        )
+def format_num(num: float) -> str:
+    if num.is_integer():
+        return str(int(num))
+    return f"{num:.2f}"
 
-    sys.stdout.write(format.render(output))
-    sys.stdout.flush()
 
-
-def percentage(value: str) -> int:
+def percentage(value: str) -> float:
     value = value.replace("%", "")
-    return int(value)
+    return float(value)
 
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Generate a PR summary page")
-
-    cmds_parser = parser.add_subparsers(title="cmds", dest="cmd", help="Command")
-
-    compare_parser = cmds_parser.add_parser("compare", help="Compare results")
-    compare_parser.add_argument("before", type=str, help="Previous result .json file")
-    compare_parser.add_argument("after", type=str, help="Current result .json file")
-    compare_parser.add_argument(
+    parser.add_argument("before", type=str, help="Previous result .json file")
+    parser.add_argument("after", type=str, help="Current result .json file")
+    parser.add_argument(
         "--threshold",
         type=percentage,
         required=False,
         default=20,
         help="Only print row if value is N%% larger or smaller",
     )
-    compare_parser.add_argument(
+    parser.add_argument(
         "--before-header",
         type=str,
         required=False,
         default="Before",
         help="Header for before column",
     )
-    compare_parser.add_argument(
+    parser.add_argument(
         "--after-header",
         type=str,
         required=False,
         default="After",
         help="Header for after column",
     )
 
-    measure_parser = cmds_parser.add_parser("measure", help="Measure sizes")
-    measure_parser.add_argument(
-        "--format",
-        type=Format,
-        choices=list(Format),
-        default=Format.JSON,
-        help="Format to render",
-    )
-    measure_parser.add_argument("files", nargs="*", help="Entries to measure. Format: name:path[:unit]")
-
     args = parser.parse_args()
 
-    if args.cmd == "compare":
-        compare(
-            args.before,
-            args.after,
-            args.threshold,
-            args.before_header,
-            args.after_header,
-        )
-    elif args.cmd == "measure":
-        measure(args.files, args.format)
+    compare(
+        args.before,
+        args.after,
+        args.threshold,
+        args.before_header,
+        args.after_header,
+    )
 
 
 if __name__ == "__main__":

diff --git a/scripts/ci/count_bytes.py b/scripts/ci/count_bytes.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+
+"""
+Measure sizes of a list of files.
+
+This produces the format for use in https://github.com/benchmark-action/github-action-benchmark.
+
+Use the script:
+    python3 scripts/ci/count_bytes.py --help
+
+    python3 scripts/ci/count_bytes.py \
+        "Wasm":web_viewer/re_viewer_bg.wasm
+
+    python3 scripts/ci/count_bytes.py --format=github \
+        "Wasm":web_viewer/re_viewer_bg.wasm
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os.path
+import sys
+from enum import Enum
+from typing import Any
+
+
+def get_unit(size: int | float) -> str:
+    UNITS = ["B", "kiB", "MiB", "GiB", "TiB"]
+
+    unit_index = 0
+    while size > 1024:
+        size /= 1024
+        unit_index += 1
+
+    return UNITS[unit_index]
+
+
+DIVISORS = {
+    "B": 1,
+    "kiB": 1024,
+    "MiB": 1024 * 1024,
+    "GiB": 1024 * 1024 * 1024,
+    "TiB": 1024 * 1024 * 1024 * 1024,
+}
+
+
+def get_divisor(unit: str) -> int:
+    return DIVISORS[unit]
+
+
+def render_table_dict(data: list[dict[str, str]]) -> str:
+    keys = data[0].keys()
+    column_widths = [max(len(key), max(len(str(row[key])) for row in data)) for key in keys]
+    separator = "|" + "|".join("-" * (width + 2) for width in column_widths)
+    header_row = "|".join(f" {key.center(width)} " for key, width in zip(keys, column_widths))
+
+    table = f"|{header_row}|\n{separator}|\n"
+    for row in data:
+        row_str = "|".join(f" {str(row.get(key, '')).ljust(width)} " for key, width in zip(keys, column_widths))
+        table += f"|{row_str}|\n"
+
+    return table
+
+
+def render_table_rows(rows: list[Any], headers: list[str]) -> str:
+    column_widths = [max(len(str(item)) for item in col) for col in zip(*([tuple(headers)] + rows))]
+    separator = "|" + "|".join("-" * (width + 2) for width in column_widths)
+    header_row = "|".join(f" {header.center(width)} " for header, width in zip(headers, column_widths))
+
+    table = f"|{header_row}|\n{separator}|\n"
+    for row in rows:
+        row_str = "|".join(f" {str(item).ljust(width)} " for item, width in zip(row, column_widths))
+        table += f"|{row_str}|\n"
+
+    return table
+
+
+class Format(Enum):
+    JSON = "json"
+    GITHUB = "github"
+
+    def render(self, data: list[dict[str, str]]) -> str:
+        if self is Format.JSON:
+            return json.dumps(data)
+        if self is Format.GITHUB:
+            return render_table_dict(data)
+
+
+def measure(files: list[str], format: Format) -> None:
+    output: list[dict[str, str]] = []
+    for arg in files:
+        parts = arg.split(":")
+        name = parts[0]
+        file = parts[1]
+        size = os.path.getsize(file)
+        unit = parts[2] if len(parts) > 2 else get_unit(size)
+        div = get_divisor(unit)
+
+        output.append(
+            {
+                "name": name,
+                "value": str(round(size / div, 2)),
+                "unit": unit,
+            }
+        )
+
+    sys.stdout.write(format.render(output))
+    sys.stdout.flush()
+
+
+def percentage(value: str) -> int:
+    value = value.replace("%", "")
+    return int(value)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate a PR summary page")
+    parser.add_argument(
+        "--format",
+        type=Format,
+        choices=list(Format),
+        default=Format.JSON,
+        help="Format to render",
+    )
+    parser.add_argument("files", nargs="*", help="Entries to measure. Format: name:path[:unit]")
+
+    args = parser.parse_args()
+    measure(args.files, args.format)
+
+
+if __name__ == "__main__":
+    main()