Merge #232

232: Split and refactor the test outcomes page r=Veykril a=pietroalbini This PR changes how the test outcomes page's content is structured, splitting it into one page per target. This has two purposes: * Reduce the clutter on the page, which would otherwise mix information of all qualified targets. * Provide an accurate list of ignored tests, as each target now has the list of tests it ignored rather than just displaying a list of tests ignored by *all* targets. To simplify the generation of the separate pages, I changed how the `ferrocene_test_outcomes` extension works. Rather than having `.. ignored-tests::` and `.. suite-summary::` directives that are rendered in Python code, the extension now gathers and organizes the data, and defers the rendering of the information to a Jinja2 rST template. This results in the whole test results page being rendered by the template, which eases the maintenance of those pages. For example, a note that should be displayed only on cross-compiled targets can now be gated behind `{% if host != target %}`. Finally, while refactoring I removed the whole parsing code of the debug representation of steps, replacing it with the [structured test metadata](rust-lang/rust#111936) I added a while back. This will increase the robustness of the tool. There are still some open issues on this I'll address in a future PR: * The list of crates for the bootstrap test suite is empty. * The note for doc-tests not being executed is not present on aarch64. * I'd like to add references to the test suite definitions in the evaluation plan. * I'd like to see if I can make the information displayed in the page more concise. Co-authored-by: Pietro Albini <[email protected]>
ferrocene · Feb 1, 2024 · cb0f681 · cb0f681
2 parents bc59de8 + f044821
commit cb0f681
Show file tree

Hide file tree

Showing 13 changed files with 491 additions and 631 deletions.
diff --git a/ferrocene/doc/qualification-report/exts/ferrocene_test_outcomes/__init__.py b/ferrocene/doc/qualification-report/exts/ferrocene_test_outcomes/__init__.py
@@ -1,12 +1,11 @@
 # SPDX-License-Identifier: MIT OR Apache-2.0
 # SPDX-FileCopyrightText: The Ferrocene Developers
 
-from . import ignored_tests, suite_summary, outcomes
+from . import outcomes, render_template
 
 def setup(app):
-    ignored_tests.setup(app)
-    suite_summary.setup(app)
     outcomes.setup(app)
+    render_template.setup(app)
 
     return {
         "version": "0",

diff --git a/ferrocene/doc/qualification-report/exts/ferrocene_test_outcomes/ignored_tests.py b/ferrocene/doc/qualification-report/exts/ferrocene_test_outcomes/ignored_tests.py
diff --git a/ferrocene/doc/qualification-report/exts/ferrocene_test_outcomes/outcomes.py b/ferrocene/doc/qualification-report/exts/ferrocene_test_outcomes/outcomes.py
@@ -1,24 +1,83 @@
 # SPDX-License-Identifier: MIT OR Apache-2.0
 # SPDX-FileCopyrightText: The Ferrocene Developers
 
-from .parse_debug_repr import DebugReprParser
 from collections import OrderedDict
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Dict, Optional, List, Union
 import json
 import os
+import re
 
 
 TEST_EXECUTED = object()
 TEST_IGNORED_NO_REASON = object()
 
 SUPPORTED_FORMAT_VERSION = 1
 
+_DOCTEST_RE = re.compile(r"^[a-zA-Z0-9\-_\.\/]+ - [a-zA-Z0-9_:'<>,]+ \(line [0-9]+\)$")
 
+
+@dataclass(order=True)
+class IgnoredTest:
+    name: str
+    reason: Optional[str]
+
+    def is_doc_test(self):
+        return _DOCTEST_RE.search(self.name) is not None
+
+
+@dataclass(order=True)
+class CompiletestInvocation:
+    suite: str
+    mode: Optional[str]
+
+
+@dataclass(order=True)
+class CargoPackageInvocation:
+    crates: list[str]
+
+
+@dataclass(order=True)
+class Invocation:
+    bootstrap_types: list[str]
+    host: str
+    target: str
+    stage: int
+    kind: Union[CompiletestInvocation, CargoPackageInvocation]
+
+    passed_tests: int = 0
+    failed_tests: int = 0
+    ignored_tests: int = 0
+
+    def total_tests(self):
+        return self.passed_tests + self.ignored_tests + self.failed_tests
+
+    def is_compiletest(self):
+        return isinstance(self.kind, CompiletestInvocation)
+
+    def is_cargo_package(self):
+        return isinstance(self.kind, CargoPackageInvocation)
+
+
+@dataclass
+class Platform:
+    invocations: list[Invocation] = field(default_factory=list)
+    ignored_tests: list[IgnoredTest] = field(default_factory=list)
+    ignored_doc_tests: list[IgnoredTest] = field(default_factory=list)
+
+    def filter_invocations(self, bootstrap_type, *, only_match_root_node=False):
+        for invocation in self.invocations:
+            if only_match_root_node:
+                if invocation.bootstrap_types[0] == bootstrap_type:
+                    yield invocation
+            elif bootstrap_type in invocation.bootstrap_types:
+                yield invocation
+
+
+@dataclass
 class Outcomes:
-    def __init__(self):
-        self.suites = OrderedDict()
+    platforms: OrderedDict[(str, str), Platform] = field(default_factory=OrderedDict)
 
     def load_file(self, file):
         with open(file) as f:
@@ -33,122 +92,79 @@ def load_file(self, file):
                 f"while only {SUPPORTED_FORMAT_VERSION} is supported",
             )
 
+        loader = FileLoader(self)
         for invocation in contents["invocations"]:
-            for step in invocation["children"]:
-                name = step["type"]
-                try:
-                    suite = self.suites[name]
-                except KeyError:
-                    suite = Suite(name=name)
-
-                collector = InvocationCollector(suite, step["debug_repr"])
-                collector.collect(step)
-
-                if collector.invocations:
-                    suite.invocations += collector.invocations
-                    self.suites[name] = suite
-
-    def finalize(self):
-        for suite in self.suites.values():
-            for name in list(suite.ignored_tests):
-                if suite.ignored_tests[name] is TEST_EXECUTED:
-                    del suite.ignored_tests[name]
-
-        self.suites = OrderedDict(sorted(self.suites.items(), key=lambda kv: kv[0]))
-        for suite in self.suites.values():
-            suite.invocations.sort(key=lambda invocation: invocation.debug_repr)
-            suite.ignored_tests = OrderedDict(
-                sorted(suite.ignored_tests.items(), key=lambda kv: kv[0])
-            )
+            loader.load_invocation(invocation)
 
+    def sort(self):
+        for platform in self.platforms.values():
+            platform.ignored_tests.sort()
+            platform.ignored_doc_tests.sort()
+            platform.invocations.sort()
 
-class InvocationCollector:
-    def __init__(self, suite, suite_debug_repr):
-        self._suite = suite
-        self._suite_debug_repr = suite_debug_repr
-        self._bootstrap_types = []
-        self.invocations = []
+    def platform(self, host, target):
+        key = (host, target)
+        if key not in self.platforms:
+            self.platforms[key] = Platform()
+        return self.platforms[key]
 
-    def collect(self, step):
-        pop_bootstrap_type = False
-        if step["kind"] == "rustbuild_step":
-            self._bootstrap_types.append(step["type"])
-            pop_bootstrap_type = True
-
-        invocation = Invocation(
-            bootstrap_types=list(self._bootstrap_types),
-            debug_repr=self._suite_debug_repr,
-            parsed_debug_repr=DebugReprParser(self._suite_debug_repr).parse_item()
-        )
-        for child in step["children"]:
-            if child["kind"] == "rustbuild_step":
-                self.collect(child)
-            elif child["kind"] == "test_suite":
-                for test in child["tests"]:
-                    outcome = test["outcome"]
-                    if outcome == "ignored":
-                        invocation.ignored_tests += 1
-                        if test["name"] not in self._suite.ignored_tests:
-                            reason = test["ignore_reason"]
-                            if reason is None:
-                                reason = TEST_IGNORED_NO_REASON
-                            self._suite.ignored_tests[test["name"]] = reason
-                    elif outcome == "failed":
-                        invocation.failed_tests += 1
-                        self._suite.ignored_tests[test["name"]] = TEST_EXECUTED
-                    elif outcome == "passed":
-                        invocation.passed_tests += 1
-                        self._suite.ignored_tests[test["name"]] = TEST_EXECUTED
-                    else:
-                        raise RuntimeError(f"unexpected outcome: {outcome}")
-
-        if invocation.total_tests() > 0:
-            self.invocations.append(invocation)
-
-        if pop_bootstrap_type:
-            self._bootstrap_types.pop()
 
+class FileLoader:
+    def __init__(self, outcomes):
+        self.outcomes = outcomes
+        self.bootstrap_type_stack = []
 
-@dataclass
-class Invocation:
-    bootstrap_types: List[str]
-    debug_repr: str
-    parsed_debug_repr: object
+    def load_invocation(self, invocation):
+        for step in invocation["children"]:
+            self.load_step(step)
 
-    passed_tests: int = 0
-    failed_tests: int = 0
-    ignored_tests: int = 0
+    def load_step(self, step):
+        if step["kind"] == "rustbuild_step":
+            self.bootstrap_type_stack.append(step["type"])
+            for child in step["children"]:
+                self.load_step(child)
+            self.bootstrap_type_stack.pop()
+        elif step["kind"] == "test_suite":
+            self.load_test_suite(step)
+        else:
+            raise RuntimeError(f"unknown step kind: {step['kind']}")
+
+    def load_test_suite(self, suite):
+        metadata = suite["metadata"]
+        platform = self.outcomes.platform(metadata["host"], metadata["target"])
+
+        if metadata["kind"] == "compiletest":
+            mode = metadata["mode"] if metadata["mode"] != metadata["suite"] else None
+            kind = CompiletestInvocation(suite=metadata["suite"], mode=mode)
+        elif metadata["kind"] == "cargo_package":
+            kind = CargoPackageInvocation(crates=metadata["crates"])
+        else:
+            raise RuntimeError(f"unknown test suite kind: {metadata['kind']}")
 
-    def total_tests(self):
-        return self.passed_tests + self.ignored_tests + self.failed_tests
+        invocation = Invocation(
+            bootstrap_types=list(self.bootstrap_type_stack),
+            host=metadata["host"],
+            target=metadata["target"],
+            stage=metadata["stage"],
+            kind=kind,
+        )
 
+        for test in suite["tests"]:
+            if test["outcome"] == "ignored":
+                invocation.ignored_tests += 1
+                ignored = IgnoredTest(name=test["name"], reason=test["ignore_reason"])
+                if ignored.is_doc_test():
+                    platform.ignored_doc_tests.append(ignored)
+                else:
+                    platform.ignored_tests.append(ignored)
+            elif test["outcome"] == "passed":
+                invocation.passed_tests += 1
+            elif test["outcome"] == "failed":
+                invocation.failed_tests += 1
+            else:
+                raise RuntimeError(f"unknown test outcome: {test['outcome']}")
 
-@dataclass
-class Suite:
-    name: str
-    invocations: List[Invocation] = field(default_factory=list)
-    # Store the list of ignored tests. Unfortunately, to properly process the
-    # data from multiple files, until the finalize method is called on the
-    # Outcomes class there will be multiple possible values for the items of
-    # this dictionary:
-    #
-    # - If the entry is missing, the test was not already executed. It can
-    #   either be set to TEST_EXECUTED if the test was executed,
-    #   TEST_IGNORED_NO_REASON if the test was ignored without a reason, or the
-    #   ignore reason if the test was ignored with a reason.
-    #
-    # - If the entry is TEST_EXECUTED, the test was executed at least once. It
-    #   can never be replaced by other states.
-    #
-    # - If the entry is a string or TEST_IGNORED_NO_REASON, the test was always
-    #   ignored, and the string the reason why the test was ignored. It can be
-    #   replaced by TEST_EXECUTED if another execution of the test did run the
-    #   test. That might happen for architecture-specific tests for example.
-    #
-    # I miss Rust enums :( -pietro
-    ignored_tests: OrderedDict[str, Union[str, object]] = field(
-        default_factory=OrderedDict
-    )
+        platform.invocations.append(invocation)
 
 
 def builder_inited(app):
@@ -172,7 +188,7 @@ def _load_outcomes():
     outcomes = Outcomes()
     for file_to_load in files_to_load:
         outcomes.load_file(file_to_load)
-    outcomes.finalize()
+    outcomes.sort()
 
     return outcomes