From de03fe8c8566d654be935072413757c659f980f2 Mon Sep 17 00:00:00 2001 From: Aaron Zuspan <50475791+aazuspan@users.noreply.github.com> Date: Mon, 20 Jan 2025 23:34:04 -0800 Subject: [PATCH] Long list optimization (#52) * Avoid unnecessary stringifying of long lists Lists are truncated to "List (n elements)" if the stringified form is too long, but string conversion is expensive for long lists. To avoid that, we can calculate the minimum possible string length and skip stringifying if we exceed it. Based on benchmarking with the S2 image collection, this is a 10 - 20% speedup for convert_to_html. The minimum length formula takes brackets, delimiters, and whitespace into account, so e.g. the shortest possible 3-element list is 9 characters: "[1, 1, 1]". * Update changelog --- CHANGELOG.md | 4 +++- eerepr/html.py | 13 ++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7b5305..f17bf36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,9 @@ All notable changes to this project will be documented in this file. ## [Unreleased] -Nothing yet. +### Performance + +- Avoid stringifying long lists that will definitely be truncated in the repr (~20% speedup when testing with a 25-image Sentinel-2 collection) ## [0.1.0] - 2025-01-10 diff --git a/eerepr/html.py b/eerepr/html.py index 8dcabb0..95c5f2c 100644 --- a/eerepr/html.py +++ b/eerepr/html.py @@ -54,13 +54,20 @@ def convert_to_html(obj: Any, key: Hashable | None = None) -> str: def list_to_html(obj: list, key: Hashable | None = None) -> str: """Convert a Python list to an HTML
  • element.""" - contents = str(obj) n = len(obj) noun = "element" if n == 1 else "elements" header = f"{key}: " if key is not None else "" - header += f"List ({n} {noun})" if len(contents) > MAX_INLINE_LENGTH else contents - children = [convert_to_html(item, key=i) for i, item in enumerate(obj)] + # Skip the expensive stringification for lists that are definitely too long to + # include inline (counting whitespace and delimiters). This is a substantial + # performance improvement for large collections. + min_length = 3 * (n - 1) + 3 + if min_length < MAX_INLINE_LENGTH and len(contents := str(obj)) < MAX_INLINE_LENGTH: + header += contents + else: + header += f"List ({n} {noun})" + + children = [convert_to_html(item, key=i) for i, item in enumerate(obj)] return _make_collapsible_li(header, children)