From 43371f018a61b8f1fc8a673e3c82e85439713dac Mon Sep 17 00:00:00 2001 From: Aaron Zuspan Date: Thu, 9 Jan 2025 22:49:36 -0800 Subject: [PATCH] Escape HTML to prevent injection --- eerepr/html.py | 12 ++++++++++++ eerepr/repr.py | 11 ++++++----- tests/test_reprs.py | 23 ++++++++++++++++++++--- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/eerepr/html.py b/eerepr/html.py index 9db9aeb..8dcabb0 100644 --- a/eerepr/html.py +++ b/eerepr/html.py @@ -1,5 +1,6 @@ from __future__ import annotations +import html from datetime import datetime, timezone from itertools import chain from typing import Any, Hashable @@ -20,6 +21,17 @@ DATE_FORMAT = "%Y-%m-%d %H:%M:%S" +def escape_object(obj: Any) -> Any: + """Recursively escape HTML strings in a Python object.""" + if isinstance(obj, str): + return html.escape(obj) + if isinstance(obj, list): + return [escape_object(element) for element in obj] + if isinstance(obj, dict): + return {escape_object(key): escape_object(value) for key, value in obj.items()} + return obj + + def convert_to_html(obj: Any, key: Hashable | None = None) -> str: """Converts a Python object (not list or dict) to an HTML
  • element. diff --git a/eerepr/repr.py b/eerepr/repr.py index 71b18eb..7e4b67a 100644 --- a/eerepr/repr.py +++ b/eerepr/repr.py @@ -1,15 +1,15 @@ from __future__ import annotations +import html import uuid from functools import _lru_cache_wrapper, lru_cache -from html import escape from typing import Any, Literal, Union from warnings import warn import ee from eerepr.config import Config -from eerepr.html import convert_to_html +from eerepr.html import convert_to_html, escape_object REPR_HTML = "_repr_html_" EEObject = Union[ee.Element, ee.ComputedObject] @@ -64,7 +64,8 @@ def _is_nondeterministic(obj: EEObject) -> bool: @lru_cache(maxsize=None) def _repr_html_(obj: EEObject) -> str: """Generate an HTML representation of an EE object.""" - info = obj.getInfo() + # Escape all strings in object info to prevent injection + info = escape_object(obj.getInfo()) css = _load_css() body = convert_to_html(info) @@ -96,7 +97,7 @@ def _ee_repr(obj: EEObject) -> str: f"Getting info failed with: '{e}'. Falling back to string repr.", stacklevel=2, ) - return f"
    {escape(repr(obj))}
    " + return f"
    {html.escape(repr(obj))}
    " mbs = len(rep) / 1e6 if mbs > options.max_repr_mbs: @@ -109,7 +110,7 @@ def _ee_repr(obj: EEObject) -> str: ), stacklevel=2, ) - return f"
    {escape(repr(obj))}
    " + return f"
    {html.escape(repr(obj))}
    " return rep diff --git a/tests/test_reprs.py b/tests/test_reprs.py index cd22ff0..4c4b640 100644 --- a/tests/test_reprs.py +++ b/tests/test_reprs.py @@ -1,15 +1,16 @@ import ee import eerepr -from eerepr.repr import _repr_html_ def test_full_repr(data_regression): """Regression test the full HTML repr (with CSS and JS) of a nested EE object.""" from tests.test_html import get_test_objects - objects = get_test_objects().items() - rendered = _repr_html_(ee.List([obj[1] for obj in objects])) + eerepr.initialize() + + objects = get_test_objects().values() + rendered = ee.List([obj for obj in objects])._repr_html_() data_regression.check(rendered) @@ -38,3 +39,19 @@ def _repr_html_(self): # reset shouldn't remove the existing repr eerepr.reset() assert obj._repr_html_() == "foo" + + +def test_scripts_sanitized(): + """Test that scripts within objects are escaped.""" + eerepr.initialize() + + script_injection = "" + + obj = ee.String(script_injection) + assert "