Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sanitize HTML #51

Merged
merged 2 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file.
```

If you're using `eerepr` through `geemap>=0.35.2`, this is [handled automatically](https://github.com/gee-community/geemap/pull/2183) by `geemap`.
- For security, HTML within Earth Engine objects is no longer rendered. This is consistent with the Code Editor.

### Added

Expand All @@ -37,6 +38,10 @@ All notable changes to this project will be documented in this file.
- Dropped Python 3.7 support
- Automatic `initialize` on import

### Security

- Escape HTML in all server-side data to prevent injection attacks

## [0.0.4] - 2022-11-30

### Added
Expand Down
12 changes: 12 additions & 0 deletions eerepr/html.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import html
from datetime import datetime, timezone
from itertools import chain
from typing import Any, Hashable
Expand All @@ -20,6 +21,17 @@
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"


def escape_object(obj: Any) -> Any:
"""Recursively escape HTML strings in a Python object."""
if isinstance(obj, str):
return html.escape(obj)
if isinstance(obj, list):
return [escape_object(element) for element in obj]
if isinstance(obj, dict):
return {escape_object(key): escape_object(value) for key, value in obj.items()}
return obj


def convert_to_html(obj: Any, key: Hashable | None = None) -> str:
"""Converts a Python object (not list or dict) to an HTML <li> element.

Expand Down
11 changes: 6 additions & 5 deletions eerepr/repr.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from __future__ import annotations

import html
import uuid
from functools import _lru_cache_wrapper, lru_cache
from html import escape
from typing import Any, Literal, Union
from warnings import warn

import ee

from eerepr.config import Config
from eerepr.html import convert_to_html
from eerepr.html import convert_to_html, escape_object

REPR_HTML = "_repr_html_"
EEObject = Union[ee.Element, ee.ComputedObject]
Expand Down Expand Up @@ -64,7 +64,8 @@ def _is_nondeterministic(obj: EEObject) -> bool:
@lru_cache(maxsize=None)
def _repr_html_(obj: EEObject) -> str:
"""Generate an HTML representation of an EE object."""
info = obj.getInfo()
# Escape all strings in object info to prevent injection
info = escape_object(obj.getInfo())
css = _load_css()
body = convert_to_html(info)

Expand Down Expand Up @@ -96,7 +97,7 @@ def _ee_repr(obj: EEObject) -> str:
f"Getting info failed with: '{e}'. Falling back to string repr.",
stacklevel=2,
)
return f"<pre>{escape(repr(obj))}</pre>"
return f"<pre>{html.escape(repr(obj))}</pre>"

mbs = len(rep) / 1e6
if mbs > options.max_repr_mbs:
Expand All @@ -109,7 +110,7 @@ def _ee_repr(obj: EEObject) -> str:
),
stacklevel=2,
)
return f"<pre>{escape(repr(obj))}</pre>"
return f"<pre>{html.escape(repr(obj))}</pre>"

return rep

Expand Down
23 changes: 20 additions & 3 deletions tests/test_reprs.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import ee

import eerepr
from eerepr.repr import _repr_html_


def test_full_repr(data_regression):
"""Regression test the full HTML repr (with CSS and JS) of a nested EE object."""
from tests.test_html import get_test_objects

objects = get_test_objects().items()
rendered = _repr_html_(ee.List([obj[1] for obj in objects]))
eerepr.initialize()

objects = get_test_objects().values()
rendered = ee.List([obj for obj in objects])._repr_html_()
data_regression.check(rendered)


Expand Down Expand Up @@ -38,3 +39,19 @@ def _repr_html_(self):
# reset shouldn't remove the existing repr
eerepr.reset()
assert obj._repr_html_() == "foo"


def test_scripts_sanitized():
"""Test that scripts within objects are escaped."""
eerepr.initialize()

script_injection = "<script>alert('foo')</script>"

obj = ee.String(script_injection)
assert "<script>" not in obj._repr_html_()

obj = ee.List([script_injection])
assert "<script>" not in obj._repr_html_()

obj = ee.Dictionary({script_injection: script_injection, "type": script_injection})
assert "<script>" not in obj._repr_html_()
Loading