Skip to content

Commit

Permalink
Prevent cache hits on ee.List.shuffle(seed=False) (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
aazuspan committed Nov 9, 2022
1 parent d1e28d3 commit 84c1376
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 38 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,4 @@ Importing `eerepr` in a Jupyter notebook adds an HTML repr method to all Earth E

`eerepr` uses caching to improve performance. Server data will only be requested once for each unique Earth Engine object, and all subsequent requests will be retrieved from the cache until the Jupyter session is restarted.

When you import `eerepr`, it is automatically initialized with an unlimited cache size. You can manually set the number of unique objects to cache using `eerepr.initialize(max_cache_size=n)`. A value of `None` sets an unlimited cache while a value of `0` disables caching. You can also clear out the cache contents to free memory with `eerepr.clear_cache()`.

> **Warning**
> There is a known bug when calling `ee.List.shuffle(seed=False)`. Because the method returns non-deterministic results from the same seed value, the incorrect cached result will be displayed if called multiple times. All other random methods use deterministic seeds and should work as expected.
When you import `eerepr`, it is automatically initialized with an unlimited cache size. You can manually set the number of unique objects to cache using `eerepr.initialize(max_cache_size=n)`. A value of `None` sets an unlimited cache while a value of `0` disables caching.
2 changes: 1 addition & 1 deletion eerepr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import ee

from eerepr.repr import clear_cache, initialize
from eerepr.repr import initialize

__version__ = '0.0.1'
__all__ = ['clear_cache', 'initialize']
Expand Down
55 changes: 33 additions & 22 deletions eerepr/repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from importlib.resources import read_text
from typing import Callable, Type, Union
from warnings import warn
import uuid

import ee

Expand Down Expand Up @@ -34,7 +35,19 @@ def _attach_html_repr(cls: Type, repr: Callable) -> None:
setattr(cls, REPR_HTML, repr)


def _ee_repr(obj: Union[ee.Element, ee.ComputedObject]) -> str:
def _is_nondeterministic(obj):
"""Check if an object returns nondeterministic results which would break caching.
Currently, this only tests for the case of `ee.List.shuffle(seed=False)`.
"""
invocation = obj.serialize()
shuffled = "List.shuffle" in invocation
false_seed = '"seed": {"constantValue": false}' in invocation
return shuffled and false_seed


@lru_cache(maxsize=None)
def _repr_html_(obj: Union[ee.Element, ee.ComputedObject]) -> str:
"""Generate an HTML representation of an EE object."""
try:
info = obj.getInfo()
Expand All @@ -58,8 +71,18 @@ def _ee_repr(obj: Union[ee.Element, ee.ComputedObject]) -> str:
)


def initialize(max_cache_size=None) -> _lru_cache_wrapper:
"""Attach HTML repr methods to EE objects.
def _ee_repr(obj: Union[ee.Element, ee.ComputedObject]) -> str:
"""Wrapper around _repr_html_ to prevent cache hits on nondeterministic objects."""
if _is_nondeterministic(obj):
# We don't want to cache nondeterministic objects, so we'll add add a unique attribute
# that causes ee.ComputedObject.__eq__ to return False, preventing a cache hit.
setattr(obj, "_eerepr_id", uuid.uuid4())

return _repr_html_(obj)


def initialize(max_cache_size=None) -> None:
"""Attach HTML repr methods to EE objects and set the cache size.
Re-running this function will reset the cache.
Expand All @@ -68,25 +91,13 @@ def initialize(max_cache_size=None) -> _lru_cache_wrapper:
max_cache_size : int, optional
The maximum number of EE objects to cache. If None, the cache size is unlimited. Set to 0
to disable caching.
Returns
-------
_lru_cache_wrapper
The cache wrapper which can be used to inspect and clear the cache.
"""
rep = (
lru_cache(maxsize=max_cache_size)(_ee_repr) if max_cache_size != 0 else _ee_repr
)
global _repr_html_
if isinstance(_repr_html_, _lru_cache_wrapper):
_repr_html_ = _repr_html_.__wrapped__

if max_cache_size != 0:
_repr_html_ = lru_cache(maxsize=max_cache_size)(_repr_html_)

for cls in [ee.Element, ee.ComputedObject]:
_attach_html_repr(cls, rep)

return rep


def clear_cache() -> None:
"""Reset the cache."""
try:
ee.Element._repr_html_.cache_clear()
except AttributeError:
pass
_attach_html_repr(cls, _ee_repr)
24 changes: 13 additions & 11 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,21 @@
import eerepr


def test_clear_cache():
x = ee.Number(0)
x._repr_html_()
assert x._repr_html_.cache_info().currsize == 1

eerepr.clear_cache()
assert x._repr_html_.cache_info().currsize == 0


def test_disabled_cache():
eerepr.initialize(max_cache_size=0)
x = ee.Number(0)
assert not isinstance(x._repr_html_, _lru_cache_wrapper)

# This shouldn't break
eerepr.clear_cache()

def test_nondeterministic_caching():
"""ee.List.shuffle(seed=False) is nondeterministic. Make sure it misses the cache."""
eerepr.initialize(max_cache_size=None)
cache = eerepr.repr._repr_html_

cache.cache_clear()

assert cache.cache_info().misses == 0
x = ee.List([0, 1, 2]).shuffle(seed=False)
x._repr_html_()
x._repr_html_()
assert cache.cache_info().misses == 2

0 comments on commit 84c1376

Please sign in to comment.