Skip to content

Commit

Permalink
Refactor sliderule io (#26)
Browse files Browse the repository at this point in the history
* refactor sliderule io

* better maxar authentication handling

* add docs

* fix maxar test logic

* add docs
  • Loading branch information
scottyhq authored Nov 22, 2024
1 parent 478116f commit 1034493
Show file tree
Hide file tree
Showing 14 changed files with 4,733 additions and 1,596 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ repos:
rev: "v4.6.0"
hooks:
- id: check-added-large-files
args: ["--maxkb=1000"]
- id: check-case-conflict
- id: check-merge-conflict
- id: check-symlinks
Expand Down
13 changes: 13 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,16 @@ Datasets
general.Dataset
maxar.open_browse
maxar.plot_browse


IO
--

.. currentmodule:: coincident.io

.. autosummary::
:toctree: generated/

sliderule.subset_gedi02a
sliderule.subset_atl06
sliderule.sample_3dep
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,4 @@
always_document_param_types = True
# autodoc_typehints = "none"
nb_execution_mode = "auto" # off, on
nb_execution_excludepatterns = ["sliderule.ipynb"]
1 change: 1 addition & 0 deletions docs/examples/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ This section contains Jupyter Notebooks with narrative workflows using the
quickstart
cascading_search
sliderule
```
483 changes: 483 additions & 0 deletions docs/examples/sliderule.ipynb

Large diffs are not rendered by default.

5,590 changes: 4,049 additions & 1,541 deletions pixi.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ docs = [
"myst-nb",
"myst_parser>=0.13",
"pydata-sphinx-theme>=0.16.0,<0.17",
#"rpds-py>=0.21.0,<0.22",
"sphinx>=7.0",
"sphinx_autodoc_typehints",
"sphinx_copybutton",
Expand Down Expand Up @@ -120,7 +121,7 @@ report.exclude_also = [
]

[tool.codespell]
skip="pixi.lock"
skip="pixi.lock,docs/examples/sliderule.ipynb"

[tool.mypy]
files = ["src", "tests"]
Expand Down Expand Up @@ -220,6 +221,7 @@ rioxarray = "*"
#s3fs = "*"
stac-geoparquet = "*"
pyarrow = "*"
jsonschema = ">=4.23.0,<5"
#nbconvert = ">=7.16.4,<8"
#cloudpathlib-s3 = ">=0.20.0,<0.21"
#matplotlib-base = ">=3.9.2,<4"
Expand All @@ -236,6 +238,7 @@ mypy = "*"
# Testing additional dependencies
#rich = ">=13.8.1,<14" # Optional. convenient for rich.print(dataset)
#xvec = ">=0.3.0,<0.4"
sliderule = ">=4.7.1,<5"

[tool.pixi.pypi-dependencies]
coincident = { path = ".", editable = false }
Expand Down
1 change: 0 additions & 1 deletion src/coincident/datasets/maxar.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ async def download_item(
return item # noqa: RET504


@depends_on_optional("matplotlib")
def open_browse(item: pystac.Item, overview_level: int = 0) -> xr.DataArray:
"""
Open a browse image from a STAC item using the specified overview level.
Expand Down
4 changes: 2 additions & 2 deletions src/coincident/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

from __future__ import annotations

from coincident.io.sliderule import load_gedi, load_icesat2
from coincident.io import sliderule

__all__ = ["load_icesat2", "load_gedi"]
__all__ = ["sliderule"]
188 changes: 138 additions & 50 deletions src/coincident/io/sliderule.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,158 @@
"""
Read ICESat-2 & GEDI using Sliderule
Subset and Sample using Sliderule
https://slideruleearth.io
"""

from __future__ import annotations

import warnings
from pathlib import Path
from typing import Any

import geopandas as gpd
import pandas as pd

try:
from sliderule import gedi, icesat2, toregion
from sliderule import earthdata, gedi, icesat2, raster, toregion
except ImportError:
warnings.warn(
"'sliderule' package not found. Install for GEDI & ICESat2 functionality: https://slideruleearth.io/web/rtd/getting_started/Install.html",
stacklevel=2,
)
from coincident._utils import depends_on_optional

# TODO: make these configurable?
default_params = {
"ats": 20.0,
"cnt": 5,
"len": 40.0,
"res": 20.0,
"maxi": 6,
}


@depends_on_optional("sliderule.gedi")
def load_gedi(
aoi: gpd.GeoDataFrame,
start_datetime: pd.Timestamp,
end_datetime: pd.Timestamp,
quality_flags: bool = True,
**kwargs: Any,

def _gdf_to_sliderule_polygon(gf: gpd.GeoDataFrame) -> list[dict[str, float]]:
# Ignore type necessary I think b/c sliderule doesn't have type hints?
return toregion(gf[["geometry"]].dissolve())["poly"] # type: ignore[no-any-return]


def _gdf_to_sliderule_params(gf: gpd.GeoDataFrame) -> dict[str, Any]:
# Sliderule expects single 'geometry' column
# Dissolve creates a single multipolygon from all geometries
sliderule_aoi = _gdf_to_sliderule_polygon(gf)
params = {}
params["poly"] = sliderule_aoi
params["t0"] = gf.start_datetime.min().tz_localize(None).isoformat()
params["t1"] = gf.end_datetime.max().tz_localize(None).isoformat()
return params


def _granule_from_assets(assets: gpd.GeoDataFrame) -> str:
# NOTE: change to while loop in case tons of assets?
for _k, v in assets.items():
if v.get("roles") == "data":
granule = Path(v.get("href")).name

return granule


@depends_on_optional("sliderule")
def subset_gedi02a(
gf: gpd.GeoDataFrame,
aoi: gpd.GeoDataFrame = None,
# NOTE: investigate B006 best-practice
sliderule_params: dict[str, Any] = {}, # noqa: B006
) -> gpd.GeoDataFrame:
# NOTE: allow passing private cluster?
gedi.init("slideruleearth.io")
params = default_params.copy()
if quality_flags:
params["degrade_flag"] = 0
params["l2_quality_flag"] = 1
params["poly"] = toregion(aoi)["poly"]
params["t0"] = start_datetime.tz_localize(None).isoformat()
params["t1"] = end_datetime.tz_localize(None).isoformat()
params.update(kwargs)
return gedi.gedi02ap(params)


@depends_on_optional("sliderule.icesat2")
def load_icesat2(
aoi: gpd.GeoDataFrame,
start_datetime: pd.Timestamp,
end_datetime: pd.Timestamp,
**kwargs: Any,
"""
Subsets GEDI L2A data based on a given GeoDataFrame and optional area of interest (AOI).
Parameters
----------
gf : gpd.GeoDataFrame
A GeoDataFrame of results from coincident.search.search(dataset='gedi')
aoi : gpd.GeoDataFrame, optional
A GeoDataFrame with a POLYGON to subset. If not provided the union of geometries in gf is used.
sliderule_params : dict[Any], optional
A dictionary of additional parameters to be passed to the sliderule `gedi.gedi02ap`.
Returns
-------
gpd.GeoDataFrame
A GeoDataFrame containing the subsetted GEDI L2A data.
Notes
-----
The function sets `degrade_filter=True` and `l2_quality_filter=True` by default.
"""
params = _gdf_to_sliderule_params(gf)

granule_names = gf.assets.apply(_granule_from_assets).to_list()

if aoi is not None:
params["poly"] = _gdf_to_sliderule_polygon(aoi)

params.update(
{
"degrade_filter": True,
"l2_quality_filter": True,
}
)

# User-provided parameters take precedence
params.update(sliderule_params)

return gedi.gedi02ap(params, resources=granule_names)


@depends_on_optional("sliderule")
def subset_atl06(
gf: gpd.GeoDataFrame,
aoi: gpd.GeoDataFrame = None,
dropna: bool = True,
sliderule_params: dict[str, Any] = {}, # noqa: B006
) -> gpd.GeoDataFrame:
# NOTE: allow passing private cluster?
# NOTE: deal with kwargs
icesat2.init("slideruleearth.io")
params = default_params.copy()
params["srt"] = icesat2.SRT_LAND
params["cnf"] = icesat2.CNF_SURFACE_HIGH
params["poly"] = toregion(aoi)["poly"]
params["t0"] = start_datetime.tz_localize(None).isoformat()
params["t1"] = end_datetime.tz_localize(None).isoformat()
params.update(kwargs)
return icesat2.atl06p(params)
params = _gdf_to_sliderule_params(gf)

# Note necessary but avoids another CMR search
granule_names = gf.assets.apply(_granule_from_assets).to_list()

if aoi is not None:
params["poly"] = _gdf_to_sliderule_polygon(aoi)

# User-provided parameters take precedence
params.update(sliderule_params)

data = icesat2.atl06sp(params, resources=granule_names)

# NOTE: add server-side filtering for NaNs in sliderule.atl06sp?
if dropna:
return data.dropna(subset=["h_li"])
return data


@depends_on_optional("sliderule")
def sample_worldcover(gf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
points = [[x, y] for x, y in zip(gf.geometry.x, gf.geometry.y, strict=False)]
return raster.sample("esa-worldcover-10meter", points)


@depends_on_optional("sliderule")
def sample_3dep(gf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""
Sample 3DEP 1-meter DEM with POINT geometries in GeoDataFrame.
Points should be (longitude, latitude) not UTM
Parameters
----------
gf : gpd.GeoDataFrame
A GeoDataFrame containing POINT geometries
Returns
-------
gpd.GeoDataFrame
A GeoDataFrame with sampled elevation data from the 3DEP 1-meter DEM.
"""
points = [[x, y] for x, y in zip(gf.geometry.x, gf.geometry.y, strict=False)]

poly = _gdf_to_sliderule_polygon(gf)
geojson = earthdata.tnm(
short_name="Digital Elevation Model (DEM) 1 meter", polygon=poly
)

# NOTE: make sliderule aware of 3dep polygons to not sample NaN regions for 1m grid?
data_3dep = raster.sample("usgs3dep-1meter-dem", points, {"catalog": geojson})

return data_3dep[data_3dep.value.notna()]
5 changes: 5 additions & 0 deletions src/coincident/search/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ def search(
**kwargs,
)

# Keep track of dataset alias in geodataframe metadata
# NOTE: attrs not always retrained and not saved to file
# https://github.com/geopandas/geopandas/issues/3320
gf.attrs["dataset"] = dataset.alias

return gf


Expand Down
18 changes: 18 additions & 0 deletions src/coincident/search/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,21 @@
except maxar_platform.session.NoSessionCredentials:
msg_noauth = "Unable to authenticate with Maxar API. Please set MAXAR_API_KEY environment variable."
warnings.warn(msg_noauth, stacklevel=2)
except maxar_platform.exceptions.UnAuthorizedException:
msg_noauth = (
"Unable to authenticate with Maxar API. Please check MAXAR_API_KEY is valid."
)
warnings.warn(msg_noauth, stacklevel=2)


def _filter_assets(assets: gpd.GeoDataFrame) -> dict[str, str]:
"""Remove key:None pairs from assets"""
keep_keys = []
for k, v in assets.items():
if v is not None:
keep_keys.append(k)

return {key: assets[key] for key in keep_keys}


def to_geopandas(
Expand Down Expand Up @@ -59,6 +74,9 @@ def to_geopandas(
record_batch_reader = stac_geoparquet.arrow.parse_stac_items_to_arrow(collection)
gf = gpd.GeoDataFrame.from_arrow(record_batch_reader) # doesn't keep arrow dtypes

# Workaround stac-geoparquet limitation https://github.com/stac-utils/stac-geoparquet/issues/82
gf["assets"] = gf["assets"].apply(_filter_assets)

# Additional columns for convenience
# NOTE: these become entries under STAC properties
gf["dayofyear"] = gf["datetime"].dt.dayofyear
Expand Down
2 changes: 2 additions & 0 deletions src/coincident/search/wesm.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def stacify_column_names(gf: GeoDataFrame) -> GeoDataFrame:
"collect_end": "end_datetime",
}
gf = gf.rename(columns=name_map)
# Add collection name to match STAC (used to identify geodataframe contents in other functions)
gf["collection"] = "3DEP"
gf["start_datetime"] = pd.to_datetime(gf["start_datetime"])
gf["end_datetime"] = pd.to_datetime(gf["end_datetime"])
duration = gf.end_datetime - gf.start_datetime
Expand Down
Loading

0 comments on commit 1034493

Please sign in to comment.