Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add conversion between cf and shapely for lines #460

Merged
merged 14 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 149 additions & 6 deletions cf_xarray/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@
}
if types.issubset({"Point", "MultiPoint"}):
ds = points_to_cf(geometries)
elif types.issubset({"Polygon", "MultiPolygon"}) or types.issubset(
{"LineString", "MultiLineString"}
):
raise NotImplementedError("Only point geometries conversion is implemented.")
elif types.issubset({"LineString", "MultiLineString"}):
ds = lines_to_cf(geometries)
elif types.issubset({"Polygon", "MultiPolygon"}):
raise NotImplementedError("Polygon geometry conversion is not implemented.")
else:
raise ValueError(
f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}"
Expand Down Expand Up @@ -165,8 +165,10 @@
geom_type = ds.geometry_container.attrs["geometry_type"]
if geom_type == "point":
geometries = cf_to_points(ds)
elif geom_type in ["line", "polygon"]:
raise NotImplementedError("Only point geometries conversion is implemented.")
elif geom_type == "line":
geometries = cf_to_lines(ds)
elif geom_type == "polygon":
raise NotImplementedError("Polygon geometry conversion is not implemented.")
else:
raise ValueError(
f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}"
Expand Down Expand Up @@ -295,3 +297,144 @@
j += n

return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords)


def lines_to_cf(lines: xr.DataArray | Sequence):
"""Convert an iterable of lines (shapely.geometry.[Multi]Line) into a CF-compliant geometry dataset.

Parameters
----------
lines : sequence of shapely.geometry.Line or MultiLine
The sequence of [multi]lines to translate to a CF dataset.

Returns
-------
xr.Dataset
A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'
and optionally 'part_node_count'.
"""
from shapely import to_ragged_array

if isinstance(lines, xr.DataArray):
dim = lines.dims[0]
coord = lines[dim] if dim in lines.coords else None
lines_ = lines.values
else:
dim = "index"
coord = None
lines_ = np.array(lines)

_, arr, offsets = to_ragged_array(lines_)
x = arr[:, 0]
y = arr[:, 1]

node_count = np.diff(offsets[0])
if len(offsets) == 1:
indices = offsets[0]
part_node_count = node_count
else:
indices = np.take(offsets[0], offsets[1])
part_node_count = np.diff(indices)

geom_coords = arr.take(indices[:-1], 0)
crdX = geom_coords[:, 0]
crdY = geom_coords[:, 1]

ds = xr.Dataset(
data_vars={
"node_count": xr.DataArray(node_count, dims=("segment",)),
"part_node_count": xr.DataArray(part_node_count, dims=(dim,)),

Check warning on line 346 in cf_xarray/geometry.py

View check run for this annotation

Codecov / codecov/patch

cf_xarray/geometry.py#L345-L346

Added lines #L345 - L346 were not covered by tests
"geometry_container": xr.DataArray(
attrs={
"geometry_type": "line",
"node_count": "node_count",
"part_node_count": "part_node_count",
"node_coordinates": "x y",
"coordinates": "crd_x crd_y",
}
),
},
coords={
"x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}),
"y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}),
"crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}),
"crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}),

Check warning on line 361 in cf_xarray/geometry.py

View check run for this annotation

Codecov / codecov/patch

cf_xarray/geometry.py#L358-L361

Added lines #L358 - L361 were not covered by tests
},
)

if coord is not None:
ds = ds.assign_coords({dim: coord})

# Special case when we have no MultiLines
if len(ds.part_node_count) == len(ds.node_count):
ds = ds.drop_vars("part_node_count")
del ds.geometry_container.attrs["part_node_count"]
return ds


def cf_to_lines(ds: xr.Dataset):
"""Convert line geometries stored in a CF-compliant way to shapely lines stored in a single variable.

Parameters
----------
ds : xr.Dataset
A dataset with CF-compliant line geometries.
Must have a "geometry_container" variable with at least a 'node_coordinates' attribute.
Must also have the two 1D variables listed by this attribute.

Returns
-------
geometry : xr.DataArray
A 1D array of shapely.geometry.[Multi]Line objects.
It has the same dimension as the ``part_node_count`` or the coordinates variables, or
``'features'`` if those were not present in ``ds``.
"""
from shapely import GeometryType, from_ragged_array

# Shorthand for convenience
geo = ds.geometry_container.attrs

# The features dimension name, defaults to the one of 'part_node_count'
# or the dimension of the coordinates, if present.
feat_dim = None
if "coordinates" in geo:
xcoord_name, _ = geo["coordinates"].split(" ")
(feat_dim,) = ds[xcoord_name].dims

x_name, y_name = geo["node_coordinates"].split(" ")
xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1)

node_count_name = geo.get("node_count")
part_node_count_name = geo.get("part_node_count")
if node_count_name is None:
raise ValueError("'node_count' must be provided for line geometries")
else:
node_count = ds[node_count_name]

offset1 = np.insert(np.cumsum(node_count.values), 0, 0)
lines = from_ragged_array(GeometryType.LINESTRING, xy, offsets=(offset1,))

if part_node_count_name is None:
# No part_node_count means there are no multilines
# And if we had no coordinates, then the dimension defaults to "index"
feat_dim = feat_dim or "index"
part_node_count = xr.DataArray(node_count.values, dims=(feat_dim,))
if feat_dim in ds.coords:
part_node_count = part_node_count.assign_coords({feat_dim: ds[feat_dim]})

geoms = lines
else:
part_node_count = ds[part_node_count_name]

# get index of offset1 values that are edges for part_node_count
offset2 = np.nonzero(
np.isin(offset1, np.insert(np.cumsum(part_node_count), 0, 0))
)[0]
multilines = from_ragged_array(
GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2)

Check warning on line 434 in cf_xarray/geometry.py

View check run for this annotation

Codecov / codecov/patch

cf_xarray/geometry.py#L434

Added line #L434 was not covered by tests
)

# get items from lines or multilines depending on number of segments
geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines)

return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords)
155 changes: 148 additions & 7 deletions cf_xarray/tests/test_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,83 @@ def geometry_ds():
return cf_ds, shp_ds


@pytest.fixture
def geometry_line_ds():
from shapely.geometry import LineString, MultiLineString

# empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries.
geoms = np.empty(3, dtype=object)
geoms[:] = [
MultiLineString([[[0, 0], [1, 2]], [[4, 4], [5, 6]]]),
LineString([[0, 0], [1, 0], [1, 1]]),
LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]),
]

ds = xr.Dataset()
shp_da = xr.DataArray(geoms, dims=("index",), name="geometry")

cf_ds = ds.assign(
x=xr.DataArray(
[0, 1, 4, 5, 0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}
),
y=xr.DataArray(
[0, 2, 4, 6, 0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}
),
part_node_count=xr.DataArray([4, 3, 3], dims=("index",)),
node_count=xr.DataArray([2, 2, 3, 3], dims=("segment",)),
crd_x=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "x"}),
crd_y=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "y"}),
geometry_container=xr.DataArray(
attrs={
"geometry_type": "line",
"node_count": "node_count",
"part_node_count": "part_node_count",
"node_coordinates": "x y",
"coordinates": "crd_x crd_y",
}
),
)

cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"])

return cf_ds, shp_da


@pytest.fixture
def geometry_line_without_multilines_ds():
from shapely.geometry import LineString

# empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries.
geoms = np.empty(2, dtype=object)
geoms[:] = [
LineString([[0, 0], [1, 0], [1, 1]]),
LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]),
]

ds = xr.Dataset()
shp_da = xr.DataArray(geoms, dims=("index",), name="geometry")

cf_ds = ds.assign(
x=xr.DataArray([0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}),
y=xr.DataArray([0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}),
node_count=xr.DataArray([3, 3], dims=("segment",)),
crd_x=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "x"}),
crd_y=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "y"}),
geometry_container=xr.DataArray(
attrs={
"geometry_type": "line",
"node_count": "node_count",
"node_coordinates": "x y",
"coordinates": "crd_x crd_y",
}
),
)

cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"])

return cf_ds, shp_da


@requires_shapely
def test_shapely_to_cf(geometry_ds):
from shapely.geometry import Point
Expand Down Expand Up @@ -87,12 +164,45 @@ def test_shapely_to_cf(geometry_ds):
assert set(out.dims) == {"features", "node"}


@requires_shapely
def test_shapely_to_cf_for_lines_as_da(geometry_line_ds):
expected, in_da = geometry_line_ds

actual = cfxr.shapely_to_cf(in_da)
xr.testing.assert_identical(actual, expected)

in_da = in_da.assign_coords(index=["a", "b", "c"])
actual = cfxr.shapely_to_cf(in_da)
xr.testing.assert_identical(actual, expected.assign_coords(index=["a", "b", "c"]))


@requires_shapely
def test_shapely_to_cf_for_lines_as_sequence(geometry_line_ds):
expected, in_da = geometry_line_ds
actual = cfxr.shapely_to_cf(in_da.values)
xr.testing.assert_identical(actual, expected)


@requires_shapely
def test_shapely_to_cf_for_lines_without_multilines(
geometry_line_without_multilines_ds,
):
expected, in_da = geometry_line_without_multilines_ds
actual = cfxr.shapely_to_cf(in_da)
xr.testing.assert_identical(actual, expected)


@requires_shapely
def test_shapely_to_cf_errors():
from shapely.geometry import LineString, Point
from shapely.geometry import Point, Polygon

geoms = [LineString([[1, 2], [2, 3]]), LineString([[2, 3, 4], [4, 3, 2]])]
with pytest.raises(NotImplementedError, match="Only point geometries conversion"):
geoms = [
Polygon([[1, 1], [1, 3], [3, 3], [1, 1]]),
Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]]),
]
with pytest.raises(
NotImplementedError, match="Polygon geometry conversion is not implemented"
):
cfxr.shapely_to_cf(geoms)

geoms.append(Point(1, 2))
Expand Down Expand Up @@ -122,16 +232,47 @@ def test_cf_to_shapely(geometry_ds):


@requires_shapely
def test_cf_to_shapely_errors(geometry_ds):
in_ds, expected = geometry_ds
in_ds.geometry_container.attrs["geometry_type"] = "line"
with pytest.raises(NotImplementedError, match="Only point geometries conversion"):
def test_cf_to_shapely_for_lines(geometry_line_ds):
in_ds, expected = geometry_line_ds

actual = cfxr.cf_to_shapely(in_ds)
assert actual.dims == ("index",)
xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected)


@requires_shapely
def test_cf_to_shapely_for_lines_without_multilines(
geometry_line_without_multilines_ds,
):
in_ds, expected = geometry_line_without_multilines_ds
actual = cfxr.cf_to_shapely(in_ds)
assert actual.dims == ("index",)
xr.testing.assert_identical(actual, expected)

in_ds = in_ds.assign_coords(index=["b", "c"])
actual = cfxr.cf_to_shapely(in_ds)
assert actual.dims == ("index",)
xr.testing.assert_identical(
actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"])
)


@requires_shapely
def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds):
in_ds, _ = geometry_ds
in_ds.geometry_container.attrs["geometry_type"] = "polygon"
with pytest.raises(NotImplementedError, match="Polygon geometry"):
cfxr.cf_to_shapely(in_ds)

in_ds.geometry_container.attrs["geometry_type"] = "punkt"
with pytest.raises(ValueError, match="Valid CF geometry types are "):
cfxr.cf_to_shapely(in_ds)

in_ds, _ = geometry_line_ds
del in_ds.geometry_container.attrs["node_count"]
with pytest.raises(ValueError, match="'node_count' must be provided"):
cfxr.cf_to_shapely(in_ds)


@requires_shapely
def test_reshape_unique_geometries(geometry_ds):
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ module=[
"pint",
"matplotlib",
"pytest",
"shapely",
"shapely.geometry",
"xarray.core.pycompat",
]
Expand Down