From 0d41c8a5d60313d0126af2c0f5e6f21a02271331 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 20 Dec 2024 18:01:16 -0600 Subject: [PATCH 1/6] Fixed dropping the geometry column --- dask_geopandas/_expr.py | 24 ++++++++++++++++++++++++ dask_geopandas/expr.py | 20 ++++++++++++++++++++ dask_geopandas/tests/test_core.py | 12 ++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 dask_geopandas/_expr.py diff --git a/dask_geopandas/_expr.py b/dask_geopandas/_expr.py new file mode 100644 index 0000000..88a2004 --- /dev/null +++ b/dask_geopandas/_expr.py @@ -0,0 +1,24 @@ +from typing import Literal + +import dask_expr as dx + +import geopandas + + +def _drop(df: geopandas.GeoDataFrame, columns, errors): + return df.drop(columns=columns, errors=errors) + + +def _validate_axis(axis=0, none_is_zero: bool = True) -> None | Literal[0, 1]: + if axis not in (0, 1, "index", "columns", None): + raise ValueError(f"No axis named {axis}") + # convert to numeric axis + numeric_axis: dict[str | None, Literal[0, 1]] = {"index": 0, "columns": 1} + if none_is_zero: + numeric_axis[None] = 0 + + return numeric_axis.get(axis, axis) + + +class Drop(dx.expr.Drop): + operation = staticmethod(_drop) diff --git a/dask_geopandas/expr.py b/dask_geopandas/expr.py index dcdafa8..78f16ff 100644 --- a/dask_geopandas/expr.py +++ b/dask_geopandas/expr.py @@ -26,6 +26,7 @@ import dask_geopandas +from ._expr import Drop, _validate_axis from .geohash import _geohash from .hilbert_distance import _hilbert_distance from .morton_distance import _morton_distance @@ -868,6 +869,25 @@ def explode(self, column=None, ignore_index=False, index_parts=None): enforce_metadata=False, ) + @derived_from(geopandas.GeoDataFrame) + def drop(self, labels=None, axis=0, columns=None, errors="raise"): + # https://github.com/geopandas/dask-geopandas/issues/321 + # Override to avoid an inplace drop, since we need + # to convert from a GeoDataFrame to a DataFrame when dropping + # the geometry column. + if columns is None and labels is None: + raise TypeError("must either specify 'columns' or 'labels'") + + axis = _validate_axis(axis) + + if axis == 1: + columns = labels or columns + elif axis == 0 and columns is None: + raise NotImplementedError( + "Drop currently only works for axis=1 or when columns is not None" + ) + return new_collection(Drop(self, columns=columns, errors=errors)) + from_geopandas = dx.from_pandas diff --git a/dask_geopandas/tests/test_core.py b/dask_geopandas/tests/test_core.py index e46ec7c..48f8a7a 100644 --- a/dask_geopandas/tests/test_core.py +++ b/dask_geopandas/tests/test_core.py @@ -1046,3 +1046,15 @@ def get_chunk(n): expected = geopandas.GeoDataFrame({"col": [1, 1], "geometry": [Point(1, 1)] * 2}) assert_geodataframe_equal(ddf.compute(), expected) + + +def test_drop(): + # https://github.com/geopandas/dask-geopandas/issues/321 + df = dask_geopandas.from_geopandas( + geopandas.GeoDataFrame({"col": [1], "geometry": [Point(1, 1)]}), npartitions=1 + ) + result = df.drop(columns="geometry") + assert type(result) is dd.DataFrame + + result = df.drop(columns="col") + assert type(result) is dask_geopandas.GeoDataFrame From 795a246030564b5c7c0d686728657f432efe6b6e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 4 Jan 2025 15:28:10 -0600 Subject: [PATCH 2/6] trigger ci From a9eb7c03215a29663f09b8665bd56e03c2cf79ed Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 19 Jan 2025 13:26:27 -0700 Subject: [PATCH 3/6] fixup --- dask_geopandas/_expr.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dask_geopandas/_expr.py b/dask_geopandas/_expr.py index 88a2004..7c8989a 100644 --- a/dask_geopandas/_expr.py +++ b/dask_geopandas/_expr.py @@ -1,6 +1,6 @@ from typing import Literal -import dask_expr as dx +import dask.dataframe.dask_expr as dx import geopandas diff --git a/pyproject.toml b/pyproject.toml index c5190d0..4d56413 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -155,6 +155,6 @@ section-order = [ ] [tool.ruff.lint.isort.sections] -"dask" = ["dask", "dask_expr"] +"dask" = ["dask"] "geo" = ["geopandas", "shapely", "pyproj"] "testing" = ["pytest", "pandas.testing", "numpy.testing", "geopandas.tests", "geopandas.testing"] From 5736fb44f0e112f88489eb81d46bf451ecea2ac3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 19 Jan 2025 13:35:18 -0700 Subject: [PATCH 4/6] changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f8e537..c5d81d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ Packaging: - `dask>=2025.1.0` is now required. - `python>=3.10` is now required. +Bug fixes: + +- Fixed `GeoDataFrame.drop` returning a `GeoDataFrame` + instead of a `DataFrame`, when dropping the geometry + column (#321). Version 0.4.2 (September 24, 2024) ---------------------------------- From 7ac4da31abe925f1fbfbe4becdf35015be7d6013 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 19 Jan 2025 13:50:31 -0700 Subject: [PATCH 5/6] test coverage --- dask_geopandas/tests/test_core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dask_geopandas/tests/test_core.py b/dask_geopandas/tests/test_core.py index 542f4cc..a760984 100644 --- a/dask_geopandas/tests/test_core.py +++ b/dask_geopandas/tests/test_core.py @@ -1045,6 +1045,9 @@ def test_drop(): result = df.drop(columns="col") assert type(result) is dask_geopandas.GeoDataFrame + with pytest.raises(ValueError, match="No axis named x"): + df.drop(axis="x") + def test_core_deprecated(): with pytest.warns(FutureWarning, match="dask_geopandas.core"): From d359cd039c34f27d1cddc2686e140e886e249579 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 19 Jan 2025 13:57:03 -0700 Subject: [PATCH 6/6] fixup --- dask_geopandas/tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dask_geopandas/tests/test_core.py b/dask_geopandas/tests/test_core.py index a760984..678f869 100644 --- a/dask_geopandas/tests/test_core.py +++ b/dask_geopandas/tests/test_core.py @@ -1046,7 +1046,7 @@ def test_drop(): assert type(result) is dask_geopandas.GeoDataFrame with pytest.raises(ValueError, match="No axis named x"): - df.drop(axis="x") + df.drop(labels="a", axis="x") def test_core_deprecated():