Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure _from_data accepts columns objects only #1415

Merged
merged 3 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/core/binpreds/contains.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.

from math import ceil, sqrt

Expand Down Expand Up @@ -110,7 +110,7 @@ def _brute_force_contains_properly(points, polygons):
width=len(polygons.polygons.part_offset) - 1,
)
)
final_result = DataFrame._from_data(
final_result = DataFrame(
{
name: result[name].astype("bool")
for name in reversed(result.columns)
Expand Down
33 changes: 20 additions & 13 deletions python/cuspatial/cuspatial/core/geodataframe.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION
from typing import Dict, Tuple, TypeVar, Union
from __future__ import annotations

from typing import Any, Dict, TypeVar, Union

import pandas as pd
from geopandas import GeoDataFrame as gpGeoDataFrame
from geopandas.geoseries import is_geometry_type as gp_is_geometry_type

import cudf
from cudf.core.column import as_column
from cudf.core.copy_types import BooleanMask, GatherMap

from cuspatial.core._column.geocolumn import GeoColumn, GeoMeta
Expand Down Expand Up @@ -41,7 +44,7 @@ def __init__(
column = GeoColumn(adapter._get_geotuple(), pandas_meta)
self._data[col] = column
else:
self._data[col] = data[col]
self._data[col] = as_column(data[col])
elif isinstance(data, dict):
for key in data.keys():
try:
Expand Down Expand Up @@ -137,7 +140,9 @@ def _copy_type_metadata(

return type_copied

def _split_out_geometry_columns(self) -> Tuple:
def _split_out_geometry_columns(
self,
) -> tuple[GeoDataFrame, cudf.DataFrame]:
"""
Break the geometry columns and non-geometry columns into
separate dataframes and return them separated.
Expand All @@ -154,18 +159,20 @@ def _split_out_geometry_columns(self) -> Tuple:
)
return (geo_columns, data_columns)

def _recombine_columns(self, geo_columns, data_columns):
def _recombine_columns(
self, geo_columns: GeoDataFrame, data_columns: cudf.DataFrame
) -> dict[Any, GeoSeries | cudf.Series]:
"""
Combine a GeoDataFrame of only geometry columns with a DataFrame
of non-geometry columns in the same order as the columns in `self`
"""
columns_mask = pd.Series(self.columns)
geocolumn_mask = pd.Series(
[isinstance(self[col], GeoSeries) for col in self.columns]
columns_mask = self.columns
geocolumn_mask = (
isinstance(self[col], GeoSeries) for col in columns_mask
)
return {
name: (geo_columns[name] if mask else data_columns[name])
for name, mask in zip(columns_mask.values, geocolumn_mask.values)
for name, mask in zip(columns_mask, geocolumn_mask)
}

def _slice(self: T, arg: slice) -> T:
Expand All @@ -190,15 +197,15 @@ def _apply_boolean_mask(self, mask: BooleanMask, keep_index=True) -> T:
{name: geo_columns[name][mask.column] for name in geo_columns}
)

res = self.__class__._from_data(self._recombine_columns(geo, data))
res = self.__class__(self._recombine_columns(geo, data))
if keep_index:
res.index = data.index
return res

def _gather(self, gather_map: GatherMap, keep_index=True):
geo_data, cudf_data = self._split_out_geometry_columns()
geo_data, df = self._split_out_geometry_columns()
# gather cudf columns
df = cudf.DataFrame._from_data(data=cudf_data, index=self.index)
df.index = self.index

cudf_gathered = df._gather(gather_map, keep_index=keep_index)

Expand All @@ -210,7 +217,7 @@ def _gather(self, gather_map: GatherMap, keep_index=True):
geo_gathered = GeoDataFrame(gathered)

# combine
result = GeoDataFrame._from_data(
result = GeoDataFrame(
self._recombine_columns(geo_gathered, cudf_gathered)
)
result.index = geo_gathered.index
Expand Down Expand Up @@ -294,7 +301,7 @@ def reset_index(
# Reset the index of the GeoDataFrame to match the
# cudf DataFrame and recombine.
geo_data.index = cudf_reindexed.index
result = GeoDataFrame._from_data(
result = GeoDataFrame(
recombiner._recombine_columns(geo_data, cudf_reindexed)
)
result.index = geo_data.index
Expand Down
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/core/spatial/join.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.

import warnings

Expand Down Expand Up @@ -87,7 +87,7 @@ def point_in_polygon(points: GeoSeries, polygons: GeoSeries):
)

result.columns = polygons.index[::-1]
return DataFrame._from_data(
return DataFrame(
{
name: result[name].astype("bool")
for name in reversed(result.columns)
Expand Down
11 changes: 6 additions & 5 deletions python/cuspatial/cuspatial/core/spatial/nearest_points.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import cupy as cp
# Copyright (c) 2024, NVIDIA CORPORATION.

import cudf
from cudf.core.column import as_column
Expand Down Expand Up @@ -57,7 +57,7 @@ def pairwise_point_linestring_nearest_points(
"segment_id": cudf.Series([], dtype="i4"),
"geometry": GeoSeries([]),
}
return GeoDataFrame._from_data(data)
return GeoDataFrame(data)

if not contains_only_points(points):
raise ValueError("`points` must contain only point geometries.")
Expand Down Expand Up @@ -97,11 +97,12 @@ def pairwise_point_linestring_nearest_points(
as_column(linestrings.lines.geometry_offset),
)

point_on_linestring = GeoColumn._from_points_xy(point_on_linestring_xy)
nearest_points_on_linestring = GeoSeries(point_on_linestring)
nearest_points_on_linestring = GeoColumn._from_points_xy(
point_on_linestring_xy
)

if not point_geometry_id:
point_geometry_id = cp.zeros(len(points), dtype=cp.int32)
point_geometry_id = as_column(0, length=len(points), dtype="int32")

data = {
"point_geometry_id": point_geometry_id,
Expand Down
Loading