From c85e3f0eb43480d444078d4f32948ea54cd34bc6 Mon Sep 17 00:00:00 2001 From: David Blodgett Date: Tue, 1 Oct 2024 21:54:51 -0500 Subject: [PATCH 1/2] add specific id search to index_points_to_lines for #24 --- R/index_points_to_lines.R | 54 ++++++++++++++++++++++++++++++------ man/index_points_to_lines.Rd | 29 +++++++++++++++---- tests/testthat/test_index.R | 31 ++++++++++++++++++--- 3 files changed, 95 insertions(+), 19 deletions(-) diff --git a/R/index_points_to_lines.R b/R/index_points_to_lines.R index 4e87b95..e490572 100644 --- a/R/index_points_to_lines.R +++ b/R/index_points_to_lines.R @@ -166,6 +166,12 @@ interp_meas <- function(m, x1, y1, x2, y2) { #' @param precision numeric the resolution of measure precision in the output in meters. #' @param max_matches numeric the maximum number of matches to return if multiple are #' found in search_radius +#' @param ids vector of ids corresponding to flowline ids from `x` of the same length +#' as and order as `points`. If included, index searching will be constrained to one +#' and only one flowline per point. +#' +#' `search radius` is still used with this option but `max_matches` is overridden. +#' #' @returns data.frame with five columns, point_id, id, aggregate_id, #' aggregate_id_measure, and offset. point_id is the row or list element in the #' point input. @@ -193,6 +199,9 @@ interp_meas <- function(m, x1, y1, x2, y2) { #' if(require(nhdplusTools)) { #' source(system.file("extdata", "sample_flines.R", package = "nhdplusTools")) #' +#' if(!any(lengths(sf::st_geometry(sample_flines)) > 1)) +#' sample_flines <- sf::st_cast(sample_flines, "LINESTRING", warn = FALSE) +#' #' point <- sf::st_sfc(sf::st_point(c(-76.87479, 39.48233)), #' crs = 4326) #' @@ -205,21 +214,27 @@ interp_meas <- function(m, x1, y1, x2, y2) { #' #' index_points_to_lines(sample_flines, point, precision = 30) #' -#' index_points_to_lines(sample_flines, -#' sf::st_sfc(list(sf::st_point(c(-76.86934, 39.49328)), +#' points <- sf::st_sfc(list(sf::st_point(c(-76.86934, 39.49328)), #' sf::st_point(c(-76.91711, 39.40884)), #' sf::st_point(c(-76.88081, 39.36354))), -#' crs = 4326), +#' crs = 4326) +#' +#' index_points_to_lines(sample_flines, points), #' search_radius = units::set_units(0.2, "degrees"), #' max_matches = 10) #' +#' index_points_to_lines(sample_flines, points, +#' search_radius = units::set_units(0.2, "degrees"), +#' ids = c(11689926, 11690110, 11688990)) +#' #' } #' } #' index_points_to_lines <- function(x, points, search_radius = NULL, precision = NA, - max_matches = 1) { + max_matches = 1, + ids = NULL) { UseMethod("index_points_to_lines") @@ -230,14 +245,16 @@ index_points_to_lines <- function(x, points, index_points_to_lines.data.frame <- function(x, points, search_radius = NULL, precision = NA, - max_matches = 1) { + max_matches = 1, + ids = NULL) { x <- hy(x) matched <- index_points_to_lines(x, points, search_radius = search_radius, precision = precision, - max_matches = max_matches) + max_matches = max_matches, + ids = ids) rename_indexed(x, matched) @@ -248,7 +265,8 @@ index_points_to_lines.data.frame <- function(x, points, index_points_to_lines.hy <- function(x, points, search_radius = NULL, precision = NA, - max_matches = 1) { + max_matches = 1, + ids = NULL) { # TODO: handle for aggregate or not? check_names(x, c(id), "index_points_to_lines") @@ -266,6 +284,7 @@ index_points_to_lines.hy <- function(x, points, } else { point_buffer <- st_buffer(points, search_radius) } + } if(units(search_radius) == units(as_units("degrees"))) { @@ -274,6 +293,17 @@ index_points_to_lines.hy <- function(x, points, } } + # filter x to ids we need + if(!is.null(ids)) { + if(!all(ids %in% x$id)) stop("ids is not NULL and not all ids are in the id field of x") + + if(!length(ids) == length(points)) stop("ids input must be 1:1 with points") + + x <- filter(x, .data$id %in% ids) + + max_matches <- 50 + } + x <- match_crs(x, points, paste("crs of lines and points don't match.", "attempting st_transform of lines")) @@ -352,7 +382,6 @@ index_points_to_lines.hy <- function(x, points, # downstream to upstream order x <- st_coordinates(x) - matched <- matcher_dt(x, points, search_radius, max_matches = max_matches) |> left_join(select(fline_atts, id, "precision_index"), by = c("L1" = "precision_index")) @@ -363,7 +392,6 @@ index_points_to_lines.hy <- function(x, points, x <- st_coordinates(x) - matched <- matcher_dt(x, points, search_radius, max_matches = max_matches) |> left_join(select(fline_atts, id, "index"), by = c("L1" = "index")) @@ -373,6 +401,14 @@ index_points_to_lines.hy <- function(x, points, } + if(!is.null(ids)) { + ids <- data.frame(point_id = seq_len(length(ids)), check_ids = ids) + + matched <- left_join(matched, ids, by = "point_id") |> + filter(.data$id == .data$check_ids) |> + select(-all_of("check_ids")) + } + x <- x |> add_index() |> filter(.data$L1 %in% matched$L1) |> diff --git a/man/index_points_to_lines.Rd b/man/index_points_to_lines.Rd index 08dde33..3616d90 100644 --- a/man/index_points_to_lines.Rd +++ b/man/index_points_to_lines.Rd @@ -11,7 +11,8 @@ index_points_to_lines( points, search_radius = NULL, precision = NA, - max_matches = 1 + max_matches = 1, + ids = NULL ) \method{index_points_to_lines}{data.frame}( @@ -19,7 +20,8 @@ index_points_to_lines( points, search_radius = NULL, precision = NA, - max_matches = 1 + max_matches = 1, + ids = NULL ) \method{index_points_to_lines}{hy}( @@ -27,7 +29,8 @@ index_points_to_lines( points, search_radius = NULL, precision = NA, - max_matches = 1 + max_matches = 1, + ids = NULL ) } \arguments{ @@ -46,6 +49,12 @@ See RANN nn2 documentation for more details.} \item{max_matches}{numeric the maximum number of matches to return if multiple are found in search_radius} + +\item{ids}{vector of ids corresponding to flowline ids from \code{x} of the same length +as and order as \code{points}. If included, index searching will be constrained to one +and only one flowline per point. + +\verb{search radius} is still used with this option but \code{max_matches} is overridden.} } \value{ data.frame with five columns, point_id, id, aggregate_id, @@ -79,6 +88,9 @@ Note 5: "from" is downstream -- 0 is the outlet "to" is upstream -- 100 is the i if(require(nhdplusTools)) { source(system.file("extdata", "sample_flines.R", package = "nhdplusTools")) +if(!any(lengths(sf::st_geometry(sample_flines)) > 1)) + sample_flines <- sf::st_cast(sample_flines, "LINESTRING", warn = FALSE) + point <- sf::st_sfc(sf::st_point(c(-76.87479, 39.48233)), crs = 4326) @@ -91,14 +103,19 @@ index_points_to_lines(sample_flines, point, index_points_to_lines(sample_flines, point, precision = 30) -index_points_to_lines(sample_flines, - sf::st_sfc(list(sf::st_point(c(-76.86934, 39.49328)), +points <- sf::st_sfc(list(sf::st_point(c(-76.86934, 39.49328)), sf::st_point(c(-76.91711, 39.40884)), sf::st_point(c(-76.88081, 39.36354))), - crs = 4326), + crs = 4326) + +index_points_to_lines(sample_flines, points), search_radius = units::set_units(0.2, "degrees"), max_matches = 10) +index_points_to_lines(sample_flines, points, + search_radius = units::set_units(0.2, "degrees"), + ids = c(11689926, 11690110, 11688990)) + } } diff --git a/tests/testthat/test_index.R b/tests/testthat/test_index.R index c7951ef..91a0ea8 100644 --- a/tests/testthat/test_index.R +++ b/tests/testthat/test_index.R @@ -177,6 +177,27 @@ test_that("point indexing to for multiple points works", { expect_true(all(matches2$REACHCODE %in% matches$REACHCODE)) + expect_equal(index_points_to_lines(flines_in, point, + search_radius = units::set_units(0.2, "degrees"), + ids = c(11689926, 11690110, 11688990))$COMID, + c(11689926L, 11690110L, 11688990L)) + + # check that a large search radius still works + expect_equal(suppressWarnings(index_points_to_lines(flines_in, point, + search_radius = units::set_units(5, "degrees"), + ids = c(11689926, 11690110, 11688990))$COMID), + c(11689926L, 11690110L, 11688990L)) + + expect_error(index_points_to_lines(flines_in, point, + search_radius = units::set_units(0.2, "degrees"), + ids = c(11689926, 11690110, 11688992)), + "not all ids are in the id field of x") + + expect_error(index_points_to_lines(flines_in, point, + search_radius = units::set_units(0.2, "degrees"), + ids = c(11689926, 11690110)), + "ids input must be 1:1 with points") + }) test_that("multipart indexing", { @@ -232,11 +253,13 @@ test_that("disambiguate", { source(system.file("extdata", "sample_flines.R", package = "nhdplusTools")) + points <- sf::st_sfc(list(sf::st_point(c(-76.86934, 39.49328)), + sf::st_point(c(-76.91711, 39.40884)), + sf::st_point(c(-76.88081, 39.36354))), + crs = 4326) + hydro_location <- sf::st_sf(point_id = c(1, 2, 3), - geom = sf::st_sfc(list(sf::st_point(c(-76.86934, 39.49328)), - sf::st_point(c(-76.91711, 39.40884)), - sf::st_point(c(-76.88081, 39.36354))), - crs = 4326), + geom = points, totda = c(23.6, 7.3, 427.9), nameid = c("Patapsco", "", "Falls Run River")) From 24cf370f1368c820b52365a14250a69bcb2fc7df Mon Sep 17 00:00:00 2001 From: David Blodgett Date: Wed, 2 Oct 2024 08:29:06 -0500 Subject: [PATCH 2/2] check issues --- NAMESPACE | 2 ++ R/index_points_to_lines.R | 8 +++++--- man/index_points_to_lines.Rd | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index aacd8db..e5de3fd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,6 +71,8 @@ export(sort_network) export(st_compatibalize) export(to_flownetwork) importFrom(RANN,nn2) +importFrom(data.table,.N) +importFrom(data.table,.SD) importFrom(data.table,as.data.table) importFrom(data.table,copy) importFrom(data.table,data.table) diff --git a/R/index_points_to_lines.R b/R/index_points_to_lines.R index e490572..12d9c42 100644 --- a/R/index_points_to_lines.R +++ b/R/index_points_to_lines.R @@ -33,6 +33,8 @@ matcher <- function(coords, points, search_radius, max_matches = 1) { matched } +utils::globalVariables(c("L1", "N", "nn.dists")) +#' @importFrom data.table .N .SD matcher_dt <- function(coords, points, search_radius, max_matches = 1) { max_match_ <- ifelse(nrow(coords) < 1000, nrow(coords), 1000) @@ -56,10 +58,10 @@ matcher_dt <- function(coords, points, search_radius, max_matches = 1) { # First get rid of duplicate nodes on the same line. matched <- matched[, .SD[nn.dists == min(nn.dists)], - by = .(L1, point_id)] + by = list(L1, point_id)] # Now limit to max matches per point - matched <- matched[, N := seq_len(.N), by = .(point_id)] + matched <- matched[, N := seq_len(.N), by = list(point_id)] matched <- matched[N <= max_matches] @@ -219,7 +221,7 @@ interp_meas <- function(m, x1, y1, x2, y2) { #' sf::st_point(c(-76.88081, 39.36354))), #' crs = 4326) #' -#' index_points_to_lines(sample_flines, points), +#' index_points_to_lines(sample_flines, points, #' search_radius = units::set_units(0.2, "degrees"), #' max_matches = 10) #' diff --git a/man/index_points_to_lines.Rd b/man/index_points_to_lines.Rd index 3616d90..c97783a 100644 --- a/man/index_points_to_lines.Rd +++ b/man/index_points_to_lines.Rd @@ -108,7 +108,7 @@ points <- sf::st_sfc(list(sf::st_point(c(-76.86934, 39.49328)), sf::st_point(c(-76.88081, 39.36354))), crs = 4326) -index_points_to_lines(sample_flines, points), +index_points_to_lines(sample_flines, points, search_radius = units::set_units(0.2, "degrees"), max_matches = 10)