diff --git a/DESCRIPTION b/DESCRIPTION index 352db189..f30291e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: uwot Title: The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction -Version: 0.0.0.9001 +Version: 0.0.0.9002 Authors@R: person("James", "Melville", email = "james.melville@gmail.com", role = c("aut", "cre")) Author: James Melville Maintainer: James Melville diff --git a/R/neighbors.R b/R/neighbors.R index 7932b064..a8149e2f 100644 --- a/R/neighbors.R +++ b/R/neighbors.R @@ -77,7 +77,7 @@ annoy_nn <- function(X, k = 10, include_self = TRUE, } annoy_build <- function(X, metric = "euclidean", n_trees = 50, - n_threads = + n_threads = max(1, RcppParallel::defaultNumThreads() / 2), grain_size = 1, verbose = FALSE) { nr <- nrow(X) @@ -111,28 +111,30 @@ annoy_build <- function(X, metric = "euclidean", n_trees = 50, # Search a pre-built Annoy index for neighbors of X annoy_search <- function(X, k = 10, ann, search_k = 100 * k, - n_threads = + n_threads = max(1, RcppParallel::defaultNumThreads() / 2), grain_size = 1, verbose = FALSE) { - ann_class <- class(ann) - if (endsWith(ann_class, "Cosine")) { - search_nn_func <- annoy_cosine_nns - } - else if (endsWith(ann_class, "Manhattan")) { - search_nn_func <- annoy_manhattan_nns - } - else { - search_nn_func <- annoy_euclidean_nns - } - - nr <- nrow(X) - if (n_threads > 0) { index_file <- tempfile() ann$save(index_file) tsmessage("Searching Annoy index using ", pluralize("thread", n_threads)) + + ann_class <- class(ann) + if (endsWith(ann_class, "Angular")) { + search_nn_func <- annoy_cosine_nns + } + else if (endsWith(ann_class, "Manhattan")) { + search_nn_func <- annoy_manhattan_nns + } + else if (endsWith(ann_class, "Euclidean")) { + search_nn_func <- annoy_euclidean_nns + } + else { + stop("BUG: Unknown ANN class '", ann_class, "'") + } + res <- search_nn_func(index_file, X, k, search_k, @@ -146,6 +148,7 @@ annoy_search <- function(X, k = 10, ann, } else { tsmessage("Searching Annoy index") + nr <- nrow(X) search_progress <- Progress$new(max = nr, display = verbose) idx <- matrix(nrow = nr, ncol = k) dist <- matrix(nrow = nr, ncol = k) @@ -222,7 +225,7 @@ sparse_nn <- function(X, k, include_self = TRUE) { is_nonzero <- dists != 0 dist_nonzero <- dists[is_nonzero] if (length(dist_nonzero) < k) { - stop("Row ", i, " of distance matrix has only ", length(dist_nonzero), + stop("Row ", i, " of distance matrix has only ", length(dist_nonzero), " defined distances") } diff --git a/tests/testthat/test_output.R b/tests/testthat/test_output.R index 9822dfe7..50ce414f 100644 --- a/tests/testthat/test_output.R +++ b/tests/testthat/test_output.R @@ -1,6 +1,7 @@ library(uwot) context("API output") +set.seed(1337) # No way to compare with the Python implementation due to differences in # random number implementations as well as floating point comparison # and various architecture differences. So we'll just check that the output @@ -25,6 +26,26 @@ res <- tumap(iris10, ) expect_ok_matrix(res) + +# UMAP and cosine metric n_threads = 1 issue #5 +res <- umap(iris10, + n_neighbors = 4, n_epochs = 2, alpha = 0.5, metric = "cosine", + init = "spectral", verbose = FALSE, n_threads = 1 +) +expect_ok_matrix(res) + +# metric = Manhattan +res <- umap(iris10, + n_neighbors = 4, n_epochs = 2, alpha = 0.5, metric = "manhattan", + init = "rand", verbose = FALSE, n_threads = 0 +) +expect_ok_matrix(res) +res <- umap(iris10, + n_neighbors = 4, n_epochs = 2, alpha = 0.5, metric = "manhattan", + init = "spca", verbose = FALSE, n_threads = 1 +) +expect_ok_matrix(res) + # lvish and force use of annoy res <- lvish(iris10, perplexity = 4, n_epochs = 2, alpha = 0.5, nn_method = "annoy",