don't ignore repulsion_strength when using t-UMAP

jlmelville · Dec 22, 2024 · f1f5f4d · f1f5f4d
1 parent a2e0506
commit f1f5f4d
Show file tree

Hide file tree

Showing 5 changed files with 17 additions and 7 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -24,6 +24,9 @@ reopening the issue (<https://github.com/jlmelville/uwot/issues/118>).
 * Relative paths could not be used to save a model. Thank you
 [Wouter van der Bijl](https://github.com/Ax3man) for the bug report
 (<https://github.com/jlmelville/uwot/issues/131>) and the suggested fix.
+* `repulsion_strength` was silently ignored if used with `tumap` or `umap2` with
+`a = 1, b = 1`. Ignoring the setting was on purpose, but it was not documented
+anywhere. `repulsion_strength` is now compatible with these settings.
 
 # uwot 0.2.2
 

diff --git a/R/transform.R b/R/transform.R
@@ -803,6 +803,7 @@ umap_transform <- function(X = NULL, model = NULL,
 
     method_args <- switch(method,
       umap = list(a = a, b = b, gamma = gamma, approx_pow = approx_pow),
+      tumap = list(gamma = gamma),
       leopold2 = list(ai = ai, aj = aj, b = b, ndim = ndim),
       list()
     )

diff --git a/R/uwot.R b/R/uwot.R
@@ -3721,7 +3721,7 @@ uwot <- function(X, n_neighbors = 15, n_components = 2, metric = "euclidean",
     method <- tolower(method)
     method_args <- switch(method,
       umap = list(a = a, b = b, gamma = gamma, approx_pow = approx_pow),
-      tumap = list(),
+      tumap = list(gamma = gamma),
       # a = 1 b = 10 for final phase of PaCMAP optimization
       pacmap = list(a = a, b = b),
       largevis = list(gamma = gamma),

diff --git a/inst/include/uwot/gradient.h b/inst/include/uwot/gradient.h
@@ -150,22 +150,24 @@ using apumap_gradient = base_umap_gradient<fastPrecisePow>;
 // distribution as used in t-SNE. This massively simplifies the gradient,
 // removing the pow calls, resulting in a noticeable speed increase (50% with
 // MNIST), although the resulting embedding has a larger spread than the
-// default. Also gamma is absent from this, because I believe it to be
-// un-necessary in the UMAP cost function.
+// default.
 class tumap_gradient {
 public:
-  tumap_gradient() = default;
+  tumap_gradient(float gamma) : gamma_2(gamma * 2.0){};
   auto grad_attr(float d2, std::size_t, std::size_t) const -> float {
     return -2.0 / (d2 + 1.0);
   }
   auto grad_rep(float d2, std::size_t, std::size_t) const -> float {
-    return 2.0 / ((0.001 + d2) * (d2 + 1.0));
+    return gamma_2 / ((0.001 + d2) * (d2 + 1.0));
   }
   auto clamp_grad(float grad_d) const -> float {
     return clamp(grad_d, clamp_lo, clamp_hi);
   }
   static const constexpr float clamp_hi = 4.0;
   static const constexpr float clamp_lo = -4.0;
+
+private:
+  float gamma_2;
 };
 
 // UMAP where a varies for each observation

diff --git a/src/r_uwot.cpp b/src/r_uwot.cpp
@@ -243,8 +243,12 @@ void create_umap(UmapFactory &umap_factory, List method_args) {
   }
 }
 
-void create_tumap(UmapFactory &umap_factory, List) {
-  const uwot::tumap_gradient gradient;
+void create_tumap(UmapFactory &umap_factory, List method_args) {
+  std::vector<std::string> arg_names = {"gamma"};
+  validate_args(method_args, arg_names);
+  float gamma = method_args["gamma"];
+
+  const uwot::tumap_gradient gradient(gamma);
   umap_factory.create(gradient);
 }