Skip to content

Commit

Permalink
Reblack everything
Browse files Browse the repository at this point in the history
  • Loading branch information
lmcinnes committed Nov 26, 2020
1 parent 29e11fc commit 8b03c48
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 68 deletions.
14 changes: 7 additions & 7 deletions umap/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ def ll_dirichlet(data1, data2):


@numba.njit(fastmath=True)
def symmetric_kl(x, y, z=1e-11): # pragma: no cover
def symmetric_kl(x, y, z=1e-11): # pragma: no cover
"""
symmetrized KL divergence between two probability distributions
Expand Down Expand Up @@ -779,7 +779,7 @@ def symmetric_kl(x, y, z=1e-11): # pragma: no cover


@numba.njit(fastmath=True)
def symmetric_kl_grad(x, y, z=1e-11): # pragma: no cover
def symmetric_kl_grad(x, y, z=1e-11): # pragma: no cover
"""
symmetrized KL divergence and its gradient
Expand Down Expand Up @@ -848,7 +848,7 @@ def correlation_grad(x, y):
@numba.njit(fastmath=True)
def sinkhorn_distance(
x, y, M=_mock_identity, cost=_mock_cost, maxiter=64
): # pragma: no cover
): # pragma: no cover
p = (x / x.sum()).astype(np.float32)
q = (y / y.sum()).astype(np.float32)

Expand All @@ -872,7 +872,7 @@ def sinkhorn_distance(


@numba.njit(fastmath=True)
def spherical_gaussian_energy_grad(x, y): # pragma: no cover
def spherical_gaussian_energy_grad(x, y): # pragma: no cover
mu_1 = x[0] - y[0]
mu_2 = x[1] - y[1]

Expand All @@ -890,7 +890,7 @@ def spherical_gaussian_energy_grad(x, y): # pragma: no cover


@numba.njit(fastmath=True)
def diagonal_gaussian_energy_grad(x, y): # pragma: no cover
def diagonal_gaussian_energy_grad(x, y): # pragma: no cover
mu_1 = x[0] - y[0]
mu_2 = x[1] - y[1]

Expand Down Expand Up @@ -925,7 +925,7 @@ def diagonal_gaussian_energy_grad(x, y): # pragma: no cover


@numba.njit(fastmath=True)
def gaussian_energy_grad(x, y): # pragma: no cover
def gaussian_energy_grad(x, y): # pragma: no cover
mu_1 = x[0] - y[0]
mu_2 = x[1] - y[1]

Expand Down Expand Up @@ -998,7 +998,7 @@ def gaussian_energy_grad(x, y): # pragma: no cover


@numba.njit(fastmath=True)
def spherical_gaussian_grad(x, y): # pragma: no cover
def spherical_gaussian_grad(x, y): # pragma: no cover
mu_1 = x[0] - y[0]
mu_2 = x[1] - y[1]

Expand Down
6 changes: 5 additions & 1 deletion umap/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,13 @@ def aligned_iris_relations():
def iris_model(iris):
return UMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(iris.data)


@pytest.fixture(scope="session")
def iris_subset_model(iris, iris_selection):
return UMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(iris.data[iris_selection])
return UMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(
iris.data[iris_selection]
)


@pytest.fixture(scope="session")
def supervised_iris_model(iris):
Expand Down
1 change: 1 addition & 0 deletions umap/tests/test_aligned_umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def test_local_clustering(aligned_iris, aligned_iris_model):
ari = adjusted_rand_score(target[3], clusters)
assert_greater_equal(ari, 0.40)


def test_aligned_update(aligned_iris, aligned_iris_relations):
data, target = aligned_iris
small_aligned_model = AlignedUMAP()
Expand Down
15 changes: 8 additions & 7 deletions umap/tests/test_composite_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@

def test_composite_trustworthiness(nn_data, iris_model):
data = nn_data[:50]
model1 = UMAP(
n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=50
).fit(data)
model1 = UMAP(n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=50).fit(data)
model2 = UMAP(
n_neighbors=30, min_dist=0.01, random_state=42, n_epochs=50,
init=model1.embedding_
n_neighbors=30,
min_dist=0.01,
random_state=42,
n_epochs=50,
init=model1.embedding_,
).fit(data)
model3 = model1 * model2
trust = trustworthiness(data, model3.embedding_, 10)
Expand All @@ -49,8 +50,9 @@ def test_composite_trustworthiness(nn_data, iris_model):
with pytest.raises(ValueError):
model5 = model1 - iris_model


@SkipTest
def test_composite_trustworthiness_random_init(nn_data): # pragma: no cover
def test_composite_trustworthiness_random_init(nn_data): # pragma: no cover
data = nn_data[:50]
model1 = UMAP(
n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=50, init="random",
Expand All @@ -74,7 +76,6 @@ def test_composite_trustworthiness_random_init(nn_data): # pragma: no cover
)



def test_composite_trustworthiness_on_iris(iris):
iris_model1 = UMAP(
n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=100,
Expand Down
1 change: 1 addition & 0 deletions umap/tests/test_densmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def test_densmap_trustworthiness(nn_data):
"Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust),
)


@SkipTest
def test_densmap_trustworthiness_random_init(nn_data): # pragma: no cover
data = nn_data[:50]
Expand Down
27 changes: 18 additions & 9 deletions umap/tests/test_umap_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_nn_bad_metric_sparse_data(sparse_nn_data):
# -------------------------------------------------


def knn(indices, nn_data): # pragma: no cover
def knn(indices, nn_data): # pragma: no cover
tree = KDTree(nn_data)
true_indices = tree.query(nn_data, 10, return_distance=False)
num_correct = 0.0
Expand All @@ -65,8 +65,9 @@ def smooth_knn(nn_data, local_connectivity=1.0):
norms = np.sum(vals, axis=1)
return norms


@SkipTest
def test_nn_descent_neighbor_accuracy(nn_data): # pragma: no cover
def test_nn_descent_neighbor_accuracy(nn_data): # pragma: no cover
knn_indices, knn_dists, _ = nearest_neighbors(
nn_data, 10, "euclidean", {}, False, np.random
)
Expand All @@ -77,8 +78,9 @@ def test_nn_descent_neighbor_accuracy(nn_data): # pragma: no cover
"NN-descent did not get 89% accuracy on nearest neighbors",
)


@SkipTest
def test_nn_descent_neighbor_accuracy_low_memory(nn_data): # pragma: no cover
def test_nn_descent_neighbor_accuracy_low_memory(nn_data): # pragma: no cover
knn_indices, knn_dists, _ = nearest_neighbors(
nn_data, 10, "euclidean", {}, False, np.random, low_memory=True
)
Expand All @@ -89,8 +91,9 @@ def test_nn_descent_neighbor_accuracy_low_memory(nn_data): # pragma: no cover
"NN-descent did not get 89% accuracy on nearest neighbors",
)


@SkipTest
def test_angular_nn_descent_neighbor_accuracy(nn_data): # pragma: no cover
def test_angular_nn_descent_neighbor_accuracy(nn_data): # pragma: no cover
knn_indices, knn_dists, _ = nearest_neighbors(
nn_data, 10, "cosine", {}, True, np.random
)
Expand All @@ -102,8 +105,9 @@ def test_angular_nn_descent_neighbor_accuracy(nn_data): # pragma: no cover
"NN-descent did not get 89% accuracy on nearest neighbors",
)


@SkipTest
def test_sparse_nn_descent_neighbor_accuracy(sparse_nn_data): # pragma: no cover
def test_sparse_nn_descent_neighbor_accuracy(sparse_nn_data): # pragma: no cover
knn_indices, knn_dists, _ = nearest_neighbors(
sparse_nn_data, 20, "euclidean", {}, False, np.random
)
Expand All @@ -114,8 +118,11 @@ def test_sparse_nn_descent_neighbor_accuracy(sparse_nn_data): # pragma: no cover
"Sparse NN-descent did not get 90% accuracy on nearest neighbors",
)


@SkipTest
def test_sparse_nn_descent_neighbor_accuracy_low_memory(sparse_nn_data): # pragma: no cover
def test_sparse_nn_descent_neighbor_accuracy_low_memory(
sparse_nn_data,
): # pragma: no cover
knn_indices, knn_dists, _ = nearest_neighbors(
sparse_nn_data, 20, "euclidean", {}, False, np.random, low_memory=True
)
Expand All @@ -126,8 +133,9 @@ def test_sparse_nn_descent_neighbor_accuracy_low_memory(sparse_nn_data): # pragm
"Sparse NN-descent did not get 90% accuracy on nearest neighbors",
)


@SkipTest
def test_nn_descent_neighbor_accuracy_callable_metric(nn_data): # pragma: no cover
def test_nn_descent_neighbor_accuracy_callable_metric(nn_data): # pragma: no cover
knn_indices, knn_dists, _ = nearest_neighbors(
nn_data, 10, dist.euclidean, {}, False, np.random
)
Expand All @@ -142,7 +150,9 @@ def test_nn_descent_neighbor_accuracy_callable_metric(nn_data): # pragma: no cov


@SkipTest
def test_sparse_angular_nn_descent_neighbor_accuracy(sparse_nn_data): # pragma: no cover
def test_sparse_angular_nn_descent_neighbor_accuracy(
sparse_nn_data,
): # pragma: no cover
knn_indices, knn_dists, _ = nearest_neighbors(
sparse_nn_data, 20, "cosine", {}, True, np.random
)
Expand Down Expand Up @@ -174,4 +184,3 @@ def test_smooth_knn_dist_l1norms_w_connectivity(nn_data):
err_msg="Smooth knn-dists does not give expected"
"norms for local_connectivity=1.75",
)

55 changes: 40 additions & 15 deletions umap/tests/test_umap_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def test_multi_component_layout():

assert_less(error, 15.0, msg="Multi component embedding to far astray")


# Multi-components Layout
def test_multi_component_layout_precomputed():
data, labels = make_blobs(
Expand All @@ -81,7 +82,8 @@ def test_multi_component_layout_precomputed():
true_centroids = normalize(true_centroids, norm="l2")

embedding = UMAP(n_neighbors=4, metric="precomputed", n_epochs=100).fit_transform(
dmat)
dmat
)
embed_centroids = np.empty((labels.max() + 1, data.shape[1]), dtype=np.float64)
embed_labels = KMeans(n_clusters=5).fit_predict(embedding)

Expand All @@ -94,6 +96,7 @@ def test_multi_component_layout_precomputed():

assert_less(error, 15.0, msg="Multi component embedding to far astray")


# ---------------
# Umap Transform
# --------------
Expand Down Expand Up @@ -142,6 +145,7 @@ def test_umap_transform_embedding_stability(iris, iris_subset_model, iris_select
_ = umap.transform(b)
assert_array_equal(u1_orig, umap.embedding_)


# -----------
# UMAP Update
# -----------
Expand All @@ -151,19 +155,18 @@ def test_umap_update(iris, iris_subset_model, iris_selection, iris_model):
new_model = iris_subset_model
new_model.update(new_data)

comparison_graph = scipy.sparse.vstack([
iris_model.graph_[iris_selection],
iris_model.graph_[~iris_selection]
])
comparison_graph = scipy.sparse.hstack([
comparison_graph[:, iris_selection],
comparison_graph[:, ~iris_selection]
])
comparison_graph = scipy.sparse.vstack(
[iris_model.graph_[iris_selection], iris_model.graph_[~iris_selection]]
)
comparison_graph = scipy.sparse.hstack(
[comparison_graph[:, iris_selection], comparison_graph[:, ~iris_selection]]
)

error = np.sum(np.abs((new_model.graph_ - comparison_graph).data))

assert_less(error, 1.0)


# -----------------
# UMAP Graph output
# -----------------
Expand All @@ -184,16 +187,38 @@ def test_umap_graph_layout():
# Component layout options
# ------------------------


def test_component_layout_options(nn_data):
dmat = pairwise_distances(nn_data[:1000])
n_components = 5
component_labels = np.repeat(np.arange(5), dmat.shape[0] // 5)
single = component_layout(dmat, n_components, component_labels, 2, np.random,
metric="precomputed", metric_kwds={"linkage": "single"})
average = component_layout(dmat, n_components, component_labels, 2, np.random,
metric="precomputed", metric_kwds={"linkage": "average"})
complete = component_layout(dmat, n_components, component_labels, 2, np.random,
metric="precomputed", metric_kwds={"linkage": "complete"})
single = component_layout(
dmat,
n_components,
component_labels,
2,
np.random,
metric="precomputed",
metric_kwds={"linkage": "single"},
)
average = component_layout(
dmat,
n_components,
component_labels,
2,
np.random,
metric="precomputed",
metric_kwds={"linkage": "average"},
)
complete = component_layout(
dmat,
n_components,
component_labels,
2,
np.random,
metric="precomputed",
metric_kwds={"linkage": "complete"},
)

assert single.shape[0] == 5
assert average.shape[0] == 5
Expand Down
Loading

0 comments on commit 8b03c48

Please sign in to comment.