Skip to content

Commit

Permalink
more
Browse files Browse the repository at this point in the history
  • Loading branch information
gagolews committed Jun 18, 2024
1 parent 5bbae1c commit dd08e73
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 110 deletions.
2 changes: 1 addition & 1 deletion .devel/pytest/test_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
#rpy2.robjects.numpy2ri.activate()
stats = importr("stats")
genie = importr("genie")
except:
Expand Down
2 changes: 1 addition & 1 deletion .devel/pytest/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
#rpy2.robjects.numpy2ri.activate()
stats = importr("stats")
genie = importr("genie")
except:
Expand Down
76 changes: 39 additions & 37 deletions .devel/pytest/test_cvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def test_cvi():
try:
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
from rpy2.robjects import numpy2ri
from rpy2.robjects import default_converter

r_base = importr("base")
lib_loc = r_base.Sys_getenv("R_LIBS_USER")[0]
Expand All @@ -33,54 +33,56 @@ def test_cvi():
print("ImportError")
return

for dataset in ["s1", "Aggregation", "unbalance", "h2mg_64_50"]:
X = np.loadtxt("%s/%s.data.gz" % (path,dataset), ndmin=2)
labels = np.loadtxt("%s/%s.labels0.gz" % (path,dataset), dtype=np.intp)
np_cv_rules = default_converter + numpy2ri.converter
with np_cv_rules.context():
for dataset in ["s1", "Aggregation", "unbalance", "h2mg_64_50"]:
X = np.loadtxt("%s/%s.data.gz" % (path,dataset), ndmin=2)
labels = np.loadtxt("%s/%s.labels0.gz" % (path,dataset), dtype=np.intp)

X = (X-X.mean(axis=0))/X.std(axis=None, ddof=1)
X += np.random.normal(0, 0.0001, X.shape)
X = (X-X.mean(axis=0))/X.std(axis=None, ddof=1)
X += np.random.normal(0, 0.0001, X.shape)

i1 = r_genieclust.calinski_harabasz_index(X, labels)[0]
i2 = genieclust.cluster_validity.calinski_harabasz_index(X, labels-1)
assert i1 == i2
i1 = r_genieclust.calinski_harabasz_index(X, labels)[0]
i2 = genieclust.cluster_validity.calinski_harabasz_index(X, labels-1)
assert i1 == i2

i1 = r_genieclust.negated_ball_hall_index(X, labels)[0]
i2 = genieclust.cluster_validity.negated_ball_hall_index(X, labels-1)
assert i1 == i2
i1 = r_genieclust.negated_ball_hall_index(X, labels)[0]
i2 = genieclust.cluster_validity.negated_ball_hall_index(X, labels-1)
assert i1 == i2

i1 = r_genieclust.negated_davies_bouldin_index(X, labels)[0]
i2 = genieclust.cluster_validity.negated_davies_bouldin_index(X, labels-1)
assert i1 == i2
i1 = r_genieclust.negated_davies_bouldin_index(X, labels)[0]
i2 = genieclust.cluster_validity.negated_davies_bouldin_index(X, labels-1)
assert i1 == i2

i1 = r_genieclust.negated_wcss_index(X, labels)[0]
i2 = genieclust.cluster_validity.negated_wcss_index(X, labels-1)
assert i1 == i2
i1 = r_genieclust.negated_wcss_index(X, labels)[0]
i2 = genieclust.cluster_validity.negated_wcss_index(X, labels-1)
assert i1 == i2

i1 = r_genieclust.silhouette_index(X, labels)[0]
i2 = genieclust.cluster_validity.silhouette_index(X, labels-1)
assert i1 == i2
i1 = r_genieclust.silhouette_index(X, labels)[0]
i2 = genieclust.cluster_validity.silhouette_index(X, labels-1)
assert i1 == i2

i1 = r_genieclust.silhouette_w_index(X, labels)[0]
i2 = genieclust.cluster_validity.silhouette_w_index(X, labels-1)
assert i1 == i2
i1 = r_genieclust.silhouette_w_index(X, labels)[0]
i2 = genieclust.cluster_validity.silhouette_w_index(X, labels-1)
assert i1 == i2

i1 = r_genieclust.generalised_dunn_index(X, labels, 4, 2)[0]
i2 = genieclust.cluster_validity.generalised_dunn_index(X, labels-1, 4, 2)
assert i1 == i2
i1 = r_genieclust.generalised_dunn_index(X, labels, 4, 2)[0]
i2 = genieclust.cluster_validity.generalised_dunn_index(X, labels-1, 4, 2)
assert i1 == i2

i1 = r_genieclust.wcnn_index(X, labels, 5)[0]
i2 = genieclust.cluster_validity.wcnn_index(X, labels-1, 5)
assert i1 == i2
i1 = r_genieclust.wcnn_index(X, labels, 5)[0]
i2 = genieclust.cluster_validity.wcnn_index(X, labels-1, 5)
assert i1 == i2

i1 = r_genieclust.dunnowa_index(X, labels, 10, "SMin:5", "Max")[0]
i2 = genieclust.cluster_validity.dunnowa_index(X, labels-1, 10, "SMin:5", "Max")
assert i1 == i2
i1 = r_genieclust.dunnowa_index(X, labels, 10, "SMin:5", "Max")[0]
i2 = genieclust.cluster_validity.dunnowa_index(X, labels-1, 10, "SMin:5", "Max")
assert i1 == i2

#double c_dunnowa_index
#double c_dunnowa_index

#double c_generalised_dunn_index
#double c_generalised_dunn_index

#double c_wcnn_index
#double c_wcnn_index



Expand Down
133 changes: 68 additions & 65 deletions .devel/pytest/test_genie.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
try:
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
from rpy2.robjects import numpy2ri
from rpy2.robjects import default_converter
#rpy2.robjects.numpy2ri.activate()
stats = importr("stats")
genie = importr("genie")
except:
Expand Down Expand Up @@ -39,69 +40,71 @@


def __test_genie(metric='euclidean'):
for dataset in ["s1", "Aggregation", "unbalance", "h2mg_64_50", "bigger"]:#, "h2mg_1024_50", "t4_8k", "bigger"]:
if dataset == "bigger":
np.random.seed(123)
n = 10_000
d = 10
K = 2
X = np.random.normal(size=(n,d))
labels = np.random.choice(np.r_[0:K], n)
else:
X = np.loadtxt("%s/%s.data.gz" % (path,dataset), ndmin=2)
labels = np.loadtxt("%s/%s.labels0.gz" % (path,dataset), dtype=np.intp)-1

k = len(np.unique(labels[labels>=0]))

# center X + scale (NOT: standardize!)
X = (X-X.mean(axis=0))/X.std(axis=None, ddof=1)
X += np.random.normal(0, 0.0001, X.shape)

#t01 = time.time()
#hdbscan.RobustSingleLinkage().fit_predict(X)
#t11 = time.time()
#print("t_robustsl=%.3f" % (t11-t01), end="\t")

#t01 = time.time()
#hdbscan.HDBSCAN().fit_predict(X)
#t11 = time.time()
#print("t_hdbscan=%.3f" % (t11-t01), end="\t")

for g in [0.01, 0.3, 0.5, 0.7, 1.0]:
gc.collect()

#D = scipy.spatial.distance.pdist(X)
#D = scipy.spatial.distance.squareform(D)

print("%-20s g=%.2f n=%5d d=%2d"%(dataset,g,X.shape[0],X.shape[1]), end="\t")

t01 = time.time()
_res1 = genieclust.Genie(
k, gini_threshold=g, exact=True, affinity=metric, compute_full_tree=True)
res1 = _res1.fit_predict(X)+1
t11 = time.time()
print("t_py=%.3f" % (t11-t01), end="\t")

assert np.all(np.diff(_res1.distances_)>= 0.0)
assert len(np.unique(res1)) == k

if stats is not None and genie is not None and metric != 'cosine':
t02 = time.time()
res2 = stats.cutree(genie.hclust2(objects=X, d=metric, thresholdGini=g), k)
t12 = time.time()
print("t_r=%.3f" % (t12-t02), end="\t")
res2 = np.array(res2, np.intp)
assert len(np.unique(res2)) == k

ari = genieclust.compare_partitions.adjusted_rand_score(res1, res2)
print("ARI=%.3f" % ari, end="\t")
assert ari>1.0-1e-12

print("t_rel=%.3f" % ((t11-t01)/(t12-t02),), end="\t")


res1, res2 = None, None
print("")
np_cv_rules = default_converter + numpy2ri.converter
with np_cv_rules.context():
for dataset in ["s1", "Aggregation", "unbalance", "h2mg_64_50", "bigger"]:#, "h2mg_1024_50", "t4_8k", "bigger"]:
if dataset == "bigger":
np.random.seed(123)
n = 10_000
d = 10
K = 2
X = np.random.normal(size=(n,d))
labels = np.random.choice(np.r_[0:K], n)
else:
X = np.loadtxt("%s/%s.data.gz" % (path,dataset), ndmin=2)
labels = np.loadtxt("%s/%s.labels0.gz" % (path,dataset), dtype=np.intp)-1

k = len(np.unique(labels[labels>=0]))

# center X + scale (NOT: standardize!)
X = (X-X.mean(axis=0))/X.std(axis=None, ddof=1)
X += np.random.normal(0, 0.0001, X.shape)

#t01 = time.time()
#hdbscan.RobustSingleLinkage().fit_predict(X)
#t11 = time.time()
#print("t_robustsl=%.3f" % (t11-t01), end="\t")

#t01 = time.time()
#hdbscan.HDBSCAN().fit_predict(X)
#t11 = time.time()
#print("t_hdbscan=%.3f" % (t11-t01), end="\t")

for g in [0.01, 0.3, 0.5, 0.7, 1.0]:
gc.collect()

#D = scipy.spatial.distance.pdist(X)
#D = scipy.spatial.distance.squareform(D)

print("%-20s g=%.2f n=%5d d=%2d"%(dataset,g,X.shape[0],X.shape[1]), end="\t")

t01 = time.time()
_res1 = genieclust.Genie(
k, gini_threshold=g, exact=True, affinity=metric, compute_full_tree=True)
res1 = _res1.fit_predict(X)+1
t11 = time.time()
print("t_py=%.3f" % (t11-t01), end="\t")

assert np.all(np.diff(_res1.distances_)>= 0.0)
assert len(np.unique(res1)) == k

if stats is not None and genie is not None and metric != 'cosine':
t02 = time.time()
res2 = stats.cutree(genie.hclust2(objects=X, d=metric, thresholdGini=g), k)
t12 = time.time()
print("t_r=%.3f" % (t12-t02), end="\t")
res2 = np.array(res2, np.intp)
assert len(np.unique(res2)) == k

ari = genieclust.compare_partitions.adjusted_rand_score(res1, res2)
print("ARI=%.3f" % ari, end="\t")
assert ari>1.0-1e-12

print("t_rel=%.3f" % ((t11-t01)/(t12-t02),), end="\t")


res1, res2 = None, None
print("")



Expand Down
6 changes: 3 additions & 3 deletions .devel/pytest/test_inequality.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,19 @@ def devergottini_ref(x):
x = np.random.random(n)*8 + 3

xg1 = gini_index(np.array(x))
xg2 = gini_index(np.array(x, dtype=np.float_))
xg2 = gini_index(np.array(x, dtype=np.float64))
xg3 = gini_ref(x)
assert abs(xg1 - xg2) < 1e-9
assert abs(xg1 - xg3) < 1e-9

xb1 = bonferroni_index(np.array(x))
xb2 = bonferroni_index(np.array(x, dtype=np.float_))
xb2 = bonferroni_index(np.array(x, dtype=np.float64))
xb3 = bonferroni_ref(x)
assert abs(xb1 - xb2) < 1e-9
assert abs(xb1 - xb3) < 1e-9

xv1 = devergottini_index(np.array(x))
xv2 = devergottini_index(np.array(x, dtype=np.float_))
xv2 = devergottini_index(np.array(x, dtype=np.float64))
xv3 = devergottini_ref(x)
assert abs(xv1 - xv2) < 1e-9
assert abs(xv1 - xv3) < 1e-9
Expand Down
2 changes: 1 addition & 1 deletion .devel/pytest/test_warnerr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
#rpy2.robjects.numpy2ri.activate()
stats = importr("stats")
genie = importr("genie")
except ImportError:
Expand Down
2 changes: 1 addition & 1 deletion .devel/sandbox_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
genie = importr("genie")
import numpy as np
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
#rpy2.robjects.numpy2ri.activate()
import time
import gc
np.random.seed(12345)
Expand Down
2 changes: 1 addition & 1 deletion .devel/sandbox_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
genieclustr = importr("genieclust")
import numpy as np
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
#rpy2.robjects.numpy2ri.activate()


verbose = False
Expand Down

0 comments on commit dd08e73

Please sign in to comment.