From 414a14614fe2a75ccf1f60c615fe798c1c4d662c Mon Sep 17 00:00:00 2001 From: Lampros Mouselimis Date: Tue, 5 Dec 2023 10:25:14 +0200 Subject: [PATCH] updated to CRAN version 1.3.2 --- .Rhistory | 0 DESCRIPTION | 4 +-- NEWS.md | 7 +++++ README.md | 2 +- inst/CITATION | 2 +- inst/include/affinity_propagation.h | 4 +-- src/Makevars.win | 2 +- src/init.c | 48 ++++++++++++++--------------- vignettes/the_clusterR_package.Rmd | 2 +- 9 files changed, 39 insertions(+), 32 deletions(-) delete mode 100644 .Rhistory diff --git a/.Rhistory b/.Rhistory deleted file mode 100644 index e69de29..0000000 diff --git a/DESCRIPTION b/DESCRIPTION index 4fcf90e..730abfd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: ClusterR Type: Package Title: Gaussian Mixture Models, K-Means, Mini-Batch-Kmeans, K-Medoids and Affinity Propagation Clustering -Version: 1.3.1 -Date: 2023-05-14 +Version: 1.3.2 +Date: 2023-12-04 Authors@R: c( person(given = "Lampros", family = "Mouselimis", email = "mouselimislampros@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "https://orcid.org/0000-0002-8024-1546")), person(given = "Conrad", family = "Sanderson", role = "cph", comment = "Author of the C++ Armadillo library"), person(given = "Ryan", family = "Curtin", role = "cph", comment = "Author of the C++ Armadillo library"), person(given = "Siddharth", family = "Agrawal", role = "cph", comment = "Author of the C code of the Mini-Batch-Kmeans algorithm (https://github.com/siddharth-agrawal/Mini-Batch-K-Means)"), person(given = "Brendan", family = "Frey", email = "frey@psi.toronto.edu", role = "cph", comment = "Author of the matlab code of the Affinity propagation algorithm (for commercial use please contact the author of the matlab code)"), person(given = "Delbert", family = "Dueck", role = "cph", comment = "Author of the matlab code of the Affinity propagation algorithm"), person(given = "Vitalie", family = "Spinu", email = "spinuvit@gmail.com", role = "ctb", comment = c(Github = "Github Contributor")) ) Maintainer: Lampros Mouselimis BugReports: https://github.com/mlampros/ClusterR/issues diff --git a/NEWS.md b/NEWS.md index 686dc30..5725c53 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,11 @@ +## Cluster 1.3.2 + +* I've fixed the CRAN *warning: format specifies type 'double' but the argument has type 'int''* in the following files & lines by replacing the `%g` expression with `%d`: + * /inst/include/affinity_propagation.h:474:37 *and* 476:58 +* I removed the `-mthreads` compilation option from the "Makevars.win" file + + ## Cluster 1.3.1 * I fixed a mistake related to a potential warning of the *'Optimal_Clusters_GMM()'* function (see issue: https://github.com/mlampros/ClusterR/issues/45) diff --git a/README.md b/README.md index 7075210..4123a5a 100644 --- a/README.md +++ b/README.md @@ -248,7 +248,7 @@ If you use the code of this repository in your paper or research please cite bot title = {{ClusterR}: Gaussian Mixture Models, K-Means, Mini-Batch-Kmeans, K-Medoids and Affinity Propagation Clustering}, author = {Lampros Mouselimis}, year = {2023}, - note = {R package version 1.3.1}, + note = {R package version 1.3.2}, url = {https://CRAN.R-project.org/package=ClusterR}, } ``` diff --git a/inst/CITATION b/inst/CITATION index 3789691..13ad96f 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -57,7 +57,7 @@ bibentry( author = person("Delbert", "Dueck"), year = "2009", school = "University of Toronto", - url = "http://old.psi.toronto.edu/~psi/pubs2/2009/DDueck-thesis_printable.pdf" + url = "https://hdl.handle.net/1807/17755" ) bibentry( diff --git a/inst/include/affinity_propagation.h b/inst/include/affinity_propagation.h index 19974ac..9564ab6 100644 --- a/inst/include/affinity_propagation.h +++ b/inst/include/affinity_propagation.h @@ -471,9 +471,9 @@ Rcpp::List Affinity_Propagation::affinity_propagation(arma::mat &s, std::vector< if (details) { Rprintf("\nNumber of exemplars identified: %d (for %d data points)\n", K, N); - Rprintf("Net similarity: %g\n", tmpnetsim); + Rprintf("Net similarity: %d\n", tmpnetsim); Rprintf(" Similarities of data points to exemplars: %g\n", dpsim(0,i)); - Rprintf(" Preferences of selected exemplars: %g\n", tmpexpref); + Rprintf(" Preferences of selected exemplars: %d\n", tmpexpref); Rprintf("Number of iterations: %d\n\n", i); } diff --git a/src/Makevars.win b/src/Makevars.win index 6fa9dbe..4bdf7f8 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,3 +1,3 @@ PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_64BIT_WORD -PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS) -mthreads +PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS) PKG_CPPFLAGS = -I../inst/include/ diff --git a/src/init.c b/src/init.c index 7171677..e1c68d6 100644 --- a/src/init.c +++ b/src/init.c @@ -8,29 +8,29 @@ */ /* .Call calls */ -extern SEXP _ClusterR_affinity_propagation(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_check_NaN_Inf(SEXP); -extern SEXP _ClusterR_ClaraMedoids(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_ClusterMedoids(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_cost_clusters_from_dis_meds(SEXP, SEXP); -extern SEXP _ClusterR_dissim_mat(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_dissim_MEDOIDS(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_evaluation_rcpp(SEXP, SEXP, SEXP); -extern SEXP _ClusterR_GMM_arma(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_GMM_arma_AIC_BIC(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_KMEANS_arma(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_KMEANS_rcpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_mini_batch_kmeans(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_OptClust(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_opt_clust_fK(SEXP, SEXP, SEXP); -extern SEXP _ClusterR_predict_medoids(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_predict_MGausDPDF(SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_Predict_mini_batch_kmeans(SEXP, SEXP, SEXP, SEXP); -extern SEXP _ClusterR_preferenceRange(SEXP, SEXP, SEXP); -extern SEXP _ClusterR_SCALE(SEXP, SEXP, SEXP); -extern SEXP _ClusterR_silhouette_clusters(SEXP, SEXP); -extern SEXP _ClusterR_split_rcpp_lst(SEXP); -extern SEXP _ClusterR_validate_centroids(SEXP, SEXP, SEXP, SEXP, SEXP); +extern SEXP _ClusterR_affinity_propagation(void *, void *, void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_check_NaN_Inf(void *); +extern SEXP _ClusterR_ClaraMedoids(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_ClusterMedoids(void *, void *, void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_cost_clusters_from_dis_meds(void *, void *); +extern SEXP _ClusterR_dissim_mat(void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_dissim_MEDOIDS(void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_evaluation_rcpp(void *, void *, void *); +extern SEXP _ClusterR_GMM_arma(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_GMM_arma_AIC_BIC(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_KMEANS_arma(void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_KMEANS_rcpp(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_mini_batch_kmeans(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_opt_clust_fK(void *, void *, void *); +extern SEXP _ClusterR_OptClust(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_predict_medoids(void *, void *, void *, void *, void *, void *, void *); +extern SEXP _ClusterR_predict_MGausDPDF(void *, void *, void *, void *, void *); +extern SEXP _ClusterR_Predict_mini_batch_kmeans(void *, void *, void *, void *); +extern SEXP _ClusterR_preferenceRange(void *, void *, void *); +extern SEXP _ClusterR_SCALE(void *, void *, void *); +extern SEXP _ClusterR_silhouette_clusters(void *, void *); +extern SEXP _ClusterR_split_rcpp_lst(void *); +extern SEXP _ClusterR_validate_centroids(void *, void *, void *, void *, void *); static const R_CallMethodDef CallEntries[] = { {"_ClusterR_affinity_propagation", (DL_FUNC) &_ClusterR_affinity_propagation, 9}, @@ -46,8 +46,8 @@ static const R_CallMethodDef CallEntries[] = { {"_ClusterR_KMEANS_arma", (DL_FUNC) &_ClusterR_KMEANS_arma, 7}, {"_ClusterR_KMEANS_rcpp", (DL_FUNC) &_ClusterR_KMEANS_rcpp, 12}, {"_ClusterR_mini_batch_kmeans", (DL_FUNC) &_ClusterR_mini_batch_kmeans, 13}, - {"_ClusterR_OptClust", (DL_FUNC) &_ClusterR_OptClust, 12}, {"_ClusterR_opt_clust_fK", (DL_FUNC) &_ClusterR_opt_clust_fK, 3}, + {"_ClusterR_OptClust", (DL_FUNC) &_ClusterR_OptClust, 12}, {"_ClusterR_predict_medoids", (DL_FUNC) &_ClusterR_predict_medoids, 7}, {"_ClusterR_predict_MGausDPDF", (DL_FUNC) &_ClusterR_predict_MGausDPDF, 5}, {"_ClusterR_Predict_mini_batch_kmeans", (DL_FUNC) &_ClusterR_Predict_mini_batch_kmeans, 4}, diff --git a/vignettes/the_clusterR_package.Rmd b/vignettes/the_clusterR_package.Rmd index 7e36767..c72d9b1 100644 --- a/vignettes/the_clusterR_package.Rmd +++ b/vignettes/the_clusterR_package.Rmd @@ -243,7 +243,7 @@ Values below the fixed threshold (here fK_threshold = 0.85) could be recommended
-[Mini-batch-kmeans](http://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf) is a variation of the classical k-means algorithm. It is particularly useful for big data sets because rather than using the whole data (as k-means does) it uses mini-batches from random data samples to optimize the objective function. +Mini-batch-kmeans (`http://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf`) is a variation of the classical k-means algorithm. It is particularly useful for big data sets because rather than using the whole data (as k-means does) it uses mini-batches from random data samples to optimize the objective function.
The parameters of the **MiniBatchKmeans** algorithm are almost the same as for the KMeans_rcpp function in the ClusterR package. The most important differences are the *batch_size* (the size of the mini batches) and the *init_fraction* (the percentage of data to use for the initialized centroids, which applies if the initializer equals to 'kmeans++' or 'quantile_init').