From 3ee06a099caca07d817cd91dfa807d74589f517f Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 28 Nov 2023 18:50:58 +0100 Subject: [PATCH] solve CRAN complaints, refactor error printing --- DESCRIPTION | 2 +- LICENSE | 2 +- R/fit.R | 42 ++++++-------------------------------- cmfrec/wrapper_untyped.pxi | 4 ++-- man/fit.Rd | 42 ++++++-------------------------------- setup.cfg | 2 -- setup.py | 2 +- src/cmfrec.h | 13 +++++------- src/collective.c | 29 +++++++++----------------- src/common.c | 37 ++++++++++++++++----------------- src/helpers.c | 25 ++++++++--------------- src/offsets.c | 20 +++++++----------- 12 files changed, 66 insertions(+), 154 deletions(-) delete mode 100644 setup.cfg diff --git a/DESCRIPTION b/DESCRIPTION index 261cba3..879e034 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: cmfrec Type: Package Title: Collective Matrix Factorization for Recommender Systems -Version: 3.5.1-1 +Version: 3.5.1-2 Authors@R: c( person(given="David", family="Cortes", role=c("aut", "cre", "cph"), email="david.cortes.rivera@gmail.com"), diff --git a/LICENSE b/LICENSE index 7f62643..27ca084 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020-2022 David Cortes +Copyright (c) 2020-2023 David Cortes All rights reserved. diff --git a/R/fit.R b/R/fit.R index c787ce0..50dd3d4 100644 --- a/R/fit.R +++ b/R/fit.R @@ -113,42 +113,12 @@ NULL #' of internal BLAS threads before entering a multi-threaded region, in order to avoid oversubscription of #' threads. This can become an issue when using OpenBLAS if it is the 'pthreads' variant. #' -#' This package relies heavily on BLAS and LAPACK functions. For better performance, it is -#' recommended to use an optimized backed for them, such as MKL or OpenBLAS. -#' -#' In Windows, the easiest way of getting MKL is to use Microsoft's -#' \href{https://mran.microsoft.com}{MRAN distribution of R}, while OpenBLAS can be obtained by -#' following \href{https://github.com/david-cortes/R-openblas-in-windows}{this tutorial} -#' (no new R installation required). -#' -#' In Linux, these can be installed through the system's package manager. In Debian and -#' Debian-based distributions such as Ubuntu, the default BLAS and LAPACK can be configured -#' through the alternatives system (see the -#' \href{https://wiki.debian.org/DebianScience/LinearAlgebraLibraries}{Debian docs} or -#' \href{https://stackoverflow.com/a/49842944/5941695}{this post for MKL}). -#' -#' By default, in a regular x86-64 CPU, R will compile all packages with generic options -#' `-msse2` and `-O2`, which misses lots of performance optimizations, and in particular, -#' `cmfrec` will not be able to achieve its maximum performance with them. -#' -#' It is recommended to use compilation options `-O3`, `-march=native`, -#' `-fno-math-errno`, `-fno-trapping-math`, and `-std=c99` or `-std=gnu99`. -#' These can be activated in multiple ways: \itemize{ -#' \item (On Linux) Creating an empty text file `~/.R/Makevars` and adding this line there: -#' `CFLAGS += -O3 -march=native -fno-math-errno -fno-trapping-math` -#' (plus an empty line at the end), then installing the usual -#' way with `install.packages("cmfrec")`. -#' \item Installing `cmfrec` from source, but modifying the `Makevars` file (it has lines -#' that can be uncommented in order to enable these optimizations). -#' \item Modifying the global `Makeconf` variable. This is a file which defines the default -#' compilation options for \bold{all} R packages, so be careful about it. In Debian, -#' this file will typically be under `/etc/R/`, but this can vary in other operating systems. -#' In this file, replace all occurences of `-O2` with `-O3`, and all occurrences of -#' `-msse2` with `-march=native -fno-math-errno -fno-trapping-math` (e.g. open them -#' in some text editor or in RStudio and -#' use the 'Replace All' functionality) (not recommended to edit this global file, it should be -#' preferred to edit the local user Makevars instead). -#' } +#' This package relies heavily on BLAS and LAPACK functions, and benefits from SIMD vectorization. +#' For better performance, it is recommended to use an optimized backed such as MKL or OpenBLAS, +#' and to enable additional compiler optimizations that are not used by default in R. +#' +#' See this guide for details: +#' \href{https://github.com/david-cortes/installing-optimized-libraries}{installing optimized libraries}. #' @param X The main matrix with interactions data to factorize (e.g. movie ratings by users, #' bag-of-words representations of texts, etc.). The package is built with #' recommender systems in mind, and will assume that `X` is a matrix in which users diff --git a/cmfrec/wrapper_untyped.pxi b/cmfrec/wrapper_untyped.pxi index 5355819..659e9d8 100644 --- a/cmfrec/wrapper_untyped.pxi +++ b/cmfrec/wrapper_untyped.pxi @@ -558,11 +558,11 @@ cdef extern from "cmfrec.h": # void py_set_threads(int) noexcept nogil # int py_get_threads() noexcept nogil -cdef public void cy_printf(char *msg) noexcept nogil: +cdef public void cy_printf(const char *msg) noexcept nogil: with gil: python_printmsg(msg) stdout.flush() -cdef public void cy_errprintf(char *msg) noexcept nogil: +cdef public void cy_errprintf(const char *msg) noexcept nogil: with gil: python_printerrmsg(msg) stderr.flush() diff --git a/man/fit.Rd b/man/fit.Rd index cb7ceae..8235ee7 100644 --- a/man/fit.Rd +++ b/man/fit.Rd @@ -980,42 +980,12 @@ package installed for better performance - if available, will be used to control of internal BLAS threads before entering a multi-threaded region, in order to avoid oversubscription of threads. This can become an issue when using OpenBLAS if it is the 'pthreads' variant. -This package relies heavily on BLAS and LAPACK functions. For better performance, it is -recommended to use an optimized backed for them, such as MKL or OpenBLAS. - -In Windows, the easiest way of getting MKL is to use Microsoft's -\href{https://mran.microsoft.com}{MRAN distribution of R}, while OpenBLAS can be obtained by -following \href{https://github.com/david-cortes/R-openblas-in-windows}{this tutorial} -(no new R installation required). - -In Linux, these can be installed through the system's package manager. In Debian and -Debian-based distributions such as Ubuntu, the default BLAS and LAPACK can be configured -through the alternatives system (see the -\href{https://wiki.debian.org/DebianScience/LinearAlgebraLibraries}{Debian docs} or -\href{https://stackoverflow.com/a/49842944/5941695}{this post for MKL}). - -By default, in a regular x86-64 CPU, R will compile all packages with generic options -`-msse2` and `-O2`, which misses lots of performance optimizations, and in particular, -`cmfrec` will not be able to achieve its maximum performance with them. - -It is recommended to use compilation options `-O3`, `-march=native`, -`-fno-math-errno`, `-fno-trapping-math`, and `-std=c99` or `-std=gnu99`. -These can be activated in multiple ways: \itemize{ -\item (On Linux) Creating an empty text file `~/.R/Makevars` and adding this line there: -`CFLAGS += -O3 -march=native -fno-math-errno -fno-trapping-math` -(plus an empty line at the end), then installing the usual -way with `install.packages("cmfrec")`. -\item Installing `cmfrec` from source, but modifying the `Makevars` file (it has lines -that can be uncommented in order to enable these optimizations). -\item Modifying the global `Makeconf` variable. This is a file which defines the default -compilation options for \bold{all} R packages, so be careful about it. In Debian, -this file will typically be under `/etc/R/`, but this can vary in other operating systems. -In this file, replace all occurences of `-O2` with `-O3`, and all occurrences of -`-msse2` with `-march=native -fno-math-errno -fno-trapping-math` (e.g. open them -in some text editor or in RStudio and -use the 'Replace All' functionality) (not recommended to edit this global file, it should be -preferred to edit the local user Makevars instead). -} +This package relies heavily on BLAS and LAPACK functions, and benefits from SIMD vectorization. +For better performance, it is recommended to use an optimized backed such as MKL or OpenBLAS, +and to enable additional compiler optimizations that are not used by default in R. + +See this guide for details: +\href{https://github.com/david-cortes/installing-optimized-libraries}{installing optimized libraries}. } \examples{ diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 224a779..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -description-file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py index 26a1297..3c0d46e 100644 --- a/setup.py +++ b/setup.py @@ -348,7 +348,7 @@ def test_supports_clang_reassociate(self): setup( name = "cmfrec", packages = ["cmfrec"], - version = '3.5.1-6', + version = '3.5.1-7', description = 'Collective matrix factorization', author = 'David Cortes', url = 'https://github.com/david-cortes/cmfrec', diff --git a/src/cmfrec.h b/src/cmfrec.h index 7c96f02..bbd4304 100644 --- a/src/cmfrec.h +++ b/src/cmfrec.h @@ -131,14 +131,12 @@ typedef void (*sig_t_)(int); #endif extern IMPORTED_FUN void PySys_WriteStdout(const char *fmt, ...); extern IMPORTED_FUN void PySys_WriteStderr(const char *fmt, ...); - void python_printmsg(char *msg); - void python_printerrmsg(char *msg); + void python_printmsg(const char *msg); + void python_printerrmsg(const char *msg); void py_printf(const char *fmt, ...); - void py_errprintf(void *ignored, const char *fmt, ...); - extern void cy_printf(char *msg); - extern void cy_errprintf(char *msg); + extern void cy_printf(const char *msg); + extern void cy_errprintf(const char *msg); #define printf py_printf - #define fprintf py_errprintf #define fflush(arg) {} #elif defined(_FOR_R) #include @@ -150,8 +148,7 @@ typedef void (*sig_t_)(int); #include #define USE_DOUBLE #define printf Rprintf - #define fprintf(f, message) REprintf(message) - #define fflush(f) R_FlushConsole() + #define fflush(arg) R_FlushConsole() #elif defined(MKL_ILP64) #include "mkl.h" #endif diff --git a/src/collective.c b/src/collective.c index a258039..ea28f71 100644 --- a/src/collective.c +++ b/src/collective.c @@ -3325,8 +3325,7 @@ int_t collective_factors_cold ) { if (NA_as_zero_U && u_bin_vec != NULL) { - fprintf(stderr, "Cannot use 'NA_as_zero_U' when there is 'u_bin'\n"); - fflush(stderr); + print_err_msg("Cannot use 'NA_as_zero_U' when there is 'u_bin'\n"); return 2; } int_t retval = 0; @@ -3585,13 +3584,11 @@ int_t collective_factors_warm ) { if (u_bin_vec != NULL && (NA_as_zero_X || NA_as_zero_U)) { - fprintf(stderr, "Cannot use 'NA_as_zero' when there is 'u_bin'\n"); - fflush(stderr); + print_err_msg("Cannot use 'NA_as_zero' when there is 'u_bin'\n"); return 2; } if (u_bin_vec != NULL && add_implicit_features) { - fprintf(stderr, "Cannot use implicit features when there is 'u_bin'\n"); - fflush(stderr); + print_err_msg("Cannot use implicit features when there is 'u_bin'\n"); return 2; } @@ -7310,27 +7307,24 @@ int_t fit_collective_explicit_als if (k_user && U == NULL && nnz_U == 0) { if (verbose) - fprintf(stderr, "Cannot pass 'k_user' without U data.\n"); + print_err_msg("Cannot pass 'k_user' without U data.\n"); retval = 2; } if (k_item && II == NULL && nnz_I == 0) { if (verbose) - fprintf(stderr, "Cannot pass 'k_item' without I data.\n"); + print_err_msg("Cannot pass 'k_item' without I data.\n"); retval = 2; } if (k_main && Xfull == NULL && nnz == 0) { if (verbose) - fprintf(stderr, "Cannot pass 'k_main' without X data.\n"); + print_err_msg("Cannot pass 'k_main' without X data.\n"); retval = 2; } if (retval == 2) { - if (verbose) { - fflush(stderr); - } return retval; } @@ -9411,19 +9405,19 @@ int_t fit_collective_implicit_als int_t retval = 0; if (k_user && U == NULL && nnz_U == 0) { if (verbose) - fprintf(stderr, "Cannot pass 'k_user' without U data.\n"); + print_err_msg("Cannot pass 'k_user' without U data.\n"); retval = 2; } if (k_item && II == NULL && nnz_I == 0) { if (verbose) - fprintf(stderr, "Cannot pass 'k_item' without I data.\n"); + print_err_msg("Cannot pass 'k_item' without I data.\n"); retval = 2; } if (k_main && nnz == 0) { if (verbose) - fprintf(stderr, "Cannot pass 'k_main' without X data.\n"); + print_err_msg("Cannot pass 'k_main' without X data.\n"); retval = 2; } @@ -9431,15 +9425,12 @@ int_t fit_collective_implicit_als (II != NULL && NA_as_zero_I)) { if (verbose) - fprintf(stderr, "Cannot pass 'NA_as_zero' with dense data.\n"); + print_err_msg("Cannot pass 'NA_as_zero' with dense data.\n"); retval = 2; } if (retval == 2) { - if (verbose) { - fflush(stderr); - } return retval; } diff --git a/src/common.c b/src/common.c index 3d7f3c3..5ea3721 100644 --- a/src/common.c +++ b/src/common.c @@ -3488,7 +3488,7 @@ int_t calc_mean_and_center } if (!cnt) - fprintf(stderr, "Warning: 'X' has all entries missing.\n"); + print_err_msg("Warning: 'X' has all entries missing.\n"); } else @@ -3513,7 +3513,7 @@ int_t calc_mean_and_center } if (!xsum) - fprintf(stderr, "Warning: 'X' has only zeros.\n"); + print_err_msg("Warning: 'X' has only zeros.\n"); if (NA_as_zero) *glob_mean = (ldouble_safe)(*glob_mean) @@ -3593,7 +3593,7 @@ int_t calc_mean_and_center } if (wsum <= 0) - fprintf(stderr, "Warning: weights are not positive.\n"); + print_err_msg("Warning: weights are not positive.\n"); } if (nonneg) @@ -5139,19 +5139,19 @@ int_t topN { int_t retval = 0; if (include_ix != NULL && exclude_ix != NULL) { - fprintf(stderr, "Cannot pass both 'include_ix' and 'exclude_ix'.\n"); + print_err_msg("Cannot pass both 'include_ix' and 'exclude_ix'.\n"); retval = 2; } if (n_top == 0) { - fprintf(stderr, "'n_top' must be greater than zero.\n"); + print_err_msg("'n_top' must be greater than zero.\n"); retval = 2; } if (n_exclude > n-n_top) { - fprintf(stderr, "Number of rankeable entities is less than 'n_top'\n"); + print_err_msg("Number of rankeable entities is less than 'n_top'\n"); retval = 2; } if (n_include > n) { - fprintf(stderr, "Number of entities to include is larger than 'n'.\n"); + print_err_msg("Number of entities to include is larger than 'n'.\n"); retval = 2; } @@ -5160,7 +5160,7 @@ int_t topN for (int_t ix = 0; ix < n_include; ix++) if (include_ix[ix] < 0 || include_ix[ix] >= n) { - fprintf(stderr, "'include_ix' contains invalid entries\n"); + print_err_msg("'include_ix' contains invalid entries\n"); retval = 2; break; } @@ -5170,7 +5170,7 @@ int_t topN for (int_t ix = 0; ix < n_exclude; ix++) if (exclude_ix[ix] < 0 || exclude_ix[ix] >= n) { - fprintf(stderr, "'exclude_ix' contains invalid entries\n"); + print_err_msg("'exclude_ix' contains invalid entries\n"); retval = 2; break; } @@ -5178,19 +5178,18 @@ int_t topN for (int_t ix = 0; ix < k_user+k+k_main; ix++) { if (isnan(a_vec[ix])) { - fprintf(stderr, "The latent factors contain NAN values\n"); + print_err_msg("The latent factors contain NAN values\n"); retval = 2; break; } } if (isnan(biasA)) { - fprintf(stderr, "The bias is a NAN value\n"); + print_err_msg("The bias is a NAN value\n"); retval = 2; } if (retval == 2) { - fflush(stderr); return retval; } @@ -5389,25 +5388,25 @@ int_t fit_most_popular if (implicit) { if (NA_as_zero) { - fprintf(stderr, + print_err_msg( "Warning: 'NA_as_zero' ignored with 'implicit=true'.\n"); NA_as_zero = false; } if (scale_lam) { - fprintf(stderr, + print_err_msg( "Warning: 'scale_lam' ignored with 'implicit=true'.\n"); scale_lam = false; } if (weight != NULL) { - fprintf(stderr, + print_err_msg( "Warning: 'weight' ignored with 'implicit=true'.\n"); weight = NULL; } if (Xfull != NULL) { - fprintf(stderr, + print_err_msg( "Error: cannot pass dense 'X' with 'implicit=true'.\n"); return 2; } @@ -5416,13 +5415,13 @@ int_t fit_most_popular else { if (adjust_weight) { - fprintf(stderr, + print_err_msg( "Warning: 'adjust_weight' ignored with 'implicit=false'.\n"); adjust_weight = false; } if (apply_log_transf) { - fprintf(stderr, + print_err_msg( "Warning: 'apply_log_transf' ignored with 'implicit=false'.\n"); apply_log_transf = false; } @@ -5430,7 +5429,7 @@ int_t fit_most_popular if (biasB == NULL) { - fprintf(stderr, "Error: must pass 'biasB'.\n"); + print_err_msg("Error: must pass 'biasB'.\n"); return 2; } diff --git a/src/helpers.c b/src/helpers.c index 8ed2a6b..a4cfe3c 100644 --- a/src/helpers.c +++ b/src/helpers.c @@ -1630,12 +1630,15 @@ void fill_lower_triangle(real_t A[], size_t n, size_t lda) void print_err_msg(const char *msg) { - #ifndef _FOR_R - fprintf(stderr, "%s", msg); + #ifdef _FOR_PYTHON + cy_errprintf(msg); + #elif defined(_FOR_R) + REprintf("%s", msg); + R_FlushConsole(); #else - fprintf(stderr, msg); - #endif + fprintf(stderr, "%s", msg); fflush(stderr); + #endif } void print_oom_message(void) @@ -1655,22 +1658,12 @@ void py_printf(const char *fmt, ...) cy_printf(msg); } -void py_errprintf(void *ignored, const char *fmt, ...) -{ - char msg[PY_MSG_MAX_LENGTH]; - va_list args; - va_start(args, fmt); - vsnprintf(msg, PY_MSG_MAX_LENGTH, fmt, args); - va_end(args); - cy_errprintf(msg); -} - -void python_printmsg(char *msg) +void python_printmsg(const char *msg) { PySys_WriteStdout("%s", msg); } -void python_printerrmsg(char *msg) +void python_printerrmsg(const char *msg) { PySys_WriteStderr("%s", msg); } diff --git a/src/offsets.c b/src/offsets.c index 4e5dfbe..dcb92b0 100644 --- a/src/offsets.c +++ b/src/offsets.c @@ -1175,8 +1175,7 @@ int_t fit_offsets_explicit_lbfgs_internal if (k_sec > 0 && U == NULL && !nnz_U && II == NULL && !nnz_I) { if (verbose) { - fprintf(stderr, "Cannot pass 'k_sec' without 'U' or 'I'.\n"); - fflush(stderr); + print_err_msg("Cannot pass 'k_sec' without 'U' or 'I'.\n"); } return 2; } @@ -1444,8 +1443,7 @@ int_t fit_offsets_explicit_lbfgs_internal if (should_stop_procedure) { - fprintf(stderr, "Procedure terminated before starting optimization\n"); - fflush(stderr); + print_err_msg("Procedure terminated before starting optimization\n"); goto cleanup; } @@ -1805,29 +1803,26 @@ int_t fit_offsets_als if (p > m || q > n || k > m || k > n) { if (verbose) { if (k > m || k > n) - fprintf(stderr, "'k' cannot be greater than 'm' or 'n'.\n"); + print_err_msg("'k' cannot be greater than 'm' or 'n'.\n"); else - fprintf(stderr, "Side info has larger dimension than 'X'\n"); + print_err_msg("Side info has larger dimension than 'X'\n"); } retval = 2; } if (implicit && (NA_as_zero_X || weight != NULL || Xfull != NULL)) { if (verbose) { - fprintf(stderr, "Combination of inputs invalid for 'implicit'.\n"); + print_err_msg("Combination of inputs invalid for 'implicit'.\n"); } retval = 2; } if (NA_as_zero_X && Xfull != NULL) { if (verbose) { - fprintf(stderr, "Cannot use 'NA_as_zero' with dense inputs.\n"); + print_err_msg("Cannot use 'NA_as_zero' with dense inputs.\n"); } retval = 2; } if (retval != 0) { - if (verbose) { - fflush(stderr); - } return retval; } @@ -1923,8 +1918,7 @@ int_t fit_offsets_als } else if (retval != 0) { if (verbose) { - fprintf(stderr, "Unexpected error\n"); - fflush(stderr); + print_err_msg("Unexpected error\n"); } return retval; }