From 3ee06a099caca07d817cd91dfa807d74589f517f Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Tue, 28 Nov 2023 18:50:58 +0100
Subject: [PATCH] solve CRAN complaints, refactor error printing

---
 DESCRIPTION                |  2 +-
 LICENSE                    |  2 +-
 R/fit.R                    | 42 ++++++--------------------------------
 cmfrec/wrapper_untyped.pxi |  4 ++--
 man/fit.Rd                 | 42 ++++++--------------------------------
 setup.cfg                  |  2 --
 setup.py                   |  2 +-
 src/cmfrec.h               | 13 +++++-------
 src/collective.c           | 29 +++++++++-----------------
 src/common.c               | 37 ++++++++++++++++-----------------
 src/helpers.c              | 25 ++++++++---------------
 src/offsets.c              | 20 +++++++-----------
 12 files changed, 66 insertions(+), 154 deletions(-)
 delete mode 100644 setup.cfg

diff --git a/DESCRIPTION b/DESCRIPTION
index 261cba3..879e034 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: cmfrec
 Type: Package
 Title: Collective Matrix Factorization for Recommender Systems
-Version: 3.5.1-1
+Version: 3.5.1-2
 Authors@R: c(
   person(given="David", family="Cortes", role=c("aut", "cre", "cph"),
          email="david.cortes.rivera@gmail.com"),
diff --git a/LICENSE b/LICENSE
index 7f62643..27ca084 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2020-2022 David Cortes
+Copyright (c) 2020-2023 David Cortes
 
 All rights reserved.
 
diff --git a/R/fit.R b/R/fit.R
index c787ce0..50dd3d4 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -113,42 +113,12 @@ NULL
 #' of internal BLAS threads before entering a multi-threaded region, in order to avoid oversubscription of
 #' threads. This can become an issue when using OpenBLAS if it is the 'pthreads' variant.
 #' 
-#' This package relies heavily on BLAS and LAPACK functions. For better performance, it is
-#' recommended to use an optimized backed for them, such as MKL or OpenBLAS.
-#' 
-#' In Windows, the easiest way of getting MKL is to use Microsoft's
-#' \href{https://mran.microsoft.com}{MRAN distribution of R}, while OpenBLAS can be obtained by
-#' following \href{https://github.com/david-cortes/R-openblas-in-windows}{this tutorial}
-#' (no new R installation required).
-#' 
-#' In Linux, these can be installed through the system's package manager. In Debian and
-#' Debian-based distributions such as Ubuntu, the default BLAS and LAPACK can be configured
-#' through the alternatives system (see the
-#' \href{https://wiki.debian.org/DebianScience/LinearAlgebraLibraries}{Debian docs} or
-#' \href{https://stackoverflow.com/a/49842944/5941695}{this post for MKL}).
-#' 
-#' By default, in a regular x86-64 CPU, R will compile all packages with generic options
-#' `-msse2` and `-O2`, which misses lots of performance optimizations, and in particular,
-#' `cmfrec` will not be able to achieve its maximum performance with them.
-#' 
-#' It is recommended to use compilation options `-O3`, `-march=native`,
-#' `-fno-math-errno`, `-fno-trapping-math`, and `-std=c99` or `-std=gnu99`.
-#' These can be activated in multiple ways: \itemize{
-#' \item (On Linux) Creating an empty text file `~/.R/Makevars` and adding this line there:
-#' `CFLAGS += -O3 -march=native -fno-math-errno -fno-trapping-math`
-#' (plus an empty line at the end), then installing the usual
-#' way with `install.packages("cmfrec")`.
-#' \item Installing `cmfrec` from source, but modifying the `Makevars` file (it has lines
-#' that can be uncommented in order to enable these optimizations).
-#' \item Modifying the global `Makeconf` variable. This is a file which defines the default
-#' compilation options for \bold{all} R packages, so be careful about it. In Debian,
-#' this file will typically be under `/etc/R/`, but this can vary in other operating systems.
-#' In this file, replace all occurences of `-O2` with `-O3`, and all occurrences of
-#' `-msse2` with `-march=native -fno-math-errno -fno-trapping-math` (e.g. open them
-#' in some text editor or in RStudio and
-#' use the 'Replace All' functionality) (not recommended to edit this global file, it should be
-#' preferred to edit the local user Makevars instead).
-#' }
+#' This package relies heavily on BLAS and LAPACK functions, and benefits from SIMD vectorization.
+#' For better performance, it is recommended to use an optimized backed such as MKL or OpenBLAS,
+#' and to enable additional compiler optimizations that are not used by default in R.
+#' 
+#' See this guide for details:
+#' \href{https://github.com/david-cortes/installing-optimized-libraries}{installing optimized libraries}.
 #' @param X The main matrix with interactions data to factorize (e.g. movie ratings by users,
 #' bag-of-words representations of texts, etc.). The package is built with
 #' recommender systems in mind, and will assume that `X` is a matrix in which users
diff --git a/cmfrec/wrapper_untyped.pxi b/cmfrec/wrapper_untyped.pxi
index 5355819..659e9d8 100644
--- a/cmfrec/wrapper_untyped.pxi
+++ b/cmfrec/wrapper_untyped.pxi
@@ -558,11 +558,11 @@ cdef extern from "cmfrec.h":
     # void py_set_threads(int) noexcept nogil
     # int py_get_threads() noexcept nogil
 
-cdef public void cy_printf(char *msg) noexcept nogil:
+cdef public void cy_printf(const char *msg) noexcept nogil:
     with gil:
         python_printmsg(msg)
         stdout.flush()
-cdef public void cy_errprintf(char *msg) noexcept nogil:
+cdef public void cy_errprintf(const char *msg) noexcept nogil:
     with gil:
         python_printerrmsg(msg)
         stderr.flush()
diff --git a/man/fit.Rd b/man/fit.Rd
index cb7ceae..8235ee7 100644
--- a/man/fit.Rd
+++ b/man/fit.Rd
@@ -980,42 +980,12 @@ package installed for better performance - if available, will be used to control
 of internal BLAS threads before entering a multi-threaded region, in order to avoid oversubscription of
 threads. This can become an issue when using OpenBLAS if it is the 'pthreads' variant.
 
-This package relies heavily on BLAS and LAPACK functions. For better performance, it is
-recommended to use an optimized backed for them, such as MKL or OpenBLAS.
-
-In Windows, the easiest way of getting MKL is to use Microsoft's
-\href{https://mran.microsoft.com}{MRAN distribution of R}, while OpenBLAS can be obtained by
-following \href{https://github.com/david-cortes/R-openblas-in-windows}{this tutorial}
-(no new R installation required).
-
-In Linux, these can be installed through the system's package manager. In Debian and
-Debian-based distributions such as Ubuntu, the default BLAS and LAPACK can be configured
-through the alternatives system (see the
-\href{https://wiki.debian.org/DebianScience/LinearAlgebraLibraries}{Debian docs} or
-\href{https://stackoverflow.com/a/49842944/5941695}{this post for MKL}).
-
-By default, in a regular x86-64 CPU, R will compile all packages with generic options
-`-msse2` and `-O2`, which misses lots of performance optimizations, and in particular,
-`cmfrec` will not be able to achieve its maximum performance with them.
-
-It is recommended to use compilation options `-O3`, `-march=native`,
-`-fno-math-errno`, `-fno-trapping-math`, and `-std=c99` or `-std=gnu99`.
-These can be activated in multiple ways: \itemize{
-\item (On Linux) Creating an empty text file `~/.R/Makevars` and adding this line there:
-`CFLAGS += -O3 -march=native -fno-math-errno -fno-trapping-math`
-(plus an empty line at the end), then installing the usual
-way with `install.packages("cmfrec")`.
-\item Installing `cmfrec` from source, but modifying the `Makevars` file (it has lines
-that can be uncommented in order to enable these optimizations).
-\item Modifying the global `Makeconf` variable. This is a file which defines the default
-compilation options for \bold{all} R packages, so be careful about it. In Debian,
-this file will typically be under `/etc/R/`, but this can vary in other operating systems.
-In this file, replace all occurences of `-O2` with `-O3`, and all occurrences of
-`-msse2` with `-march=native -fno-math-errno -fno-trapping-math` (e.g. open them
-in some text editor or in RStudio and
-use the 'Replace All' functionality) (not recommended to edit this global file, it should be
-preferred to edit the local user Makevars instead).
-}
+This package relies heavily on BLAS and LAPACK functions, and benefits from SIMD vectorization.
+For better performance, it is recommended to use an optimized backed such as MKL or OpenBLAS,
+and to enable additional compiler optimizations that are not used by default in R.
+
+See this guide for details:
+\href{https://github.com/david-cortes/installing-optimized-libraries}{installing optimized libraries}.
 }
 
 \examples{
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 224a779..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[metadata]
-description-file = README.md
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 26a1297..3c0d46e 100644
--- a/setup.py
+++ b/setup.py
@@ -348,7 +348,7 @@ def test_supports_clang_reassociate(self):
 setup(
     name  = "cmfrec",
     packages = ["cmfrec"],
-    version = '3.5.1-6',
+    version = '3.5.1-7',
     description = 'Collective matrix factorization',
     author = 'David Cortes',
     url = 'https://github.com/david-cortes/cmfrec',
diff --git a/src/cmfrec.h b/src/cmfrec.h
index 7c96f02..bbd4304 100644
--- a/src/cmfrec.h
+++ b/src/cmfrec.h
@@ -131,14 +131,12 @@ typedef void (*sig_t_)(int);
     #endif
     extern IMPORTED_FUN void PySys_WriteStdout(const char *fmt, ...);
     extern IMPORTED_FUN void PySys_WriteStderr(const char *fmt, ...);
-    void python_printmsg(char *msg);
-    void python_printerrmsg(char *msg);
+    void python_printmsg(const char *msg);
+    void python_printerrmsg(const char *msg);
     void py_printf(const char *fmt, ...);
-    void py_errprintf(void *ignored, const char *fmt, ...);
-    extern void cy_printf(char *msg);
-    extern void cy_errprintf(char *msg);
+    extern void cy_printf(const char *msg);
+    extern void cy_errprintf(const char *msg);
     #define printf py_printf
-    #define fprintf py_errprintf
     #define fflush(arg) {}
 #elif defined(_FOR_R)
     #include <Rconfig.h>
@@ -150,8 +148,7 @@ typedef void (*sig_t_)(int);
     #include <R_ext/Visibility.h>
     #define USE_DOUBLE
     #define printf Rprintf
-    #define fprintf(f, message) REprintf(message)
-    #define fflush(f) R_FlushConsole()
+    #define fflush(arg) R_FlushConsole()
 #elif defined(MKL_ILP64)
     #include "mkl.h"
 #endif
diff --git a/src/collective.c b/src/collective.c
index a258039..ea28f71 100644
--- a/src/collective.c
+++ b/src/collective.c
@@ -3325,8 +3325,7 @@ int_t collective_factors_cold
 )
 {
     if (NA_as_zero_U && u_bin_vec != NULL) {
-        fprintf(stderr, "Cannot use 'NA_as_zero_U' when there is 'u_bin'\n");
-        fflush(stderr);
+        print_err_msg("Cannot use 'NA_as_zero_U' when there is 'u_bin'\n");
         return 2;
     }
     int_t retval = 0;
@@ -3585,13 +3584,11 @@ int_t collective_factors_warm
 )
 {
     if (u_bin_vec != NULL && (NA_as_zero_X || NA_as_zero_U)) {
-        fprintf(stderr, "Cannot use 'NA_as_zero' when there is 'u_bin'\n");
-        fflush(stderr);
+        print_err_msg("Cannot use 'NA_as_zero' when there is 'u_bin'\n");
         return 2;
     }
     if (u_bin_vec != NULL && add_implicit_features) {
-        fprintf(stderr, "Cannot use implicit features when there is 'u_bin'\n");
-        fflush(stderr);
+        print_err_msg("Cannot use implicit features when there is 'u_bin'\n");
         return 2;
     }
 
@@ -7310,27 +7307,24 @@ int_t fit_collective_explicit_als
 
     if (k_user && U == NULL && nnz_U == 0) {
         if (verbose)
-            fprintf(stderr, "Cannot pass 'k_user' without U data.\n");
+            print_err_msg("Cannot pass 'k_user' without U data.\n");
         retval = 2;
     }
 
     if (k_item && II == NULL && nnz_I == 0) {
         if (verbose)
-            fprintf(stderr, "Cannot pass 'k_item' without I data.\n");
+            print_err_msg("Cannot pass 'k_item' without I data.\n");
         retval = 2;
     }
 
     if (k_main && Xfull == NULL && nnz == 0) {
         if (verbose)
-            fprintf(stderr, "Cannot pass 'k_main' without X data.\n");
+            print_err_msg("Cannot pass 'k_main' without X data.\n");
         retval = 2;
     }
 
     if (retval == 2)
     {
-        if (verbose) {
-            fflush(stderr);
-        }
         return retval;
     }
 
@@ -9411,19 +9405,19 @@ int_t fit_collective_implicit_als
     int_t retval = 0;
     if (k_user && U == NULL && nnz_U == 0) {
         if (verbose)
-            fprintf(stderr, "Cannot pass 'k_user' without U data.\n");
+            print_err_msg("Cannot pass 'k_user' without U data.\n");
         retval = 2;
     }
 
     if (k_item && II == NULL && nnz_I == 0) {
         if (verbose)
-            fprintf(stderr, "Cannot pass 'k_item' without I data.\n");
+            print_err_msg("Cannot pass 'k_item' without I data.\n");
         retval = 2;
     }
 
     if (k_main && nnz == 0) {
         if (verbose)
-            fprintf(stderr, "Cannot pass 'k_main' without X data.\n");
+            print_err_msg("Cannot pass 'k_main' without X data.\n");
         retval = 2;
     }
 
@@ -9431,15 +9425,12 @@ int_t fit_collective_implicit_als
         (II != NULL && NA_as_zero_I))
     {
         if (verbose)
-            fprintf(stderr, "Cannot pass 'NA_as_zero' with dense data.\n");
+            print_err_msg("Cannot pass 'NA_as_zero' with dense data.\n");
         retval = 2;
     }
 
     if (retval == 2)
     {
-        if (verbose) {
-            fflush(stderr);
-        }
         return retval;
     }
 
diff --git a/src/common.c b/src/common.c
index 3d7f3c3..5ea3721 100644
--- a/src/common.c
+++ b/src/common.c
@@ -3488,7 +3488,7 @@ int_t calc_mean_and_center
             }
 
             if (!cnt)
-                fprintf(stderr, "Warning: 'X' has all entries missing.\n");
+                print_err_msg("Warning: 'X' has all entries missing.\n");
         }
 
         else
@@ -3513,7 +3513,7 @@ int_t calc_mean_and_center
             }
 
             if (!xsum)
-                fprintf(stderr, "Warning: 'X' has only zeros.\n");
+                print_err_msg("Warning: 'X' has only zeros.\n");
 
             if (NA_as_zero)
                 *glob_mean = (ldouble_safe)(*glob_mean)
@@ -3593,7 +3593,7 @@ int_t calc_mean_and_center
         }
 
         if (wsum <= 0)
-            fprintf(stderr, "Warning: weights are not positive.\n");
+            print_err_msg("Warning: weights are not positive.\n");
     }
 
     if (nonneg)
@@ -5139,19 +5139,19 @@ int_t topN
 {
     int_t retval = 0;
     if (include_ix != NULL && exclude_ix != NULL) {
-        fprintf(stderr, "Cannot pass both 'include_ix' and 'exclude_ix'.\n");
+        print_err_msg("Cannot pass both 'include_ix' and 'exclude_ix'.\n");
         retval = 2;
     }
     if (n_top == 0) {
-        fprintf(stderr, "'n_top' must be greater than zero.\n");
+        print_err_msg("'n_top' must be greater than zero.\n");
         retval = 2;
     }
     if (n_exclude > n-n_top) {
-        fprintf(stderr, "Number of rankeable entities is less than 'n_top'\n");
+        print_err_msg("Number of rankeable entities is less than 'n_top'\n");
         retval = 2;
     }
     if (n_include > n) {
-        fprintf(stderr, "Number of entities to include is larger than 'n'.\n");
+        print_err_msg("Number of entities to include is larger than 'n'.\n");
         retval = 2;
     }
 
@@ -5160,7 +5160,7 @@ int_t topN
         for (int_t ix = 0; ix < n_include; ix++)
             if (include_ix[ix] < 0 || include_ix[ix] >= n)
             {
-                fprintf(stderr, "'include_ix' contains invalid entries\n");
+                print_err_msg("'include_ix' contains invalid entries\n");
                 retval = 2;
                 break;
             }
@@ -5170,7 +5170,7 @@ int_t topN
         for (int_t ix = 0; ix < n_exclude; ix++)
             if (exclude_ix[ix] < 0 || exclude_ix[ix] >= n)
             {
-                fprintf(stderr, "'exclude_ix' contains invalid entries\n");
+                print_err_msg("'exclude_ix' contains invalid entries\n");
                 retval = 2;
                 break;
             }
@@ -5178,19 +5178,18 @@ int_t topN
     for (int_t ix = 0; ix < k_user+k+k_main; ix++)
     {
         if (isnan(a_vec[ix])) {
-            fprintf(stderr, "The latent factors contain NAN values\n");
+            print_err_msg("The latent factors contain NAN values\n");
             retval = 2;
             break;
         }
     }
     if (isnan(biasA)) {
-        fprintf(stderr, "The bias is a NAN value\n");
+        print_err_msg("The bias is a NAN value\n");
         retval = 2;
     }
 
     if (retval == 2)
     {
-        fflush(stderr);
         return retval;
     }
 
@@ -5389,25 +5388,25 @@ int_t fit_most_popular
     if (implicit)
     {
         if (NA_as_zero) {
-            fprintf(stderr,
+            print_err_msg(
                     "Warning: 'NA_as_zero' ignored with 'implicit=true'.\n");
             NA_as_zero = false;
         }
 
         if (scale_lam) {
-            fprintf(stderr,
+            print_err_msg(
                     "Warning: 'scale_lam' ignored with 'implicit=true'.\n");
             scale_lam = false;
         }
 
         if (weight != NULL) {
-            fprintf(stderr,
+            print_err_msg(
                     "Warning: 'weight' ignored with 'implicit=true'.\n");
             weight = NULL;
         }
 
         if (Xfull != NULL) {
-            fprintf(stderr,
+            print_err_msg(
                     "Error: cannot pass dense 'X' with 'implicit=true'.\n");
             return 2;
         }
@@ -5416,13 +5415,13 @@ int_t fit_most_popular
     else
     {
         if (adjust_weight) {
-            fprintf(stderr,
+            print_err_msg(
                 "Warning: 'adjust_weight' ignored with 'implicit=false'.\n");
             adjust_weight = false;
         }
 
         if (apply_log_transf) {
-            fprintf(stderr,
+            print_err_msg(
                 "Warning: 'apply_log_transf' ignored with 'implicit=false'.\n");
             apply_log_transf = false;
         }
@@ -5430,7 +5429,7 @@ int_t fit_most_popular
 
     if (biasB == NULL)
     {
-        fprintf(stderr, "Error: must pass 'biasB'.\n");
+        print_err_msg("Error: must pass 'biasB'.\n");
         return 2;
     }
 
diff --git a/src/helpers.c b/src/helpers.c
index 8ed2a6b..a4cfe3c 100644
--- a/src/helpers.c
+++ b/src/helpers.c
@@ -1630,12 +1630,15 @@ void fill_lower_triangle(real_t A[], size_t n, size_t lda)
 
 void print_err_msg(const char *msg)
 {
-    #ifndef _FOR_R
-    fprintf(stderr, "%s", msg);
+    #ifdef _FOR_PYTHON
+    cy_errprintf(msg);
+    #elif defined(_FOR_R)
+    REprintf("%s", msg);
+    R_FlushConsole();
     #else
-    fprintf(stderr, msg);
-    #endif
+    fprintf(stderr, "%s", msg);
     fflush(stderr);
+    #endif
 }
 
 void print_oom_message(void)
@@ -1655,22 +1658,12 @@ void py_printf(const char *fmt, ...)
     cy_printf(msg);
 }
 
-void py_errprintf(void *ignored, const char *fmt, ...)
-{
-    char msg[PY_MSG_MAX_LENGTH];
-    va_list args;
-    va_start(args, fmt);
-    vsnprintf(msg, PY_MSG_MAX_LENGTH, fmt, args);
-    va_end(args);
-    cy_errprintf(msg);
-}
-
-void python_printmsg(char *msg)
+void python_printmsg(const char *msg)
 {
     PySys_WriteStdout("%s", msg);
 }
 
-void python_printerrmsg(char *msg)
+void python_printerrmsg(const char *msg)
 {
     PySys_WriteStderr("%s", msg);
 }
diff --git a/src/offsets.c b/src/offsets.c
index 4e5dfbe..dcb92b0 100644
--- a/src/offsets.c
+++ b/src/offsets.c
@@ -1175,8 +1175,7 @@ int_t fit_offsets_explicit_lbfgs_internal
     if (k_sec > 0 && U == NULL && !nnz_U && II == NULL && !nnz_I)
     {
         if (verbose) {
-            fprintf(stderr, "Cannot pass 'k_sec' without 'U' or 'I'.\n");
-            fflush(stderr);
+            print_err_msg("Cannot pass 'k_sec' without 'U' or 'I'.\n");
         }
         return 2;
     }
@@ -1444,8 +1443,7 @@ int_t fit_offsets_explicit_lbfgs_internal
 
     if (should_stop_procedure)
     {
-        fprintf(stderr, "Procedure terminated before starting optimization\n");
-        fflush(stderr);
+        print_err_msg("Procedure terminated before starting optimization\n");
         goto cleanup;
     }
 
@@ -1805,29 +1803,26 @@ int_t fit_offsets_als
     if (p > m || q > n || k > m || k > n) {
         if (verbose) {
             if (k > m || k > n)
-                fprintf(stderr, "'k' cannot be greater than 'm' or 'n'.\n");
+                print_err_msg("'k' cannot be greater than 'm' or 'n'.\n");
             else
-                fprintf(stderr, "Side info has larger dimension than 'X'\n");
+                print_err_msg("Side info has larger dimension than 'X'\n");
         }
         retval = 2;
     }
     if (implicit && (NA_as_zero_X || weight != NULL || Xfull != NULL)) {
         if (verbose) {
-            fprintf(stderr, "Combination of inputs invalid for 'implicit'.\n");
+            print_err_msg("Combination of inputs invalid for 'implicit'.\n");
         }
         retval = 2;
     }
     if (NA_as_zero_X && Xfull != NULL) {
         if (verbose) {
-            fprintf(stderr, "Cannot use 'NA_as_zero' with dense inputs.\n");
+            print_err_msg("Cannot use 'NA_as_zero' with dense inputs.\n");
         }
         retval = 2;
     }
 
     if (retval != 0) {
-        if (verbose) {
-            fflush(stderr);
-        }
         return retval;
     }
 
@@ -1923,8 +1918,7 @@ int_t fit_offsets_als
     }
     else if (retval != 0) {
         if (verbose) {
-            fprintf(stderr, "Unexpected error\n");
-            fflush(stderr);
+            print_err_msg("Unexpected error\n");
         }
         return retval;
     }