diff --git a/R/fit.R b/R/fit.R
index 497cf10..5320971 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -512,6 +512,10 @@ NULL
 #' This might help to speed up the procedure by starting closer to an
 #' optimum. This option is not available when the side information is passed
 #' as sparse matrices.
+#' 
+#' Note that this option will not work (will throw an error) if there are
+#' users or items without side information, or if the input data is otherwise
+#' problematic (e.g. users/items which are duplicates of each other).
 #' @param apply_log_transf Whether to apply a logarithm transformation on the values of `X`
 #' (i.e. `X := log(X)`)
 #' @param NA_as_zero Whether to take missing entries in the `X` matrix as zeros (only
diff --git a/cmfrec/__init__.py b/cmfrec/__init__.py
index 311d457..7f612b2 100644
--- a/cmfrec/__init__.py
+++ b/cmfrec/__init__.py
@@ -4395,7 +4395,7 @@ def from_model_matrices(A, B, glob_mean=0., precompute=True,
         if precompute:
             new_model.force_precompute_for_predictions()
         return new_model
-        
+
 
 class CMF_implicit(_CMF):
     """
@@ -7844,6 +7844,10 @@ class ContentBased(_OMF_Base):
         This might help to speed up the procedure by starting closer to an
         optimum. This option is not available when the side information is passed
         as sparse matrices.
+
+        Note that this option will not work (will throw an error) if there are
+        users or items without side information, or if the input data is otherwise
+        problematic (e.g. users/items which are duplicates of each other).
     nthreads : int
         Number of parallel threads to use. If passing a negative number, will
         use the same formula as joblib (maximum threads + 1 - nthreads).
diff --git a/include/cmfrec.h.in b/include/cmfrec.h.in
index 9b28807..79ba21a 100644
--- a/include/cmfrec.h.in
+++ b/include/cmfrec.h.in
@@ -1616,6 +1616,11 @@ CMFREC_EXPORTABLE int_t predict_X_old_most_popular
                            might reduce fitting times due to starting closer
                            to a local optimum. Sparse inputs for 'U' and 'I'
                            are not supported with this option.
+                           Note that this option will not work (will throw an
+                           error) if there are users or items without side
+                           information, or if the input data is otherwise
+                           problematic (e.g. users/items which are duplicates of
+                           each other).
                            Recommended value: true
 
     --- For prediction functionality ---
diff --git a/man/fit.Rd b/man/fit.Rd
index 7bf6a51..8f62699 100644
--- a/man/fit.Rd
+++ b/man/fit.Rd
@@ -701,7 +701,11 @@ Whether to add intercepts/biases to the user/item attribute matrices.}
 Whether to determine the initial coefficients through an ALS procedure.
 This might help to speed up the procedure by starting closer to an
 optimum. This option is not available when the side information is passed
-as sparse matrices.}
+as sparse matrices.
+
+Note that this option will not work (will throw an error) if there are
+users or items without side information, or if the input data is otherwise
+problematic (e.g. users/items which are duplicates of each other).}
 
 \item{k_sec}{(Only for `OMF_explicit`)
 Number of factors in the factorizing matrices which are determined