-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathcmfrec.h.in
2248 lines (2074 loc) · 96.6 KB
/
cmfrec.h.in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*******************************************************************************
Collective Matrix Factorization
-------------------------------
This is a module for multi-way factorization of sparse and dense matrices
intended to be used for recommender system with explicit feedback data plus
side information about users and/or items.
The reference papers are:
(a) Cortes, David.
"Cold-start recommendations in Collective Matrix Factorization."
arXiv preprint arXiv:1809.00366 (2018).
(b) Singh, Ajit P., and Geoffrey J. Gordon.
"Relational learning via collective matrix factorization."
Proceedings of the 14th ACM SIGKDD international conference on
Knowledge discovery and data mining. 2008.
(c) Hu, Yifan, Yehuda Koren, and Chris Volinsky.
"Collaborative filtering for implicit feedback datasets."
2008 Eighth IEEE International Conference on Data Mining.
Ieee, 2008.
(d) Takacs, Gabor, Istvan Pilaszy, and Domonkos Tikk.
"Applications of the conjugate gradient method for
implicit feedback collaborative filtering."
Proceedings of the fifth ACM conference on
Recommender systems. 2011.
(e) Rendle, Steffen, Li Zhang, and Yehuda Koren.
"On the difficulty of evaluating baselines:
A study on recommender systems."
arXiv preprint arXiv:1905.01395 (2019).
(f) Franc, Vojtech, Vaclav Hlavac, and Mirko Navara.
"Sequential coordinate-wise algorithm for the
non-negative least squares problem."
International Conference on Computer Analysis of Images
and Patterns. Springer, Berlin, Heidelberg, 2005.
(g) Zhou, Yunhong, et al.
"Large-scale parallel collaborative filtering for
the netflix prize."
International conference on algorithmic applications in management.
Springer, Berlin, Heidelberg, 2008.
For information about the models offered here and how they are fit to
the data, see the files 'collective.c' and 'offsets.c'.
Written for C99 standard and OpenMP version 2.0 or higher, and aimed to be
used either as a stand-alone program, or wrapped into scripting languages
such as Python and R.
<https://www.github.com/david-cortes/cmfrec>
MIT License:
Copyright (c) 2020-2022 David Cortes
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
*******************************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
#cmakedefine USE_INT64
#ifdef USE_INT64
#include <inttypes.h>
#endif
#cmakedefine real_t @real_t@
#cmakedefine int_t @int_t@
#include <stddef.h>
#include <stdbool.h>
#ifdef _WIN32
#define CMFREC_EXPORTABLE __declspec(dllimport)
#else
#define CMFREC_EXPORTABLE
#endif
/*******************************************************************************
CMFREC library header
---------------------
********************************************************************************
Overview
--------
This library provides functionality for approximate low-rank matrix
factorization of a matrix "X", optionally aided by secondary matrices
"U" (rows matching to rows of "X") and "I" (rows matching to columns of "X")
which are also factorized along the way sharing the same latent factors or
components - i.e.
X ~ A*t(B)
U ~ A*t(C)
I ~ B*t(D)
Where "A", "B", "C", "D" are the obtained model parameters, with a
dimensionality lower than "X".
As an alternative for the main explicit-feedback model, can also implicitly
generate features from "X" by converting it into a binary matrix in which
each entry is one when the value is not missing (as in reference (d)) - the
zeros in this case are taken into consideration in the loss/objective
function.
The matrix dimensions are referred to as follows:
* X -> [m, n]
* U -> [m_u, p]
* I -> [n_i, q]
The matrices "A", "B", "C", "D" are obtained by minimizing the squared error
with respect to the values of "X" obtained from their product - either with
missing entries in "X" being ignored for the error function, or taken as
zeros. The intended usage is for recommender systems, in which users
correspond to rows of "X", items to columns of "X", "U" and "I" to side
information about users and items, and values of "X" denoting interactions
such as users giving a certain rating to a given item.
The library provides different models, which are explained below:
* Collective explicit model: this is the model described in reference (a),
and is the most flexible model and the main feature of this library.
* Collective implicit model: is similar to the explicit model, but entries
of "X" are treated as either zero (if missing) or one (if not missing),
with the positive (non-missing) ones having a weight given by X+1.
* Most-Popular: is a model with only intercepts or only one parameter per
item (corresponding to k=0 plus biases), which can mimic either the
explicit or the implicit error functions. This is intended for comparison
purposes as a non-personalized recommender model.
* Content-Based: is a model in which instead of there being multiple
factorizations, there is only one factorization, but the matrices are
determined directly from the user and item attributes - that is:
A = U*C, B = I*D
With no free parameters per user or per item.
There is no "implicit" version for this model.
Alternatively, the library also offers the following experimental model:
* Offsets explicit: is similar to the content-based model, but an additional
offset per user and per item is added - that is:
A = A' + U*C, B = B' + I*D
* Offsets implicit: is the same as above, but taking "X" as weighted binary
entries just like in the collective implicit model.
The last two models are not recommended to be used, and were only
implemented for research purposes. It is not recommende to rely on them.
For more details about the models, see the files "collective.c" and
"offsets.c"
********************************************************************************
Funcionality
------------
The following classes of functions are available:
* "fit" -> fits the given model to data.
* "precompute" -> pre-calculates matrices based on the fitted model which
might be used to speed up predictions on new data.
* "topN" -> determines top-N recommended items for a given users.
* "factors_single" -> determines the latent factors (row of A) for a user
given its data.
* "factors_multiple" -> determines the latent factors (rows of A) for
a series of users, given their data.
* "predict_X" -> predicts the value of given entries in the "X" matrix
given by a row and column index.
* "impute_X" -> replaces missing entries in rows of a new "X" matrix with
their predicted value.
The functions based on new data work for both warm-start and cold-start
scenarios - that is, they can work with "X" data alone, with "U" data alone.
or with both. All the functions that take new data are just wrappers around
the "factors" functions which then pass them to the corresponding function
for "old" data.
Note that the library is user/row oriented and all the prediction functions
for new data assume the items remain constant. The functions however can
also be used for new items or for the side information by substituting the
input matrices and sizes as follows:
* For items:
A -> B, B -> A, C -> D, X -> t(X)
m -> n, n -> m, p -> q, k_user -> k_item
* For user attributes:
A -> C, B -> A, C -> NULL, X -> U
m -> p, n -> m, p -> 0, k_main -> 0
* For item attributes:
A -> D, B -> B, C -> NULL, X -> I
m -> q, n -> n, p -> 0, k_main -> 0
All the matrices are assumed to be in row-major order (the opposite of
FORTRAN), unless they are sparse, in which case either COO or CSR are
assumed depending on the function. One should pass the data for a given
input "X","U","I" in only one format to each function, but it is possible
to mix dense/sparse-coo/sparse-csr inputs between "X"-"U"-"I".
Note: the documentation below uses Doxygen keywords, but it is NOT meant
to be compiled through doxygen - those are just for ease of reading.
Functions have the following return codes:
* 0 -> completed successfully.
* 1 -> ran out of memory.
* 2 -> received invalid inputs or invalid input combination.
*******************************************************************************/
/*******************************************************************************
Collective Model
----------------
Some notes:
* The "X" data must always be passed to "fit", whereas "U" and "I" are
optional.
* It's possible to pass partially disjoints sets of users/items - that is
both the "X" and "U" matrices might have rows that the other doesn't, in
which case it will consider the missing rows to have all values missing.
Same thing for "X" and "I". However, the procedure will work faster if
the "X" matrix is the shorter one.
* When using the "NA_as_zero" options, the matrix with "NA_as_zero" should
not have smaller dimension (less rows/columns) than its counterpart.
E.g. if passing "NA_as_zero_U", then 'm_u' >= 'm'.
* For the implicit model, it is assumed that 'm' >= 'm_u' and 'n' >= 'n_i'.
* For sparse matrices "X"-"U"-"I", these shape constraints are just nominal,
as it is possible to have entire rows or entire columns missing, in which
case the underlying data arrays do not change if empty rows or columns are
added (since the data is passed as COO).
* For dense inputs, missing values should be coded as NAN. The sparse inputs
should NOT have any NAN values among the non-zero/non-missing entries.
* For the optional inputs, should pass NULL when they are array pointers
and are not given, or zero if they are integers and not given.
* m_max = max(m, m_u, m_ubin)
* n_max = max(n, n_i, n_ibin)
--- Model matrices ---
@param biasA (out)(optional) The user/row biases or intercepts. These are
only available for the explicit-feedback model. If not used,
should pass 'user_bias'=false.
Dimension: [m_max]
@param biasB (out)(optional) The item/column biases or intercepts.These are
only available for the explicit-feedback model. If not used,
should pass NULL here and pass 'item_bias'=false.
Dimension: [n_max]
@param A (out) The user-factors matrix (2D).
Dimension: [m_max, k_user+k+k_main]
@param B (out) The item-factors matrix (2D).
Dimension: [n_max, k_item+k+k_main]
@param C (out)(optional) The user-attribute factors matrix (2D).
If passing "U" data, "C" must be passed.
Dimension: [m_u, k_user+k]
@param D (out)(optional) The itemn-attribute factors matrix (2D).
If passing "I" data, "D" must be passed.
Dimension: [n_i, k_item+k]
@param Cb (out)(optional) The binary-user-attribute factors matrix (2D).
If passing "Ub" data, "Cb" must be passed.
Dimension: [m_ubin, k_user+k]
@param Db (out)(optional) The binary-item-attribute factors matrix (2D).
If passing "Ib" data, "Db" must be passed.
Dimension: [n_i, k_item+k]
@param Ai (out)(optional) The implicit user-factors matrix. This is used
when passing 'add_implicit_features'=true.
Dimension: [m, k+k_main]
@param Bi (out)(optional) The implicit item-factors matrix. This is used
when passing 'add_implicit_features'=true.
If using 'include_all_X'=true, should have the full dimension,
but will be filled with zeros at the end.
Dimension: [n, k+k_main] or [n_max, k+k_main]
@param reset_values Whether to reset the values in the model matrices to
random normal numbers (for explicit-feedback models) or
random uniform numbers (for implicit-feedback models)
and the biases to a most-popular model. If the matrices
and biases are already initialized, should pass 'false'
here.
@param seed Random seed used to initialize the matrices, if passing
'reset_values'=true.
--- Other model parameters ---
@param glob_mean (out) The global mean of "X", used to center it when
passing 'center=true'. Even if not using centering, must
be passed, in which case it will be set to zero, and this
zero should be passed to the predicion functions.
Dimension: [1]
@param scaling_biasA (out)(optional) When passing 'scale_lam=true' along
with 'scale_bias_const=true' and using user biases,
this will hold the scaling that is applied to the
regularization of the user biases. Must be passed
when having user biases along with the two options
above.
Dimension: [1]
@param scaling_biasB (out)(optional) When passing 'scale_lam=true' along
with 'scale_bias_const=true' and using item biases,
this will hold the scaling that is applied to the
regularization of the item biases. Must be passed
when having item biases along with the two options
above.
Dimension: [1]
@param U_colmeans (out)(optional) The column means for the "U" matrix,
which are used to center it. If not passing it, will not
center the "U" matrix.
Dimension: [m_u]
@param I_colmeans (out)(optional) The column means for the "I" matrix,
which are used to center it. If not passing it, will not
center the "I" matrix.
Dimension: [n_i]
@param w_main_multiplier (out) Number to multiply 'w_main' with, if
passing 'adjust_weight=true'. Must be passed
regardless.
Dimension: [1]
--- Problem dimensions ---
@param m Number of rows in "X".
@param n Number of columns in "X".
@param k Number of shared latent factors.
Recommended values: 30 to 100
@param k_user Number of latent factors which are only used to factorize
"C" and "Cb". These will be the first columns in the results.
Recommended value: 0
@param k_item Number of latent factors which are only used to factorize
"D" and "Db". These will be the first columns in the results.
Recommended value: 0
@param k_main Number of latent factors which are only used to factorize
"X". These will be the last columns in the results.
Note that these are also used in the factorization of the
implicit features (when using 'add_implicit_features').
Recommended value: 0
--- More model hyperparameters ---
@param user_bias Whether to include user/row biases/intercepts in
the model.
Recommended value: true
@param item_bias Whether to include item/column biases/intercepts in
the model.
Recommended value: true
@param center Wheter to center the 'X' data by subtracting its mean.
It is highly recommended to pass 'true' here, but for some
applications with 'NA_as_zero_X=false', it might make sense
not pass 'false'.
Recommended value: true
@param lam Regularization parameter applied to the model matrices.
Note that, unlike in other software, the loss/objective function
here is by default not scaled by the number of entries anywhere
(see parameter "scale_lam"), so the optimal regularization value
is likely going to be rather high - for example, for the
MovieLens10M, a value of 35 would perform well, while for the
LastFM-360K, a value of 5 performs better.
Will be ignored if passing 'lam_unique'.
Recommended values: from 10^-2 to 10^2
@param lam_unique (optional) Regularization parameter to apply to each
matrix of the model. Should specify the regularizations
in this order:
biasA, biasB, A, B, C/Cb, D/Db.
Note that setting a too low regularization parameter for
the item biases might result in recommending the ones
with highest "X" value regardless of number of
occurrences
Dimension: [6] (regardless of model)
@param l1_lam Regularization parameter applied to the L1 norm of the model
matrices. Note that, when this is non-zero, the model will
be fit through a coordinate descent procedure, and will be
significantly slower than the L2-regularized model solved
with Cholesky decomposition.
Recommended value: 0
@param l1_lam_unique (optional) L1 regularization to apply to each matrix
of the model. Follows the same order as "lam_unique".
Dimension: [6] (regardless of model)
@param scale_lam Whether to scale (increase) the regularization parameter
for each row of the model matrices (A, B, C, D) according
to the number of non-missing entries in the data for that
particular row, as proposed in reference [7]. For the
A and B matrices, the regularization will only be scaled
according to the number of non-missing entries in "X"
(see also the "scale_lam_sideinfo" parameter). Note that,
when using the option "NA_as_zero_*", all entries are
considered to be non-missing. If passing "true" here, the
optimal value for "lam" will be much smaller
(and likely below 0.1).
This option tends to give better results, but
requires more hyperparameter tuning. In smaller datasets,
using this option can result in top-N recommendations
having mostly items with very few interactions (see
parameter 'scale_bias_const').
When generating factors based on side information alone,
if passing 'scale_lam_sideinfo', will regularize assuming
there was one observation present. Be aware that using
this option **without** `scale_lam_sideinfo=true` can lead
to bad cold-start recommendations as it will set a very
small regularization for users who have no 'X' data.
Warning: in smaller datasets, using this option can result
in top-N recommendations having mostly items with very few
interactions (see parameter `scale_bias_const`).
Recommended value: false
@param scale_lam_sideinfo Whether to scale (increase) the regularization
parameter for each row of the "A" and "B"
matrices according to the number of non-missing
entries in both "X" and the side info matrices
"U" and "I". If passing "true" here, "scale_lam"
will also be assumed to be "true".
Recommended value: false
@scale_bias_const When passing 'scale_lam=true' and using user or item
biases, whether to apply the same scaling to the
regularization of the biases to all users and items,
according to the average number of non-missing entries
rather than to the number of entries for each specific
user/item.
While this tends to result in worse RMSE, it tends to make
the top-N recommendations less likely to select items with
only a few interactions from only a few users.
Recommended value: false
@param w_main Weight to assign to the errors of the factorization of "X"
in the final objective. Note that, in the implicit model,
the sum of errors from "X" is typically much larger than for
the other matrices, and one usually wants to correct for
this by playing with the weights.
Recommended value: 1
@param w_user Weight to assing to the errors of the factorization of "U"
in the final objective.
Recommended value: 1
@param w_item Weight to assing to the errors of the factorization of "I"
in the final objective.
Recommended value: 1
@param w_implicit Weight to assign to the errors of the implicit features
factorizations, if using 'add_implicit_features'.
Note that, depending on the sparsity of the matrix, the
sum errors on the implicit features might be much larger
than for the "X" matrix of interest.
Recommended values: 0.01 to 0.5.
@param alpha Multiplier for the values in the "X" matrix.
Recommended value: 1 (but reference (c) suggests 40)
@param adjust_weight Whether to downweight (decrease 'w_main') the errors
in the "X" matrix according to the number of present
entries.
Recommended value: false
@param apply_log_transf Whether to apply a logarithm transformation to the
values in "X" (i.e. X := log(X)).
Recommended value: false
@param nonneg Whether to make the matrices A and B non-negative. The
matrix 'X' must also be non-negative in order for this to
work. Under this option, will use a coordinate descent
solver instead of Cholesky or CG, whose maximum number of
iterations is controlled through 'max_cd_steps'.
Recommended value: false
@param nonneg_C Whether to make the matrix C non-negative. The matrix 'U'
must also be non-negative in order for this to work.
Recommended value: false
@param nonneg_D Whether to make the matrix D non-negative. The matrix 'I'
must also be non-negative in order for this to work.
Recommended value: false
--- "X" data ---
@param X_row Row indices when passing "X" as a sparse COO matrix.
Dimension: [nnz]
@param X_col Column indices when passing "X" as a sparse COO matrix.
Dimension: [nnz]
@param X Non-missing values of the "X" matrix, when passing it as sparse
COO, with row and column indices given by 'X_row' and 'X_col'.
As the values can get overwritten, it should not point to the
same address as another input data array.
Dimension: [nnz]
@param nnz Number of non-missing entries in "X", when passing it as sparse.
@param Xfull "X" as a full 2D array, if passing it as dense.
Dimension: [m, n]
@param weight (optional) Observation weights for the entries in "X". Must
be of the same shape as the "X" data passed - that is, if
"X" is passed as sparse COO, must match with values of 'X',
if "X" is passed as dense, must match with values of 'Xfull'
Dimension: [nnz] or [m, n]
--- "U" and "I" data ---
@param U (optional) The "U" data, in dense format.
As the values can get overwritten, it should not point to the
same address as another input data array.
Dimension: [m_u, p]
@param m_u (optional) Number of rows in the "U" matrix. Pass zero if there
is no "U" matrix. Should be passed regardless of whether "U"
is passed as dense or sparse.
@param p (optional) Number of columns in the "U" matrix. Pass zero if there
is no "U" matrix. Should be passed regardless of whether "U"
is passed as dense or sparse.
@param II (optional) The "I" data, in dense format.
As the values can get overwritten, it should not point to the
same address as another input data array.
Note: variable with name "I" alone is already taken by the
C preprocesor in most compilers, hence the name "II".
Dimension: [n_i, q]
@param n_i (optional) Number of rows in the "I" matrix. Pass zero if there
is no "I" matrix. Should be passed regardless of whether "I"
is passed as dense or sparse.
@param q (optional) Number of columns in the "I" matrix. Pass zero if there
is no "I" matrix. Should be passed regardless of whether "I"
is passed as dense or sparse.
@param U_row (optional) The "U" data, in sparse COO format.
This array indicates the row indices.
Dimension: [nnz_U]
@param U_col (optional) The "U" data, in sparse COO format.
This array indicates the column indices.
Dimension: [nnz_U]
@param U_sp (optional) The "U" data, in sparse COO format.
This array indicates the non-missing values.
As the values can get overwritten, it should not point to the
same address as another input data array.
Dimension: [nnz_U]
@param nnz_U (optional) Number of non-zero entries in the "U" matrix, if
passing it as sparse COO. Pass zero if there is no "U" matrix
or if is passed as dense.
@param I_row (optional) The "I" data, in sparse COO format.
This array indicates the row indices.
Dimension: [nnz_I]
@param I_col (optional) The "I" data, in sparse COO format.
This array indicates the column indices.
Dimension: [nnz_I]
@param I_sp (optional) The "I" data, in sparse COO format.
This array indicates the non-missing values.
As the values can get overwritten, it should not point to the
same address as another input data array.
Dimension: [nnz_I]
@param nnz_I (optional) Number of non-zero entries in the "I" matrix, if
passing it as sparse COO. Pass zero if there is no "I" matrix
or if is passed as dense.
--- "U" and "I" with binary values ---
@param Ub (optional) Columns of the "U" matrix which have only binary (0/1)
values, for which a sigmoid transformation will be applied on
the predicted values. If passed, should always be dense.
Only supported with the L-BFGS method.
As the values can get overwritten, it should not point to the
same address as another input data array.
Dimension: [m_ubin, pbin]
@param m_ubin (optional) Number of rows in the "Ub" matrix. Pass zero here
if there is no "Ub" matrix.
@param pbin (optional) Number of columns in the "Ub" matrix. Pass zero
here if there is no "Ub" matrix.
@param Ib (optional) Columns of the "I" matrix which have only binary (0/1)
values, for which a sigmoid transformation will be applied on
the predicted values. If passed, should always be dense.
Only supported with the L-BFGS method.
As the values can get overwritten, it should not point to the
same address as another input data array.
Dimension: [n_ibin, qbin]
@param n_ibin (optional) Number of rows in the "Ib" matrix. Pass zero here
if there is no "Ib" matrix.
@param qbin (optional) Number of columns in the "Ib" matrix. Pass zero
here if there is no "Ib" matrix.
--- More options for the data ---
@param NA_as_zero_X Whether to consider the non-present entries in the
sparse "X" matrix as zeroes (they count towards the
errors) instead of as unknown (they don't count towards
the errors).
For recommender systems, one usually wants to pass
'false' here.
Ignored if "X" is passed as dense.
@param NA_as_zero_U Same but for the "U" matrix.
@param NA_as_zero_I Same but for the "I" matrix.
@param add_implicit_features Whether to generate implicit features from the
data as described in reference (d). If pasing
'true' here, must also pass the matrices
'Ai' and 'Bi'.
For recommender systems with small data, one
usually wants to pass 'true' here.
--- For the ALS procedures ---
@param niter Number of ALS iterations to perform. Note that one iteration
denotes an update round over all the matrices.
Generally, the more iterations, the better the end results.
Recommended values: 6 to 30
@param use_cg Whether to use a Conjugate Gradient method to optimize the
model matrices - this is faster than the default Cholesky
method, but less precise, less numerically stable, and might
require more iterations to converge. Note that it will be
ignored in cases in which the Cholesky approach is faster,
such as when ussing missing-as-zero with no weights.
In general, the Cholesky solver tends to lead to better
results for the explicit-feedback models.
Recommended value: true
@param precondition_cg Whether to use Jacobi preconditioning for the CG
procedure. In general, this type of preconditioning
is not beneficial (makes the algorithm slower) as
the factor variables tend to be in the same scale,
but it might help when using non-shared factors.
Note that, when using preconditioning, the procedure
will not check for convergence, taking instead a
fixed number of steps (given by 'max_cg_steps') at
each iteration regardless of whether it has reached
the optimum already.
Ignored when passing 'use_cg=false'.
Recommended value: false
@param max_cg_steps Maximum number of CG updates to perform per iteration.
Recommended values: something much smaller than k, >=2
@param max_cd_steps Maximum number of coordinate descent updates to perform
per iteration when applying non-negativity constraints.
Pass zero for no maximum.
Recommended values: at least 2x higher than k
@param finalize_chol Whether to perform the last iteration with the
Cholesky solver. Note that, under the CG method,
the factors obtained by calling the prediction
functionality might differ slightly from those
obtained from the 'fit' functionality. This option
solves such problem. Ignored for matrices which have
non-negativity constraints.
Recommended value: true
--- For the L-BFGS procedures ---
@param n_corr_pairs Number of correction pairs to use for the L-BFGS solver
@param maxiter Maximum number of L-BFGS updates to perform.
@param prefer_onepass Whether to prefer a parallelization strategy that
iterates over the data only once. This uses a lot
more memory than a strategy that iterates first by
rows and then by columns, but might be faster when
the problem dimension is small.
@param niter (out) Number of L-BFGS updates that were performed.
Dimension: [1]
@param nfev (out) Number of function evaluations that were performed.
Dimension: [1]
--- Other ---
@param verbose Whether to print information about the optimization
procedure. This can be helpful for getting an idea of the
bottlenecks in ALS, and for determining if the
hyperparameters in L-BFGS make sense (e.g. if there are
many iterations with no or very small decrease in the loss
function, the regularization is probably too small, if it
converges in too few iterations, regularization is probably
too large).
Recommended value: true
@param print_every Every how many L-BFGS iterations to print information.
Recommended value: 100
@param handle_interrupt Whether to handle interrupt signals (SIGINT) by
gracefully terminating the procedure earlier.
The results from the matrices will still be usable
if interrupted. Note however that, if passing
'true', any potential interrupt signal will not
trigger the current action set for SIGINT, if any
such action was set, which could be problematic
when calling the functions from another language.
--- When using the prediction API ---
@param precompute_for_predictions Whether to precompute some extra matrices
which can help to speed up calculations
of factors in new data.
If passing 'true', must also pass arrays
for the matrices below.
If passing 'false', these values can
still be obtained later through the
functions named 'precompute_*'.
Note that, when passing 'user_bias'=true,
the precomputed matrices are computed
from 'B_plus_bias', not from 'B'.
Recommended value: true
@param include_all_X When 'n' is less than 'n_i' or 'n_ibin' and passing
'precompute_for_predictions'=true, whether to
construct the precomputed matrices with all the
missing items/columns, so that it would be possible to
take in new "X" data about them.
Warning: if fitting the model with
'NA_as_zero_X=false' plus 'include_all_X=false' and
then calling the prediction functions for new data
with 'NA_as_zero_X=true', the precomputed matrices
will be invalid and should not be provided, unless
they are recomputed through the provided helper
function.
Recommended value: true
@param B_plus_bias (out)(optional) Precomputed matrix consisting of the
"B" matrix with an extra column of all-ones. Only used
when passing 'user_bias'=true.
Dimension: [n_max, k_item+k+k_main+1]
@param precomputedBtB (out)(optional) Precomputed matrix, containing
t(B)*B - for the explicit model, or
t(B)*B + diag(lambda') - for the implicit model
With lambda' = lambda_ / w_main
Dimension: [k+k_main+user_bias, k+k_main+user_bias]
@param precomputedTransBtBinvBt (out)(optional) Precomputed matrix,
containing:
t( inv(t(B)*B+diag(lambda'))*t(B) )
With lambda' = lambda_ / w_main
This is only used for warm-start
predictions with no "U" data and "X" data
with no missing values or with
"NA_as_zero_X". This matrix is ignored
when using implicit features or
non-negativity constraints.
This matrix is optional even if passing
'precompute_for_predictions' = true.
If passing 'include_all_X', will need to
have also the entries for potentially
missing "X" columns.
Dimension: [k+k_main+user_bias, n_max]
or
[k+k_main+user_bias, n]
@param precomputedBtXbias (out)(optional) Precomputed matrix, containing:
t(B)*(-biasB - glob_mean)
This is only used when using 'NA_as_zero_X=true'
along with centering or item biases. This matrix
is optional even if passing
'precompute_for_predictions=true'.
Dimension: [k+k_main+user_bias]
@param precomputedBeTBeChol (out)(optional) Precomputed matrix, containing:
Chol(t(Be)*Be)
Where Be is the extended block matrix from
reference (a), and will contain information
from 'Bi' if adding implicit features.
This matrix is ignored when using
non-negativity constraints.
This matrix is optional even if
passing 'precompute_for_predictions' = true.
Dimension: [k_user+k+k_main+user_bias,
k_user+k+k_main+user_bias]
@param precomputedBiTBi (out)(optional) Precomputed matrix, containing:
t(Bi)*Bi
Dimension: [k+k_main, k+k_main]
@param precomputedTransCtCinvCt (out)(optional) Precomputed matrix,
containing:
t( inv(t(C)*C+diag(lambda'))*t(C) )
with lambda_ here calculated as:
lambda' = lambda_ / (w_main * w_user)
This is only used for cold-start
predictions with no "X" data plus "U" data
with no missing values or with
"NA_as_zero_U". This matrix is ignored
when using implicit features or
non-negativity constraints.
This matrix is optional even if passing
'precompute_for_predictions' = true.
Dimension: [k_user+k, p]
@param precomputedCtCw (out)(optional) Precomputed matrix, containing
w_user*t(C)*C
Dimension: [k_user+k, k_user+k]
@param precomputedCtUbias (out)(optional) Precomputed matrix, containing
(-w_user)*t(C)*U_colmeans
Only used when centering 'U' (signaled by passing
'U_colmeans') and using 'NA_as_zero_U=true'.
Dimension: [k_user+k]
--- Parallelization ---
@param nthreads Number of parallel threads to use. Parallelization is only
available when compiling with OpenMP support. Note that,
(a) the more threads that are used, the higher the memory
consumption will be, and (b) parallelization occurs by rows
and by columns, thus values higher than 'm' or 'n' will not
result in a speed up.
--- Return Codes ---
@retval All the functions will return 0 upon successful termination.
In case the function receives an invalid combination of inputs,
will return 2 (but note that one should still make sure that e.g.
the pointers are not NULL when they shouldn't be, it won't make
such checks).
If a function runs out of memory, will return 2.@
*******************************************************************************/
/* For ease of reading - these structs/enums are not used by the library */
enum CmfReturnCode {CmfrecSuccess = 0,
CmfrecOutOfMemory = 1,
CmfrecInvalidInput = 2,
CmfrecReceivedInterrupt = 3};
struct CollectiveExplicitModel {
/* Model parameters and their dimensions */
real_t *A; real_t *biasA; real_t scaling_biasA; int_t m; int_t m_max;
real_t *B; real_t *biasB; real_t scaling_biasB; int_t n; int_t n_max;
real_t *C; int_t m_u; int_t p;
real_t *D; int_t n_i; int_t q;
real_t *Cb; int_t m_ubin; int_t pbin;
real_t *Db; int_t n_ibin; int_t qbin;
real_t *Ai; real_t *Bi;
int_t k; int_t k_user; int_t k_item; int_t k_main;
/* Regularization */
real_t lam; real_t *lam_unique;
real_t l1_lam; real_t *l1_lam_unique;
/* Options about the data and factors */
bool add_implicit_features; bool nonneg;
bool NA_as_zero_X; bool NA_as_zero_U;
bool scale_lam; bool scale_lam_sideinfo; bool scale_bias_const;
/* Mean used to center data */
real_t glob_mean;
/* Optional means to center side info */
real_t *U_colmeans; real_t *I_colmeans;
/* Optional (for predictions) */
real_t *B_plus_bias;
real_t *precomputedBtB;
real_t *precomputedTransBtBinvBt;
real_t *precomputedBtXbias;
real_t *precomputedBeTBeChol;
real_t *precomputedBiTBi;
real_t *precomputedTransCtCinvCt;
real_t *precomputedCtCw;
real_t *precomputedCtUbias;
};
struct CollectiveImplicitModel {
/* Model parameters and their dimensions */
real_t *A; real_t *biasA; real_t scaling_biasA; int_t m;
real_t *B; real_t *biasB; real_t scaling_biasB; int_t n;
real_t *C; int_t m_u; int_t p;
real_t *D; int_t n_i; int_t q;
int_t k; int_t k_user; int_t k_item; int_t k_main;
/* Regularization */
real_t lam; real_t *lam_unique;
real_t l1_lam; real_t *l1_lam_unique;
/* Optional means to center side info */
real_t *U_colmeans; real_t *I_colmeans;
/* Other parameters */
real_t w_main_multiplier; bool apply_log_transf;
bool nonneg; bool NA_as_zero_U;
/* Optional (for predictions) */
real_t *B_plus_bias;
real_t *precomputedBtB;
real_t *precomputedBeTBe;
real_t *precomputedBeTBeChol;
real_t *precomputedCtUbias;
};
CMFREC_EXPORTABLE int_t fit_collective_explicit_lbfgs
(
real_t *biasA, real_t *biasB,
real_t *A, real_t *B,
real_t *C, real_t *Cb,
real_t *D, real_t *Db,
bool reset_values, int_t seed,
real_t *glob_mean,
real_t *U_colmeans, real_t *I_colmeans,
int_t m, int_t n, int_t k,
int_t X_row[], int_t X_col[], real_t *X, size_t nnz,
real_t *Xfull,
real_t *weight,
bool user_bias, bool item_bias, bool center,
real_t lam, real_t *lam_unique,
real_t *U, int_t m_u, int_t p,
real_t *II, int_t n_i, int_t q,
real_t *Ub, int_t m_ubin, int_t pbin,
real_t *Ib, int_t n_ibin, int_t qbin,
int_t U_row[], int_t U_col[], real_t *U_sp, size_t nnz_U,
int_t I_row[], int_t I_col[], real_t *I_sp, size_t nnz_I,
int_t k_main, int_t k_user, int_t k_item,
real_t w_main, real_t w_user, real_t w_item,
int_t n_corr_pairs, size_t maxiter,
int nthreads, bool prefer_onepass,
bool verbose, int_t print_every, bool handle_interrupt,
int_t *niter, int_t *nfev,
bool precompute_for_predictions,
bool include_all_X,
real_t *B_plus_bias,
real_t *precomputedBtB,
real_t *precomputedTransBtBinvBt,
real_t *precomputedBeTBeChol,
real_t *precomputedTransCtCinvCt,
real_t *precomputedCtCw
);
CMFREC_EXPORTABLE int_t fit_collective_explicit_als
(
real_t *biasA, real_t *biasB,
real_t *A, real_t *B,
real_t *C, real_t *D,
real_t *Ai, real_t *Bi,
bool add_implicit_features,
bool reset_values, int_t seed,
real_t *glob_mean,
real_t *U_colmeans, real_t *I_colmeans,
int_t m, int_t n, int_t k,
int_t ixA[], int_t ixB[], real_t *X, size_t nnz,
real_t *Xfull,
real_t *weight,
bool user_bias, bool item_bias, bool center,
real_t lam, real_t *lam_unique,
real_t l1_lam, real_t *l1_lam_unique,
bool scale_lam, bool scale_lam_sideinfo, bool scale_bias_const,
real_t *scaling_biasA, real_t *scaling_biasB,
real_t *U, int_t m_u, int_t p,
real_t *II, int_t n_i, int_t q,
int_t U_row[], int_t U_col[], real_t *U_sp, size_t nnz_U,
int_t I_row[], int_t I_col[], real_t *I_sp, size_t nnz_I,
bool NA_as_zero_X, bool NA_as_zero_U, bool NA_as_zero_I,
int_t k_main, int_t k_user, int_t k_item,
real_t w_main, real_t w_user, real_t w_item, real_t w_implicit,
int_t niter, int nthreads,
bool verbose, bool handle_interrupt,
bool use_cg, int_t max_cg_steps, bool precondition_cg, bool finalize_chol,
bool nonneg, int_t max_cd_steps, bool nonneg_C, bool nonneg_D,
bool precompute_for_predictions,
bool include_all_X,
real_t *B_plus_bias,
real_t *precomputedBtB,
real_t *precomputedTransBtBinvBt,
real_t *precomputedBtXbias,
real_t *precomputedBeTBeChol,
real_t *precomputedBiTBi,
real_t *precomputedTransCtCinvCt,
real_t *precomputedCtCw,
real_t *precomputedCtUbias
);
CMFREC_EXPORTABLE int_t fit_collective_implicit_als
(
real_t *A, real_t *B,
real_t *C, real_t *D,
bool reset_values, int_t seed,
real_t *U_colmeans, real_t *I_colmeans,
int_t m, int_t n, int_t k,
int_t X_row[], int_t X_col[], real_t *X, size_t nnz,
real_t lam, real_t *lam_unique,
real_t l1_lam, real_t *l1_lam_unique,
real_t *U, int_t m_u, int_t p,
real_t *II, int_t n_i, int_t q,
int_t U_row[], int_t U_col[], real_t *U_sp, size_t nnz_U,
int_t I_row[], int_t I_col[], real_t *I_sp, size_t nnz_I,
bool NA_as_zero_U, bool NA_as_zero_I,
int_t k_main, int_t k_user, int_t k_item,
real_t w_main, real_t w_user, real_t w_item,
real_t *w_main_multiplier,
real_t alpha, bool adjust_weight, bool apply_log_transf,
int_t niter, int nthreads,
bool verbose, bool handle_interrupt,
bool use_cg, int_t max_cg_steps, bool precondition_cg, bool finalize_chol,
bool nonneg, int_t max_cd_steps, bool nonneg_C, bool nonneg_D,
bool precompute_for_predictions,
real_t *precomputedBtB,
real_t *precomputedBeTBe,
real_t *precomputedBeTBeChol,
real_t *precomputedCtUbias
);
CMFREC_EXPORTABLE int_t precompute_collective_explicit
(
real_t *B, int_t n, int_t n_max, bool include_all_X,
real_t *C, int_t p,
real_t *Bi, bool add_implicit_features,
real_t *biasB, real_t glob_mean, bool NA_as_zero_X,
real_t *U_colmeans, bool NA_as_zero_U,
int_t k, int_t k_user, int_t k_item, int_t k_main,
bool user_bias,
bool nonneg,
real_t lam, real_t *lam_unique,
bool scale_lam, bool scale_lam_sideinfo,
real_t w_main, real_t w_user, real_t w_implicit,
real_t *B_plus_bias,
real_t *BtB,
real_t *TransBtBinvBt,
real_t *BtXbias,
real_t *BeTBeChol,
real_t *BiTBi,
real_t *TransCtCinvCt,
real_t *CtCw,
real_t *CtUbias
);
CMFREC_EXPORTABLE int_t precompute_collective_implicit
(
real_t *B, int_t n,
real_t *C, int_t p,
real_t *U_colmeans, bool NA_as_zero_U,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t lam, real_t w_main, real_t w_user, real_t w_main_multiplier,
bool nonneg,
bool extra_precision,
real_t *BtB,
real_t *BeTBe,
real_t *BeTBeChol,
real_t *CtUbias
);
/*******************************************************************************