Skip to content

Commit

Permalink
- Added support for negative test in slim_predict.
Browse files Browse the repository at this point in the history
- Re-organized the information that is displayed by the command-line programs.
- Fixed mini-help typos.
  • Loading branch information
karypis committed Nov 8, 2019
1 parent 50f5eba commit e09cc70
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 38 deletions.
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ gklib_path = not-set
bcls_path = not-set
shared = not-set
with_mkl = not-set
cc = /usr/bin/gcc
cxx = /usr/bin/g++
#cc = gcc-mp-4.9
#cxx = g++-mp-4.9
cc = not-set
cxx = not-set
#cc = /usr/bin/gcc
#cxx = /usr/bin/g++

#===============================================================
# There should be no need to modify beyond this point
Expand Down
2 changes: 1 addition & 1 deletion include/slim.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ typedef void slim_t;
* Constant definitions
*-------------------------------------------------------------------------*/
/* SLIM's version number */
#define SLIM_VERSION "2.0pre1"
#define SLIM_VERSION "2.0"

/* The maximum length of the options[] array */
#define SLIM_NOPTIONS 40
Expand Down
23 changes: 9 additions & 14 deletions src/programs/cmdline_learn.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,24 +73,18 @@ static char helpstr[][512] = {
" csrnv - CSR format without ratings.",
" cluto - Format used by CLUTO.",
" ijv - One (row#, col#, val) per line.",
" "
" ",
" -binarize",
" Specifies that the ratings should be binarized.",
" ",
" -l1r=double",
" Specifies the L1 regularization parameter. The default value is "
"1.0.",
" ",
" -ipmdlfile=string",
" Specifies the file used to initialize the model.",
" Specifies the L1 regularization parameter. The default value is 1.0.",
" ",
" -l2r=double",
" Specifies the L2 regularization parameter. The default value is "
"1.0.",
" Specifies the L2 regularization parameter. The default value is 1.0.",
" ",
" -nnbrs=int",
" Selects FSLIM model and specifies the number of item nearest "
"neighbors",
" Selects FSLIM model and specifies the number of item nearest neighbors",
" to be used. The default value is 0.",
" ",
" -simtype=string",
Expand All @@ -116,12 +110,13 @@ static char helpstr[][512] = {
" ",
" -nthreads=int",
" Specifies the number of threads to be used for estimation.",
" The default value is maximum number of threads available in the "
"machine.",
" The default value is maximum number of threads available in the machine.",
" ",
" -ipmdlfile=string",
" Specifies the file used to initialize the model.",
" ",
" -dbglvl=int",
" Specifies the debug level. The default value turns on info and "
"timing.",
" Specifies the debug level. The default value turns on info and timing.",
" ",
" -help",
" Prints this message.",
Expand Down
27 changes: 22 additions & 5 deletions src/programs/cmdline_predict.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,27 @@ static gk_StringMap_t ifmt_options[] = {
static char helpstr[][512] = {
" ",
" Usage:",
" slim_predict [options] model-file old-file [test-file]",
" slim_predict [options] model-file old-file [test-file] [neg-file]",
" ",
" Parameters:",
" model-file",
" The file that stores the model that was generated by slim_learn.",
" ",
" old-file",
" The file that stores the historical information for each user.",
" The file that stores the historical information for the users",
" for which recommendations are generated.",
" ",
" test-file",
" The file that stores the hidden items for each user.",
" It is only used to evaluate the quality of the recommendations",
" and it should contain a row for each of the users in the old-file.",
" ",
" neg-file",
" The file that stores the negative items for each user.",
" It is used for evaluation purposes as follows: The hidden items",
" and the negative items are predicted, and the nrcmds highest",
" highest scoring items among them are returned as the recommendations.",
" This is list is then used to evaluate the performance.",
" ",
" Options:",
" -ifmt=string",
Expand All @@ -55,7 +65,7 @@ static char helpstr[][512] = {
" csrnv - CSR format without ratings.",
" cluto - Format used by CLUTO.",
" ijv - One (row#, col#, val) per line.",
" "
" ",
" -binarize",
" Specifies that the ratings should be binarized.",
" ",
Expand Down Expand Up @@ -98,6 +108,7 @@ params_t *parse_cmdline(int argc, char *argv[]) {
params->binarize = 0;
params->outfile = NULL;
params->tstfile = NULL;
params->negfile = NULL;
params->nrcmds = 10;
params->dbglvl = 0;

Expand Down Expand Up @@ -145,7 +156,7 @@ params_t *parse_cmdline(int argc, char *argv[]) {
}

/* get the datafile */
if (argc - gk_optind < 1 || argc - gk_optind > 3) {
if (argc - gk_optind < 1 || argc - gk_optind > 4) {
for (int i = 0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
Expand All @@ -159,11 +170,17 @@ params_t *parse_cmdline(int argc, char *argv[]) {
if (!gk_fexists(params->trnfile))
errexit("Input old file %s does not exist.\n", params->trnfile);

if (argc - gk_optind == 1) {
if (argc - gk_optind >= 1) {
params->tstfile = gk_strdup(argv[gk_optind++]);
if (!gk_fexists(params->tstfile))
errexit("Input test file %s does not exist.\n", params->tstfile);
}

if (argc - gk_optind >= 1) {
params->negfile = gk_strdup(argv[gk_optind++]);
if (!gk_fexists(params->negfile))
errexit("Input negative file %s does not exist.\n", params->negfile);
}

return params;
}
14 changes: 8 additions & 6 deletions src/programs/slim_learn.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ int main(int argc, char *argv[]) {
"------------------------------------------------------------------\n");
printf(" trnfile: %s, nrows: %d, ncols: %d, nnz: %zd\n", params->trnfile,
tmat->nrows, tmat->ncols, tmat->rowptr[tmat->nrows]);
printf(" l1r: %.2le, l2r: %.2le, optTol: %.2le, niters: %d\n", params->l1r,
params->l2r, params->optTol, params->niters);
printf(" binarize: %d, nnbrs: %d, nthreads: %d, dbglvl: %d\n",
params->binarize, params->nnbrs, params->nthreads, params->dbglvl);
printf(" simtype: %s, mdlfile: %s\n", slim_simtypenames[params->simtype],
params->mdlfile);
printf(" l1r: %.2le, l2r: %.2le, binarize: %s\n", params->l1r,
params->l2r, (params->binarize == 0 ? "No" : "Yes"));
printf(" solver: %s, optTol: %.2le, niters: %d\n",
slim_algonames[params->algo], params->optTol, params->niters);
printf(" mdlfile: %s, nthreads: %d, dbglvl: %d\n",
params->mdlfile, params->nthreads, params->dbglvl);
printf(" simtype: %s, nnbrs: %d\n",
slim_simtypenames[params->simtype], params->nnbrs);
printf("\nEstimating model...\n");

/* free any user-supplied ratings if set to be ignored */
Expand Down
90 changes: 82 additions & 8 deletions src/programs/slim_predict.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
/*************************************************************************/
int main(int argc, char *argv[]) {
ssize_t zI;
int32_t iU, iR, nrcmds, nhits[3], ntrue[2];
int32_t i, iU, iR, nrcmds, ask_nrcmds, ncands, nhits[3], ntrue[2];
int32_t nvalid, nvalid_head, nvalid_tail;
float all_hr, head_hr, tail_hr;
int is_tail_u, is_head_u;
int32_t *rids, *rmarker, *fmarker;
gk_fkv_t *rcands, cand;
float *rscores, hr[3], arhr, larhr, baseline;
params_t *params;
gk_csr_t *oldmat, *tstmat = NULL, *model;
gk_csr_t *oldmat, *tstmat = NULL, *negmat = NULL, *model;
int32_t ioptions[SLIM_NOPTIONS];
FILE *fpout = NULL;

Expand All @@ -34,6 +35,8 @@ int main(int argc, char *argv[]) {
oldmat = gk_csr_Read(params->trnfile, params->ifmt, params->readvals, 0);
if (params->tstfile)
tstmat = gk_csr_Read(params->tstfile, params->ifmt, params->readvals, 0);
if (params->negfile)
negmat = gk_csr_Read(params->negfile, params->ifmt, params->readvals, 0);

printf(
"------------------------------------------------------------------\n");
Expand All @@ -47,6 +50,9 @@ int main(int argc, char *argv[]) {
if (tstmat)
printf(" tstfile: %s, nrows: %d, ncols: %d, nnz: %zd\n", params->tstfile,
tstmat->nrows, tstmat->ncols, tstmat->rowptr[tstmat->nrows]);
if (negmat)
printf(" negfile: %s, nrows: %d, ncols: %d, nnz: %zd\n", params->negfile,
negmat->nrows, negmat->ncols, negmat->rowptr[negmat->nrows]);
if (params->outfile)
printf(" outfile: %s\n",
(params->outfile ? params->outfile : "No output"));
Expand All @@ -62,18 +68,25 @@ int main(int argc, char *argv[]) {
gk_free((void **)&oldmat->rowval, LTERM);
if (tstmat)
gk_free((void **)&tstmat->rowval, LTERM);
if (negmat)
gk_free((void **)&negmat->rowval, LTERM);
}

SLIM_iSetDefaults(ioptions);
ioptions[SLIM_OPTION_DBGLVL] = params->dbglvl;

/* predict for each row in oldmat */
if (params->outfile)
fpout = gk_fopen(params->outfile, "w", "outfile");

rids = gk_i32malloc(params->nrcmds, "rids");
rscores = gk_fmalloc(params->nrcmds, "rscores");
/* if we are using a negative test, ask for a score for all non-supplied items */
ask_nrcmds = (negmat ? model->nrows : params->nrcmds);

/* allocate neccessary arrays */
rids = gk_i32malloc(ask_nrcmds, "rids");
rscores = gk_fmalloc(ask_nrcmds, "rscores");
rmarker = (tstmat ? gk_i32smalloc(model->ncols, -1, "rmarker") : NULL);
rcands = (negmat ? gk_fkvmalloc(model->ncols, "rcands") : NULL);

// get head and tail columns, mark 0 for head items and 1 for items in tail
fmarker = (tstmat ? SLIM_DetermineHeadAndTail(
oldmat->nrows, gk_max(oldmat->ncols, tstmat->ncols),
Expand All @@ -84,12 +97,72 @@ int main(int argc, char *argv[]) {
arhr = 0.0;
nvalid = nvalid_head = nvalid_tail = 0;


/* predict for each row in oldmat */
for (iU = 0; iU < oldmat->nrows; iU++) {
nrcmds = SLIM_GetTopN(
model, oldmat->rowptr[iU + 1] - oldmat->rowptr[iU],
oldmat->rowind + oldmat->rowptr[iU],
(oldmat->rowval ? oldmat->rowval + oldmat->rowptr[iU] : NULL), ioptions,
params->nrcmds, rids, rscores);
(oldmat->rowval ? oldmat->rowval + oldmat->rowptr[iU] : NULL),
ioptions, ask_nrcmds, rids, rscores);

/* if negative test items, select the params->nrcmds from neg+pos test */
if (negmat && nrcmds != SLIM_ERROR) {
for (zI = tstmat->rowptr[iU]; zI < tstmat->rowptr[iU + 1]; zI++)
rmarker[tstmat->rowind[zI]] = -2;
for (zI = negmat->rowptr[iU]; zI < negmat->rowptr[iU + 1]; zI++)
rmarker[negmat->rowind[zI]] = -2;

/* select the neg+pos that were in the recommended list */
for (ncands=0, iR=0; iR<nrcmds; iR++) {
if (rmarker[rids[iR]] == -2) {
rmarker[rids[iR]] = -3;
rcands[ncands].val = rids[iR];
rcands[ncands].key = rscores[iR];
ncands++;
}
}

//printf("u: %5d, ncands: %5d, ", iU, ncands);

/* add the neg+pos that were not in the recommended list */
for (zI = tstmat->rowptr[iU]; zI < tstmat->rowptr[iU + 1]; zI++) {
if (rmarker[tstmat->rowind[zI]] != -3) {
rcands[ncands].val = tstmat->rowind[zI];
rcands[ncands].key = 0.0;
ncands++;
}
rmarker[tstmat->rowind[zI]] = -1;
}
for (zI = negmat->rowptr[iU]; zI < negmat->rowptr[iU + 1]; zI++) {
if (rmarker[negmat->rowind[zI]] != -3) {
rcands[ncands].val = negmat->rowind[zI];
rcands[ncands].key = 0.0;
ncands++;
}
rmarker[negmat->rowind[zI]] = -1;
}
//printf("ncands: %5d,", ncands);


/* shuffle prior to sorting */
for (iR=0; iR<ncands; iR++) {
i = gk_irandInRange(ncands);
gk_SWAP(rcands[iR], rcands[i], cand);
}
for (iR=0; iR<ncands; iR++) {
i = gk_irandInRange(ncands);
gk_SWAP(rcands[iR], rcands[i], cand);
}

gk_fkvsortd(ncands, rcands);
nrcmds = gk_min(nrcmds, params->nrcmds);
for (iR=0; iR<nrcmds; iR++) {
rids[iR] = rcands[iR].val;
rscores[iR] = rcands[iR].key;
}
//printf(" nrcmds: %5d,", nrcmds);
}

nvalid += (nrcmds != SLIM_ERROR ? 1 : 0);
is_tail_u = is_head_u = 0;
Expand Down Expand Up @@ -140,6 +213,7 @@ int main(int argc, char *argv[]) {
larhr += 1.0 / (1.0 + iR);
}
}
//printf(" hit: %d\n", nhits[2]);

// head hit rate in test data
hr[0] += (nhits[0] > 0 ? 1.0 * nhits[0] / ntrue[0] : 0.0);
Expand Down Expand Up @@ -171,7 +245,7 @@ int main(int argc, char *argv[]) {
"------------------------------------------------------------------\n");

/* clean up */
gk_free((void **)&rids, &rscores, &rmarker, &fmarker, LTERM);
gk_free((void **)&rids, &rscores, &rmarker, &fmarker, &rcands, LTERM);
SLIM_FreeModel((slim_t **)&model);
gk_csr_Free(&oldmat);
if (tstmat)
Expand Down
1 change: 1 addition & 0 deletions src/programs/struct.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ the University of Minnesota
typedef struct {
char *trnfile; /*!< the file of historical preferences */
char *tstfile; /*!< the file to validate the recommendations */
char *negfile; /*!< the file containing the negative test instances */
char *l12file; /*!< the file that contains the regularization values over
which to search */
char *mdlfile; /*!< the model file during prediction */
Expand Down

0 comments on commit e09cc70

Please sign in to comment.