Skip to content

Commit

Permalink
Merge pull request #348 from mixOmicsTeam/issue-304-streamline-tune
Browse files Browse the repository at this point in the history
Issue 304 streamline tune
  • Loading branch information
evaham1 authored Nov 28, 2024
2 parents b61605b + dcda5b5 commit ea9bc05
Show file tree
Hide file tree
Showing 57 changed files with 4,366 additions and 1,508 deletions.
16 changes: 10 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@ S3method(circosPlot,block.plsda)
S3method(circosPlot,block.spls)
S3method(circosPlot,block.splsda)
S3method(image,tune.rcc)
S3method(perf,assess.mint.plsda)
S3method(perf,assess.mint.splsda)
S3method(perf,assess.mixo_pls)
S3method(perf,assess.mixo_plsda)
S3method(perf,assess.mixo_spls)
S3method(perf,assess.mixo_splsda)
S3method(perf,mint.pls)
S3method(perf,mint.plsda)
S3method(perf,mint.spls)
Expand All @@ -31,6 +25,12 @@ S3method(perf,mixo_plsda)
S3method(perf,mixo_spls)
S3method(perf,mixo_splsda)
S3method(perf,sgccda)
S3method(perf.assess,mint.plsda)
S3method(perf.assess,mint.splsda)
S3method(perf.assess,mixo_pls)
S3method(perf.assess,mixo_plsda)
S3method(perf.assess,mixo_spls)
S3method(perf.assess,mixo_splsda)
S3method(perf.assess,sgccda)
S3method(plot,pca)
S3method(plot,perf.mint.plsda.mthd)
Expand Down Expand Up @@ -169,9 +169,13 @@ export(spls)
export(splsda)
export(study_split)
export(tune)
export(tune.block.plsda)
export(tune.block.splsda)
export(tune.mint.plsda)
export(tune.mint.splsda)
export(tune.pca)
export(tune.pls)
export(tune.plsda)
export(tune.rcc)
export(tune.spca)
export(tune.spls)
Expand Down
2 changes: 1 addition & 1 deletion R/image.tune.rcc.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#' Y <- nutrimouse$gene
#'
#' ## this can take some seconds
#' cv.score <- tune.rcc(X, Y, validation = "Mfold", plot = FALSE)
#' cv.score <- tune.rcc(X, Y, validation = "Mfold")
#' plot(cv.score)
#'
#' # image(cv.score) # same result as plot()
Expand Down
29 changes: 19 additions & 10 deletions R/perf.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
#' Not recommended during exploratory analysis. Note if RNGseed is set in 'BPPARAM', this will be overwritten by 'seed'.
#' Note 'seed' is not required or used in perf.mint.plsda as this method uses loo cross-validation
#' @param ... not used
#'
#' @return For PLS and sPLS models, \code{perf} produces a list with the
#' following components for every repeat:
#' \item{MSEP}{Mean Square Error Prediction for each \eqn{Y} variable, only
Expand Down Expand Up @@ -141,19 +142,25 @@
#' \item{cor.tpred, cor.upred}{Correlation between the
#' predicted and actual components for X (t) and Y (u)}
#' \item{RSS.tpred, RSS.upred}{Residual Sum of Squares between the
#' predicted and actual components for X (t) and Y (u)}
#' \item{error.rate}{ For
#' PLS-DA and sPLS-DA models, \code{perf} produces a matrix of classification
#' predicted and actual components for X (t) and Y (u)}
#'
#'
#'
#' For PLS-DA and sPLS-DA models, \code{perf} produces a matrix of classification
#' error rate estimation. The dimensions correspond to the components in the
#' model and to the prediction method used, respectively. Note that error rates
#' reported in any component include the performance of the model in earlier
#' components for the specified \code{keepX} parameters (e.g. error rate
#' reported for component 3 for \code{keepX = 20} already includes the fitted
#' model on components 1 and 2 for \code{keepX = 20}). For more advanced usage
#' of the \code{perf} function, see \url{www.mixomics.org/methods/spls-da/} and
#' consider using the \code{predict} function.}
#' \item{auc}{Averaged AUC values
#' over the \code{nrepeat}}
#' model on components 1 and 2 for \code{keepX = 20}).
#' \item{error.rate}{Prediction error rate for each dist and measure}
#' \item{auc}{AUC values per component averaged over the \code{nrepeat}}
#' \item{auc.all}{AUC values per component per repeat}
#' \item{predict}{A list of length ncomp that os predicted values of each sample for each class}
#' \item{features}{a list of features selected across the folds ($stable.X) for the keepX parameters from the input object.}
#' \item{choice.ncomp}{Otimal number of components for the model for each prediction distance using one-sided t-tests that test
#' for a significant difference in the mean error rate (gain in prediction) when components are added to the model.}
#' \item{class}{A list which gives the predicted class of each sample for each dist and each of the ncomp components}
#'
#' For mint.splsda models, \code{perf} produces the following outputs:
#' \item{study.specific.error}{A list that gives BER, overall error rate and
Expand All @@ -166,7 +173,7 @@
#' \item{auc}{AUC values} \item{auc.study}{AUC values for each study in mint
#' models}
#'
#' For sgccda models, \code{perf} produces the following outputs:
#' For sgccda models (i.e. block (s)PLS-DA models), \code{perf} produces the following outputs:
#' \item{error.rate}{Prediction error rate for each block of \code{object$X}
#' and each \code{dist}} \item{error.rate.per.class}{Prediction error rate for
#' each block of \code{object$X}, each \code{dist} and each class}
Expand Down Expand Up @@ -197,12 +204,14 @@
#' \item{WeightedVote.error.rate}{if more than one block, returns the error
#' rate of the \code{WeightedVote} output} \item{weights}{Returns the weights
#' of each block used for the weighted predictions, for each nrepeat and each
#' fold} \item{choice.ncomp}{For supervised models; returns the optimal number
#' fold}
#' \item{choice.ncomp}{For supervised models; returns the optimal number
#' of components for the model for each prediction distance using one-sided
#' t-tests that test for a significant difference in the mean error rate (gain
#' in prediction) when components are added to the model. See more details in
#' Rohart et al 2017 Suppl. For more than one block, an optimal ncomp is
#' returned for each prediction framework.}
#'
#' @author Ignacio González, Amrit Singh, Kim-Anh Lê Cao, Benoit Gautier,
#' Florian Rohart, Al J Abadi
#' @seealso \code{\link{predict}}, \code{\link{nipals}},
Expand Down
102 changes: 43 additions & 59 deletions R/perf.assess.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,6 @@
#' that for PLS and sPLS objects, perf is performed on the pre-processed data
#' after log ratio transform and multilevel analysis, if any.
#'
#' Sparse methods. The sPLS, sPLS-DA and sgccda functions are run on several
#' and different subsets of data (the cross-folds) and will certainly lead to
#' different subset of selected features. Those are summarised in the output
#' \code{features$stable} (see output Value below) to assess how often the
#' variables are selected across all folds. Note that for PLS-DA and sPLS-DA
#' objects, perf is performed on the original data, i.e. before the
#' pre-processing step of the log ratio transform and multilevel analysis, if
#' any. In addition for these methods, the classification error rate is
#' averaged across all folds.
#'
#' The mint.sPLS-DA function estimates errors based on Leave-one-group-out
#' cross validation (where each levels of object$study is left out (and
#' predicted) once) and provides study-specific outputs
Expand All @@ -63,8 +53,7 @@
#' threshold based on distances (see \code{predict}) that optimally determine
#' class membership of the samples tested. As such AUC and ROC are not needed
#' to estimate the performance of the model. We provide those outputs as
#' complementary performance measures. See more details in our mixOmics
#' article.
#' complementary performance measures.
#'
#' Prediction distances. See details from \code{?predict}, and also our
#' supplemental material in the mixOmics article.
Expand All @@ -87,20 +76,20 @@
#' More details about the PLS modes in \code{?pls}.
#'
#' @param object object of class inherited from \code{"pls"}, \code{"plsda"},
#' \code{"spls"}, \code{"splsda"} or \code{"mint.splsda"}. The function will
#' \code{"spls"}, \code{"splsda"}. \code{"sgccda"} or \code{"mint.splsda"}. The function will
#' retrieve some key parameters stored in that object.
#' @param validation a character string. What kind of (internal) validation to use,
#' matching one of \code{"Mfold"} or \code{"loo"} (see below). Default is
#' \code{"Mfold"}. For MINT methods only \code{"loo"} will be used.
#' @param folds numeric. Number of folds in the Mfold cross-validation. See Details.
#' @param nrepeat numierc. Number of times the Cross-Validation process is repeated.
#' This is an important argument to ensure the estimation of the performance to
#' be as accurate as possible. Default it 1.
#' @param dist only applies to an object inheriting from \code{"plsda"},
#' \code{"splsda"} or \code{"mint.splsda"} to evaluate the classification
#' performance of the model. Should be a subset of \code{"max.dist"},
#' \code{"centroids.dist"}, \code{"mahalanobis.dist"}. Default is \code{"all"}.
#' See \code{\link{predict}}.
#' @param validation character. What kind of (internal) validation to use,
#' matching one of \code{"Mfold"} or \code{"loo"} (see below). Default is
#' \code{"Mfold"}.
#' @param folds the folds in the Mfold cross-validation. See Details.
#' @param nrepeat Number of times the Cross-Validation process is repeated.
#' This is an important argument to ensure the estimation of the performance to
#' be as accurate as possible.
#' @param auc if \code{TRUE} calculate the Area Under the Curve (AUC)
#' performance of the model.
#' @param progressBar by default set to \code{FALSE} to output the progress bar
Expand All @@ -113,57 +102,61 @@
#' Not recommended during exploratory analysis. Note if RNGseed is set in 'BPPARAM', this will be overwritten by 'seed'.
#' Note 'seed' is not required or used in perf.mint.plsda as this method uses loo cross-validation
#' @param ... not used
#' @return For PLS and sPLS models, \code{perf} produces a list with the
#' following components for every repeat:

#' @return For PLS and sPLS models:
#' \item{MSEP}{Mean Square Error Prediction for each \eqn{Y} variable, only
#' applies to object inherited from \code{"pls"}, and \code{"spls"}. Only
#' available when in regression (s)PLS.}
#' \item{RMSEP}{Root Mean Square Error Prediction for each \eqn{Y} variable, only
#' applies to object inherited from \code{"pls"}, and \code{"spls"}. Only
#' available when in regression (s)PLS.}
#' \item{R2}{a matrix of \eqn{R^2} values of the \eqn{Y}-variables. Only applies to object
#' \item{R2}{a matrix of \eqn{R^2} values of the \eqn{Y}-variables for models
#' with \eqn{1, \ldots ,}\code{ncomp} components, only applies to object
#' inherited from \code{"pls"}, and \code{"spls"}. Only available when in
#' regression (s)PLS.}
#' \item{Q2}{if \eqn{Y} contains one variable, a vector of \eqn{Q^2} values
#' else a list with a matrix of \eqn{Q^2} values for each \eqn{Y}-variable.
#' Note that in the specific case of an sPLS model, it is better to have a look
#' at the Q2.total criterion, only applies to object inherited from
#' \code{"pls"}, and \code{"spls"}. Only available when in regression (s)PLS.}
#' \item{Q2.total}{a vector of \eqn{Q^2}-total values for model, only applies to object inherited from
#' \item{Q2.total}{a vector of \eqn{Q^2}-total values for models with \eqn{1,
#' \ldots ,}\code{ncomp} components, only applies to object inherited from
#' \code{"pls"}, and \code{"spls"}. Available in both (s)PLS modes.}
#' \item{RSS}{Residual Sum of Squares across all selected features.}
#' \item{RSS}{Residual Sum of Squares across all selected features}
#' \item{PRESS}{Predicted Residual Error Sum of Squares across all selected features}
#' \item{features}{a list of features selected across the
#' folds (\code{$stable.X} and \code{$stable.Y}) for the \code{keepX} and
#' \code{keepY} parameters from the input object. Note, this will be \code{NULL}
#' if using standard (non-sparse) PLS.}
#' \item{cor.tpred, cor.upred}{Correlation between the
#' predicted and actual components for X (t) and Y (u)}
#' \item{RSS.tpred, RSS.upred}{Residual Sum of Squares between the
#' predicted and actual components for X (t) and Y (u)}
#' \item{error.rate}{ For
#' PLS-DA and sPLS-DA models, \code{perf} produces a matrix of classification
#' error rate estimation using overall and BER error rates across different distance methods.
#' Although error rates are only reported for the number of components used in the final model,
#' Note that are calculated including the performance of the model in a smaller number of
#' components for the specified \code{keepX} parameters (e.g. error rate
#' reported for component 3 for \code{keepX = 20} already includes the fitted
#' model on components 1 and 2 for \code{keepX = 20}). For more advanced usage
#' of the \code{perf} function, see \url{www.mixomics.org/methods/spls-da/} and
#' consider using the \code{predict} function.}
#' \item{auc}{Averaged AUC values
#' over the \code{nrepeat}}
#'
#' #' For sgccda models, \code{perf} produces the following outputs:
#' predicted and actual components for X (t) and Y (u)}
#'
#'
#'
#' For PLS-DA and sPLS-DA models:
#' \item{error.rate}{Prediction error rate for each dist and measure}
#' \item{auc}{AUC value averaged over the \code{nrepeat}}
#' \item{auc.all}{AUC values per repeat}
#' \item{predict}{Predicted values of each sample for each class}
#' \item{class}{A list which gives the predicted class of each sample for each dist and each of the ncomp components}
#'
#' For mint.splsda models:
#' \item{study.specific.error}{A list that gives BER, overall error rate and
#' error rate per class, for each study}
#' \item{global.error}{A list that gives
#' BER, overall error rate and error rate per class for all samples}
#' \item{predict}{A list of length \code{ncomp} that produces the predicted
#' values of each sample for each class}
#' \item{class}{A list which gives the
#' predicted class of each sample for each \code{dist}.}
#' \item{auc}{AUC values} \item{auc.study}{AUC values for each study in mint models}
#'
#' For sgccda models (i.e. block (s)PLS-DA models):
#' \item{error.rate}{Prediction error rate for each block of \code{object$X}
#' and each \code{dist}}
#' \item{error.rate.per.class}{Prediction error rate for
#' each block of \code{object$X}, each \code{dist} and each class}
#' \item{predict}{Predicted values of each sample for each class and each block.}
#' \item{class}{Predicted class of each sample for each block, each \code{dist}, and each nrepeat}
#' \item{features}{a list of features selected across the folds (\code{$stable.X} and
#' \code{$stable.Y}) for the \code{keepX} and \code{keepY} parameters from the
#' input object.}
#' \item{predict}{Predicted values of each sample for each class and each block}
#' \item{class}{Predicted class of each sample for each
#' block, each \code{dist}, and each nrepeat}
#' \item{AveragedPredict.class}{if more than one block, returns
#' the average predicted class over the blocks (averaged of the \code{Predict}
#' output and prediction using the \code{max.dist} distance)}
Expand All @@ -187,15 +180,6 @@
#' rate of the \code{WeightedVote} output}
#' \item{weights}{Returns the weights of each block used for the weighted predictions, for each nrepeat and each
#' fold}
#'
#' For mint.splsda models, \code{perf} produces the following outputs:
#' \item{study.specific.error}{A list that gives BER, overall error rate and
#' error rate per class, for each study}
#' \item{global.error}{A list that gives BER, overall error rate and error rate per class for all samples}
#' \item{predict}{A list of the predicted values of each sample for each class}
#' \item{class}{A list which gives the predicted class of each sample for each \code{dist}. Directly obtained from the \code{predict} output.}
#' \item{auc}{AUC values} \item{auc.study}{AUC values for each study}
#'

#' @author Ignacio González, Amrit Singh, Kim-Anh Lê Cao, Benoit Gautier,
#' Florian Rohart, Al J Abadi
Expand Down Expand Up @@ -247,7 +231,7 @@
#' 4:e1845.
#' @keywords regression multivariate
#' @export
#' @example ./examples/perf-examples.R
#' @example ./examples/perf.assess-examples.R
## ------------------------------- Generic -------------------------------- ##
perf.assess <- function(object, ...)
UseMethod("perf.assess")
1 change: 0 additions & 1 deletion R/perf.assess.diablo.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
# folds - number of folds if validation = "Mfold"
# ----------------------------------------------------------------------------------------------------------
#' @rdname perf.assess
#' @importFrom utils relist
#' @method perf.assess sgccda
#' @export
perf.assess.sgccda <-
Expand Down
39 changes: 38 additions & 1 deletion R/perf.assess.mint.plsda.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,41 @@
## -------------------------- perf.mint(s)plsda --------------------------- ##
#############################################################################################################
# Authors:
# Amrit Singh, University of British Columbia, Vancouver.
# Florian Rohart, The University of Queensland, The University of Queensland Diamantina Institute, Translational Research Institute, Brisbane, QLD
# Kim-Anh Le Cao, The University of Queensland, The University of Queensland Diamantina Institute, Translational Research Institute, Brisbane, QLD
#
# created: 01-04-2015
# last modified: 27-05-2016
#
# Copyright (C) 2015
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#############################################################################################################


# ----------------------------------------------------------------------------------------------------------
# perf.assess.mint.plsda - Function to evaluate the performance of the fitted PLS (cross-validation)
# inputs: object - object obtain from running mint.plsda
# dist - to evaluate the classification performance
# validation - type of validation
# folds - number of folds if validation = "Mfold"
# ----------------------------------------------------------------------------------------------------------
#' ## -------------------------- perf.mint(s)plsda --------------------------- ##

#' @rdname perf.assess
#' @method perf.assess mint.plsda
#' @export
perf.assess.mint.plsda <- function (object,
dist = c("all", "max.dist", "centroids.dist", "mahalanobis.dist"),
Expand Down Expand Up @@ -259,5 +295,6 @@ perf.assess.mint.plsda <- function (object,
}

#' @rdname perf.assess
#' @method perf.assess mint.splsda
#' @export
perf.assess.mint.splsda <- perf.assess.mint.plsda
9 changes: 8 additions & 1 deletion R/perf.assess.pls.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

## -------------------------------- (s)PLS -------------------------------- ##
#' @rdname perf.assess
#' @method perf.assess mixo_pls
#' @export
perf.assess.mixo_pls <- function(object,
validation = c("Mfold", "loo"),
Expand All @@ -41,6 +42,7 @@ perf.assess.mixo_pls <- function(object,
seed = NULL,
...)
{

# checking args and initialize params
ncomp = object$ncomp
spls.model <- is(object, 'mixo_spls')
Expand All @@ -65,7 +67,7 @@ perf.assess.mixo_pls <- function(object,
measures <- as.data.frame(measures)

# Add this line to remove rows with NAs that correspond to components < ncomp
measures <- dplyr::filter(measures, comp == ncomp)
measures <- dplyr::filter(measures, .data$comp == ncomp)

## R CMD check stuff
measure <- feature <- comp <- block <- stability <- value <- NULL
Expand Down Expand Up @@ -132,6 +134,7 @@ perf.assess.mixo_pls <- function(object,
}

#' @rdname perf.assess
#' @method perf.assess mixo_spls
#' @export
perf.assess.mixo_spls <- perf.assess.mixo_pls

Expand All @@ -145,6 +148,10 @@ perf.assess.mixo_spls <- perf.assess.mixo_pls

{
# changes to bypass the loop for the Q2

## R CMD check stuff
measure <- feature <- comp <- block <- stability <- value <- NULL
lower <- upper <- keepX <- keepY <- NULL

## -------- checks -------- ##
if (object$mode == 'invariant')
Expand Down
Loading

0 comments on commit ea9bc05

Please sign in to comment.