Skip to content

Commit

Permalink
Merge branch 'devel' into alr
Browse files Browse the repository at this point in the history
  • Loading branch information
warrenmcg authored Nov 17, 2017
2 parents 5d09823 + 725197c commit 1e84a3a
Show file tree
Hide file tree
Showing 9 changed files with 903 additions and 259 deletions.
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
Package: sleuth
Title: Tools for investigating RNA-Seq
Version: 0.29.0
Authors@R: c(person("Harold", "Pimentel", , "[email protected]", role = c("aut", "cre")))
Authors@R: c(
person("Harold", "Pimentel", , "[email protected]", role = c("aut", "cre")),
person("Warren", "McGee", , "[email protected]", role = "aut"))
Description: Investigate transcript abundance from "kallisto" and differential
expression analysis from RNA-Seq data.
License: GPL-3
Expand All @@ -21,6 +23,7 @@ Imports:
parallel,
lazyeval,
matrixStats,
pheatmap,
shiny
Suggests:
MASS,
Expand Down
9 changes: 5 additions & 4 deletions R/bootstrap.R
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,12 @@ get_bootstrap_summary <- function(obj, target_id, units = 'est_counts') {
stop(paste0("'", units, "' is invalid for 'units'. please see documentation"))
}

if (is.null(obj$bs_quants)) {
if (units == 'est_counts') {
stop("bootstrap summary missing. rerun sleuth_prep() with argument 'extra_bootstrap_summary = TRUE'")
if (is.null(obj$bs_quants) | is.null(obj$bs_quants[[1]][[units]])) {
if (units %in% c('est_counts', 'scaled_reads_per_base')) {
stop("bootstrap summary appears to be missing. rerun sleuth_prep() with argument 'extra_bootstrap_summary = TRUE'")
} else {
stop("bootstrap summary missing. rerun sleuth_prep() with argument 'extra_bootstrap_summary = TRUE' and 'read_bootstrap_tpm = TRUE'")
stop("bootstrap summary appears to be missing. rerun sleuth_prep() with argument 'extra_bootstrap_summary = TRUE' ",
"and 'read_bootstrap_tpm = TRUE'")
}
}

Expand Down
10 changes: 9 additions & 1 deletion R/matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,18 @@ sleuth_to_matrix <- function(obj, which_df, which_units) {
if ( !(which_df %in% c("obs_norm", "obs_raw")) ) {
stop("Invalid object")
}
if ( !(which_units %in% c("tpm", "est_counts")) ) {
if ( !(which_units %in% c("tpm", "est_counts", "scaled_reads_per_base")) ) {
stop("Invalid units")
}

which_units <- check_quant_mode(obj, which_units)

if (obj$gene_mode && which_df == "obs_raw") {
warning("This object is in gene mode, and the raw values are ",
"transcripts. Using 'obs_norm' instead.")
which_df <- "obs_norm"
}

data <- as.data.frame(obj[[which_df]])

res <- list()
Expand Down
44 changes: 42 additions & 2 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,48 @@ sliding_window_grouping <- function(data, x_col, y_col,
shrink_df <- function(data, shrink_formula, filter_var) {
data <- as.data.frame(data)
s_formula <- substitute(shrink_formula)
fit <- eval(loess(s_formula, data[data[, filter_var], ]))
data.frame(data, shrink = predict(fit, data))
fit <- eval(loess(s_formula, data[data[, filter_var], ], model = T))
shrink_data <- data.frame(data, shrink = predict(fit, data))

# include a column to report any target IDs that failed shrinkage estimation
# 'failed_ise' is short for 'failed initial shrinkage estimation'
shrink_data$failed_ise <- is.na(shrink_data$shrink)
if (any(shrink_data$failed_ise)) {
na_rows <- which(shrink_data$failed_ise)
num_na <- length(na_rows)
which_na <- shrink_data$target_id[na_rows]
max_mean_obs <- summary(fit$model$mean_obs)["Max."]
min_mean_obs <- summary(fit$model$mean_obs)["Min."]
# check for mean_obs values that fall outside of the window that
# was used to fit the LOESS curve. If any values do fall outside,
# repeat the LOESS fit using "surface = direct" to get an exact surface fit,
# which can then be used for extrapolation of these values.
if (any(shrink_data$mean_obs > max_mean_obs |
shrink_data$mean_obs < min_mean_obs)) {
message(num_na, " NA values were found during variance shrinkage estimation",
" due to mean observation values outside of the range used for the LOESS fit.\n",
"The LOESS fit will be repeated using exact computation of the fitted ",
"surface to extrapolate the missing values.\n",
"These are the target ids with NA values: ", paste(which_na, collapse = ", "))
direct_fit <- eval(loess(s_formula, data[data[, filter_var], ], model = T,
control = loess.control(surface = "direct")))
na_data <- data[na_rows, ]
na_shrink_data <- data.frame(na_data, shrink = predict(direct_fit, na_data))
shrink_data[na_rows, "shrink"] <- na_shrink_data$shrink
}
# repeat the check for NA values; if there were no values outside of the mean_obs window used
# (first check above) or if there were new NA values from the direct fit, then
# the cause of the NA values is unknown. The user will be warned that this will cause
# all downstream testing to fail for these target IDs.
if (any(is.na(shrink_data$shrink))) {
stop(num_na, " NA values were found during variance shrinkage estimation",
" due to an unknown cause. These values will result in NAs ",
"with any downstream testing for these target ids.\n",
"Please submit a bug report at the Sleuth Github website.\n",
"These are the target ids with NA values: ", paste(which_na, collapse = ", "))
}
}
shrink_data
}

msg <- function(..., nl = TRUE) {
Expand Down
46 changes: 35 additions & 11 deletions R/model.R
Original file line number Diff line number Diff line change
Expand Up @@ -271,30 +271,37 @@ tests.sleuth <- function(obj, lrt = TRUE, wt = TRUE) {

}

#' Extract Wald test results from a sleuth object
#' Extract Wald or Likelihood Ratio test results from a sleuth object
#'
#' This function extracts Wald test results from a sleuth object.
#' This function extracts Wald or Likelihood Ratio test results from a sleuth object.
#'
#' @param obj a \code{sleuth} object
#' @param test a character string denoting the test to extract. Possible tests can be found by using \code{models(obj)}.
#' @param which_model a character string denoting the model. If extracting a wald test, use the model name. If extracting a likelihood ratio test, use 'lrt'.
#' @param test_type 'wt' for Wald test or 'lrt' for Likelihood Ratio test.
#' @param which_model a character string denoting the model. If extracting a wald test, use the model name.
#' Not used if extracting a likelihood ratio test.
#' @param rename_cols if \code{TRUE} will rename some columns to be shorter and
#' consistent with vignette
#' consistent with vignette
#' @param show_all if \code{TRUE} will show all transcripts (not only the ones
#' passing filters). The transcripts that do not pass filters will have
#' \code{NA} values in most columns.
#' @return a \code{data.frame} with the following columns:
#' @return target_id: transcript name, e.g. "ENSXX#####" (dependent on the transcriptome used in kallisto)
#' @return ...: if there is a target mapping data frame, all of the annotations columns are added from \code{obj$target_mapping} before the other columns.
#' @return pval: p-value of the chosen model
#' @return qval: false discovery rate adjusted p-value, using Benjamini-Hochberg (see \code{\link{p.adjust}})
#' @return b: 'beta' value (effect size). Technically a biased estimator of the fold change
#' @return se_b: standard error of the beta
#' @return test_stat (LRT only): Chi-squared test statistic (likelihood ratio test). Only seen with Likelihood Ratio test results.
#' @return rss (LRT only): the residual sum of squares under the "null model". Only seen with Likelihood Ratio test results.
#' @return degrees_free (LRT only): the degrees of freedom (equal to difference between the two models). Only seen with Likelihood Ratio test results.
#' @return b (Wald only): 'beta' value (effect size). Technically a biased estimator of the fold change. Only seen with Wald test results.
#' @return se_b (Wald only): standard error of the beta. Only seen with Wald test results.
#' @return mean_obs: mean of natural log counts of observations
#' @return var_obs: variance of observation
#' @return tech_var: technical variance of observation from the bootstraps
#' @return tech_var: technical variance of observation from the bootstraps (named 'sigma_q_sq' if rename_cols is \code{FALSE})
#' @return sigma_sq: raw estimator of the variance once the technical variance has been removed
#' @return smooth_sigma_sq: smooth regression fit for the shrinkage estimation
#' @return final_simga_sq: max(sigma_sq, smooth_sigma_sq); used for covariance estimation of beta
#' (named 'smooth_sigma_sq_pmax' if rename_cols is \code{FALSE})
#' @seealso \code{\link{sleuth_wt}} and \code{\link{sleuth_lrt}} to compute tests, \code{\link{models}} to
#' view which models, \code{\link{tests}} to view which tests were performed (and can be extracted)
#' @examples
Expand Down Expand Up @@ -327,6 +334,20 @@ sleuth_results <- function(obj, test, test_type = 'wt',
res <- NULL
if (test_type == 'lrt') {
res <- get_test(obj, test, type = 'lrt')
res <- dplyr::select(res,
target_id,
pval,
qval,
test_stat,
rss,
degrees_free,
mean_obs,
var_obs,
sigma_q_sq,
sigma_sq,
smooth_sigma_sq,
smooth_sigma_sq_pmax
)
} else {
res <- get_test(obj, test, 'wt', which_model)
res <- dplyr::select(res,
Expand Down Expand Up @@ -362,8 +383,8 @@ sleuth_results <- function(obj, test, test_type = 'wt',

if ( !is.null(obj$target_mapping) && !obj$gene_mode) {
res <- dplyr::left_join(
data.table::as.data.table(res),
data.table::as.data.table(obj$target_mapping),
data.table::as.data.table(res),
by = 'target_id')
}

Expand All @@ -377,9 +398,12 @@ sleuth_results <- function(obj, test, test_type = 'wt',
# this line uses dplyr's "left_join" syntax for "by"
# to match "target_id" from the "res" table,
# and the gene_column from the target_mapping table.
res <- dplyr::left_join(data.table::as.data.table(res),
data.table::as.data.table(target_mapping),
by = c("target_id" = obj$gene_column))
by_col <- "target_id"
names(by_col) <- obj$gene_column
res <- dplyr::left_join(data.table::as.data.table(target_mapping),
data.table::as.data.table(res),
by = by_col)
names(res)[1] <- "target_id"
}

res <- as_df(res)
Expand Down
Loading

0 comments on commit 1e84a3a

Please sign in to comment.