Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issues #80 and #81: reduce 'mclapply' memory footprint; add "num_cores" option #94

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions R/bootstrap.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ aggregate_bootstrap <- function(kal, mapping, split_by = "gene_id",

if ( any(!complete.cases(mapping)) ) {
warning("Found some NAs in mapping. Removing them.")
mapping <- mapping[complete.cases(mapping),]
mapping <- mapping[complete.cases(mapping), ]
}

m_bs <- melt_bootstrap(kal, column)
Expand Down Expand Up @@ -275,8 +275,8 @@ sample_bootstrap <- function(obj, n_samples = 100L) {
# matrix sample
for (s in 1:n_samples) {
for (idx in 1:nrow(which_samp)) {
b <- which_samp[idx,s]
sample_mat[[s]][,idx] <- obj$kal[[idx]]$bootstrap[[b]]$est_counts
b <- which_samp[idx, s]
sample_mat[[s]][, idx] <- obj$kal[[idx]]$bootstrap[[b]]$est_counts
}
}

Expand Down Expand Up @@ -318,7 +318,7 @@ dcast_bootstrap.kallisto <- function(obj, units, nsamples = NULL) {
mat <- matrix(NA_real_, nrow = n_features, ncol = length(which_bs))

for (j in seq_along(which_bs)) {
mat[ ,j] <- obj[[ "bootstrap" ]][[which_bs[j]]][[ units ]]
mat[, j] <- obj[[ "bootstrap" ]][[which_bs[j]]][[ units ]]
}
rownames(mat) <- obj[["bootstrap"]][[1]][["target_id"]]

Expand Down
4 changes: 2 additions & 2 deletions R/gene_analysis.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
propagate_transcript_filter <- function(filter_df, target_mapping,
grouping_column) {

filtered_target_mapping <- dplyr::inner_join(as.data.table(filter_df),
as.data.table(target_mapping), by = 'target_id')
filtered_target_mapping <- dplyr::inner_join(as.data.table(filter_df), # nolint
as.data.table(target_mapping), by = 'target_id') # nolint

filtered_target_mapping <- dplyr::select_(filtered_target_mapping,
grouping_column)
Expand Down
20 changes: 13 additions & 7 deletions R/measurement_error.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) {
msg('computing variance of betas')
beta_covars <- lapply(1:nrow(l_smooth),
function(i) {
row <- l_smooth[i,]
row <- l_smooth[i, ]
with(row,
covar_beta(smooth_sigma_sq_pmax + sigma_q_sq, X, A)
)
Expand Down Expand Up @@ -267,7 +267,7 @@ me_model_by_row <- function(obj, design, bs_summary) {

models <- lapply(1:nrow(bs_summary$obs_counts),
function(i) {
me_model(design, bs_summary$obs_counts[i,], bs_summary$sigma_q_sq[i])
me_model(design, bs_summary$obs_counts[i, ], bs_summary$sigma_q_sq[i])
})
names(models) <- rownames(bs_summary$obs_counts)

Expand Down Expand Up @@ -303,7 +303,7 @@ me_heteroscedastic_by_row <- function(obj, design, samp_bs_summary, obs_counts)

models <- lapply(1:nrow(bs_summary$obs_counts),
function(i) {
res <- me_white_model(design, obs_counts[i,], sigma_q_sq[i,], A)
res <- me_white_model(design, obs_counts[i, ], sigma_q_sq[i, ], A)
res$df$target_id <- rownames(obs_counts)[i]
res
})
Expand Down Expand Up @@ -429,7 +429,8 @@ reads_per_base_transform <- function(reads_table, scale_factor_input,
as_df(reads_table)
}

gene_summary <- function(obj, which_column, transform = identity, norm_by_length = TRUE) {
gene_summary <- function(obj, which_column, transform = identity,
norm_by_length = TRUE, num_cores=2) {
# stopifnot(is(obj, 'sleuth'))
msg(paste0('aggregating by column: ', which_column))
obj_mod <- obj
Expand All @@ -454,16 +455,21 @@ gene_summary <- function(obj, which_column, transform = identity, norm_by_length
obs_counts <- obs_to_matrix(obj_mod, "scaled_reads_per_base")
obs_counts <- transform(obs_counts)

obj_mod$kal <- parallel::mclapply(seq_along(obj_mod$kal),
# NEW CODE: Switched mclapply to be on second lapply
# This means that it's applied on the order of one bootstrap
# This creates a much smaller memory footprint
msg("starting mclapply process now")
obj_mod$kal <- lapply(seq_along(obj_mod$kal),
function(i) {
k <- obj_mod$kal[[i]]
current_sample <- obj_mod$sample_to_covariates$sample[i]
msg(paste('aggregating across sample: ', current_sample))
k$bootstrap <- lapply(k$bootstrap, function(b) {
k$bootstrap <- parallel::mclapply(seq_along(k$bootstrap), function(j) {
b <- k$bootstrap[[j]]
b <- dplyr::mutate(b, sample = current_sample)
reads_per_base_transform(b, scale_factor, which_column,
obj$target_mapping, norm_by_length)
})
}, mc.cores = num_cores)

k
})
Expand Down
20 changes: 10 additions & 10 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ get_quantile <- function(data, which_col, lwr, upr, ignore_zeroes = TRUE) {
data$iqr <- valid

if (ignore_zeroes) {
valid <- data[,which_col] > 0
valid <- data[, which_col] > 0
if (sum(valid) == 0) {
# in this case, everything in this bin is zero, so we just return the
# entire bin
Expand All @@ -36,7 +36,7 @@ get_quantile <- function(data, which_col, lwr, upr, ignore_zeroes = TRUE) {
data$iqr[!valid] <- FALSE
}

data$iqr[valid] <- data[valid,which_col] %>%
data$iqr[valid] <- data[valid, which_col] %>%
ecdf(.)(.) %>%
ifelse(lwr <= . & . <= upr)

Expand All @@ -49,7 +49,7 @@ sliding_window_grouping <- function(data, x_col, y_col,

data <- as.data.frame(data)

data <- mutate(data,x_ecdf = ecdf(data[,x_col])(data[,x_col]))
data <- mutate(data, x_ecdf = ecdf(data[, x_col])(data[, x_col]))
data <- mutate(data, x_group = cut(x_ecdf, n_bins))
data <- group_by(data, x_group)

Expand All @@ -63,7 +63,7 @@ sliding_window_grouping <- function(data, x_col, y_col,
shrink_df <- function(data, shrink_formula, filter_var) {
data <- as.data.frame(data)
s_formula <- substitute(shrink_formula)
fit <- eval(loess(s_formula, data[data[,filter_var],]))
fit <- eval(loess(s_formula, data[data[, filter_var], ]))
data.frame(data, shrink = predict(fit, data))
}

Expand Down Expand Up @@ -119,17 +119,17 @@ apply_all_pairs <- function(mat, fun) {

all_pairs <- utils::combn(ids, 2)
for (i in 1:ncol(all_pairs)) {
j <- all_pairs[1,i]
k <- all_pairs[2,i]
j <- all_pairs[1, i]
k <- all_pairs[2, i]

cur <- fun(mat[,j], mat[,k])
cur <- fun(mat[, j], mat[, k])

res[j,k] <- cur
res[k,j] <- cur
res[j, k] <- cur
res[k, j] <- cur
}

for (i in 1:length(ids)) {
res[i,i] <- fun(mat[,i], mat[,i])
res[i, i] <- fun(mat[, i], mat[, i])
}


Expand Down
2 changes: 1 addition & 1 deletion R/model.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ models.sleuth <- function(obj, verbose = TRUE) {
# TODO: output a new in between models for readability
if (verbose) {
for (x in names(obj$fits)) {
cat('[ ', x,' ]\n')
cat('[ ', x, ' ]\n')
models(obj$fits[[x]])
}
}
Expand Down
28 changes: 14 additions & 14 deletions R/plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ plot_loadings <- function(obj,
#given a sample
if (!is.null(sample)) {
toggle <- TRUE
loadings <- pca_calc$x[sample,]
loadings <- pca_calc$x[sample, ]
if (pca_loading_abs) {
loadings <- abs(loadings)
loadings <- sort(loadings, decreasing = TRUE)
Expand All @@ -185,7 +185,7 @@ plot_loadings <- function(obj,

#given a PC, which samples contribute the most?
if (!toggle) {
loadings <- pca_calc$x[,pc_input]
loadings <- pca_calc$x[, pc_input]
if (pca_loading_abs) {
loadings <- abs(loadings)
loadings <- sort(loadings, decreasing = TRUE)
Expand Down Expand Up @@ -272,14 +272,14 @@ plot_pc_variance <- function(obj,
}
pc_df <- data.frame(PC_count = 1:colsize, var = var_explained) #order here matters

if(!is.null(PC_relative)) {
if (!is.null(PC_relative)) {
pc_df <- data.frame(PC_count = 1:length(eigenvalues), var = var_explained2)
pc_df <- pc_df[PC_relative:nrow(pc_df),]
pc_df <- pc_df[PC_relative:nrow(pc_df), ]

if (!is.null(pca_number) && (PC_relative + pca_number <= length(eigenvalues))) {
pc_df <- pc_df[1:pca_number,]
pc_df <- pc_df[1:pca_number, ]
} else if (PC_relative + 5 >= length(eigenvalues)) {
pc_df <- pc_df[1:nrow(pc_df),]
pc_df <- pc_df[1:nrow(pc_df), ]
}
}

Expand Down Expand Up @@ -874,15 +874,15 @@ plot_transcript_heatmap <- function(obj,
units = 'tpm',
trans = 'log',
offset = 1) {
if(!all(transcripts %in% obj$obs_norm$target_id)) {
if (!all(transcripts %in% obj$obs_norm$target_id)) {
stop("Couldn't find the following transcripts: ",
paste(transcripts[!(transcripts %in% obj$obs_norm$target_id)], collapse = ", "),
"\n\tIt is highly likely that some of them were filtered out.")
}

tabd_df <- obj$obs_norm[obj$obs_norm$target_id %in% transcripts,]
tabd_df <- obj$obs_norm[obj$obs_norm$target_id %in% transcripts, ]

if(units == 'tpm') {
if (units == 'tpm') {
tabd_df <- dplyr::select(tabd_df, target_id, sample, tpm)
tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'tpm')
} else if (units == 'est_counts') {
Expand Down Expand Up @@ -948,16 +948,16 @@ ggPlotExpression <- function(exMat, clustRows = TRUE, clustCols = TRUE,
legend.direction = 'vertical',
legend.position = 'top',
legend.background = element_rect(fill = "gray95", colour = "black", size = 0.5, linetype = 1),
axis.title=element_blank())
axis.title = element_blank())
if (rowNames)
p <- p + theme(axis.text.x=element_text(angle = 90, size=14))
p <- p + theme(axis.text.x = element_text(angle = 90, size = 14))
else
p <- p + theme(axis.text.x=element_text(size=0))
p <- p + theme(axis.text.x = element_text(size = 0))

if (colNames)
p <- p + theme(axis.text.y=element_text(size=14))
p <- p + theme(axis.text.y = element_text(size = 14))
else
p <- p + theme(axis.text.y=element_text(size=0))
p <- p + theme(axis.text.y = element_text(size = 0))

p
#list(plot = p, rowOrder = rowOrder, colOrder = colOrder)
Expand Down
2 changes: 1 addition & 1 deletion R/read_write.R
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ gtf_gene_names <- function(gtf_attr) {

for (i in 1:length(all_attr)) {
j <- 1
while ((nchar(gene_id[i]) < 1 || nchar(trans_id[i]) < 1) &&
while ( (nchar(gene_id[i]) < 1 || nchar(trans_id[i]) < 1) &&
j <= length(all_attr[[i]]) ) {
if (all_attr[[i]][j] == "gene_id") {
gene_id[i] <- all_attr[[i]][j + 1] %>%
Expand Down
32 changes: 16 additions & 16 deletions R/shiny.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
textInput('hm_transcripts', label = 'enter target ids: ', value = '')
),
column(3,
selectInput('hm_units', label = 'units:', choices = c('est_counts','tpm'), selected = 'tpm')
selectInput('hm_units', label = 'units:', choices = c('est_counts', 'tpm'), selected = 'tpm')
),
column(3,
textInput('hm_trans', label = 'tranform: ', value = 'log')
Expand All @@ -161,7 +161,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
actionButton('hm_go', 'view')
)
),
tags$style(type='text/css', "#hm_go {margin-top: 25px}"),
tags$style(type = 'text/css', "#hm_go {margin-top: 25px}"),
fluidRow(plotOutput('hm_plot')),
fluidRow(uiOutput("download_hm_plt_button"))
),
Expand Down Expand Up @@ -1235,15 +1235,15 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
result
})
output$table_type <- renderUI({
if(!is.null(obj$target_mapping)) {
if (!is.null(obj$target_mapping)) {
selectInput('pop_genes', label = 'table type: ',
choices = list('target table' = 1, 'gene table' = 2),
selected = 1)
}
})

output$group_by <- renderUI({
if(!is.null(input$pop_genes) && input$pop_genes == 2) {
if (!is.null(input$pop_genes) && input$pop_genes == 2) {
selectInput('mappingGroup',
label = 'group by: ',
choices = names(obj$target_mapping)[2:length(names(obj$target_mapping))])
Expand All @@ -1258,7 +1258,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
wb <- possible_tests[1]
}

if(!is.null(input$mappingGroup) && (input$pop_genes == 2)) {
if (!is.null(input$mappingGroup) && (input$pop_genes == 2)) {
mg <- input$mappingGroup
test_table <- sleuth_gene_table(obj, wb, input$settings_test_type,
input$which_model_de, mg)
Expand Down Expand Up @@ -1291,7 +1291,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
possible_tests <- list_tests(obj, input$settings_test_type)
which_test <- possible_tests[1]
}
if(!is.null(input$mapping_group_lrt) && (input$pop_genes_lrt == 2)) {
if (!is.null(input$mapping_group_lrt) && (input$pop_genes_lrt == 2)) {
mg <- input$mapping_group_lrt
sleuth_gene_table(obj, which_test, input$settings_test_type,
which_group = mg)
Expand All @@ -1301,14 +1301,14 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
})

output$table_type_lrt <- renderUI({
if(!is.null(obj$target_mapping)) {
if (!is.null(obj$target_mapping)) {
selectInput('pop_genes_lrt', label = 'table type: ',
choices = list('transcript table' = 1, 'gene table' = 2),
selected = 1)
}
})
output$group_by_lrt <- renderUI({
if(!is.null(input$pop_genes_lrt) && input$pop_genes_lrt == 2) {
if (!is.null(input$pop_genes_lrt) && input$pop_genes_lrt == 2) {
selectInput('mapping_group_lrt',
label = 'group by: ',
choices = names(obj$target_mapping)[2:length(names(obj$target_mapping))])
Expand Down Expand Up @@ -1345,13 +1345,13 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
### Gene Viewer
# the name of the gene supplied
gv_var_text <- eventReactive(input$gv_go, {
if(!is.null(obj$target_mapping)) {
if (!is.null(obj$target_mapping)) {
input$gv_var_input
}
})

output$gv_gene_column <- renderUI({
if(!is.null(obj$target_mapping)) {
if (!is.null(obj$target_mapping)) {
selectInput('gv_gene_colname',
label = 'genes from: ',
choices = names(obj$target_mapping)[2:length(names(obj$target_mapping))])
Expand Down Expand Up @@ -1388,7 +1388,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
output$gv_var_plts <- renderUI({
gv_plot_list <- lapply(1:input$gv_maxplots,
function(i) {
gv_plotname <- paste("plot", i, sep="")
gv_plotname <- paste("plot", i, sep = "")
plotOutput(gv_plotname)
})

Expand All @@ -1398,9 +1398,9 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
for (i in 1:15) {
local({
my_i <- i
gv_plotname <- paste("plot", my_i, sep="")
gv_plotname <- paste("plot", my_i, sep = "")
output[[gv_plotname]] <- renderPlot({
if(!is.null(obj$target_mapping) && !is.na(gv_var_list()[my_i])) {
if (!is.null(obj$target_mapping) && !is.na(gv_var_list()[my_i])) {
plot_bootstrap(obj,
gv_var_list()[my_i],
units = input$gv_var_units,
Expand All @@ -1411,7 +1411,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
}

output$no_genes_message <- renderUI({
if(is.null(obj$target_mapping)) {
if (is.null(obj$target_mapping)) {
HTML('&nbsp&nbsp&nbsp&nbspYou need to add genes to your sleuth object to use the gene viewer.<br>',
'&nbsp&nbsp&nbsp&nbspTo add genes to your sleuth object, see the ',
'<a href = "http://pachterlab.github.io/sleuth/starting.html">sleuth getting started guide</a>.')
Expand All @@ -1426,7 +1426,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
default_hm_plot_height <- 400

hm_plot_height <- function() {
if(length(hm_transcripts()) > 5) {
if (length(hm_transcripts()) > 5) {
length(hm_transcripts()) * 60
} else {
default_hm_plot_height
Expand Down Expand Up @@ -1566,7 +1566,7 @@ enclosed_brush <- function(df, brush) {
xbool <- brush$xmin <= df[[xvar]] & df[[xvar]] <= brush$xmax
ybool <- brush$ymin <= df[[yvar]] & df[[yvar]] <= brush$ymax

df[xbool & ybool,]
df[xbool & ybool, ]
}

#' settings for sleuth_live
Expand Down
Loading