pachterlab · warrenmcg · Mar 13, 2017 · Mar 13, 2017 · Mar 13, 2017 · Mar 13, 2017
diff --git a/R/bootstrap.R b/R/bootstrap.R
@@ -145,7 +145,7 @@ aggregate_bootstrap <- function(kal, mapping, split_by = "gene_id",
 
   if ( any(!complete.cases(mapping)) ) {
     warning("Found some NAs in mapping. Removing them.")
-    mapping <- mapping[complete.cases(mapping),]
+    mapping <- mapping[complete.cases(mapping), ]
   }
 
   m_bs <- melt_bootstrap(kal, column)
@@ -275,8 +275,8 @@ sample_bootstrap <- function(obj, n_samples = 100L) {
   # matrix sample
   for (s in 1:n_samples) {
     for (idx in 1:nrow(which_samp)) {
-      b <- which_samp[idx,s]
-      sample_mat[[s]][,idx] <- obj$kal[[idx]]$bootstrap[[b]]$est_counts
+      b <- which_samp[idx, s]
+      sample_mat[[s]][, idx] <- obj$kal[[idx]]$bootstrap[[b]]$est_counts
     }
   }
 
@@ -318,7 +318,7 @@ dcast_bootstrap.kallisto <- function(obj, units, nsamples = NULL) {
   mat <- matrix(NA_real_, nrow = n_features, ncol = length(which_bs))
 
   for (j in seq_along(which_bs)) {
-    mat[ ,j] <- obj[[ "bootstrap" ]][[which_bs[j]]][[ units ]]
+    mat[, j] <- obj[[ "bootstrap" ]][[which_bs[j]]][[ units ]]
   }
   rownames(mat) <- obj[["bootstrap"]][[1]][["target_id"]]
 

diff --git a/R/gene_analysis.R b/R/gene_analysis.R
@@ -1,8 +1,8 @@
 propagate_transcript_filter <- function(filter_df, target_mapping,
   grouping_column) {
 
-  filtered_target_mapping <- dplyr::inner_join(as.data.table(filter_df),
-    as.data.table(target_mapping), by = 'target_id')
+  filtered_target_mapping <- dplyr::inner_join(as.data.table(filter_df), # nolint
+    as.data.table(target_mapping), by = 'target_id') # nolint
 
   filtered_target_mapping <- dplyr::select_(filtered_target_mapping,
     grouping_column)

diff --git a/R/measurement_error.R b/R/measurement_error.R
@@ -113,7 +113,7 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) {
   msg('computing variance of betas')
   beta_covars <- lapply(1:nrow(l_smooth),
     function(i) {
-      row <- l_smooth[i,]
+      row <- l_smooth[i, ]
       with(row,
           covar_beta(smooth_sigma_sq_pmax + sigma_q_sq, X, A)
         )
@@ -267,7 +267,7 @@ me_model_by_row <- function(obj, design, bs_summary) {
 
   models <- lapply(1:nrow(bs_summary$obs_counts),
     function(i) {
-      me_model(design, bs_summary$obs_counts[i,], bs_summary$sigma_q_sq[i])
+      me_model(design, bs_summary$obs_counts[i, ], bs_summary$sigma_q_sq[i])
     })
   names(models) <- rownames(bs_summary$obs_counts)
 
@@ -303,7 +303,7 @@ me_heteroscedastic_by_row <- function(obj, design, samp_bs_summary, obs_counts)
 
   models <- lapply(1:nrow(bs_summary$obs_counts),
     function(i) {
-      res <- me_white_model(design, obs_counts[i,], sigma_q_sq[i,], A)
+      res <- me_white_model(design, obs_counts[i, ], sigma_q_sq[i, ], A)
       res$df$target_id <- rownames(obs_counts)[i]
       res
     })
@@ -429,7 +429,8 @@ reads_per_base_transform <- function(reads_table, scale_factor_input,
   as_df(reads_table)
 }
 
-gene_summary <- function(obj, which_column, transform = identity, norm_by_length = TRUE) {
+gene_summary <- function(obj, which_column, transform = identity,
+                         norm_by_length = TRUE, num_cores=2) {
   # stopifnot(is(obj, 'sleuth'))
   msg(paste0('aggregating by column: ', which_column))
   obj_mod <- obj
@@ -454,16 +455,21 @@ gene_summary <- function(obj, which_column, transform = identity, norm_by_length
   obs_counts <- obs_to_matrix(obj_mod, "scaled_reads_per_base")
   obs_counts <- transform(obs_counts)
 
-  obj_mod$kal <- parallel::mclapply(seq_along(obj_mod$kal),
+  # NEW CODE: Switched mclapply to be on second lapply
+  # This means that it's applied on the order of one bootstrap
+  # This creates a much smaller memory footprint
+  msg("starting mclapply process now")
+  obj_mod$kal <- lapply(seq_along(obj_mod$kal),
     function(i) {
       k <- obj_mod$kal[[i]]
       current_sample <- obj_mod$sample_to_covariates$sample[i]
       msg(paste('aggregating across sample: ', current_sample))
-      k$bootstrap <- lapply(k$bootstrap, function(b) {
+      k$bootstrap <- parallel::mclapply(seq_along(k$bootstrap), function(j) {
+        b <- k$bootstrap[[j]]
         b <- dplyr::mutate(b, sample = current_sample)
         reads_per_base_transform(b, scale_factor, which_column,
           obj$target_mapping, norm_by_length)
-      })
+      }, mc.cores = num_cores)
 
       k
     })

diff --git a/R/misc.R b/R/misc.R
@@ -25,7 +25,7 @@ get_quantile <- function(data, which_col, lwr, upr, ignore_zeroes = TRUE) {
   data$iqr <- valid
 
   if (ignore_zeroes) {
-    valid <- data[,which_col] > 0
+    valid <- data[, which_col] > 0
     if (sum(valid) == 0) {
       # in this case, everything in this bin is zero, so we just return the
       # entire bin
@@ -36,7 +36,7 @@ get_quantile <- function(data, which_col, lwr, upr, ignore_zeroes = TRUE) {
     data$iqr[!valid] <- FALSE
   }
 
-  data$iqr[valid] <- data[valid,which_col] %>%
+  data$iqr[valid] <- data[valid, which_col] %>%
     ecdf(.)(.) %>%
     ifelse(lwr <= . & . <= upr)
 
@@ -49,7 +49,7 @@ sliding_window_grouping <- function(data, x_col, y_col,
 
   data <- as.data.frame(data)
 
-  data <- mutate(data,x_ecdf = ecdf(data[,x_col])(data[,x_col]))
+  data <- mutate(data, x_ecdf = ecdf(data[, x_col])(data[, x_col]))
   data <- mutate(data, x_group = cut(x_ecdf, n_bins))
   data <- group_by(data, x_group)
 
@@ -63,7 +63,7 @@ sliding_window_grouping <- function(data, x_col, y_col,
 shrink_df <- function(data, shrink_formula, filter_var) {
   data <- as.data.frame(data)
   s_formula <- substitute(shrink_formula)
-  fit <- eval(loess(s_formula, data[data[,filter_var],]))
+  fit <- eval(loess(s_formula, data[data[, filter_var], ]))
   data.frame(data, shrink = predict(fit, data))
 }
 
@@ -119,17 +119,17 @@ apply_all_pairs <- function(mat, fun) {
 
   all_pairs <- utils::combn(ids, 2)
   for (i in 1:ncol(all_pairs)) {
-    j <- all_pairs[1,i]
-    k <- all_pairs[2,i]
+    j <- all_pairs[1, i]
+    k <- all_pairs[2, i]
 
-    cur <- fun(mat[,j], mat[,k])
+    cur <- fun(mat[, j], mat[, k])
 
-    res[j,k] <- cur
-    res[k,j] <- cur
+    res[j, k] <- cur
+    res[k, j] <- cur
   }
 
   for (i in 1:length(ids)) {
-    res[i,i] <- fun(mat[,i], mat[,i])
+    res[i, i] <- fun(mat[, i], mat[, i])
   }
 
 

diff --git a/R/model.R b/R/model.R
@@ -54,7 +54,7 @@ models.sleuth <- function(obj, verbose = TRUE) {
   # TODO: output a new in between models for readability
   if (verbose) {
     for (x in names(obj$fits)) {
-      cat('[ ', x,' ]\n')
+      cat('[ ', x, ' ]\n')
       models(obj$fits[[x]])
     }
   }

diff --git a/R/plots.R b/R/plots.R
@@ -174,7 +174,7 @@ plot_loadings <- function(obj,
   #given a sample
   if (!is.null(sample)) {
     toggle <- TRUE
-    loadings <- pca_calc$x[sample,]
+    loadings <- pca_calc$x[sample, ]
     if (pca_loading_abs) {
       loadings <- abs(loadings)
       loadings <- sort(loadings, decreasing = TRUE)
@@ -185,7 +185,7 @@ plot_loadings <- function(obj,
 
   #given a PC, which samples contribute the most?
   if (!toggle) {
-    loadings <- pca_calc$x[,pc_input]
+    loadings <- pca_calc$x[, pc_input]
     if (pca_loading_abs) {
       loadings <- abs(loadings)
       loadings <- sort(loadings, decreasing = TRUE)
@@ -272,14 +272,14 @@ plot_pc_variance <- function(obj,
   }
   pc_df <- data.frame(PC_count = 1:colsize, var = var_explained) #order here matters
 
-  if(!is.null(PC_relative)) {
+  if (!is.null(PC_relative)) {
     pc_df <- data.frame(PC_count = 1:length(eigenvalues), var = var_explained2)
-    pc_df <- pc_df[PC_relative:nrow(pc_df),]
+    pc_df <- pc_df[PC_relative:nrow(pc_df), ]
 
     if (!is.null(pca_number) && (PC_relative + pca_number <= length(eigenvalues))) {
-      pc_df <- pc_df[1:pca_number,]
+      pc_df <- pc_df[1:pca_number, ]
     } else if (PC_relative + 5 >= length(eigenvalues)) {
-      pc_df <- pc_df[1:nrow(pc_df),]
+      pc_df <- pc_df[1:nrow(pc_df), ]
     }
   }
 
@@ -874,15 +874,15 @@ plot_transcript_heatmap <- function(obj,
   units = 'tpm',
   trans = 'log',
   offset = 1) {
-  if(!all(transcripts %in% obj$obs_norm$target_id)) {
+  if (!all(transcripts %in% obj$obs_norm$target_id)) {
     stop("Couldn't find the following transcripts: ",
       paste(transcripts[!(transcripts %in% obj$obs_norm$target_id)], collapse = ", "),
       "\n\tIt is highly likely that some of them were filtered out.")
   }
 
-  tabd_df <- obj$obs_norm[obj$obs_norm$target_id %in% transcripts,]
+  tabd_df <- obj$obs_norm[obj$obs_norm$target_id %in% transcripts, ]
 
-  if(units == 'tpm') {
+  if (units == 'tpm') {
     tabd_df <- dplyr::select(tabd_df, target_id, sample, tpm)
     tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'tpm')
   } else if (units == 'est_counts') {
@@ -948,16 +948,16 @@ ggPlotExpression <- function(exMat, clustRows = TRUE, clustCols = TRUE,
      legend.direction = 'vertical',
      legend.position = 'top',
      legend.background = element_rect(fill = "gray95", colour = "black", size = 0.5, linetype = 1),
-     axis.title=element_blank())
+     axis.title = element_blank())
     if (rowNames)
-        p <- p + theme(axis.text.x=element_text(angle = 90, size=14))
+        p <- p + theme(axis.text.x = element_text(angle = 90, size = 14))
     else
-        p <- p + theme(axis.text.x=element_text(size=0))
+        p <- p + theme(axis.text.x = element_text(size = 0))
 
     if (colNames)
-        p <- p + theme(axis.text.y=element_text(size=14))
+        p <- p + theme(axis.text.y = element_text(size = 14))
     else
-        p <- p + theme(axis.text.y=element_text(size=0))
+        p <- p + theme(axis.text.y = element_text(size = 0))
 
     p
     #list(plot = p, rowOrder = rowOrder, colOrder = colOrder)

diff --git a/R/read_write.R b/R/read_write.R
@@ -273,7 +273,7 @@ gtf_gene_names <- function(gtf_attr) {
 
   for (i in 1:length(all_attr)) {
     j <- 1
-    while ((nchar(gene_id[i]) < 1 || nchar(trans_id[i]) < 1) &&
+    while ( (nchar(gene_id[i]) < 1 || nchar(trans_id[i]) < 1) &&
       j <= length(all_attr[[i]]) ) {
       if (all_attr[[i]][j] == "gene_id") {
         gene_id[i] <- all_attr[[i]][j + 1] %>%

diff --git a/R/shiny.R b/R/shiny.R
@@ -150,7 +150,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
             textInput('hm_transcripts', label = 'enter target ids: ', value = '')
               ),
           column(3,
-            selectInput('hm_units', label = 'units:', choices = c('est_counts','tpm'), selected = 'tpm')
+            selectInput('hm_units', label = 'units:', choices = c('est_counts', 'tpm'), selected = 'tpm')
               ),
           column(3,
             textInput('hm_trans', label = 'tranform: ', value = 'log')
@@ -161,7 +161,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
             actionButton('hm_go', 'view')
           )
         ),
-        tags$style(type='text/css', "#hm_go {margin-top: 25px}"),
+        tags$style(type = 'text/css', "#hm_go {margin-top: 25px}"),
         fluidRow(plotOutput('hm_plot')),
         fluidRow(uiOutput("download_hm_plt_button"))
     ),
@@ -1235,15 +1235,15 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
       result
     })
     output$table_type <- renderUI({
-      if(!is.null(obj$target_mapping)) {
+      if (!is.null(obj$target_mapping)) {
         selectInput('pop_genes', label = 'table type: ',
           choices = list('target table' = 1, 'gene table' = 2),
           selected = 1)
       }
     })
 
     output$group_by <- renderUI({
-      if(!is.null(input$pop_genes) && input$pop_genes == 2) {
+      if (!is.null(input$pop_genes) && input$pop_genes == 2) {
         selectInput('mappingGroup',
           label = 'group by: ',
           choices = names(obj$target_mapping)[2:length(names(obj$target_mapping))])
@@ -1258,7 +1258,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
         wb <- possible_tests[1]
       }
 
-      if(!is.null(input$mappingGroup) && (input$pop_genes == 2)) {
+      if (!is.null(input$mappingGroup) && (input$pop_genes == 2)) {
           mg <- input$mappingGroup
           test_table <- sleuth_gene_table(obj, wb, input$settings_test_type,
             input$which_model_de, mg)
@@ -1291,7 +1291,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
         possible_tests <- list_tests(obj, input$settings_test_type)
         which_test <- possible_tests[1]
       }
-      if(!is.null(input$mapping_group_lrt) && (input$pop_genes_lrt == 2)) {
+      if (!is.null(input$mapping_group_lrt) && (input$pop_genes_lrt == 2)) {
           mg <- input$mapping_group_lrt
           sleuth_gene_table(obj, which_test, input$settings_test_type,
             which_group = mg)
@@ -1301,14 +1301,14 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
     })
 
     output$table_type_lrt <- renderUI({
-      if(!is.null(obj$target_mapping)) {
+      if (!is.null(obj$target_mapping)) {
         selectInput('pop_genes_lrt', label = 'table type: ',
           choices = list('transcript table' = 1, 'gene table' = 2),
           selected = 1)
       }
     })
     output$group_by_lrt <- renderUI({
-      if(!is.null(input$pop_genes_lrt) && input$pop_genes_lrt == 2) {
+      if (!is.null(input$pop_genes_lrt) && input$pop_genes_lrt == 2) {
         selectInput('mapping_group_lrt',
           label = 'group by: ',
           choices = names(obj$target_mapping)[2:length(names(obj$target_mapping))])
@@ -1345,13 +1345,13 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
     ### Gene Viewer
     # the name of the gene supplied
     gv_var_text <- eventReactive(input$gv_go, {
-      if(!is.null(obj$target_mapping)) {
+      if (!is.null(obj$target_mapping)) {
           input$gv_var_input
       }
     })
 
     output$gv_gene_column <- renderUI({
-      if(!is.null(obj$target_mapping)) {
+      if (!is.null(obj$target_mapping)) {
         selectInput('gv_gene_colname',
           label = 'genes from: ',
           choices = names(obj$target_mapping)[2:length(names(obj$target_mapping))])
@@ -1388,7 +1388,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
     output$gv_var_plts <- renderUI({
       gv_plot_list <- lapply(1:input$gv_maxplots,
         function(i) {
-          gv_plotname <- paste("plot", i, sep="")
+          gv_plotname <- paste("plot", i, sep = "")
           plotOutput(gv_plotname)
         })
 
@@ -1398,9 +1398,9 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
       for (i in 1:15) {
         local({
           my_i <- i
-          gv_plotname <- paste("plot", my_i, sep="")
+          gv_plotname <- paste("plot", my_i, sep = "")
             output[[gv_plotname]] <- renderPlot({
-               if(!is.null(obj$target_mapping) && !is.na(gv_var_list()[my_i])) {
+               if (!is.null(obj$target_mapping) && !is.na(gv_var_list()[my_i])) {
                  plot_bootstrap(obj,
                    gv_var_list()[my_i],
                    units = input$gv_var_units,
@@ -1411,7 +1411,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
         }
 
     output$no_genes_message <- renderUI({
-      if(is.null(obj$target_mapping)) {
+      if (is.null(obj$target_mapping)) {
         HTML('&nbsp&nbsp&nbsp&nbspYou need to add genes to your sleuth object to use the gene viewer.<br>',
         '&nbsp&nbsp&nbsp&nbspTo add genes to your sleuth object, see the ',
         '<a href = "http://pachterlab.github.io/sleuth/starting.html">sleuth getting started guide</a>.')
@@ -1426,7 +1426,7 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(),
     default_hm_plot_height <- 400
 
     hm_plot_height <- function() {
-        if(length(hm_transcripts()) > 5) {
+        if (length(hm_transcripts()) > 5) {
             length(hm_transcripts()) * 60
         } else {
             default_hm_plot_height
@@ -1566,7 +1566,7 @@ enclosed_brush <- function(df, brush) {
   xbool <- brush$xmin <= df[[xvar]] & df[[xvar]] <= brush$xmax
   ybool <- brush$ymin <= df[[yvar]] & df[[yvar]] <= brush$ymax
 
-  df[xbool & ybool,]
+  df[xbool & ybool, ]
 }
 
 #' settings for sleuth_live