From 522b03b388fa98562e04e905351598c653275454 Mon Sep 17 00:00:00 2001 From: drosofff Date: Thu, 7 Nov 2024 22:36:52 +0100 Subject: [PATCH 1/3] update gsc-scran_normalize tool --- tools/gsc_scran_normalize/.shed.yml | 1 + tools/gsc_scran_normalize/scran_normalize.xml | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/gsc_scran_normalize/.shed.yml b/tools/gsc_scran_normalize/.shed.yml index 73e992753..6cd0b1e8c 100644 --- a/tools/gsc_scran_normalize/.shed.yml +++ b/tools/gsc_scran_normalize/.shed.yml @@ -6,6 +6,7 @@ long_description: | Normalizes raw counts expression matrix using deconvolution size factors categories: - Transcriptomics + - Single Cell homepage_url: http://artbio.fr remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_scran_normalize toolshed: diff --git a/tools/gsc_scran_normalize/scran_normalize.xml b/tools/gsc_scran_normalize/scran_normalize.xml index bf8f5b6d6..c00f70b7e 100644 --- a/tools/gsc_scran_normalize/scran_normalize.xml +++ b/tools/gsc_scran_normalize/scran_normalize.xml @@ -1,5 +1,8 @@ - + Normalize raw counts expression values using deconvolution size factors + + galaxy_single_cell_suite + bioconductor-scran r-dynamictreecut From 743cf253c9576bf543c2e17d11d0980abb85d6a0 Mon Sep 17 00:00:00 2001 From: Christophe Antoniewski Date: Thu, 7 Nov 2024 22:39:03 +0100 Subject: [PATCH 2/3] reindent R code --- tools/gsc_scran_normalize/scran-normalize.R | 109 ++++++++++---------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/tools/gsc_scran_normalize/scran-normalize.R b/tools/gsc_scran_normalize/scran-normalize.R index e81e97c1d..c6c41bc0c 100644 --- a/tools/gsc_scran_normalize/scran-normalize.R +++ b/tools/gsc_scran_normalize/scran-normalize.R @@ -1,8 +1,9 @@ -options(show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } +options( + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") warnings() @@ -13,63 +14,63 @@ library(dynamicTreeCut) # Arguments option_list <- list( - make_option( - c("-d", "--data"), - default = NA, - type = "character", - help = "Input file that contains count values to transform" - ), - make_option( - "--cluster", - default = FALSE, - action = "store_true", - type = "logical", - help = "Whether to calculate the size factor per cluster or on all cell" - ), - make_option( - c("-m", "--method"), - default = "hclust", - type = "character", - help = "The clustering method to use for grouping cells into cluster : hclust or igraph [default : '%default' ]" - ), - make_option( - "--size", - default = 100, - type = "integer", - help = "Minimal number of cells in each cluster : hclust or igraph [default : '%default' ]" - ), - make_option( - c("-o", "--out"), - default = "res.tab", - type = "character", - help = "Output name [default : '%default' ]" - ) + make_option( + c("-d", "--data"), + default = NA, + type = "character", + help = "Input file that contains count values to transform" + ), + make_option( + "--cluster", + default = FALSE, + action = "store_true", + type = "logical", + help = "Whether to calculate the size factor per cluster or on all cell" + ), + make_option( + c("-m", "--method"), + default = "hclust", + type = "character", + help = "The clustering method to use for grouping cells into cluster : hclust or igraph [default : '%default' ]" + ), + make_option( + "--size", + default = 100, + type = "integer", + help = "Minimal number of cells in each cluster : hclust or igraph [default : '%default' ]" + ), + make_option( + c("-o", "--out"), + default = "res.tab", + type = "character", + help = "Output name [default : '%default' ]" + ) ) opt <- parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) + args = commandArgs(trailingOnly = TRUE) +) data <- read.table( - opt$data, - check.names = FALSE, - header = TRUE, - row.names = 1, - sep = "\t" + opt$data, + check.names = FALSE, + header = TRUE, + row.names = 1, + sep = "\t" ) ## Import data as a SingleCellExperiment object sce <- SingleCellExperiment(list(counts = as.matrix(data))) if (opt$cluster) { - clusters <- quickCluster(sce, min.size = opt$size, method = opt$method) + clusters <- quickCluster(sce, min.size = opt$size, method = opt$method) - ## Compute sum factors - sce <- computeSumFactors(sce, cluster = clusters) + ## Compute sum factors + sce <- computeSumFactors(sce, cluster = clusters) } else { - - ## Compute sum factors - sce <- computeSumFactors(sce) + ## Compute sum factors + sce <- computeSumFactors(sce) } sce <- logNormCounts(sce) @@ -78,10 +79,10 @@ logcounts <- data.frame(genes = rownames(sce), round(logcounts(sce), digits = 5) write.table( - logcounts, - opt$out, - col.names = TRUE, - row.names = FALSE, - quote = FALSE, - sep = "\t" + logcounts, + opt$out, + col.names = TRUE, + row.names = FALSE, + quote = FALSE, + sep = "\t" ) From 0d1ea42abc7883634e146f0067b696c8a0df6ae6 Mon Sep 17 00:00:00 2001 From: drosofff Date: Thu, 7 Nov 2024 22:50:12 +0100 Subject: [PATCH 3/3] Update scran_normalize.xml --- tools/gsc_scran_normalize/scran_normalize.xml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/gsc_scran_normalize/scran_normalize.xml b/tools/gsc_scran_normalize/scran_normalize.xml index c00f70b7e..5930aa6a3 100644 --- a/tools/gsc_scran_normalize/scran_normalize.xml +++ b/tools/gsc_scran_normalize/scran_normalize.xml @@ -74,8 +74,10 @@ expression across the majority of genes represents some technical bias that shou Cell-specific biases are normalized using the computeSumFactors method, which implements the deconvolution strategy for scaling normalization (A. T. Lun, Bach, and Marioni 2016). It creates a reference : - - if no clustering step : the average count of all transcriptomes - - if you choose to cluster your cells : the average count of each cluster. + +- if no clustering step : the average count of all transcriptomes +- if you choose to cluster your cells : the average count of each cluster. + Then it pools cells and then sum their expression profiles. The size factor is described as the median ration between the count sums and the average across all genes. Finally it constructs a linear distribution (deconvolution method) of size factors by taking multiple pools of cells. @@ -83,9 +85,8 @@ of size factors by taking multiple pools of cells. You can apply this method on cell cluster instead of your all set of cells by using quickCluster. It defines cluster using distances based on Spearman correlation on counts between cells, there is two available methods : - - *hclust* : hierarchical clustering on the distance matrix and dynamic tree cut. - - *igraph* : constructs a Shared Nearest Neighbor graph (SNN) on the distance matrix and identifies highly connected communities. - +- *hclust* : hierarchical clustering on the distance matrix and dynamic tree cut. +- *igraph* : constructs a Shared Nearest Neighbor graph (SNN) on the distance matrix and identifies highly connected communities. Note: First header row must NOT start with a '#' comment character