diff --git a/tools/ez_histograms/ez_histograms.R b/tools/ez_histograms/ez_histograms.R index 2533170d2..3dc4e61b8 100644 --- a/tools/ez_histograms/ez_histograms.R +++ b/tools/ez_histograms/ez_histograms.R @@ -5,121 +5,125 @@ library(scales) library(psych) library(optparse) -options(show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } +options( + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") warnings() option_list <- list( - make_option( - c("-f", "--file"), - default = NA, - type = "character", - help = "Input file that contains count values to transform" - ), - make_option( - c("-d", "--profile"), - default = "count", - type = "character", - help = "Whether y-axis shows absolute counts or density: 'count' or 'density' [default : '%default' ]" - ), - make_option( - "--xscale", - default = "cartesian", - type = "character", - help = "Whether x-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" - ), - make_option( - "--yscale", - default = "cartesian", - type = "character", - help = "Whether y-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" - ), - make_option( - c("-p", "--pdf"), - default = "histograms.pdf", - type = "character", - help = "Output pdf file name [default : '%default' ]" - ), - make_option( - c("-s", "--summary"), - default = "summary.tsv", - type = "character", - help = "statistics summary file name [default : '%default' ]" - ) + make_option( + c("-f", "--file"), + default = NA, + type = "character", + help = "Input file that contains count values to transform" + ), + make_option( + c("-d", "--profile"), + default = "count", + type = "character", + help = "Whether y-axis shows absolute counts or density: 'count' or 'density' [default : '%default' ]" + ), + make_option( + "--xscale", + default = "cartesian", + type = "character", + help = "Whether x-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" + ), + make_option( + "--yscale", + default = "cartesian", + type = "character", + help = "Whether y-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" + ), + make_option( + c("-p", "--pdf"), + default = "histograms.pdf", + type = "character", + help = "Output pdf file name [default : '%default' ]" + ), + make_option( + c("-s", "--summary"), + default = "summary.tsv", + type = "character", + help = "statistics summary file name [default : '%default' ]" + ) ) opt <- parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) + args = commandArgs(trailingOnly = TRUE) +) plot_histograms <- function(mdata, profile = "count", xscale = "cartesian", yscale = "cartesian", bins = 30) { - if (profile == "count") { - # count histogram - p <- ggplot(mdata, aes(x = value, fill = variable, color = variable, y = after_stat(count)), show.legend = FALSE) + - geom_histogram(bins = bins) + theme(legend.position = "none") - if (xscale == "cartesian") { - if (yscale == "log2") { - p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) - } else { - if (yscale == "log10") { - p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) + if (profile == "count") { + # count histogram + p <- ggplot(mdata, aes(x = value, fill = variable, color = variable, y = after_stat(count)), show.legend = FALSE) + + geom_histogram(bins = bins) + + theme(legend.position = "none") + if (xscale == "cartesian") { + if (yscale == "log2") { + p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) + } else { + if (yscale == "log10") { + p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) + } + } } - } - } - if (xscale == "log2") { - p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) - if (yscale == "log2") { - p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) - } else { - if (yscale == "log10") { - p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) + if (xscale == "log2") { + p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) + if (yscale == "log2") { + p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) + } else { + if (yscale == "log10") { + p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) + } + } } - } - } - if (xscale == "log10") { - p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) - if (yscale == "log2") { - p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) - } else { - if (yscale == "log10") { - p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) + if (xscale == "log10") { + p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) + if (yscale == "log2") { + p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) + } else { + if (yscale == "log10") { + p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) + } + } } - } } - } - if (profile == "density") { - # density histogram - p <- ggplot(mdata, aes(x = value, fill = variable, color = variable)) + - geom_density() + theme(legend.position = "none") - if (xscale == "log2") { - p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) - } - if (xscale == "log10") { - p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) + if (profile == "density") { + # density histogram + p <- ggplot(mdata, aes(x = value, fill = variable, color = variable)) + + geom_density() + + theme(legend.position = "none") + if (xscale == "log2") { + p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) + } + if (xscale == "log10") { + p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) + } } - } - return(p) + return(p) } test_header <- function(file) { - data <- read.delim(file = file, header = FALSE, row.names = 1, nrows = 2) - if (all(is.na(as.numeric(data[1, seq_len(ncol(data))])))) { - return(TRUE) - } else { - return(FALSE) - } + data <- read.delim(file = file, header = FALSE, row.names = 1, nrows = 2) + if (all(is.na(as.numeric(data[1, seq_len(ncol(data))])))) { + return(TRUE) + } else { + return(FALSE) + } } ##### prepare input data data <- read.delim(file = opt$file, header = test_header(opt$file)) -data <- data %>% select(where(is.numeric)) # remove non numeric columns +data <- data %>% select(where(is.numeric)) # remove non numeric columns mdata <- melt(data) ##### main @@ -132,23 +136,23 @@ p <- plot_histograms(mdata, profile = opt$profile, xscale = opt$xscale, bins = b # determine optimal width for the graph width <- length(data) width <- case_when( - width == 1 ~ 14 / 3, - width == 2 ~ (2 / 3) * 14, - TRUE ~ 14 + width == 1 ~ 14 / 3, + width == 2 ~ (2 / 3) * 14, + TRUE ~ 14 ) # determine optimal height for the graph height <- length(data) height <- case_when( - height <= 3 ~ 3, - height <= 6 ~ 6, - TRUE ~ (floor(height / 3) + 1) * 3 + height <= 3 ~ 3, + height <= 6 ~ 6, + TRUE ~ (floor(height / 3) + 1) * 3 ) # determine optimal number of col for the graph ncol <- length(data) ncol <- case_when( - ncol == 1 ~ 1, - ncol == 2 ~ 2, - TRUE ~ 3 + ncol == 1 ~ 1, + ncol == 2 ~ 2, + TRUE ~ 3 ) pdf(opt$pdf, width = width, height = height) print(p + facet_wrap(~variable, ncol = ncol, scales = "free")) diff --git a/tools/ez_histograms/ez_histograms.xml b/tools/ez_histograms/ez_histograms.xml index e388afa3d..e45c574f1 100755 --- a/tools/ez_histograms/ez_histograms.xml +++ b/tools/ez_histograms/ez_histograms.xml @@ -1,9 +1,14 @@ 3.4.4 - 1 + 2 23.0 + + + ez_histograms + + r-ggplot2 r-reshape2