From c55e91bc6716a71ee3a3f30da3f4c915f465c1d4 Mon Sep 17 00:00:00 2001 From: Christophe Antoniewski Date: Sun, 11 Feb 2024 01:53:03 +0100 Subject: [PATCH] Update mutational_patterns (#667) * Update mutational_patterns.xml * styleR R script * Update mutational_patterns.R * Bump wrapper version to 1 --- scripts/styleR/styler.R | 0 .../mutational_patterns/mutational_patterns.R | 545 ++++++++++-------- .../mutational_patterns.xml | 2 +- 3 files changed, 298 insertions(+), 249 deletions(-) mode change 100644 => 100755 scripts/styleR/styler.R diff --git a/scripts/styleR/styler.R b/scripts/styleR/styler.R old mode 100644 new mode 100755 diff --git a/tools/mutational_patterns/mutational_patterns.R b/tools/mutational_patterns/mutational_patterns.R index 73dd9cc5b..74073c2ea 100644 --- a/tools/mutational_patterns/mutational_patterns.R +++ b/tools/mutational_patterns/mutational_patterns.R @@ -1,10 +1,11 @@ # load packages that are provided in the conda env -options(show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } - ) +options( + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } +) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") warnings() library(optparse) @@ -16,124 +17,125 @@ library(RColorBrewer) # Arguments option_list <- list( - make_option( - "--inputs", - default = NA, - type = "character", - help = "json formatted dictionary of datasets and their paths" - ), - make_option( - "--genome", - default = NA, - type = "character", - help = "genome name in the BSgenome bioconductor package" - ), - make_option( - "--levels", - default = NA, - type = "character", - help = "path to the tab separated file describing the levels in function of datasets" - ), - make_option( - "--cosmic_version", - default = NA, - type = "character", - help = "Version of the Cosmic Signature set to be used to express mutational profiles" - ), - make_option( - "--own_signatures", - default = NA, - type = "character", - help = "Path to the user-defined signature matrix" - ), - make_option( - "--signum", - default = 2, - type = "integer", - help = "selects the N most significant signatures in samples to express mutational profiles" - ), - make_option( - "--nrun", - default = 2, - type = "integer", - help = "Number of runs to fit signatures" - ), - make_option( - "--rank", - default = 2, - type = "integer", - help = "number of ranks to display for parameter optimization" - ), make_option( - "--newsignum", - default = 2, - type = "integer", - help = "Number of new signatures to be captured" - ), + "--inputs", + default = NA, + type = "character", + help = "json formatted dictionary of datasets and their paths" + ), make_option( - "--cosmic_id_threshold", - default = 0.85, - type = "double", - help = "minimu cosine similarity to rename a new signature according to cosmic v3.2" - ), - make_option( - "--output_spectrum", - default = NA, - type = "character", - help = "path to output dataset" - ), - make_option( - "--output_denovo", - default = NA, - type = "character", - help = "path to output dataset" - ), - make_option( - "--sigmatrix", - default = NA, - type = "character", - help = "path to signature matrix" - ), - make_option( - "--output_sigpattern", - default = NA, - type = "character", - help = "path to output dataset" - ), - make_option( - "--display_signatures", - default = NA, - type = "character", - help = "display input signature profiles if set to yes" - ), - make_option( - "--sig_contrib_matrix", - default = NA, - type = "character", - help = "path to signature contribution matrix" - ), - make_option( - "--colors", - default = NA, - type = "character", - help = "color palette to display signatures" - ), - make_option( - c("-r", "--rdata"), - type = "character", - default = NULL, - help = "Path to RData output file" - ), - make_option( - c("-t", "--tooldir"), - type = "character", - default = NULL, - help = "Path to tool directory, where tool data are stored") - + "--genome", + default = NA, + type = "character", + help = "genome name in the BSgenome bioconductor package" + ), + make_option( + "--levels", + default = NA, + type = "character", + help = "path to the tab separated file describing the levels in function of datasets" + ), + make_option( + "--cosmic_version", + default = NA, + type = "character", + help = "Version of the Cosmic Signature set to be used to express mutational profiles" + ), + make_option( + "--own_signatures", + default = NA, + type = "character", + help = "Path to the user-defined signature matrix" + ), + make_option( + "--signum", + default = 2, + type = "integer", + help = "selects the N most significant signatures in samples to express mutational profiles" + ), + make_option( + "--nrun", + default = 2, + type = "integer", + help = "Number of runs to fit signatures" + ), + make_option( + "--rank", + default = 2, + type = "integer", + help = "number of ranks to display for parameter optimization" + ), + make_option( + "--newsignum", + default = 2, + type = "integer", + help = "Number of new signatures to be captured" + ), + make_option( + "--cosmic_id_threshold", + default = 0.85, + type = "double", + help = "minimu cosine similarity to rename a new signature according to cosmic v3.2" + ), + make_option( + "--output_spectrum", + default = NA, + type = "character", + help = "path to output dataset" + ), + make_option( + "--output_denovo", + default = NA, + type = "character", + help = "path to output dataset" + ), + make_option( + "--sigmatrix", + default = NA, + type = "character", + help = "path to signature matrix" + ), + make_option( + "--output_sigpattern", + default = NA, + type = "character", + help = "path to output dataset" + ), + make_option( + "--display_signatures", + default = NA, + type = "character", + help = "display input signature profiles if set to yes" + ), + make_option( + "--sig_contrib_matrix", + default = NA, + type = "character", + help = "path to signature contribution matrix" + ), + make_option( + "--colors", + default = NA, + type = "character", + help = "color palette to display signatures" + ), + make_option( + c("-r", "--rdata"), + type = "character", + default = NULL, + help = "Path to RData output file" + ), + make_option( + c("-t", "--tooldir"), + type = "character", + default = NULL, + help = "Path to tool directory, where tool data are stored" + ) ) opt <- parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) + args = commandArgs(trailingOnly = TRUE) +) ################ Manage input data #################### json_dict <- opt$inputs @@ -152,12 +154,16 @@ library(ggplot2) # Load the VCF files into a GRangesList: vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) library(plyr) -if (!is.na(opt$levels)[1]) { # manage levels if there are - levels_table <- read.delim(opt$levels, header = FALSE, - col.names = c("element_identifier", "level")) - } else { - levels_table <- data.frame(element_identifier = vcf_table$element_identifier, - level = rep("nolabels", length(vcf_table$element_identifier))) +if (!is.na(opt$levels)[1]) { # manage levels if there are + levels_table <- read.delim(opt$levels, + header = FALSE, + col.names = c("element_identifier", "level") + ) +} else { + levels_table <- data.frame( + element_identifier = vcf_table$element_identifier, + level = rep("nolabels", length(vcf_table$element_identifier)) + ) } metadata_table <- join(vcf_table, levels_table, by = "element_identifier") tissue <- as.vector(metadata_table$level) @@ -181,7 +187,7 @@ if (!is.na(opt$output_spectrum)[1]) { p2 <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE) # by levels p3 <- plot_spectrum(type_occurrences, CT = TRUE, legend = TRUE) # total grid.arrange(p2, p3, ncol = 2, widths = c(4, 2.3), heights = c(4, 1)) - } + } plot_96_profile(mut_mat, condensed = TRUE) dev.off() } @@ -190,13 +196,12 @@ if (!is.na(opt$output_spectrum)[1]) { # opt$rank cannot be higher than the number of samples and # likewise, opt$signum cannot be higher thant the number of samples if (!is.na(opt$output_denovo)[1]) { - if (opt$rank > length(element_identifiers)) { opt$rank <- length(element_identifiers) - } + } if (opt$signum > length(element_identifiers)) { opt$signum <- length(element_identifiers) - } + } pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix # Use the NMF package to generate an estimate rank plot library("NMF") @@ -221,9 +226,11 @@ if (!is.na(opt$output_denovo)[1]) { # write matrix of deno signatures for user new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) - newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), - "[", new_sig_matrix$substitution, "]", - gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) + newcol <- paste0( + gsub("\\..", "", new_sig_matrix$context, perl = TRUE), + "[", new_sig_matrix$substitution, "]", + gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE) + ) new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") # Visualize the contribution of the signatures in a barplot @@ -241,14 +248,14 @@ if (!is.na(opt$output_denovo)[1]) { pch1 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE) # Plot signature contribution as a heatmap without sample clustering: pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE) - #Combine the plots into one figure: + # Combine the plots into one figure: grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6)) # Compare the reconstructed mutational profile with the original mutational profile: pch3 <- plot_original_vs_reconstructed(pseudo_mut_mat, nmf_res$reconstructed, y_intercept = 0.95) grid.arrange(pch3) dev.off() - } +} ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures #### @@ -257,9 +264,10 @@ if (!is.na(opt$output_sigpattern)[1]) { if (!is.na(opt$cosmic_version)) { cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE) cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome & - cosmic_urls$cosmic_version == opt$cosmic_version] + cosmic_urls$cosmic_version == opt$cosmic_version] sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file), - sep = "\t", header = TRUE) + sep = "\t", header = TRUE + ) tag <- paste(gsub("BSgenome.Hsapiens.UCSC.", "", opt$genome), "COSMIC", opt$cosmic_version, sep = " ") } # Prepare user-defined signatures @@ -274,38 +282,44 @@ if (!is.na(opt$output_sigpattern)[1]) { sbs_signatures <- sbs_signatures[match(row.names(mut_mat), row.names(sbs_signatures)), ] # arrange signature colors if (opt$colors == "intense") { - signature_colors <- c("#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#c4bedf", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3", - "#d2ab00", "#e36eff", "#cad5b3", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083", - "#c0ce67", "#002981", "#e6b8b3", "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec", - "#ff233f", "#01b7b4", "#fd005c", "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff", - "#093f00", "#ff6494", "#009791", "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956", - "#f0be6d", "#6a0058", "#957a00", "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e", - "#b36b00", "#debaeb", "#605400", "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500", - "#6e0028", "#717653", "#6c1000", "#693600") + signature_colors <- c( + "#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#c4bedf", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3", + "#d2ab00", "#e36eff", "#cad5b3", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083", + "#c0ce67", "#002981", "#e6b8b3", "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec", + "#ff233f", "#01b7b4", "#fd005c", "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff", + "#093f00", "#ff6494", "#009791", "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956", + "#f0be6d", "#6a0058", "#957a00", "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e", + "#b36b00", "#debaeb", "#605400", "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500", + "#6e0028", "#717653", "#6c1000", "#693600" + ) } else { - signature_colors <- c("#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F", "#c4bedf", "#BF5B17", "#666666", "#1B9E77", "#D95F02", - "#7570B3", "#E7298A", "#cad5b3", "#66A61E", "#E6AB02", "#A6761D", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", - "#E31A1C", "#B3E2CD", "#e6b8b3", "#FF7F00", "#CAB2D6", "#6A3D9A", "#B15928", "#FBB4AE", "#B3CDE3", "#CCEBC5", "#DECBE4", - "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4", "#E6F5C9", "#FFF2AE", - "#F1E2CC", "#CCCCCC", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5", - "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", - "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#FFED6F", "#3f1d02", "#ff958f", "#634a66", "#775500", - "#6e0028", "#717653", "#6c1000", "#693600") + signature_colors <- c( + "#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F", "#c4bedf", "#BF5B17", "#666666", "#1B9E77", "#D95F02", + "#7570B3", "#E7298A", "#cad5b3", "#66A61E", "#E6AB02", "#A6761D", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", + "#E31A1C", "#B3E2CD", "#e6b8b3", "#FF7F00", "#CAB2D6", "#6A3D9A", "#B15928", "#FBB4AE", "#B3CDE3", "#CCEBC5", "#DECBE4", + "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4", "#E6F5C9", "#FFF2AE", + "#F1E2CC", "#CCCCCC", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5", + "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", + "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#FFED6F", "#3f1d02", "#ff958f", "#634a66", "#775500", + "#6e0028", "#717653", "#6c1000", "#693600" + ) } - names(signature_colors) <- c("SBS1", "SBS2", "SBS3", "SBS4", "SBS5", "SBS6", "SBS7", "SBS7a", "SBS7b", "SBS7c", "SBS7d", - "SBS8", "SBS9", "SBS10", "SBS10a", "SBS10b", "SBS10c", "SBS10d", "SBS11", "SBS12", "SBS13", "SBS14", - "SBS15", "SBS16", "SBS17", "SBS17a", "SBS17b", "SBS18", "SBS19", "SBS20", "SBS21", "SBS22", "SBS23", - "SBS24", "SBS25", "SBS26", "SBS27", "SBS28", "SBS29", "SBS30", "SBS31", "SBS32", "SBS33", "SBS34", - "SBS35", "SBS36", "SBS37", "SBS38", "SBS39", "SBS40", "SBS41", "SBS42", "SBS43", "SBS44", "SBS45", - "SBS46", "SBS47", "SBS48", "SBS49", "SBS50", "SBS51", "SBS52", "SBS53", "SBS54", "SBS55", "SBS56", - "SBS57", "SBS58", "SBS59", "SBS60", "SBS84", "SBS85", "SBS86", "SBS87", "SBS88", "SBS89", "SBS90", - "SBS91", "SBS92", "SBS93", "SBS94") + names(signature_colors) <- c( + "SBS1", "SBS2", "SBS3", "SBS4", "SBS5", "SBS6", "SBS7", "SBS7a", "SBS7b", "SBS7c", "SBS7d", + "SBS8", "SBS9", "SBS10", "SBS10a", "SBS10b", "SBS10c", "SBS10d", "SBS11", "SBS12", "SBS13", "SBS14", + "SBS15", "SBS16", "SBS17", "SBS17a", "SBS17b", "SBS18", "SBS19", "SBS20", "SBS21", "SBS22", "SBS23", + "SBS24", "SBS25", "SBS26", "SBS27", "SBS28", "SBS29", "SBS30", "SBS31", "SBS32", "SBS33", "SBS34", + "SBS35", "SBS36", "SBS37", "SBS38", "SBS39", "SBS40", "SBS41", "SBS42", "SBS43", "SBS44", "SBS45", + "SBS46", "SBS47", "SBS48", "SBS49", "SBS50", "SBS51", "SBS52", "SBS53", "SBS54", "SBS55", "SBS56", + "SBS57", "SBS58", "SBS59", "SBS60", "SBS84", "SBS85", "SBS86", "SBS87", "SBS88", "SBS89", "SBS90", + "SBS91", "SBS92", "SBS93", "SBS94" + ) # if signature names provided are not compliant with cosmic nomenclature, # we attribute these names to the active signature_colors vector, adjusted to the length # of the signature names. - if (! all(colnames(sbs_signatures) %in% names(signature_colors))) { # provided signature are not all included in cosmic names + if (!all(colnames(sbs_signatures) %in% names(signature_colors))) { # provided signature are not all included in cosmic names signature_colors <- signature_colors[seq_along(sbs_signatures)] names(signature_colors) <- colnames(sbs_signatures) } @@ -320,15 +334,24 @@ if (!is.na(opt$output_sigpattern)[1]) { if (opt$display_signatures == "yes") { for (i in head(seq(1, ncol(sbs_signatures), by = 20), -1)) { p6 <- plot_96_profile(sbs_signatures[, i:(i + 19)], condensed = TRUE, ymax = 0.3) - grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc((i + 1) / 20) + 1, " of ", - trunc(ncol(sbs_signatures) / 20) + 1, " pages)"), - gp = gpar(fontsize = 12, font = 3))) + grid.arrange(p6, top = textGrob( + paste0( + tag, " profiles (", trunc((i + 1) / 20) + 1, " of ", + trunc(ncol(sbs_signatures) / 20) + 1, " pages)" + ), + gp = gpar(fontsize = 12, font = 3) + )) } p6 <- plot_96_profile(sbs_signatures[, (trunc(ncol(sbs_signatures) / 20) * 20):(ncol(sbs_signatures))], - condensed = TRUE, ymax = 0.3) - grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ", - trunc(ncol(sbs_signatures) / 20) + 1, " pages)"), - gp = gpar(fontsize = 12, font = 3))) + condensed = TRUE, ymax = 0.3 + ) + grid.arrange(p6, top = textGrob( + paste0( + tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ", + trunc(ncol(sbs_signatures) / 20) + 1, " pages)" + ), + gp = gpar(fontsize = 12, font = 3) + )) } @@ -339,72 +362,88 @@ if (!is.na(opt$output_sigpattern)[1]) { # Plot contribution barplots pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute") pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative") - if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs + if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs pc3_data <- pc3$data pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + - geom_bar(stat = "identity", position = "stack") + - coord_flip() + - scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + - labs(x = "Samples", y = "Absolute contribution") + theme_bw() + - theme(panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - legend.position = "right", - text = element_text(size = 8), - axis.text.x = element_text(angle = 90, hjust = 1)) + geom_bar(stat = "identity", position = "stack") + + coord_flip() + + scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + + labs(x = "Samples", y = "Absolute contribution") + + theme_bw() + + theme( + panel.grid.minor.x = element_blank(), + panel.grid.major.x = element_blank(), + legend.position = "right", + text = element_text(size = 8), + axis.text.x = element_text(angle = 90, hjust = 1) + ) pc4_data <- pc4$data pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + - geom_bar(stat = "identity", position = "fill") + - coord_flip() + - scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + - scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + - labs(x = "Samples", y = "Relative contribution") + theme_bw() + - theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right", - text = element_text(size = 8), - axis.text.x = element_text(angle = 90, hjust = 1)) + geom_bar(stat = "identity", position = "fill") + + coord_flip() + + scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + + scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + + labs(x = "Samples", y = "Relative contribution") + + theme_bw() + + theme( + panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right", + text = element_text(size = 8), + axis.text.x = element_text(angle = 90, hjust = 1) + ) } ##### # ggplot2 alternative - if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs + if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs pc3_data <- pc3$data pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + - geom_bar(stat = "identity", position = "stack") + - scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + - labs(x = "Samples", y = "Absolute contribution") + theme_bw() + - theme(panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - legend.position = "right", - text = element_text(size = 8), - axis.text.x = element_text(angle = 90, hjust = 1)) + - facet_grid(~level, scales = "free_x", space = "free") + geom_bar(stat = "identity", position = "stack") + + scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + + labs(x = "Samples", y = "Absolute contribution") + + theme_bw() + + theme( + panel.grid.minor.x = element_blank(), + panel.grid.major.x = element_blank(), + legend.position = "right", + text = element_text(size = 8), + axis.text.x = element_text(angle = 90, hjust = 1) + ) + + facet_grid(~level, scales = "free_x", space = "free") pc4_data <- pc4$data pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + - geom_bar(stat = "identity", position = "fill") + - scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + - scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + - labs(x = "Samples", y = "Relative contribution") + theme_bw() + - theme(panel.grid.minor.x = element_blank(), - panel.grid.major.x = element_blank(), - legend.position = "right", - text = element_text(size = 8), - axis.text.x = element_text(angle = 90, hjust = 1)) + - facet_grid(~level, scales = "free_x", space = "free") + geom_bar(stat = "identity", position = "fill") + + scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + + scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + + labs(x = "Samples", y = "Relative contribution") + + theme_bw() + + theme( + panel.grid.minor.x = element_blank(), + panel.grid.major.x = element_blank(), + legend.position = "right", + text = element_text(size = 8), + axis.text.x = element_text(angle = 90, hjust = 1) + ) + + facet_grid(~level, scales = "free_x", space = "free") } # Combine the two plots: grid.arrange(pc3, pc4, - top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles", - gp = gpar(fontsize = 12, font = 3))) + top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles", + gp = gpar(fontsize = 12, font = 3) + ) + ) #### pie charts of comic signatures contributions in samples ### library(reshape2) library(dplyr) if (length(levels(factor(levels_table$level))) < 2) { fit_res_contrib <- as.data.frame(fit_res$contribution) - worklist <- cbind(signature = rownames(fit_res$contribution), - level = rep("nolabels", length(fit_res_contrib[, 1])), - fit_res_contrib, - sum = rowSums(fit_res_contrib)) + worklist <- cbind( + signature = rownames(fit_res$contribution), + level = rep("nolabels", length(fit_res_contrib[, 1])), + fit_res_contrib, + sum = rowSums(fit_res_contrib) + ) worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ] worklist <- worklist[1:opt$signum, ] worklist <- worklist[, -length(worklist[1, ])] @@ -413,12 +452,12 @@ if (!is.na(opt$output_sigpattern)[1]) { } else { worklist <- list() for (i in levels(factor(levels_table$level))) { - worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] - sum <- rowSums(as.data.frame(worklist[[i]])) - worklist[[i]] <- cbind(worklist[[i]], sum) - worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ] - worklist[[i]] <- worklist[[i]][1:opt$signum, ] - worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] + worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] + sum <- rowSums(as.data.frame(worklist[[i]])) + worklist[[i]] <- cbind(worklist[[i]], sum) + worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ] + worklist[[i]] <- worklist[[i]][1:opt$signum, ] + worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] } worklist <- as.data.frame(melt(worklist)) worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2]) @@ -429,17 +468,22 @@ if (!is.na(opt$output_sigpattern)[1]) { worklist$pos <- cumsum(worklist$value) - worklist$value / 2 worklist$label <- factor(gsub("SBS", "", worklist$signature)) worklist$signature <- factor(worklist$signature) - p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + - geom_bar(width = 1, stat = "identity") + - geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + - coord_polar("y", start = 0) + facet_wrap(. ~ sample) + - labs(x = "", y = "Samples", fill = tag) + - scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), - values = signature_colors[levels(worklist$signature)], - labels = names(signature_colors[levels(worklist$signature)])) + - theme(axis.text = element_blank(), - axis.ticks = element_blank(), - panel.grid = element_blank()) + p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + + geom_bar(width = 1, stat = "identity") + + geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + + coord_polar("y", start = 0) + + facet_wrap(. ~ sample) + + labs(x = "", y = "Samples", fill = tag) + + scale_fill_manual( + name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), + values = signature_colors[levels(worklist$signature)], + labels = names(signature_colors[levels(worklist$signature)]) + ) + + theme( + axis.text = element_blank(), + axis.ticks = element_blank(), + panel.grid = element_blank() + ) grid.arrange(p7) # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering @@ -452,11 +496,15 @@ if (!is.na(opt$output_sigpattern)[1]) { if (!is.na(opt$sig_contrib_matrix)) { output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) if (length(levels(factor(levels_table$level))) > 1) { - output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution), - 3], "-", colnames(fit_res$contribution)), - output_table) + output_table <- data.frame( + sample = paste0(metadata_table[ + metadata_table$element_identifier == colnames(fit_res$contribution), + 3 + ], "-", colnames(fit_res$contribution)), + output_table + ) colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) - } else { + } else { output_table <- data.frame(sample = rownames(output_table), output_table) colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) } @@ -480,20 +528,21 @@ if (!is.na(opt$output_sigpattern)[1]) { cos_sim_ori_rec$sample <- row.names(cos_sim_ori_rec) # Make barplot p9 <- ggplot(cos_sim_ori_rec, aes(y = cos_sim, x = sample)) + - geom_bar(stat = "identity", fill = "skyblue4") + - coord_cartesian(ylim = c(0.8, 1)) + - # coord_flip(ylim=c(0.8,1)) + - ylab("Cosine similarity\n original VS reconstructed") + - xlab("") + - # Reverse order of the samples such that first is up - # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) + - theme_bw() + - theme(panel.grid.minor.y = element_blank(), - panel.grid.major.y = element_blank(), - axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1) - ) + - # Add cut.off line - geom_hline(aes(yintercept = .95)) + geom_bar(stat = "identity", fill = "skyblue4") + + coord_cartesian(ylim = c(0.8, 1)) + + # coord_flip(ylim=c(0.8,1)) + + ylab("Cosine similarity\n original VS reconstructed") + + xlab("") + + # Reverse order of the samples such that first is up + # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) + + theme_bw() + + theme( + panel.grid.minor.y = element_blank(), + panel.grid.major.y = element_blank(), + axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1) + ) + + # Add cut.off line + geom_hline(aes(yintercept = .95)) grid.arrange(p9, top = textGrob("Similarity between true profiles and profiles reconstructed with elementary signatures", gp = gpar(fontsize = 12, font = 3))) dev.off() } @@ -501,5 +550,5 @@ if (!is.na(opt$output_sigpattern)[1]) { # Output RData file if (!is.null(opt$rdata)) { - save.image(file = opt$rdata) + save.image(file = opt$rdata) } diff --git a/tools/mutational_patterns/mutational_patterns.xml b/tools/mutational_patterns/mutational_patterns.xml index fcc1bae75..9280ecb86 100644 --- a/tools/mutational_patterns/mutational_patterns.xml +++ b/tools/mutational_patterns/mutational_patterns.xml @@ -2,7 +2,7 @@ from genomic variations in vcf files 3.12.0 - 0 + 1 23.0