diff --git a/SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_CNV48_Signatures.txt b/SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_CNV48_Signatures.txt index e65d60d..92e00a6 100644 --- a/SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_CNV48_Signatures.txt +++ b/SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_CNV48_Signatures.txt @@ -1,4 +1,4 @@ -MutationType CN1 CN2 CN9 CN20 CNV48B CNV48D +MutationType CN1 CN2 CN9 CN20 CN3 CN4 0:homdel:0-100kb 0.00179505 0.00223767 0.00818184 0.00004078 0.00000295 0.00000075 0:homdel:100kb-1Mb 0.00380710 0.00719579 0.02099919 0.00017845 0.00039561 0.02703219 0:homdel:>1Mb 0.00243675 0.00383711 0.00900922 0.00011722 0.00122668 0.02703635 diff --git a/SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_SV32_Signatures.txt b/SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_SV32_Signatures.txt new file mode 100644 index 0000000..7518c78 --- /dev/null +++ b/SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_SV32_Signatures.txt @@ -0,0 +1,33 @@ +MutationType SV1 SV2 SV3 SV4 SV5 SV6 SV7 SV9 +clustered_del_1-10Kb 0.00001830 0.00001750 0.00000831 0.00558463 0.00162643 0.00484161 0.00036457 0.01689226 +clustered_del_10-100Kb 0.00000013 0.00000500 0.00000000 0.00883802 0.00395326 0.00397105 0.00493403 0.02027149 +clustered_del_100Kb-1Mb 0.00004260 0.00000257 0.00000253 0.01342056 0.00264242 0.01567818 0.00175481 0.04559844 +clustered_del_1Mb-10Mb 0.00000003 0.00000557 0.00000000 0.01941683 0.00079517 0.06315566 0.00000135 0.08553749 +clustered_del_>10Mb 0.00000003 0.00000224 0.00000009 0.01212996 0.00000205 0.13914616 0.00000121 0.00004090 +clustered_tds_1-10Kb 0.00000029 0.00002000 0.00233538 0.00119602 0.00044748 0.00173254 0.00023931 0.00407692 +clustered_tds_10-100Kb 0.00080029 0.00003770 0.00137651 0.00406629 0.00092090 0.00411956 0.00031235 0.00799563 +clustered_tds_100Kb-1Mb 0.00217807 0.00001180 0.00018608 0.00927806 0.00180766 0.01359699 0.00000078 0.04878886 +clustered_tds_1Mb-10Mb 0.00006350 0.00000161 0.00000004 0.01669456 0.00008410 0.06927579 0.00000076 0.07607224 +clustered_tds_>10Mb 0.00000003 0.00000314 0.00000010 0.00950079 0.00003530 0.14468700 0.00000187 0.00019873 +clustered_inv_1-10Kb 0.00003510 0.00009050 0.00000000 0.01188233 0.01068676 0.00333129 0.00000116 0.00901519 +clustered_inv_10-100Kb 0.00000113 0.00002000 0.00000415 0.01283746 0.00421801 0.00778075 0.00000166 0.03440818 +clustered_inv_100Kb-1Mb 0.00062736 0.00000829 0.00000000 0.01893295 0.00104651 0.02657075 0.00000052 0.08534106 +clustered_inv_1Mb-10Mb 0.00000637 0.00000052 0.00000003 0.03875053 0.00018613 0.13269196 0.00000021 0.15018331 +clustered_inv_>10Mb 0.00000001 0.00000229 0.00000003 0.02250855 0.00000130 0.26682614 0.00000158 0.00004010 +clustered_trans 0.00005860 0.00000182 0.00000829 0.74672823 0.01500836 0.00000039 0.00000036 0.00000269 +non-clustered_del_1-10Kb 0.00016322 0.01077137 0.03108973 0.00000143 0.27051044 0.00049070 0.11080824 0.03271109 +non-clustered_del_10-100Kb 0.01062551 0.00189318 0.00026854 0.00000001 0.04236249 0.00047467 0.40908574 0.03762502 +non-clustered_del_100Kb-1Mb 0.01031305 0.00009900 0.00051531 0.00110315 0.02494482 0.00056581 0.34862452 0.05284083 +non-clustered_del_1Mb-10Mb 0.00810848 0.04772402 0.00323839 0.00114858 0.04036508 0.00908681 0.02776373 0.04842356 +non-clustered_del_>10Mb 0.00460793 0.05938062 0.00861972 0.00280099 0.02737192 0.01709695 0.01575099 0.01781587 +non-clustered_tds_1-10Kb 0.00000013 0.00004480 0.42763680 0.00000015 0.08730244 0.00000010 0.01078117 0.01195956 +non-clustered_tds_10-100Kb 0.04599380 0.00002460 0.32675443 0.00000001 0.12292716 0.00000054 0.00652355 0.01970948 +non-clustered_tds_100Kb-1Mb 0.45491853 0.01413625 0.01600693 0.00008590 0.06600995 0.00067638 0.00545841 0.01400244 +non-clustered_tds_1Mb-10Mb 0.35062795 0.04029646 0.00096917 0.00013931 0.01815304 0.00881413 0.00239992 0.02583554 +non-clustered_tds_>10Mb 0.00580397 0.03967873 0.00649925 0.00298543 0.01284238 0.01668428 0.00007000 0.00744539 +non-clustered_inv_1-10Kb 0.00440008 0.00000085 0.00411229 0.00018744 0.04638068 0.00022783 0.00000043 0.01662502 +non-clustered_inv_10-100Kb 0.01048762 0.00459802 0.00275734 0.00116401 0.03219888 0.00171438 0.03025930 0.01859228 +non-clustered_inv_100Kb-1Mb 0.01700822 0.05558915 0.00038638 0.00198348 0.03887257 0.00243843 0.01706313 0.03888396 +non-clustered_inv_1Mb-10Mb 0.01075404 0.09313637 0.00482722 0.00198335 0.03699181 0.01686964 0.00553730 0.04676524 +non-clustered_inv_>10Mb 0.01304926 0.09976361 0.01624505 0.01157483 0.04637754 0.02742841 0.00186541 0.02399899 +non-clustered_trans 0.04930641 0.53263233 0.14615191 0.02307616 0.04292692 0.00002510 0.00039160 0.00230227 diff --git a/SigProfilerAssignment/DecompositionPlots/ExampleSample/SV32_De-Novo_Signatures.txt b/SigProfilerAssignment/DecompositionPlots/ExampleSample/SV32_De-Novo_Signatures.txt new file mode 100644 index 0000000..ec57a57 --- /dev/null +++ b/SigProfilerAssignment/DecompositionPlots/ExampleSample/SV32_De-Novo_Signatures.txt @@ -0,0 +1,33 @@ +MutationType SV32A SV32B SV32C +clustered_del_1-10Kb 0.00662723 0.00177044 0.00000007 +clustered_del_10-100Kb 0.00972409 0.00167625 0.00000003 +clustered_del_100Kb-1Mb 0.02672959 0.00069852 0.00000004 +clustered_del_1Mb-10Mb 0.05108407 0.00033823 0.00000009 +clustered_del_>10Mb 0.03969927 0.00000047 0.00000040 +clustered_tds_1-10Kb 0.00166860 0.00057069 0.00011002 +clustered_tds_10-100Kb 0.00695235 0.00099348 0.00000008 +clustered_tds_100Kb-1Mb 0.02396912 0.00136236 0.00000000 +clustered_tds_1Mb-10Mb 0.04294237 0.00000963 0.00000009 +clustered_tds_>10Mb 0.04211685 0.00000072 0.00000016 +clustered_inv_1-10Kb 0.01459632 0.00452079 0.00000000 +clustered_inv_10-100Kb 0.01751233 0.00170391 0.00000000 +clustered_inv_100Kb-1Mb 0.03681359 0.00037369 0.00000012 +clustered_inv_1Mb-10Mb 0.10623513 0.00000047 0.00000013 +clustered_inv_>10Mb 0.07525245 0.00108337 0.00000002 +clustered_trans 0.23293450 0.00000023 0.00000022 +non-clustered_del_1-10Kb 0.00016344 0.11881046 0.04395760 +non-clustered_del_10-100Kb 0.00447025 0.09378964 0.00000052 +non-clustered_del_100Kb-1Mb 0.01586304 0.03619608 0.00000016 +non-clustered_del_1Mb-10Mb 0.02913311 0.02808863 0.00010709 +non-clustered_del_>10Mb 0.01433401 0.03214281 0.00690197 +non-clustered_tds_1-10Kb 0.00022787 0.00000001 0.34090417 +non-clustered_tds_10-100Kb 0.00000028 0.01990271 0.34120828 +non-clustered_tds_100Kb-1Mb 0.00938708 0.10950006 0.06107216 +non-clustered_tds_1Mb-10Mb 0.02086042 0.02455653 0.02874254 +non-clustered_tds_>10Mb 0.01097690 0.02568377 0.01287658 +non-clustered_inv_1-10Kb 0.00947122 0.06278640 0.00888667 +non-clustered_inv_10-100Kb 0.00786742 0.03894469 0.00344490 +non-clustered_inv_100Kb-1Mb 0.01588333 0.02367522 0.00125801 +non-clustered_inv_1Mb-10Mb 0.03501238 0.04148272 0.00366291 +non-clustered_inv_>10Mb 0.03495364 0.06231916 0.01299224 +non-clustered_trans 0.05653771 0.26701783 0.13387279 diff --git a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition.py b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition.py index be96a0f..1a87c08 100644 --- a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition.py +++ b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition.py @@ -26,14 +26,15 @@ import SigProfilerAssignment import SigProfilerAssignment.DecompositionPlots from SigProfilerAssignment.DecompositionPlots import SigProfilerPlottingMatrix as mPlt -from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_SBS96 as spd_96 -from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_SBS288 as spd_288 from SigProfilerAssignment.DecompositionPlots import ( + PlotDecomposition_SBS96 as spd_96, + PlotDecomposition_SBS288 as spd_288, PlotDecomposition_SBS1536 as spd_1536, + PlotDecomposition_DBS78 as spd_78, + PlotDecomposition_ID83 as spd_83, + PlotDecomposition_CNV48 as cnv_48, + PlotDecomposition_SV32 as sv_32, ) -from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_DBS78 as spd_78 -from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_ID83 as spd_83 -from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_CNV48 as cnv_48 from SigProfilerAssignment import decompose_subroutines as sub # imports for working with plots in memory @@ -43,11 +44,13 @@ import json import base64 + # Global Variables SBS_CONTEXTS = ["6", "24", "96", "288", "384", "1536", "6144"] DBS_CONTEXTS = ["78", "186", "1248", "2976"] ID_CONTEXTS = ["28", "83", "415"] CNV_CONTEXTS = ["48"] +SV_CONTEXTS = ["32"] MTYPE_OPTIONS = [ "6", "24", @@ -62,6 +65,7 @@ "186", "1248", "2976", + "32", ] DECOMPOSITION_PATH = SigProfilerAssignment.DecompositionPlots.__path__[0] REFSIG_PATH = os.path.join( @@ -69,7 +73,6 @@ ) TEMPLATE_PATH = os.path.join(DECOMPOSITION_PATH, "CosmicTemplates") - # Remove templates so that they can be rebuilt def remove_cosmic_templates(): if not os.path.exists(TEMPLATE_PATH): @@ -84,10 +87,11 @@ def remove_cosmic_templates(): def install_cosmic_plots( context_type="96", genome_build="GRCh37", cosmic_version="3.4", exome=False ): + if not os.path.exists(TEMPLATE_PATH): os.mkdir(TEMPLATE_PATH) - # determine if context is from SBS, ID, DBS, or CNV + # determine if context is from SBS, ID, DBS, CNV or SV context_type_str = "" if context_type in SBS_CONTEXTS: context_type_str = "SBS" @@ -102,6 +106,9 @@ def install_cosmic_plots( elif context_type in CNV_CONTEXTS: context_type_str = "CNV" cosmic_mtype = "48" + elif context_type in SV_CONTEXTS: + context_type_str = "SV" + cosmic_mtype = "32" else: raise ValueError("ERROR: context", context_type, "not in context lists.") @@ -138,6 +145,20 @@ def install_cosmic_plots( genome_build = "GRCh37" exome_str = "" + # CNV signatures exome=False, genome_build=GRCh37 + if context_type in CNV_CONTEXTS: + cosmic_file_name = "COSMIC_v" + str(cosmic_version) + "_CN_GRCh37.txt" + json_file_name = "COSMIC_v" + str(cosmic_version) + "_CN_GRCh37.json" + genome_build = "GRCh37" + exome_str = "" + + # SV signatures exome=False, genome_build=GRCh37 + if context_type in SV_CONTEXTS: + cosmic_file_name = "COSMIC_v" + str(cosmic_version) + "_SV_GRCh38.txt" + json_file_name = "COSMIC_v" + str(cosmic_version) + "_SV_GRCh38.json" + genome_build = "GRCh38" + exome_str = "" + # Load cosmic plots if they exist filename = os.path.join(TEMPLATE_PATH, json_file_name) if os.path.exists(filename): @@ -165,7 +186,6 @@ def install_cosmic_plots( + exome_str, "now...", ) - # Create the respective plots if context_type_str == "SBS": cosmic_buff_plots = sigPlt.plotSBS( @@ -195,6 +215,25 @@ def install_cosmic_plots( percentage=True, savefig_format="PIL_Image", ) + elif context_type_str == "CNV": + cosmic_buff_plots = sigPlt.plotCNV( + cosmic_file_path, + "buffer", + "buffer", + percentage=True, + aggregate=False, + read_from_file=False, + savefig_format="PIL_Image", + ) + elif context_type_str == "SV": + cosmic_buff_plots = sigPlt.plotSV( + cosmic_file_path, + "buffer", + "buffer", + percentage=True, + aggregate=False, + savefig_format="PIL_Image", + ) # Process the plots to be stored in JSON file cosmic_img_dict = {} @@ -413,8 +452,33 @@ def genCNV_pngs(denovo_mtx, basis_mtx, output_path, project, mtype): return denovo_plots, basis_plots +def genSV_pngs(denovo_mtx, basis_mtx, output_path, project, mtype): + denovo_plots = dict() + basis_plots = dict() + denovo_plots = sigPlt.plotSV( + denovo_mtx, + output_path, + project, + percentage=True, + aggregate=False, + savefig_format="PIL_Image", + ) + + if basis_mtx is not None: + basis_plots = sigPlt.plotSV( + basis_mtx, + output_path, + project, + percentage=True, + aggregate=False, + savefig_format="PIL_Image", + ) + return denovo_plots, basis_plots + + # signames, weights def gen_sub_plots(denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp): + # Make output directory if not os.path.exists(output_path): os.makedirs(output_path) @@ -442,6 +506,11 @@ def gen_sub_plots(denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp) denovo_mtx, basis_mtx, output_path, project, mtype ) return denovo_plots, basis_plots + elif mtype in SV_CONTEXTS: + denovo_plots, basis_plots = genSV_pngs( + denovo_mtx, basis_mtx, output_path, project, mtype + ) + return denovo_plots, basis_plots else: print("ERROR: mtype is " + mtype + " and is not yet supported.") @@ -510,6 +579,15 @@ def gen_reconstructed_png_percent( read_from_file=False, savefig_format="PIL_Image", ) + elif mtype in SV_CONTEXTS: + reconstruction_plot = sigPlt.plotSV( + reconstruction_mtx, + output_path, + "reconstruction_" + project, + percentage=True, + aggregate=False, + savefig_format="PIL_Image", + ) else: print("ERROR: mtype is " + mtype + " and is not yet supported.") @@ -591,6 +669,16 @@ def gen_reconstructed_png_numerical( read_from_file=False, savefig_format="PIL_Image", ) + elif mtype in SV_CONTEXTS: + reconstruction_plot = sigPlt.plotSV( + reconstruction_mtx, + output_path, + "reconstruction_" + project, + percentage=True, + aggregate=False, + read_from_file=False, + savefig_format="PIL_Image", + ) else: print("ERROR: mtype is " + mtype + " and is not yet supported.") @@ -612,6 +700,7 @@ def gen_decomposition( cosmic_version=None, custom_text=None, ): + """ Generate the correct plot based on mtype. @@ -744,6 +833,22 @@ def gen_decomposition( custom_text, ) return byte_plot + elif mtype == "32": + byte_plot = sv_32.gen_decomposition( + denovo_name, + basis_names, + weights, + output_path, + project, + denovo_plots_dict, + basis_plots_dict, + reconstruction_plot_dict, + reconstruction, + statistics, + cosmic_version, + custom_text, + ) + return byte_plot def run_PlotDecomposition( @@ -773,8 +878,7 @@ def run_PlotDecomposition( basis_mtx: Pandas Dataframe. This format represents the catalog of mutations seperated by tab. - basis_names: List of Strings. The names of the samples in denovo_mtx that - the denovo_name sample from denovo_mtx is decomposed into. + basis_names: List of Strings. The names of the samples in denovo_mtx that the denovo_name sample from denovo_mtx is decomposed into. ie. basis_names=["SBS1", "SBS5", "SBS15", "SBS20"] weights: List of Strings. The percentile weight corresponding to each basis @@ -796,17 +900,9 @@ def run_PlotDecomposition( None. """ # Create the denovo plots and load basis plots - if mtype != "48": - denovo_plots_dict = gen_sub_plots( - denovo_mtx, None, output_path, project, mtype, ss_decomp=False - ) - denovo_plots_dict = denovo_plots_dict[0] - else: - # cnv basis plots need to be generated and not loaded - denovo_plots_dict, basis_plots_dict = gen_sub_plots( - denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp=False - ) - # Create the matrix and plot for the reconstructed matrix + denovo_plots_dict, basis_plots_dict = gen_sub_plots( + denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp=False + ) reconstructed_mtx, reconstruction_plot_dict = gen_reconstructed_png_percent( denovo_name, basis_mtx, basis_names, weights, output_path, project, mtype ) @@ -816,14 +912,13 @@ def run_PlotDecomposition( # Convert dictionary of bytes to dictionary of images denovo_plots_dict = convert_to_imgReaderDict(denovo_plots_dict) # Load in the COSMIC plots - if mtype != "48": - basis_plots_dict = install_cosmic_plots( - context_type=mtype, - genome_build=genome_build, - cosmic_version=cosmic_version, - exome=exome, - ) - basis_plots_dict = {key: basis_plots_dict[key] for key in basis_names} + basis_plots_dict = install_cosmic_plots( + context_type=mtype, + genome_build=genome_build, + cosmic_version=cosmic_version, + exome=exome, + ) + basis_plots_dict = {key: basis_plots_dict[key] for key in basis_names} basis_plots_dict = convert_to_imgReaderDict(basis_plots_dict) # Generate the reconstruction plot reconstruction_plot_dict = convert_to_imgReaderDict(reconstruction_plot_dict) diff --git a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SV32.py b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SV32.py new file mode 100644 index 0000000..c253c3e --- /dev/null +++ b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SV32.py @@ -0,0 +1,530 @@ +#!/usr/bin/env python3 +""" +Created: February 21, 2020 +@author: Mark Barnes +""" +import reportlab +import os +from reportlab.pdfgen import canvas +from reportlab.lib.pagesizes import letter, A4, landscape, portrait +from reportlab.lib import utils +from reportlab.pdfbase.ttfonts import TTFont +from reportlab.pdfbase import pdfmetrics +import SigProfilerAssignment as spa_path +from PyPDF2 import PdfWriter, PdfReader + +# imports for saving plots to memory +import io +from PIL import Image + +# imports for dashed line +from reportlab.lib.colors import black + +PATHS = spa_path.__path__[0] + +# USING LETTER PORTRAIT DIMENSIONS +WIDTH_LETTER = 965 +HEIGHT_LETTER = 755 +MID_WIDTH_LETTER = WIDTH_LETTER / 2 +MID_HEIGHT_LETTER = HEIGHT_LETTER / 2 + +# SV32 plots have a dimension of 16" width, 10" height +WIDTH_GRAPH = 290 +HEIGHT_GRAPH = 214.89 + +# Layout Formatting +HEIGHT_GAP = 26 +WIDTH_GAP = 6 + +# For indexing the layout variables +X_COORD = 0 +Y_COORD = 1 + +# Coordinates for graphs on right side of plot +GRAPH_X_COORD = (WIDTH_LETTER) - WIDTH_GRAPH +GRAPH_Y_COORD = HEIGHT_LETTER - HEIGHT_GAP +TEXT_X_COORD = GRAPH_X_COORD + WIDTH_GRAPH - 50 +TEXT_Y_COORD = (HEIGHT_LETTER - HEIGHT_GAP) + 55.75 + +FONTS_DIR = os.path.join(PATHS, "DecompositionPlots/ReferenceFiles/Fonts/") +BRACKET_PATH = os.path.join( + PATHS, "DecompositionPlots/ReferenceFiles/Accolade_fermante.png" +) +reportlab.rl_config.TTFSearchPath.append(FONTS_DIR) +pdfmetrics.registerFont(TTFont("Arial-Bold", "Arial Bold.ttf")) + +LAYOUT_6_GRAPH = [ + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 124.78 - HEIGHT_GRAPH / 2), + (784 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 124.78 - HEIGHT_GRAPH / 2), + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 344.67 - HEIGHT_GRAPH / 2), + (784 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 344.67 - HEIGHT_GRAPH / 2), + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 564.56 - HEIGHT_GRAPH / 2), + (784 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 564.56 - HEIGHT_GRAPH / 2), +] +LAYOUT_6_TEXT = [(600, 700), (900, 700), (600, 480), (900, 480), (600, 260), (900, 260)] + +LAYOUT_5_GRAPH = [ + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 124.78 - HEIGHT_GRAPH / 2), + (784 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 124.78 - HEIGHT_GRAPH / 2), + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 344.67 - HEIGHT_GRAPH / 2), + (784 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 344.67 - HEIGHT_GRAPH / 2), + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 564.56 - HEIGHT_GRAPH / 2), +] +LAYOUT_5_TEXT = [(600, 700), (900, 700), (600, 480), (900, 480), (600, 260)] + +LAYOUT_4_GRAPH = [ + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 124.78 - HEIGHT_GRAPH), + (784 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 124.78 - HEIGHT_GRAPH), + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 349.6 - HEIGHT_GRAPH), + (784 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 349.6 - HEIGHT_GRAPH), +] +LAYOUT_4_TEXT = [ + (600, 700 - HEIGHT_GRAPH / 2), + (900, 700 - HEIGHT_GRAPH / 2), + (600, 480 - HEIGHT_GRAPH / 2), + (900, 480 - HEIGHT_GRAPH / 2), +] + +LAYOUT_3_GRAPH = [ + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 124.78 - HEIGHT_GRAPH), + (784 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 124.78 - HEIGHT_GRAPH), + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 349.6 - HEIGHT_GRAPH), +] +LAYOUT_3_TEXT = [ + (600, 700 - HEIGHT_GRAPH / 2), + (900, 700 - HEIGHT_GRAPH / 2), + (600, 480 - HEIGHT_GRAPH / 2 - 4.9), +] + + +LAYOUT_2_GRAPH = [ + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 344.67 - HEIGHT_GRAPH / 2), + (784 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 344.67 - HEIGHT_GRAPH / 2), +] +LAYOUT_2_TEXT = [(600, 480), (900, 480)] + +LAYOUT_2_GRAPH_RECON = [ + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 265.055 - 74.61), + (485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 489.945 - 74.61), +] +LAYOUT_2_TEXT_RECON = [ + (600, 700 - HEIGHT_GRAPH / 2), + (600, 480 - HEIGHT_GRAPH / 2 - 4.9), +] + + +LAYOUT_1_GRAPH = [(485 - WIDTH_GRAPH / 2, HEIGHT_LETTER - 344.67 - HEIGHT_GRAPH / 2)] +LAYOUT_1_TEXT = [(600, 480)] + +# Pairs (position, height), organized from 1 plot to 5 plus plots +BRACKET_SIZES = [ + (HEIGHT_LETTER * (3 / 8) + 32.75, HEIGHT_LETTER * (2 / 8)), + (HEIGHT_LETTER - 564.64, 439.785), + (HEIGHT_LETTER - 564.64, 439.785), + (HEIGHT_LETTER - 564.64, 439.785), + (HEIGHT_LETTER - 672, 654.67), +] + +PLOT_NAME = 0 +CONTRIBUTION = 1 + +# Helper functions for plotting the layout of a graph with 1-5 basis signatures +# Parameters: +# bases - (List of Strings) The list of basis names +# output_path - (String) The path to where the .png files are stored. +# project - (String) The name of the project that is post-fixed to each file name. +# c_draw - (Canvas) The canvas to draw the graph decomposition on. +def plot_1(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): + for i in range(0, 1): + image = basis_plots_dict[bases[i][0]] + c_draw.drawImage( + image, + LAYOUT_1_GRAPH[i][X_COORD], + LAYOUT_1_GRAPH[i][Y_COORD], + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + c_draw.drawString( + LAYOUT_1_TEXT[i][X_COORD], LAYOUT_1_TEXT[i][Y_COORD], str(bases[i][1]) + "%" + ) + + +def plot_2(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): + for i in range(0, 2): + image = basis_plots_dict[bases[i][0]] + c_draw.drawImage( + image, + LAYOUT_2_GRAPH[i][X_COORD], + LAYOUT_2_GRAPH[i][Y_COORD], + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + c_draw.drawString( + LAYOUT_2_TEXT[i][X_COORD], LAYOUT_2_TEXT[i][Y_COORD], str(bases[i][1]) + "%" + ) + + +def plot_3(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): + + for i in range(0, 3): + image = basis_plots_dict[bases[i][0]] + c_draw.drawImage( + image, + LAYOUT_3_GRAPH[i][X_COORD], + LAYOUT_3_GRAPH[i][Y_COORD], + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + c_draw.drawString( + LAYOUT_3_TEXT[i][X_COORD], LAYOUT_3_TEXT[i][Y_COORD], str(bases[i][1]) + "%" + ) + + +def plot_4(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): + for i in range(0, 4): + image = basis_plots_dict[bases[i][0]] + c_draw.drawImage( + image, + LAYOUT_4_GRAPH[i][X_COORD], + LAYOUT_4_GRAPH[i][Y_COORD], + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + c_draw.drawString( + LAYOUT_4_TEXT[i][X_COORD], LAYOUT_4_TEXT[i][Y_COORD], str(bases[i][1]) + "%" + ) + + +def plot_5(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): + for i in range(0, 5): + image = basis_plots_dict[bases[i][0]] + c_draw.drawImage( + image, + LAYOUT_5_GRAPH[i][X_COORD], + LAYOUT_5_GRAPH[i][Y_COORD], + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + c_draw.drawString( + LAYOUT_5_TEXT[i][X_COORD], LAYOUT_5_TEXT[i][Y_COORD], str(bases[i][1]) + "%" + ) + + +def plot_6(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): + for i in range(0, 6): + image = basis_plots_dict[bases[i][0]] + c_draw.drawImage( + image, + LAYOUT_6_GRAPH[i][X_COORD], + LAYOUT_6_GRAPH[i][Y_COORD], + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + c_draw.drawString( + LAYOUT_6_TEXT[i][X_COORD], LAYOUT_6_TEXT[i][Y_COORD], str(bases[i][1]) + "%" + ) + + +def plot_7_plus(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): + for i in range(0, 6): + image = basis_plots_dict[bases[i][0]] + c_draw.drawImage( + image, + LAYOUT_6_GRAPH[i][X_COORD], + LAYOUT_6_GRAPH[i][Y_COORD], + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + c_draw.drawString( + LAYOUT_6_TEXT[i][X_COORD], LAYOUT_6_TEXT[i][Y_COORD], str(bases[i][1]) + "%" + ) + + extra_sigs = "* " + for i in range(6, len(bases) - 1): + extra_sigs += str(bases[i][0]) + " (" + str(bases[i][1]) + "%), " + + extra_sigs += bases[len(bases) - 1][0] + " (" + str(bases[len(bases) - 1][1]) + "%)" + c_draw.drawString( + GRAPH_X_COORD - 330, (TEXT_Y_COORD - HEIGHT_GRAPH * 6) + 557, extra_sigs + ) + + +# Helper function to add calculations to layout +# Parameters: +# c_draw - (Canvas) The canvas to draw the graph decomposition on. +# statistics - (Pandas Dataframe) Dataframe w/ calculations +def draw_statistics(c_draw, statistics, sig_version, custom_text): + cos_sim = statistics["Cosine Similarity"][0] + cor_coeff = statistics["Correlation Coefficient"][0] + l1_norm_percent = statistics["L1 Norm %"][0] + l2_norm_percent = statistics["L2 Norm %"][0] + kl_divergence = statistics["KL Divergence"][0] + + c_draw.drawString( + WIDTH_GAP + 15, + LAYOUT_2_TEXT_RECON[1][Y_COORD] - 195, + "Cosine Similarity: " + str(cos_sim), + ) + c_draw.drawString( + WIDTH_GAP + 15, + LAYOUT_2_TEXT_RECON[1][Y_COORD] - 205, + "Correlation: " + str(cor_coeff), + ) + c_draw.drawString( + WIDTH_GAP + 105, + LAYOUT_2_TEXT_RECON[1][Y_COORD] - 195, + "L1 Error %: " + str(l1_norm_percent) + "%", + ) + c_draw.drawString( + WIDTH_GAP + 105, + LAYOUT_2_TEXT_RECON[1][Y_COORD] - 205, + "L2 Error %: " + str(l2_norm_percent) + "%", + ) + c_draw.drawString( + WIDTH_GAP + 195, + LAYOUT_2_TEXT_RECON[1][Y_COORD] - 195, + "KL Divergence: " + str(kl_divergence), + ) + + if sig_version is not None: + c_draw.drawString( + WIDTH_GAP + 195, + LAYOUT_2_TEXT_RECON[1][Y_COORD] - 205, + "Signature Version: " + str(sig_version), + ) + if custom_text is not None: + c_draw.drawString( + GRAPH_X_COORD - 330, LAYOUT_2_TEXT_RECON[1][Y_COORD] - 205, str(custom_text) + ) + + +# Helper function to resize bracket depending on number of bases plotted +# Parameters: +# num_bases - (Integer) The number of bases to be plotted. +# c_draw - (Canvas) The canvas to draw the graph decomposition on. +def draw_bracket(num_bases, c_draw): + num_plts = num_bases - 1 + + if num_bases >= 5: + num_plts = 4 + + c_draw.drawImage( + BRACKET_PATH, + 310, + BRACKET_SIZES[num_plts][0], + width=20, + height=BRACKET_SIZES[num_plts][1], + mask="auto", + ) + + +# Helper function to remove the margins from the PlotDecomposition pdf +# Parameters: +# pdf_to_edit - (String) The path to the uncropped decomposition plot +# num_bases - (Integer) The number of signatures the sample is composed of +def crop_margins(pdf_to_edit, num_bases): + pdf_to_edit.seek(0) + pdf_file = PdfReader(pdf_to_edit, "rb") + page = pdf_file.pages[0] + writer = PdfWriter() + output_plot_buff = io.BytesIO() + + if num_bases == 1: + page.mediabox.lower_right = (935 - WIDTH_GRAPH, 155) + page.mediabox.lower_left = (0, 155) + page.mediabox.upper_right = (935 - WIDTH_GRAPH, 640) + page.mediabox.upper_left = (0, 640) + writer.add_page(page) + writer.write(output_plot_buff) + elif num_bases == 2: + page.mediabox.lower_right = (935, 155) + page.mediabox.lower_left = (0, 155) + page.mediabox.upper_right = (935, 640) + page.mediabox.upper_left = (0, 640) + writer.add_page(page) + writer.write(output_plot_buff) + elif num_bases == 3: + page.mediabox.lower_right = (935, 155) + page.mediabox.lower_left = (0, 155) + page.mediabox.upper_right = (935, 640) + page.mediabox.upper_left = (0, 640) + writer.add_page(page) + writer.write(output_plot_buff) + elif num_bases == 4: + page.mediabox.lower_right = (935, 155) + page.mediabox.lower_left = (0, 155) + page.mediabox.upper_right = (935, 640) + page.mediabox.upper_left = (0, 640) + writer.add_page(page) + writer.write(output_plot_buff) + elif num_bases == 5: + page.mediabox.lower_right = (935, 75) + page.mediabox.lower_left = (0, 75) + page.mediabox.upper_right = (935, 745) + page.mediabox.upper_left = (0, 745) + writer.add_page(page) + writer.write(output_plot_buff) + elif num_bases == 6: + page.mediabox.lower_right = (935, 75) + page.mediabox.lower_left = (0, 75) + page.mediabox.upper_right = (935, 745) + page.mediabox.upper_left = (0, 745) + writer.add_page(page) + writer.write(output_plot_buff) + elif num_bases > 6: + page.mediabox.lower_right = (935, 35) + page.mediabox.lower_left = (0, 35) + page.mediabox.upper_right = (935, 745) + page.mediabox.upper_left = (0, 745) + writer.add_page(page) + writer.write(output_plot_buff) + return output_plot_buff + + +# Parameters: +# de_novo_name (String) The name of the denovo signature. +# basis_names (List of Strings) The names of the basis signatures +# output_path (String) Path to where to save the output. +# project (String) The project name that is appended to file names. +# c (Canvas) The canvas that is being drawn on. +# reconstruction (Boolean) True to create reconstruction +# statistics (Pandas Dataframe) If reconstructing, then include statistics. +# sig_version (String) The version of the Cosmic Signatures used +# denovo_plots_dict (Dictionary) Signatures are keys, ByteIO plots are values +# basis_plots_dict (Dictionary) Signatures are keys, ByteIO plots are values +# reconstruction_plot_dict (Dictionary) Signatures are keys, ByteIO plots are values +# +# Output: +# A graph of the de_novo signature's decomposition. +def gen_plot( + de_novo_name, + bases, + output_path, + project, + c, + reconstruction, + statistics, + sig_version, + custom_text, + denovo_plots_dict, + basis_plots_dict, + reconstruction_plot_dict, +): + + # THIS IS THE RIGHT SIDE, IT CHANGES + num_bases = len(bases) + if num_bases == 1: + plot_1(bases, project, c, denovo_plots_dict, basis_plots_dict) + elif num_bases == 2: + plot_2(bases, project, c, denovo_plots_dict, basis_plots_dict) + elif num_bases == 3: + plot_3(bases, project, c, denovo_plots_dict, basis_plots_dict) + elif num_bases == 4: + plot_4(bases, project, c, denovo_plots_dict, basis_plots_dict) + elif num_bases == 5: + plot_5(bases, project, c, denovo_plots_dict, basis_plots_dict) + elif num_bases == 6: + plot_6(bases, project, c, denovo_plots_dict, basis_plots_dict) + elif num_bases > 6: + plot_7_plus(bases, project, c, denovo_plots_dict, basis_plots_dict) + + recon_image = reconstruction_plot_dict[de_novo_name] + denovo_image = denovo_plots_dict[de_novo_name] + # THIS IS THE LEFT SIDE + if reconstruction: + c.drawImage( + denovo_image, + WIDTH_GAP, + LAYOUT_2_GRAPH_RECON[0][Y_COORD], + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + c.drawString( + WIDTH_GRAPH - WIDTH_GAP - 25, LAYOUT_2_TEXT_RECON[0][Y_COORD], "Original" + ) + c.drawImage( + recon_image, + WIDTH_GAP, + LAYOUT_2_GRAPH_RECON[1][Y_COORD], + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + c.drawString( + WIDTH_GRAPH - WIDTH_GAP - 45, + LAYOUT_2_TEXT_RECON[1][Y_COORD], + "Reconstructed", + ) + draw_statistics(c, statistics, sig_version, custom_text) + + # Draw dashed line + c.setLineWidth(2) + c.setDash(25, 5) + c.setStrokeColor(black) + c.setFillColorRGB(255, 255, 255) + p = c.beginPath() + p.moveTo(WIDTH_GAP, HEIGHT_LETTER - 377.75 + 33) + p.lineTo(WIDTH_GRAPH + 2, HEIGHT_LETTER - 377.75 + 33) + c.drawPath(p, stroke=1, fill=1) + else: + c.drawImage( + denovo_image, + WIDTH_GAP, + MID_HEIGHT_LETTER - HEIGHT_GRAPH / 2, + width=WIDTH_GRAPH, + height=HEIGHT_GRAPH, + ) + draw_bracket(num_bases, c) + + c.showPage() + + +def gen_decomposition( + denovo_name, + basis_names, + weights, + output_path, + project, + denovo_plots_dict, + basis_plots_dict, + reconstruction_plot_dict, + reconstruction, + statistics, + sig_version=None, + custom_text=None, +): + + buff = io.BytesIO() + c = canvas.Canvas(buff, pagesize=(WIDTH_LETTER, HEIGHT_LETTER)) + c.setFont("Arial-Bold", 5.24) + + basis_plots = [] + for i in range(0, len(basis_names)): + basis_plots.append([basis_names[i], weights[i]]) + + # create for loop to iterate through list, then change second value in list of lists + # Otherwise sorts strings and then 5.14% > 48.54% + for j in range(0, len(basis_names)): + basis_plots[j][1] = float(basis_plots[j][1].strip("%")) + sorted_list = sorted(basis_plots, key=lambda tup: tup[1], reverse=True) + + gen_plot( + denovo_name, + sorted_list, + output_path, + project, + c, + reconstruction, + statistics, + sig_version, + custom_text, + denovo_plots_dict, + basis_plots_dict, + reconstruction_plot_dict, + ) + + c.save() + + # Take the plot and crop the margins + byte_plot = crop_margins(buff, len(basis_names)) + buff.close() + return byte_plot diff --git a/SigProfilerAssignment/data/tests/txt_input/SV32_S3_Signatures.txt b/SigProfilerAssignment/data/tests/txt_input/SV32_S3_Signatures.txt index b29ffae..f41e48f 100644 --- a/SigProfilerAssignment/data/tests/txt_input/SV32_S3_Signatures.txt +++ b/SigProfilerAssignment/data/tests/txt_input/SV32_S3_Signatures.txt @@ -1,33 +1,33 @@ -MutationType 0 1 2 -clustered_del_1-10Kb 0.00591623 0.00000119 0.00003543 -clustered_del_10-100Kb 0.00754999 0.00000101 0.00599576 -clustered_del_100Kb-1Mb 0.01723822 0.00000066 0.02294156 -clustered_del_1Mb-10Mb 0.02895137 0.00000094 0.03714988 -clustered_del_>10Mb 0.02151931 0.00000096 0.00003624 -clustered_tds_1-10Kb 0.00138182 0.00000152 0.00000141 -clustered_tds_10-100Kb 0.00363530 0.00277485 0.00528629 -clustered_tds_100Kb-1Mb 0.00751767 0.00000075 0.03179513 -clustered_tds_1Mb-10Mb 0.02024984 0.00000084 0.00387149 -clustered_tds_>10Mb 0.01728936 0.00000073 0.01635656 -clustered_inv_1-10Kb 0.01599352 0.00000107 0.06660425 -clustered_inv_10-100Kb 0.01473596 0.00000077 0.04657534 -clustered_inv_100Kb-1Mb 0.02195676 0.00000093 0.07297037 -clustered_inv_1Mb-10Mb 0.05721262 0.00000059 0.02610903 -clustered_inv_>10Mb 0.03575116 0.00000068 0.01932889 -clustered_trans 0.31742983 0.00000000 0.11931097 -non-clustered_del_1-10Kb 0.01235744 0.10028469 0.03929744 -non-clustered_del_10-100Kb 0.01642868 0.08869502 0.06827200 -non-clustered_del_100Kb-1Mb 0.04314541 0.02361266 0.01049556 -non-clustered_del_1Mb-10Mb 0.02307427 0.02360570 0.02154467 -non-clustered_del_>10Mb 0.01346200 0.02884193 0.02113914 -non-clustered_tds_1-10Kb 0.00098514 0.00559656 0.00000127 -non-clustered_tds_10-100Kb 0.00011906 0.01948073 0.02651685 -non-clustered_tds_100Kb-1Mb 0.01252569 0.03134159 0.00405312 -non-clustered_tds_1Mb-10Mb 0.02074378 0.00844506 0.01205615 -non-clustered_tds_>10Mb 0.01391367 0.02407620 0.01162608 -non-clustered_inv_1-10Kb 0.01775605 0.04985384 0.05993695 -non-clustered_inv_10-100Kb 0.00886354 0.01882036 0.02325057 -non-clustered_inv_100Kb-1Mb 0.02501168 0.04010485 0.02112356 -non-clustered_inv_1Mb-10Mb 0.03438851 0.04624524 0.01434552 -non-clustered_inv_>10Mb 0.04593804 0.10010973 0.00383835 -non-clustered_trans 0.11695809 0.38809832 0.18813424 +MutationType SV32A SV32B SV32C +clustered_del_1-10Kb 0.00591623 0.00000119 0.00003543 +clustered_del_10-100Kb 0.00754999 0.00000101 0.00599576 +clustered_del_100Kb-1Mb 0.01723822 0.00000066 0.02294156 +clustered_del_1Mb-10Mb 0.02895137 0.00000094 0.03714988 +clustered_del_>10Mb 0.02151931 0.00000096 0.00003624 +clustered_tds_1-10Kb 0.00138182 0.00000152 0.00000141 +clustered_tds_10-100Kb 0.0036353 0.00277485 0.00528629 +clustered_tds_100Kb-1Mb 0.00751767 0.00000075 0.03179513 +clustered_tds_1Mb-10Mb 0.02024984 0.00000084 0.00387149 +clustered_tds_>10Mb 0.01728936 0.00000073 0.01635656 +clustered_inv_1-10Kb 0.01599352 0.00000107 0.06660425 +clustered_inv_10-100Kb 0.01473596 0.00000077 0.04657534 +clustered_inv_100Kb-1Mb 0.02195676 0.00000093 0.07297037 +clustered_inv_1Mb-10Mb 0.05721262 0.00000059 0.02610903 +clustered_inv_>10Mb 0.03575116 0.00000068 0.01932889 +clustered_trans 0.31742983 0 0.11931097 +non-clustered_del_1-10Kb 0.01235744 0.10028469 0.03929744 +non-clustered_del_10-100Kb 0.01642868 0.08869502 0.068272 +non-clustered_del_100Kb-1Mb 0.04314541 0.02361266 0.01049556 +non-clustered_del_1Mb-10Mb 0.02307427 0.0236057 0.02154467 +non-clustered_del_>10Mb 0.013462 0.02884193 0.02113914 +non-clustered_tds_1-10Kb 0.00098514 0.00559656 0.00000127 +non-clustered_tds_10-100Kb 0.00011906 0.01948073 0.02651685 +non-clustered_tds_100Kb-1Mb 0.01252569 0.03134159 0.00405312 +non-clustered_tds_1Mb-10Mb 0.02074378 0.00844506 0.01205615 +non-clustered_tds_>10Mb 0.01391367 0.0240762 0.01162608 +non-clustered_inv_1-10Kb 0.01775605 0.04985384 0.05993695 +non-clustered_inv_10-100Kb 0.00886354 0.01882036 0.02325057 +non-clustered_inv_100Kb-1Mb 0.02501168 0.04010485 0.02112356 +non-clustered_inv_1Mb-10Mb 0.03438851 0.04624524 0.01434552 +non-clustered_inv_>10Mb 0.04593804 0.10010973 0.00383835 +non-clustered_trans 0.11695809 0.38809832 0.18813424 \ No newline at end of file diff --git a/SigProfilerAssignment/decompose_subroutines.py b/SigProfilerAssignment/decompose_subroutines.py index 3be2de4..b4427e2 100644 --- a/SigProfilerAssignment/decompose_subroutines.py +++ b/SigProfilerAssignment/decompose_subroutines.py @@ -63,7 +63,6 @@ def getProcessAvg( + ". COSMIC signatures are available only for GRCh37/38, mm9/10 and rn6 genomes. So, the genome build is reset to GRCh37." ) genome_build = "GRCh37" - if samples.shape[0] == 96: if exome == False: sigDatabase = pd.read_csv( @@ -181,6 +180,7 @@ def getProcessAvg( ) signames = sigDatabase.columns connected_sigs = False + elif samples.shape[0] == 32: if cosmic_version < 3.4: print( @@ -205,6 +205,7 @@ def getProcessAvg( sigDatabase.index = sigDatabase.index.astype(str) signames = sigDatabase.columns connected_sigs = False + return sigDatabase, signames, connected_sigs, genome_build if signature_database != None: # pd.core.frame.DataFrame: @@ -572,6 +573,8 @@ def signature_decomposition( mtype_par = "83" elif mtype == "CNV" or mtype == "48": mtype_par = "48" + elif mtype == "SV" or mtype == "32": + mtype_par = "32" else: mtype_par = "none" # only create decomposition plots for COSMIC signatures @@ -667,6 +670,7 @@ def signature_decomposition( "78": "DBS78", "83": "ID83", "48": "CNV", + "32": "SV", } merger.write( directory + "/" + contexts[mtype_par] + "_Decomposition_Plots.pdf" diff --git a/setup.py b/setup.py index aaedb32..5b1c1b5 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if os.path.exists("dist"): shutil.rmtree("dist") -VERSION = "0.1.3" +VERSION = "0.1.4" def write_version_py(filename="SigProfilerAssignment/version.py"): @@ -15,7 +15,7 @@ def write_version_py(filename="SigProfilerAssignment/version.py"): # THIS FILE IS GENERATED FROM SigProfilerAssignment SETUP.PY short_version = '%(version)s' version = '%(version)s' -Update = 'v0.1.3: Modify how ndarrays are processed' +Update = 'v0.1.4: Add SV decomposition plotting.' """ diff --git a/test.py b/test.py index df0b20d..d0a6a54 100644 --- a/test.py +++ b/test.py @@ -573,7 +573,8 @@ def gen_CNV48(): mtype = "48" denovo_name = "CNV48A" - basis_names = ["CN1", "CN2", "CN9", "CN20", "CNV48B", "CNV48D"] + # basis_names = ["CN1", "CN2", "CN9", "CN20", "CNV48B", "CNV48C"] + basis_names = ["CN1", "CN2", "CN9", "CN20", "CN3", "CN4"] weights = ["0.94%", "48.72%", "28.44%", "8.42%", "13.48%", "0%"] denovo_cols = ["MutationType", "CNV48A"] basis_cols = basis_names.copy() @@ -617,6 +618,63 @@ def gen_CNV48(): return time.time() - s +def gen_SV32(): + np.random.seed(1234567) + s = time.time() + merger = PdfMerger() + file1 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/SV32_De-Novo_Signatures.txt" + file2 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_SV32_Signatures.txt" + denovo_mtx = pd.read_csv(file1, sep="\t") + basis_mtx = pd.read_csv(file2, sep="\t") + output_path = "TestOutput/Results/" + project = "test_run" + mtype = "32" + + denovo_name = "SV32A" + basis_names = ["SV1", "SV2", "SV3", "SV4", "SV5", "SV6", "SV7", "SV9"] + weights = ["10%", "10%", "10%", "10%", "10%", "10%", "10%", "20%"] + denovo_cols = ["MutationType", "SV32A"] + basis_cols = basis_names.copy() + basis_cols.insert(0, "MutationType") + nonzero_exposures = np.random.uniform(size=len(basis_names)) + result = sp.run_PlotDecomposition( + denovo_mtx[denovo_cols], + denovo_name, + basis_mtx[basis_cols], + basis_names, + weights, + nonzero_exposures, + output_path, + project, + mtype, + ) + # sp.run_PlotDecomposition(denovo_mtx, basis_names, weights, output_path, project, mtype, True, statistics, "COSMICv3-GRCh37", "This is where a custom message would go.") + merger.append(result) + + for ind in range(5, 0, -1): + basis_names = basis_names[:ind] + weights = weights[:ind] + denovo_cols = ["MutationType", "SV32A"] + basis_cols = basis_names.copy() + basis_cols.insert(0, "MutationType") + nonzero_exposures = np.random.uniform(size=len(basis_names)) + result = sp.run_PlotDecomposition( + denovo_mtx[denovo_cols], + denovo_name, + basis_mtx[basis_cols], + basis_names, + weights, + nonzero_exposures, + output_path, + project, + mtype, + ) + merger.append(result) + + merger.write(os.path.join(output_path, "Result_Decomposition_Plots_SV32.pdf")) + return time.time() - s + + if __name__ == "__main__": print("Running SBS matrix input tests...") cosmic_fit_SBS_matrix_test() @@ -661,3 +719,5 @@ def gen_CNV48(): print("Completed ID83 plots in", time_83, "seconds.") time_48 = gen_CNV48() print("Completed CNV48 plots in", time_48, "seconds.") + time_32 = gen_SV32() + print("Completed SV32 plots in", time_32, "seconds.")