Skip to content

Commit

Permalink
Merge pull request #117 from AlexandrovLab/U57
Browse files Browse the repository at this point in the history
U57
  • Loading branch information
mdbarnesUCSD authored Mar 7, 2024
2 parents 486a8d8 + 14acaa0 commit 23e9b3d
Show file tree
Hide file tree
Showing 9 changed files with 822 additions and 67 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MutationType CN1 CN2 CN9 CN20 CNV48B CNV48D
MutationType CN1 CN2 CN9 CN20 CN3 CN4
0:homdel:0-100kb 0.00179505 0.00223767 0.00818184 0.00004078 0.00000295 0.00000075
0:homdel:100kb-1Mb 0.00380710 0.00719579 0.02099919 0.00017845 0.00039561 0.02703219
0:homdel:>1Mb 0.00243675 0.00383711 0.00900922 0.00011722 0.00122668 0.02703635
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
MutationType SV1 SV2 SV3 SV4 SV5 SV6 SV7 SV9
clustered_del_1-10Kb 0.00001830 0.00001750 0.00000831 0.00558463 0.00162643 0.00484161 0.00036457 0.01689226
clustered_del_10-100Kb 0.00000013 0.00000500 0.00000000 0.00883802 0.00395326 0.00397105 0.00493403 0.02027149
clustered_del_100Kb-1Mb 0.00004260 0.00000257 0.00000253 0.01342056 0.00264242 0.01567818 0.00175481 0.04559844
clustered_del_1Mb-10Mb 0.00000003 0.00000557 0.00000000 0.01941683 0.00079517 0.06315566 0.00000135 0.08553749
clustered_del_>10Mb 0.00000003 0.00000224 0.00000009 0.01212996 0.00000205 0.13914616 0.00000121 0.00004090
clustered_tds_1-10Kb 0.00000029 0.00002000 0.00233538 0.00119602 0.00044748 0.00173254 0.00023931 0.00407692
clustered_tds_10-100Kb 0.00080029 0.00003770 0.00137651 0.00406629 0.00092090 0.00411956 0.00031235 0.00799563
clustered_tds_100Kb-1Mb 0.00217807 0.00001180 0.00018608 0.00927806 0.00180766 0.01359699 0.00000078 0.04878886
clustered_tds_1Mb-10Mb 0.00006350 0.00000161 0.00000004 0.01669456 0.00008410 0.06927579 0.00000076 0.07607224
clustered_tds_>10Mb 0.00000003 0.00000314 0.00000010 0.00950079 0.00003530 0.14468700 0.00000187 0.00019873
clustered_inv_1-10Kb 0.00003510 0.00009050 0.00000000 0.01188233 0.01068676 0.00333129 0.00000116 0.00901519
clustered_inv_10-100Kb 0.00000113 0.00002000 0.00000415 0.01283746 0.00421801 0.00778075 0.00000166 0.03440818
clustered_inv_100Kb-1Mb 0.00062736 0.00000829 0.00000000 0.01893295 0.00104651 0.02657075 0.00000052 0.08534106
clustered_inv_1Mb-10Mb 0.00000637 0.00000052 0.00000003 0.03875053 0.00018613 0.13269196 0.00000021 0.15018331
clustered_inv_>10Mb 0.00000001 0.00000229 0.00000003 0.02250855 0.00000130 0.26682614 0.00000158 0.00004010
clustered_trans 0.00005860 0.00000182 0.00000829 0.74672823 0.01500836 0.00000039 0.00000036 0.00000269
non-clustered_del_1-10Kb 0.00016322 0.01077137 0.03108973 0.00000143 0.27051044 0.00049070 0.11080824 0.03271109
non-clustered_del_10-100Kb 0.01062551 0.00189318 0.00026854 0.00000001 0.04236249 0.00047467 0.40908574 0.03762502
non-clustered_del_100Kb-1Mb 0.01031305 0.00009900 0.00051531 0.00110315 0.02494482 0.00056581 0.34862452 0.05284083
non-clustered_del_1Mb-10Mb 0.00810848 0.04772402 0.00323839 0.00114858 0.04036508 0.00908681 0.02776373 0.04842356
non-clustered_del_>10Mb 0.00460793 0.05938062 0.00861972 0.00280099 0.02737192 0.01709695 0.01575099 0.01781587
non-clustered_tds_1-10Kb 0.00000013 0.00004480 0.42763680 0.00000015 0.08730244 0.00000010 0.01078117 0.01195956
non-clustered_tds_10-100Kb 0.04599380 0.00002460 0.32675443 0.00000001 0.12292716 0.00000054 0.00652355 0.01970948
non-clustered_tds_100Kb-1Mb 0.45491853 0.01413625 0.01600693 0.00008590 0.06600995 0.00067638 0.00545841 0.01400244
non-clustered_tds_1Mb-10Mb 0.35062795 0.04029646 0.00096917 0.00013931 0.01815304 0.00881413 0.00239992 0.02583554
non-clustered_tds_>10Mb 0.00580397 0.03967873 0.00649925 0.00298543 0.01284238 0.01668428 0.00007000 0.00744539
non-clustered_inv_1-10Kb 0.00440008 0.00000085 0.00411229 0.00018744 0.04638068 0.00022783 0.00000043 0.01662502
non-clustered_inv_10-100Kb 0.01048762 0.00459802 0.00275734 0.00116401 0.03219888 0.00171438 0.03025930 0.01859228
non-clustered_inv_100Kb-1Mb 0.01700822 0.05558915 0.00038638 0.00198348 0.03887257 0.00243843 0.01706313 0.03888396
non-clustered_inv_1Mb-10Mb 0.01075404 0.09313637 0.00482722 0.00198335 0.03699181 0.01686964 0.00553730 0.04676524
non-clustered_inv_>10Mb 0.01304926 0.09976361 0.01624505 0.01157483 0.04637754 0.02742841 0.00186541 0.02399899
non-clustered_trans 0.04930641 0.53263233 0.14615191 0.02307616 0.04292692 0.00002510 0.00039160 0.00230227
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
MutationType SV32A SV32B SV32C
clustered_del_1-10Kb 0.00662723 0.00177044 0.00000007
clustered_del_10-100Kb 0.00972409 0.00167625 0.00000003
clustered_del_100Kb-1Mb 0.02672959 0.00069852 0.00000004
clustered_del_1Mb-10Mb 0.05108407 0.00033823 0.00000009
clustered_del_>10Mb 0.03969927 0.00000047 0.00000040
clustered_tds_1-10Kb 0.00166860 0.00057069 0.00011002
clustered_tds_10-100Kb 0.00695235 0.00099348 0.00000008
clustered_tds_100Kb-1Mb 0.02396912 0.00136236 0.00000000
clustered_tds_1Mb-10Mb 0.04294237 0.00000963 0.00000009
clustered_tds_>10Mb 0.04211685 0.00000072 0.00000016
clustered_inv_1-10Kb 0.01459632 0.00452079 0.00000000
clustered_inv_10-100Kb 0.01751233 0.00170391 0.00000000
clustered_inv_100Kb-1Mb 0.03681359 0.00037369 0.00000012
clustered_inv_1Mb-10Mb 0.10623513 0.00000047 0.00000013
clustered_inv_>10Mb 0.07525245 0.00108337 0.00000002
clustered_trans 0.23293450 0.00000023 0.00000022
non-clustered_del_1-10Kb 0.00016344 0.11881046 0.04395760
non-clustered_del_10-100Kb 0.00447025 0.09378964 0.00000052
non-clustered_del_100Kb-1Mb 0.01586304 0.03619608 0.00000016
non-clustered_del_1Mb-10Mb 0.02913311 0.02808863 0.00010709
non-clustered_del_>10Mb 0.01433401 0.03214281 0.00690197
non-clustered_tds_1-10Kb 0.00022787 0.00000001 0.34090417
non-clustered_tds_10-100Kb 0.00000028 0.01990271 0.34120828
non-clustered_tds_100Kb-1Mb 0.00938708 0.10950006 0.06107216
non-clustered_tds_1Mb-10Mb 0.02086042 0.02455653 0.02874254
non-clustered_tds_>10Mb 0.01097690 0.02568377 0.01287658
non-clustered_inv_1-10Kb 0.00947122 0.06278640 0.00888667
non-clustered_inv_10-100Kb 0.00786742 0.03894469 0.00344490
non-clustered_inv_100Kb-1Mb 0.01588333 0.02367522 0.00125801
non-clustered_inv_1Mb-10Mb 0.03501238 0.04148272 0.00366291
non-clustered_inv_>10Mb 0.03495364 0.06231916 0.01299224
non-clustered_trans 0.05653771 0.26701783 0.13387279
153 changes: 124 additions & 29 deletions SigProfilerAssignment/DecompositionPlots/PlotDecomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@
import SigProfilerAssignment
import SigProfilerAssignment.DecompositionPlots
from SigProfilerAssignment.DecompositionPlots import SigProfilerPlottingMatrix as mPlt
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_SBS96 as spd_96
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_SBS288 as spd_288
from SigProfilerAssignment.DecompositionPlots import (
PlotDecomposition_SBS96 as spd_96,
PlotDecomposition_SBS288 as spd_288,
PlotDecomposition_SBS1536 as spd_1536,
PlotDecomposition_DBS78 as spd_78,
PlotDecomposition_ID83 as spd_83,
PlotDecomposition_CNV48 as cnv_48,
PlotDecomposition_SV32 as sv_32,
)
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_DBS78 as spd_78
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_ID83 as spd_83
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition_CNV48 as cnv_48
from SigProfilerAssignment import decompose_subroutines as sub

# imports for working with plots in memory
Expand All @@ -43,11 +44,13 @@
import json
import base64


# Global Variables
SBS_CONTEXTS = ["6", "24", "96", "288", "384", "1536", "6144"]
DBS_CONTEXTS = ["78", "186", "1248", "2976"]
ID_CONTEXTS = ["28", "83", "415"]
CNV_CONTEXTS = ["48"]
SV_CONTEXTS = ["32"]
MTYPE_OPTIONS = [
"6",
"24",
Expand All @@ -62,14 +65,14 @@
"186",
"1248",
"2976",
"32",
]
DECOMPOSITION_PATH = SigProfilerAssignment.DecompositionPlots.__path__[0]
REFSIG_PATH = os.path.join(
SigProfilerAssignment.__path__[0], "data/Reference_Signatures"
)
TEMPLATE_PATH = os.path.join(DECOMPOSITION_PATH, "CosmicTemplates")


# Remove templates so that they can be rebuilt
def remove_cosmic_templates():
if not os.path.exists(TEMPLATE_PATH):
Expand All @@ -84,10 +87,11 @@ def remove_cosmic_templates():
def install_cosmic_plots(
context_type="96", genome_build="GRCh37", cosmic_version="3.4", exome=False
):

if not os.path.exists(TEMPLATE_PATH):
os.mkdir(TEMPLATE_PATH)

# determine if context is from SBS, ID, DBS, or CNV
# determine if context is from SBS, ID, DBS, CNV or SV
context_type_str = ""
if context_type in SBS_CONTEXTS:
context_type_str = "SBS"
Expand All @@ -102,6 +106,9 @@ def install_cosmic_plots(
elif context_type in CNV_CONTEXTS:
context_type_str = "CNV"
cosmic_mtype = "48"
elif context_type in SV_CONTEXTS:
context_type_str = "SV"
cosmic_mtype = "32"
else:
raise ValueError("ERROR: context", context_type, "not in context lists.")

Expand Down Expand Up @@ -138,6 +145,20 @@ def install_cosmic_plots(
genome_build = "GRCh37"
exome_str = ""

# CNV signatures exome=False, genome_build=GRCh37
if context_type in CNV_CONTEXTS:
cosmic_file_name = "COSMIC_v" + str(cosmic_version) + "_CN_GRCh37.txt"
json_file_name = "COSMIC_v" + str(cosmic_version) + "_CN_GRCh37.json"
genome_build = "GRCh37"
exome_str = ""

# SV signatures exome=False, genome_build=GRCh37
if context_type in SV_CONTEXTS:
cosmic_file_name = "COSMIC_v" + str(cosmic_version) + "_SV_GRCh38.txt"
json_file_name = "COSMIC_v" + str(cosmic_version) + "_SV_GRCh38.json"
genome_build = "GRCh38"
exome_str = ""

# Load cosmic plots if they exist
filename = os.path.join(TEMPLATE_PATH, json_file_name)
if os.path.exists(filename):
Expand Down Expand Up @@ -165,7 +186,6 @@ def install_cosmic_plots(
+ exome_str,
"now...",
)

# Create the respective plots
if context_type_str == "SBS":
cosmic_buff_plots = sigPlt.plotSBS(
Expand Down Expand Up @@ -195,6 +215,25 @@ def install_cosmic_plots(
percentage=True,
savefig_format="PIL_Image",
)
elif context_type_str == "CNV":
cosmic_buff_plots = sigPlt.plotCNV(
cosmic_file_path,
"buffer",
"buffer",
percentage=True,
aggregate=False,
read_from_file=False,
savefig_format="PIL_Image",
)
elif context_type_str == "SV":
cosmic_buff_plots = sigPlt.plotSV(
cosmic_file_path,
"buffer",
"buffer",
percentage=True,
aggregate=False,
savefig_format="PIL_Image",
)

# Process the plots to be stored in JSON file
cosmic_img_dict = {}
Expand Down Expand Up @@ -413,8 +452,33 @@ def genCNV_pngs(denovo_mtx, basis_mtx, output_path, project, mtype):
return denovo_plots, basis_plots


def genSV_pngs(denovo_mtx, basis_mtx, output_path, project, mtype):
denovo_plots = dict()
basis_plots = dict()
denovo_plots = sigPlt.plotSV(
denovo_mtx,
output_path,
project,
percentage=True,
aggregate=False,
savefig_format="PIL_Image",
)

if basis_mtx is not None:
basis_plots = sigPlt.plotSV(
basis_mtx,
output_path,
project,
percentage=True,
aggregate=False,
savefig_format="PIL_Image",
)
return denovo_plots, basis_plots


# signames, weights
def gen_sub_plots(denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp):

# Make output directory
if not os.path.exists(output_path):
os.makedirs(output_path)
Expand Down Expand Up @@ -442,6 +506,11 @@ def gen_sub_plots(denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp)
denovo_mtx, basis_mtx, output_path, project, mtype
)
return denovo_plots, basis_plots
elif mtype in SV_CONTEXTS:
denovo_plots, basis_plots = genSV_pngs(
denovo_mtx, basis_mtx, output_path, project, mtype
)
return denovo_plots, basis_plots

else:
print("ERROR: mtype is " + mtype + " and is not yet supported.")
Expand Down Expand Up @@ -510,6 +579,15 @@ def gen_reconstructed_png_percent(
read_from_file=False,
savefig_format="PIL_Image",
)
elif mtype in SV_CONTEXTS:
reconstruction_plot = sigPlt.plotSV(
reconstruction_mtx,
output_path,
"reconstruction_" + project,
percentage=True,
aggregate=False,
savefig_format="PIL_Image",
)
else:
print("ERROR: mtype is " + mtype + " and is not yet supported.")

Expand Down Expand Up @@ -591,6 +669,16 @@ def gen_reconstructed_png_numerical(
read_from_file=False,
savefig_format="PIL_Image",
)
elif mtype in SV_CONTEXTS:
reconstruction_plot = sigPlt.plotSV(
reconstruction_mtx,
output_path,
"reconstruction_" + project,
percentage=True,
aggregate=False,
read_from_file=False,
savefig_format="PIL_Image",
)
else:
print("ERROR: mtype is " + mtype + " and is not yet supported.")

Expand All @@ -612,6 +700,7 @@ def gen_decomposition(
cosmic_version=None,
custom_text=None,
):

"""
Generate the correct plot based on mtype.
Expand Down Expand Up @@ -744,6 +833,22 @@ def gen_decomposition(
custom_text,
)
return byte_plot
elif mtype == "32":
byte_plot = sv_32.gen_decomposition(
denovo_name,
basis_names,
weights,
output_path,
project,
denovo_plots_dict,
basis_plots_dict,
reconstruction_plot_dict,
reconstruction,
statistics,
cosmic_version,
custom_text,
)
return byte_plot


def run_PlotDecomposition(
Expand Down Expand Up @@ -773,8 +878,7 @@ def run_PlotDecomposition(
basis_mtx: Pandas Dataframe. This format represents the catalog of mutations seperated by tab.
basis_names: List of Strings. The names of the samples in denovo_mtx that
the denovo_name sample from denovo_mtx is decomposed into.
basis_names: List of Strings. The names of the samples in denovo_mtx that the denovo_name sample from denovo_mtx is decomposed into.
ie. basis_names=["SBS1", "SBS5", "SBS15", "SBS20"]
weights: List of Strings. The percentile weight corresponding to each basis
Expand All @@ -796,17 +900,9 @@ def run_PlotDecomposition(
None.
"""
# Create the denovo plots and load basis plots
if mtype != "48":
denovo_plots_dict = gen_sub_plots(
denovo_mtx, None, output_path, project, mtype, ss_decomp=False
)
denovo_plots_dict = denovo_plots_dict[0]
else:
# cnv basis plots need to be generated and not loaded
denovo_plots_dict, basis_plots_dict = gen_sub_plots(
denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp=False
)
# Create the matrix and plot for the reconstructed matrix
denovo_plots_dict, basis_plots_dict = gen_sub_plots(
denovo_mtx, basis_mtx, output_path, project, mtype, ss_decomp=False
)
reconstructed_mtx, reconstruction_plot_dict = gen_reconstructed_png_percent(
denovo_name, basis_mtx, basis_names, weights, output_path, project, mtype
)
Expand All @@ -816,14 +912,13 @@ def run_PlotDecomposition(
# Convert dictionary of bytes to dictionary of images
denovo_plots_dict = convert_to_imgReaderDict(denovo_plots_dict)
# Load in the COSMIC plots
if mtype != "48":
basis_plots_dict = install_cosmic_plots(
context_type=mtype,
genome_build=genome_build,
cosmic_version=cosmic_version,
exome=exome,
)
basis_plots_dict = {key: basis_plots_dict[key] for key in basis_names}
basis_plots_dict = install_cosmic_plots(
context_type=mtype,
genome_build=genome_build,
cosmic_version=cosmic_version,
exome=exome,
)
basis_plots_dict = {key: basis_plots_dict[key] for key in basis_names}
basis_plots_dict = convert_to_imgReaderDict(basis_plots_dict)
# Generate the reconstruction plot
reconstruction_plot_dict = convert_to_imgReaderDict(reconstruction_plot_dict)
Expand Down
Loading

0 comments on commit 23e9b3d

Please sign in to comment.