diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index dc49ed74e..8ce64b100 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -16,7 +16,7 @@ env: GALAXY_FORK: galaxyproject GALAXY_BRANCH: release_23.2 MAX_CHUNKS: 4 - MAX_FILE_SIZE: 1M + MAX_FILE_SIZE: 2M concurrency: # Group runs by PR, but keep runs on the default branch separate # because we do not want to cancel ToolShed uploads diff --git a/deprecated/lumpy_sv/test-data/output_extended.vcf b/deprecated/lumpy_sv/test-data/output_extended.vcf deleted file mode 100644 index aedd9d8b8..000000000 --- a/deprecated/lumpy_sv/test-data/output_extended.vcf +++ /dev/null @@ -1,75 +0,0 @@ -##fileformat=VCFv4.2 -##source=LUMPY -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##ALT= -##ALT= -##ALT= -##ALT= -##ALT= -##ALT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sr_input_bam - Evidence: M00860:26:000000000-A6UGV:1:1101:22421:6659_2 hg38_gold_U07000.1 8 50 hg38_gold_U07000.1 1885 1932 0x17456f0 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:21503:6234_2 hg38_gold_U07000.1 13 52 hg38_gold_U07000.1 1885 1933 0x17451d0 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:14108:4338_2 hg38_gold_U07000.1 8 53 hg38_gold_U07000.1 1879 1932 0x1747410 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:22281:3587_2 hg38_gold_U07000.1 8 52 hg38_gold_U07000.1 1879 1944 0x174d920 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:9129:3504_2 hg38_gold_U07000.1 13 50 hg38_gold_U07000.1 1872 1932 0x1748e60 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:10154:5887_2 hg38_gold_U07000.1 10 41 hg38_gold_U07000.1 1872 1931 0x17499d0 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:9382:5932_2 hg38_gold_U07000.1 8 53 hg38_gold_U07000.1 1871 1921 0x174d580 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:22747:6442_2 hg38_gold_U07000.1 8 52 hg38_gold_U07000.1 1870 1933 0x1748cc0 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:6411:6050_2 hg38_gold_U07000.1 13 49 hg38_gold_U07000.1 1868 1932 0x17486a0 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:23865:5544_2 hg38_gold_U07000.1 13 56 hg38_gold_U07000.1 1868 1923 0x1748480 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:20349:5252_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1868 1931 0x174ce80 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:22571:4203_2 hg38_gold_U07000.1 13 50 hg38_gold_U07000.1 1868 1926 0x1745050 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:16719:4051_2 hg38_gold_U07000.1 8 51 hg38_gold_U07000.1 1868 1932 0x1747b70 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:8961:3182_2 hg38_gold_U07000.1 15 51 hg38_gold_U07000.1 1868 1917 0x1744130 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:18427:2937_2 hg38_gold_U07000.1 8 41 hg38_gold_U07000.1 1868 1932 0x174b760 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:21139:5913_2 hg38_gold_U07000.1 12 50 hg38_gold_U07000.1 1868 1924 0x174d640 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:10523:5853_2 hg38_gold_U07000.1 8 50 hg38_gold_U07000.1 1868 1935 0x174de60 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:16433:5445_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1868 1932 0x174d870 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:12490:4165_2 hg38_gold_U07000.1 13 58 hg38_gold_U07000.1 1868 1917 0x174d9f0 0 - + id:1 weight:1 -hg38_gold_U07000.1 14 1_1 N [hg38_gold_U07000.1:1876[N . . SVTYPE=BND;STRANDS=--:19;EVENT=1;MATEID=1_2;CIPOS=0,0;CIEND=0,2;CIPOS95=0,0;CIEND95=0,0;SU=19;SR=19;PRPOS=1;PREND=0.99996,3.98091e-05,1.58483e-09 GT:SU:SR ./.:19:19 -hg38_gold_U07000.1 1876 1_2 N [hg38_gold_U07000.1:14[N . . SVTYPE=BND;STRANDS=--:19;SECONDARY;EVENT=1;MATEID=1_1;CIPOS=0,2;CIEND=0,0;CIPOS95=0,0;CIEND95=0,0;SU=19;SR=19;PRPOS=0.99996,3.98091e-05,1.58483e-09;PREND=1 GT:SU:SR ./.:19:19 - Evidence: M00860:26:000000000-A6UGV:1:1101:7043:5583_2 hg38_gold_U07000.1 13 56 hg38_gold_U07000.1 1899 1935 0x1742010 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:19472:2185_2 hg38_gold_U07000.1 0 56 hg38_gold_U07000.1 1898 1945 0x1744270 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:9324:6462_2 hg38_gold_U07000.1 0 51 hg38_gold_U07000.1 1898 1933 0x1748fd0 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:23764:6273_2 hg38_gold_U07000.1 13 48 hg38_gold_U07000.1 1898 1945 0x17490a0 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:7772:5906_2 hg38_gold_U07000.1 0 54 hg38_gold_U07000.1 1898 1942 0x1747240 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:6971:4906_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1898 1935 0x1746170 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:10511:4776_2 hg38_gold_U07000.1 8 50 hg38_gold_U07000.1 1898 1934 0x174a840 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:19677:3538_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1898 1940 0x174c190 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:22109:4922_2 hg38_gold_U07000.1 13 53 hg38_gold_U07000.1 1897 1933 0x1743ac0 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:14027:4470_2 hg38_gold_U07000.1 0 51 hg38_gold_U07000.1 1896 1933 0x174b500 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:22736:5607_2 hg38_gold_U07000.1 13 57 hg38_gold_U07000.1 1893 1945 0x174cf30 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:10768:5508_2 hg38_gold_U07000.1 13 56 hg38_gold_U07000.1 1893 1935 0x17480b0 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:14449:3437_2 hg38_gold_U07000.1 13 48 hg38_gold_U07000.1 1893 1933 0x174b150 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:18009:3360_2 hg38_gold_U07000.1 13 47 hg38_gold_U07000.1 1893 1925 0x1749390 0 + - id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:16615:6513_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1893 1931 0x174b860 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:19235:5076_2 hg38_gold_U07000.1 13 45 hg38_gold_U07000.1 1893 1932 0x174a790 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:14629:4828_2 hg38_gold_U07000.1 8 50 hg38_gold_U07000.1 1893 1932 0x174b360 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:22780:4994_2 hg38_gold_U07000.1 13 50 hg38_gold_U07000.1 1892 1924 0x174aaf0 0 - + id:1 weight:1 - Evidence: M00860:26:000000000-A6UGV:1:1101:12387:3929_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1892 1931 0x174c000 0 - + id:1 weight:1 -hg38_gold_U07000.1 10 2_1 N [hg38_gold_U07000.1:1897[N . . SVTYPE=BND;STRANDS=--:19;EVENT=2;MATEID=2_2;CIPOS=-1,0;CIEND=-7,5;CIPOS95=0,1;CIEND95=-2,1;IMPRECISE;SU=19;SR=19;PRPOS=9.99999e-13,9.99999e-07;PREND=4.89496e-31,1.94872e-23,7.75799e-16,3.08851e-08,0.0308851,0.0775799,0.194872,0.489496,0.194872,0.0122956,1.94872e-09,4.89496e-17,1.22956e-24 GT:SU:SR ./.:19:19 -hg38_gold_U07000.1 1897 2_2 N [hg38_gold_U07000.1:10[N . . SVTYPE=BND;STRANDS=--:19;SECONDARY;EVENT=2;MATEID=2_1;CIPOS=-7,5;CIEND=-1,0;CIPOS95=-2,1;CIEND95=0,1;IMPRECISE;SU=19;SR=19;PRPOS=4.89496e-31,1.94872e-23,7.75799e-16,3.08851e-08,0.0308851,0.0775799,0.194872,0.489496,0.194872,0.0122956,1.94872e-09,4.89496e-17,1.22956e-24;PREND=9.99999e-13,9.99999e-07 GT:SU:SR ./.:19:19 diff --git a/deprecated/lumpy_sv/.shed.yml b/tools/lumpy_sv/.shed.yml similarity index 100% rename from deprecated/lumpy_sv/.shed.yml rename to tools/lumpy_sv/.shed.yml diff --git a/deprecated/lumpy_sv/extractSplitReads_BwaMem.py b/tools/lumpy_sv/extractSplitReads_BwaMem.py similarity index 97% rename from deprecated/lumpy_sv/extractSplitReads_BwaMem.py rename to tools/lumpy_sv/extractSplitReads_BwaMem.py index 2853460f5..2dbc7461e 100644 --- a/deprecated/lumpy_sv/extractSplitReads_BwaMem.py +++ b/tools/lumpy_sv/extractSplitReads_BwaMem.py @@ -21,7 +21,7 @@ def extractSplitsFromBwaMem(inFile, numSplits, includeDups, minNonOverlap): continue for el in sam.tags: if "SA:" in el: - if(len(el.split(";"))) <= numSplits: + if (len(el.split(";"))) <= numSplits: split = 1 mate = el.split(",") mateCigar = mate[3] @@ -103,7 +103,6 @@ def extractCigarOps(cigar, flag): cigarOps = [] for opString in cigarOpStrings: cigarOpList = atomicCigarSearch.findall(opString) -# print cigarOpList # "struct" for the op and it's length cigar = cigarOp(cigarOpList[0][0], cigarOpList[0][1]) # add to the list of cigarOps @@ -120,8 +119,7 @@ def extractCigarOps(cigar, flag): cigar = cigarOp(cigarOpList[0][0], cigarOpList[0][1]) # add to the list of cigarOps cigarOps.append(cigar) -# cigarOps = cigarOps - return(cigarOps) + return cigarOps def calcQueryPosFromCigar(cigarOps): @@ -202,7 +200,8 @@ def main(): help='''Include alignments marked as duplicates. Default=False''') parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", - default=20, type="int", help='''minimum non-overlap between + default=20, type="int", + help='''minimum non-overlap between split alignments on the query (default=20)''', metavar="INT") (opts, args) = parser.parse_args() diff --git a/deprecated/lumpy_sv/lumpy.xml b/tools/lumpy_sv/lumpy.xml similarity index 90% rename from deprecated/lumpy_sv/lumpy.xml rename to tools/lumpy_sv/lumpy.xml index 3201b8130..1462159dc 100644 --- a/deprecated/lumpy_sv/lumpy.xml +++ b/tools/lumpy_sv/lumpy.xml @@ -1,9 +1,9 @@ - + find structural variants - lumpy-sv - samtools - numpy + lumpy-sv + samtools + numpy @@ -33,7 +33,7 @@ |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLength -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandev.txt && mean=\$(cat meandev.txt | sed s/mean:// | sed -r s/stdev:.+//) && stdev=\$(cat meandev.txt | sed -r s/mean:.+stdev://) && - lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt + lumpy $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt #if $output_format == "BEDPE": -b #end if @@ -41,7 +41,7 @@ -sr id:'$one_sample_bam',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > '$vcf_call' #elif $seq_method.seq_method_list == "single-read": samtools view -@ \${GALAXY_SLOTS:-4} -h '$one_sample_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -@ \${GALAXY_SLOTS:-4} -O bam -o input.splitters.bam && - lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt + lumpy $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt #if $output_format == "BEDPE": -b #end if @@ -61,7 +61,7 @@ meanB=\$(cat meandevB.txt | sed s/mean:// | sed -r s/stdev:.+//) && stdevA=\$(cat meandevA.txt | sed -r s/mean:.+stdev://) && stdevB=\$(cat meandevB.txt | sed -r s/mean:.+stdev://) && - lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt + lumpy $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt #if $output_format == "BEDPE": -b #end if @@ -72,7 +72,7 @@ #elif $seq_method.seq_method_list == "single-read": samtools view -@ \${GALAXY_SLOTS:-4} -h '$sample_a_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -@ \${GALAXY_SLOTS:-4} -O bam -o input.splitters.bam && samtools view -@ \${GALAXY_SLOTS:-4} -h '$sample_b_bam' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -@ \${GALAXY_SLOTS:-4} -O bam -o input.B.splitters.bam && - lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt + lumpy $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt #if $output_format == "BEDPE": -b #end if @@ -116,7 +116,6 @@ - @@ -127,7 +126,6 @@ - @@ -139,21 +137,21 @@ - + seq_method['seq_method_list'] == "paired-end" - + seq_method['seq_method_list'] == "paired-end" analysis_type['analysis_type_list'] == "two_sample" - + analysis_type['analysis_type_list'] == "two_sample" seq_method['seq_method_list'] == "paired-end" - + seq_method['seq_method_list'] == "paired-end" analysis_type['analysis_type_list'] == "two_sample" @@ -165,7 +163,7 @@ - + @@ -177,7 +175,7 @@ - + @@ -188,7 +186,7 @@ - + @@ -197,11 +195,10 @@ - - + diff --git a/deprecated/lumpy_sv/pairend_distro.py b/tools/lumpy_sv/pairend_distro.py similarity index 100% rename from deprecated/lumpy_sv/pairend_distro.py rename to tools/lumpy_sv/pairend_distro.py diff --git a/deprecated/lumpy_sv/test-data/output.vcf b/tools/lumpy_sv/test-data/output.vcf similarity index 100% rename from deprecated/lumpy_sv/test-data/output.vcf rename to tools/lumpy_sv/test-data/output.vcf diff --git a/tools/lumpy_sv/test-data/output_extended.vcf b/tools/lumpy_sv/test-data/output_extended.vcf new file mode 100644 index 000000000..e94addd6c --- /dev/null +++ b/tools/lumpy_sv/test-data/output_extended.vcf @@ -0,0 +1,37 @@ +##fileformat=VCFv4.2 +##source=LUMPY +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sr_input_bam +hg38_gold_U07000.1 14 1_1 N [hg38_gold_U07000.1:1876[N . . SVTYPE=BND;STRANDS=--:19;EVENT=1;MATEID=1_2;CIPOS=0,0;CIEND=0,2;CIPOS95=0,0;CIEND95=0,0;SU=19;SR=19;PRPOS=1;PREND=0.99996,3.98091e-05,1.58483e-09 GT:SU:SR ./.:19:19 +hg38_gold_U07000.1 1876 1_2 N [hg38_gold_U07000.1:14[N . . SVTYPE=BND;STRANDS=--:19;SECONDARY;EVENT=1;MATEID=1_1;CIPOS=0,2;CIEND=0,0;CIPOS95=0,0;CIEND95=0,0;SU=19;SR=19;PRPOS=0.99996,3.98091e-05,1.58483e-09;PREND=1 GT:SU:SR ./.:19:19 +hg38_gold_U07000.1 10 2_1 N [hg38_gold_U07000.1:1897[N . . SVTYPE=BND;STRANDS=--:19;EVENT=2;MATEID=2_2;CIPOS=-1,0;CIEND=-7,5;CIPOS95=0,1;CIEND95=-2,1;IMPRECISE;SU=19;SR=19;PRPOS=9.99999e-13,9.99999e-07;PREND=4.89496e-31,1.94872e-23,7.75799e-16,3.08851e-08,0.0308851,0.0775799,0.194872,0.489496,0.194872,0.0122956,1.94872e-09,4.89496e-17,1.22956e-24 GT:SU:SR ./.:19:19 +hg38_gold_U07000.1 1897 2_2 N [hg38_gold_U07000.1:10[N . . SVTYPE=BND;STRANDS=--:19;SECONDARY;EVENT=2;MATEID=2_1;CIPOS=-7,5;CIEND=-1,0;CIPOS95=-2,1;CIEND95=0,1;IMPRECISE;SU=19;SR=19;PRPOS=4.89496e-31,1.94872e-23,7.75799e-16,3.08851e-08,0.0308851,0.0775799,0.194872,0.489496,0.194872,0.0122956,1.94872e-09,4.89496e-17,1.22956e-24;PREND=9.99999e-13,9.99999e-07 GT:SU:SR ./.:19:19 diff --git a/deprecated/lumpy_sv/test-data/output_two.paired_end.vcf b/tools/lumpy_sv/test-data/output_two.paired_end.vcf similarity index 100% rename from deprecated/lumpy_sv/test-data/output_two.paired_end.vcf rename to tools/lumpy_sv/test-data/output_two.paired_end.vcf diff --git a/deprecated/lumpy_sv/test-data/output_two.vcf b/tools/lumpy_sv/test-data/output_two.vcf similarity index 100% rename from deprecated/lumpy_sv/test-data/output_two.vcf rename to tools/lumpy_sv/test-data/output_two.vcf diff --git a/tools/lumpy_sv/test-data/sample_Del.bam b/tools/lumpy_sv/test-data/sample_Del.bam new file mode 100644 index 000000000..3981f36c9 Binary files /dev/null and b/tools/lumpy_sv/test-data/sample_Del.bam differ diff --git a/tools/lumpy_sv/test-data/sample_cle.bam b/tools/lumpy_sv/test-data/sample_cle.bam new file mode 100644 index 000000000..b33e18d1c Binary files /dev/null and b/tools/lumpy_sv/test-data/sample_cle.bam differ diff --git a/deprecated/lumpy_sv/test-data/sr.input.bam b/tools/lumpy_sv/test-data/sr.input.bam similarity index 100% rename from deprecated/lumpy_sv/test-data/sr.input.bam rename to tools/lumpy_sv/test-data/sr.input.bam