Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
fabio-cunial committed Feb 4, 2024
1 parent 5a403d1 commit bc0dd7b
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 8 deletions.
3 changes: 2 additions & 1 deletion docker/truvari_intrasample/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ RUN wget https://github.com/samtools/bcftools/releases/download/${bcftools_versi
&& bcftools --help

# TRUVARI
RUN pip3 install git+https://github.com/acenglish/truvari[email protected] \
RUN pip3 install truvari \
&& truvari --help
COPY ./resolve.py ${work_dir}
COPY ./inversion_guesser.py ${work_dir}
17 changes: 16 additions & 1 deletion docker/truvari_intrasample/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import truvari

REF_CLEAN = True # Set to false if you're working with the right reference
MAX_SV = 100_000 # Filter things smaller than this
MAX_SV = 100_000_000 # Filter things smaller than this

RC = str.maketrans("ATCG", "TAGC")
def do_rc(s):
Expand Down Expand Up @@ -41,6 +41,7 @@ def resolve(entry, ref):
entry.alts = [seq]
entry.stop = entry.start + 1
entry.qual = 1

return entry

if __name__ == '__main__':
Expand All @@ -61,7 +62,13 @@ def resolve(entry, ref):
n_header.contigs[ctg].remove_header()

out = pysam.VariantFile("/dev/stdout", 'w', header=n_header)
seen = set()
for entry in vcf:
key = truvari.entry_to_hash(entry)
if key in seen:
continue
seen.add(key)

if REF_CLEAN and entry.chrom not in ref.references:
continue

Expand All @@ -74,9 +81,17 @@ def resolve(entry, ref):

if entry is None or set(entry.alts[0]) == {'N'}:
continue
if entry.info['SVTYPE'] != 'INV':
entry.info['SVLEN'] = abs(len(entry.ref) - len(entry.alts[0]))
else:
entry.info['SVLEN'] = len(entry.ref)
# No more blank genotypes
n_gt = tuple([_ if _ is not None else 0 for _ in entry.samples[0]['GT']])
# Preserve phasing informatino
is_phased = entry.samples[0].phased
entry.samples[0]['GT'] = n_gt
entry.samples[0].phased = is_phased

entry.translate(n_header)
try:
out.write(entry)
Expand Down
14 changes: 8 additions & 6 deletions wdl/pipelines/TruvariIntrasample.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,13 @@ task TruvariIntrasampleImpl {
mkdir -p preprocessed
for in_vcf in ~{pav_vcf_gz} ~{pbsv_vcf_gz} ~{sniffles_vcf_gz}
do
prename=preprocessed/pre_inv_$(basename $in_vcf)
python ~{docker_dir}/resolve.py ${in_vcf} $ref \
| bcftools norm --check-ref s --fasta-ref $ref -N -m-any \
| bcftools view -i "SVTYPE != 'BND'" -O z -o ${prename}
tabix $prename
outname=preprocessed/$(basename $in_vcf)
python ~{docker_dir}/resolve.py ${in_vcf} ~{reference_fa} \
| bcftools norm --check-ref s --fasta-ref ~{reference_fa} -N -m-any \
| bcftools view -i "SVTYPE != 'BND'" -O z -o ${outname}
tabix $outname
python ~{docker_dir}/inversion_guesser.py -i $prename -o $outname
done

# Step 2 - merge
Expand All @@ -108,8 +110,8 @@ task TruvariIntrasampleImpl {
tabix ~{sample_id}.bcftools_merged.vcf.gz

# Step 3 - collapse
truvari collapse -i ~{sample_id}.bcftools_merged.vcf.gz -c removed.vcf.gz -k maxqual --gt --intra \
--pctseq 0.90 --pctsize 0.90 --refdist 500 \
truvari collapse -i ~{sample_id}.bcftools_merged.vcf.gz -c removed.vcf.gz \
--sizemin 0 --sizemax 1000000 -k maxqual --gt --intra --pctseq 0.90 --pctsize 0.90 --refdist 500 \
| bcftools sort -O z -o ~{sample_id}.truvari_collapsed.vcf.gz
tabix ~{sample_id}.truvari_collapsed.vcf.gz
>>>
Expand Down

0 comments on commit bc0dd7b

Please sign in to comment.