Skip to content

Commit

Permalink
Can specify sample name
Browse files Browse the repository at this point in the history
  • Loading branch information
kvg committed Dec 5, 2024
1 parent bc12551 commit e40daff
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 50 deletions.
4 changes: 2 additions & 2 deletions src/hidive/src/call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use skydive::wmec::*;

pub fn start(
output: &PathBuf,
sample_name: &String,
loci_list: &Vec<String>,
reference_fasta_path: &PathBuf,
bam_path: &PathBuf,
Expand Down Expand Up @@ -42,9 +43,8 @@ pub fn start(

let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
vcf_header.push_record(header_gt_line.as_bytes());
vcf_header.push_sample("test_sample".as_bytes());
vcf_header.push_sample(sample_name.as_bytes());

// let mut vcf = Writer::from_stdout(&vcf_header, true, Format::Vcf).unwrap();
let mut vcf = Writer::from_path(output, &vcf_header, true, Format::Vcf).unwrap();

for (chr, start, stop, name) in loci {
Expand Down
6 changes: 6 additions & 0 deletions src/hidive/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,10 @@ enum Commands {
#[clap(short, long, value_parser, default_value = "/dev/stdout")]
output: PathBuf,

/// Sample name.
#[clap(short, long, required = true, value_parser)]
sample_name: String,

/// One or more genomic loci ("contig:start-stop[|name]", or BED format) to extract from WGS BAM files.
#[clap(short, long, value_parser, required = true)]
loci: Vec<String>,
Expand Down Expand Up @@ -597,12 +601,14 @@ fn main() {
}
Commands::Call {
output,
sample_name,
loci,
reference_fasta_path,
bam_path,
} => {
call::start(
&output,
&sample_name,
&loci,
&reference_fasta_path,
&bam_path
Expand Down
82 changes: 34 additions & 48 deletions wdl/HidiveCorrect.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -42,25 +42,30 @@ workflow Hidive {
input:
locus = locus,
reference = reference,
sample_name = sample_name,
aligned_reads_bam = Correct.corrected_bam,
aligned_reads_csi = Correct.corrected_csi,
}

call Phase {
call Consensus {
input:
locus = locus,
reference = reference,
called_bam = Call.called_bam,
called_csi = Call.called_csi,
sample_name = sample_name,
calls_vcf = Call.calls_vcf,
calls_tbi = Call.calls_tbi,
}

output {
File corrected_bam = Correct.corrected_bam
File corrected_csi = Correct.corrected_csi
File called_bam = Call.called_bam
File called_csi = Call.called_csi
File phased_gvcf = Phase.phased_gvcf
File phased_gvcf_tbi = Phase.phased_gvcf_tbi

File calls_vcf = Call.calls_vcf
File calls_tbi = Call.calls_tbi

File h1_bam = Consensus.h1_bam
File h1_csi = Consensus.h1_csi
File h2_bam = Consensus.h2_bam
File h2_csi = Consensus.h2_csi
}
}

Expand Down Expand Up @@ -136,7 +141,7 @@ task Rescue {
memory: "~{memory_gb} GB"
cpu: num_cpus
disks: "local-disk ~{disk_size_gb} SSD"
# maxRetries: 1
maxRetries: 2
}
}

Expand Down Expand Up @@ -180,11 +185,13 @@ task Correct {

task Call {
input {
String locus
File reference
String sample_name

File aligned_reads_bam
File aligned_reads_csi

String locus
String prefix = "out"

Int num_cpus = 8
Expand All @@ -196,70 +203,49 @@ task Call {
command <<<
set -x

hidive call -l "~{locus}" -r ~{reference} ~{aligned_reads_bam} | \
minimap2 -ayYL -x map-hifi ~{reference} - | \
samtools sort --write-index -O BAM -o ~{prefix}.bam
hidive call -s "~{sample_name}" -l "~{locus}" -r ~{reference} ~{aligned_reads_bam} | bgzip -c > ~{prefix}.vcf.bgz
tabix -p vcf ~{prefix}.vcf.bgz
>>>

output {
File called_bam = "~{prefix}.bam"
File called_csi = "~{prefix}.bam.csi"
File calls_vcf = "~{prefix}.vcf.bgz"
File calls_tbi = "~{prefix}.vcf.bgz.tbi"
}

runtime {
docker: "us.gcr.io/broad-dsp-lrma/lr-hidive:0.1.98"
docker: "us.gcr.io/broad-dsp-lrma/lr-hidive:kvg_call"
memory: "~{memory_gb} GB"
cpu: num_cpus
disks: "local-disk ~{disk_size_gb} SSD"
}
}

task Phase {
task Consensus {
input {
File called_bam
File called_csi
File reference
File calls_vcf
File calls_tbi

String sample_name
String prefix = "out"
String locus

Int num_cpus = 2
Int num_cpus = 8
}

Int disk_size_gb = 1 + 2*ceil(size([reference, called_bam, called_csi], "GB"))
Int disk_size_gb = 1 + 2*ceil(size([reference, calls_vcf, calls_tbi], "GB"))
Int memory_gb = 2*num_cpus

command <<<
set -x

samtools view -h ~{called_bam} | grep -e '^@' -e '^h1' | samtools view -bS --write-index -o h1.bam
samtools view -h ~{called_bam} | grep -e '^@' -e '^h2' | samtools view -bS --write-index -o h2.bam

bcftools mpileup -f ~{reference} -g 0 -Ou -a DP -m 1 --indel-size 30000 h1.bam | bcftools call -m --ploidy 1 -g 0 -Oz -W -o h1.variants.gvcf.bgz
bcftools mpileup -f ~{reference} -g 0 -Ou -a DP -m 1 --indel-size 30000 h2.bam | bcftools call -m --ploidy 1 -g 0 -Oz -W -o h2.variants.gvcf.bgz

bcftools merge -g ~{reference} -0 -Ov h1.variants.gvcf.bgz h2.variants.gvcf.bgz | \
awk -F'\t' '
$0 ~ /^##/ {print; next}
$0 ~ /^#CHROM/ {
for(i=1; i<=9; i++) printf "%s\t", $i
print "~{sample_name}"
next
}
{
h1 = ($10 ~ /^\./) ? "0" : substr($10,1,1)
h2 = ($11 ~ /^\./) ? "0" : substr($11,1,1)
for(i=1; i<=8; i++) printf "%s\t", $i
printf "GT\t%s|%s\n", h1, h2
}' > ~{prefix}.phased.gvcf

bgzip -c ~{prefix}.phased.gvcf > ~{prefix}.phased.gvcf.bgz
tabix -p vcf ~{prefix}.phased.gvcf.bgz
samtools faidx ~{reference} ~{locus} | bcftools consensus -H 1pIu ~{calls_vcf} | minimap2 -ayYL -x map-hifi ~{reference} - | samtools sort --write-index -O BAM -o h1.bam
samtools faidx ~{reference} ~{locus} | bcftools consensus -H 2pIu ~{calls_vcf} | minimap2 -ayYL -x map-hifi ~{reference} - | samtools sort --write-index -O BAM -o h2.bam
>>>

output {
File phased_gvcf = "~{prefix}.phased.gvcf.bgz"
File phased_gvcf_tbi = "~{prefix}.phased.gvcf.bgz.tbi"
File h1_bam = "h1.bam"
File h1_csi = "h1.bam.csi"
File h2_bam = "h2.bam"
File h2_csi = "h2.bam.csi"
}

runtime {
Expand Down

0 comments on commit e40daff

Please sign in to comment.