Skip to content

Commit

Permalink
Fixed B37 VCF output renderer to keep B37 contig names in the VCF out…
Browse files Browse the repository at this point in the history
…put file
  • Loading branch information
jamesemery committed Oct 11, 2023
1 parent d40a485 commit 47d0086
Show file tree
Hide file tree
Showing 8 changed files with 1,910 additions and 1,880 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,6 @@ public void apply(final VariantContext variant, final ReadsContext readsContext,
// Get the correct reference for B37/HG19 compliance:
// This is necessary because of the variant transformation that gets applied in VariantWalkerBase::apply.
final ReferenceContext correctReferenceContext = funcotatorEngine.getCorrectReferenceContext(variant, referenceContext);

// Place the variant on our queue to be funcotated:
enqueueAndHandleVariant(variant, correctReferenceContext, featureContext);
}
Expand Down Expand Up @@ -924,7 +923,10 @@ protected void enqueueAndHandleVariant(final VariantContext variant, final Refer

final FuncotationMap funcotationMap = funcotatorEngine.createFuncotationMapForVariant(variant, referenceContext, featureContext);

// This is necessary because we want to revert the variant contig namechange if it was applied in the VariantWalkerBase::apply method before output.
final VariantContext variantContextForOutput = funcotatorEngine.getCorrectVariantContextForOutput(variant);

// At this point there is only one transcript ID in the funcotation map if canonical or best effect are selected
outputRenderer.write(variant, funcotationMap);
outputRenderer.write(variantContextForOutput, funcotationMap);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ public final class FuncotatorEngine implements AutoCloseable {
*/
private final boolean mustConvertInputContigsToHg19;

/**
* Whether the output variant contigs must be converted back to B37 from hg19 before being returned.
* (NOTE: this means that the output contigs will continue to use B37 contig names even if internally we converted them to hg19)
*/
private boolean mustRevertVariantContigsFromHg19ToB37 = false;

/**
* Whether this {@link FuncotatorEngine} has only produced annotations on variants that have been labeled by the
* {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotationFactory} as {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#IGR}.
Expand Down Expand Up @@ -327,6 +333,22 @@ private VariantContext getCorrectVariantContextForReference(final VariantContext
}
}

/**
* Create a new {@link VariantContext} which will match the given Reference if there is a mismatch for input between the B37 reference and the HG19 reference.
* @param variant A {@link VariantContext} object containing the variant to convert.
* @return A {@link VariantContext} whose contig has been transformed to HG19 if requested by the user. Otherwise, an identical variant.
*/
VariantContext getCorrectVariantContextForOutput(final VariantContext variant) {
if ( mustRevertVariantContigsFromHg19ToB37 ) {
final VariantContextBuilder vcb = new VariantContextBuilder(variant);
vcb.chr(FuncotatorUtils.convertHG19ContigToB37Contig(variant.getContig()));
return vcb.make();
}
else {
return variant;
}
}

/**
* @return The default {@link VariantTransformer} which will automatically convert from the B37 reference standard to the HG19 reference standard for contig names.
*/
Expand Down Expand Up @@ -483,7 +505,7 @@ private boolean determineReferenceAndDatasourceCompatibility() {
}
else if ( funcotatorArgs.referenceVersion.equals(BaseFuncotatorArgumentCollection.FuncotatorReferenceVersionHg19) &&
FuncotatorUtils.isSequenceDictionaryUsingB37Reference(sequenceDictionaryForDrivingVariants) ) {
logger.info("VCF sequence dictionary detected as B37 in HG19 annotation mode. Performing conversion.");
logger.info("VCF sequence dictionary detected as B37 in HG19 annotation mode. Performing conversion. (NOTE: the output VCF will still be B37)");
mustConvertInputContigsToHg19 = true;
}
else {
Expand All @@ -505,6 +527,11 @@ else if ( funcotatorArgs.referenceVersion.equals(BaseFuncotatorArgumentCollectio
"There MAY be some errors (e.g. in the Y chromosome, but possibly in other places as well) due to changes between the two references.");
}

// Record whether we need to revert the contigs back to B37 after annotation:
if (FuncotatorUtils.isSequenceDictionaryUsingB37Reference(sequenceDictionaryForDrivingVariants) && mustConvertInputContigsToHg19) {
this.mustRevertVariantContigsFromHg19ToB37 = true;
}

return mustConvertInputContigsToHg19;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ public void write(final VariantContext variant, final FuncotationMap txToFuncota
variantContextOutputBuilder.genotypes( variant.getGenotypes() );

// Render and add our VCF line:
vcfWriter.add( variantContextOutputBuilder.make() );
VariantContext out = variantContextOutputBuilder.make();
vcfWriter.add( out );
}

private Funcotation createManualAnnotationFuncotation(final Allele altAllele) {
Expand Down
Git LFS file not shown
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,4 @@
##reference=/cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta
##source=Funcotator
#CHROM POS ID REF ALT QUAL FILTER INFO
chr2 70120909 rs3214822 GA G 722.12 PASS FUNCOTATION=[SNRNP27|hg19|chr2|70120910|70120910|FIVE_PRIME_UTR||DEL|A|A|-|g.chr2:70120910delA|ENST00000244227.3|+|1|||||0.43640897755610975|GGGAAAAATGAAAGCTGTGTT|SNRNP27_ENST00000409116.1_FIVE_PRIME_FLANK/SNRNP27_ENST00000488986.1_FIVE_PRIME_FLANK|||||||||||||||||||||||||91|biliary_tract(2)_%7C_breast(12)_%7C_central_nervous_system(44)_%7C_large_intestine(11)_%7C_pancreas(22)|||||||X76302|NM_006857.2|NP_006848.1|HGNC:30240|small_%20_nuclear_%20_ribonucleoprotein_%20_U4/U6.U5_%20_subunit_%20_27|Approved|gene_%20_with_%20_protein_%20_product|protein-coding_%20_gene||"small_%20_nuclear_%20_ribonucleoprotein_%20_27kDa_%20_(U4/U6.U5)"_%2C__%20_"small_%20_nuclear_%20_ribonucleoprotein_%2C__%20_U4/U6.U5_%20_27kDa_%20_subunit"|RY1_%2C__%20_U4/U6.U5-27K|"nucleic_%20_acid_%20_binding_%20_protein_%20_RY_%20_1"_%2C__%20_"U4/U6.U5_%20_small_%20_nuclear_%20_ribonucleoprotein_%20_27_%20_kDa_%20_protein"|2p13.3|2016-10-05||2016-03-11|X76302||11017|ENSG00000124380|7931148_%2C__%20_9085842|NM_006857|||CCDS33219|OTTHUMG00000152689|11017||NM_006857|Q8WVK2|ENSG00000124380|uc002sfw.4|SNR27_HUMAN||Q15410|Q8WVK2|mRNA_%20_processing_%20_(GO:0006397)_%7C_RNA_%20_splicing_%20_(GO:0008380)|nucleus_%20_(GO:0005634)|nucleic_%20_acid_%20_binding_%20_(GO:0003676)|_%7C_|_%7C_|_%7C_|true_%7C_true|false_%7C_false|0.6222_%2C_0.3778_%7C_0.6222_%2C_0.3778|false_%7C_false|false_%7C_false|1_%7C_1|_%7C_|false_%7C_false|true_%7C_false|true_%7C_false|SNRNP27:11017_%7C_SNRNP27:11017|true_%7C_false|false_%7C_false|false_%7C_false|true_%7C_false|true_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|true_%7C_true|false_%7C_false|3214822_%7C_397747233|70120910_%7C_70120912|false_%7C_false|false_%7C_false|0_%7C_0|true_%7C_true|0_%7C_0|false_%7C_false|0.661217_%2C_0.338783_%7C_|false_%7C_false|false_%7C_false|false_%7C_false|DIV_%7C_DIV|true_%7C_false|0x05010002000517013e000200_%7C_0x050100020005000002000200|1_%7C_1|false_%7C_false|134_%7C_138|rs3214822_%7C_rs397747233|_%7C_]
2 70120909 rs3214822 GA G 722.12 PASS FUNCOTATION=[SNRNP27|hg19|chr2|70120910|70120910|FIVE_PRIME_UTR||DEL|A|A|-|g.chr2:70120910delA|ENST00000244227.3|+|1|||||0.43640897755610975|GGGAAAAATGAAAGCTGTGTT|SNRNP27_ENST00000409116.1_FIVE_PRIME_FLANK/SNRNP27_ENST00000488986.1_FIVE_PRIME_FLANK|||||||||||||||||||||||||91|biliary_tract(2)_%7C_breast(12)_%7C_central_nervous_system(44)_%7C_large_intestine(11)_%7C_pancreas(22)|||||||X76302|NM_006857.2|NP_006848.1|HGNC:30240|small_%20_nuclear_%20_ribonucleoprotein_%20_U4/U6.U5_%20_subunit_%20_27|Approved|gene_%20_with_%20_protein_%20_product|protein-coding_%20_gene||"small_%20_nuclear_%20_ribonucleoprotein_%20_27kDa_%20_(U4/U6.U5)"_%2C__%20_"small_%20_nuclear_%20_ribonucleoprotein_%2C__%20_U4/U6.U5_%20_27kDa_%20_subunit"|RY1_%2C__%20_U4/U6.U5-27K|"nucleic_%20_acid_%20_binding_%20_protein_%20_RY_%20_1"_%2C__%20_"U4/U6.U5_%20_small_%20_nuclear_%20_ribonucleoprotein_%20_27_%20_kDa_%20_protein"|2p13.3|2016-10-05||2016-03-11|X76302||11017|ENSG00000124380|7931148_%2C__%20_9085842|NM_006857|||CCDS33219|OTTHUMG00000152689|11017||NM_006857|Q8WVK2|ENSG00000124380|uc002sfw.4|SNR27_HUMAN||Q15410|Q8WVK2|mRNA_%20_processing_%20_(GO:0006397)_%7C_RNA_%20_splicing_%20_(GO:0008380)|nucleus_%20_(GO:0005634)|nucleic_%20_acid_%20_binding_%20_(GO:0003676)|_%7C_|_%7C_|_%7C_|true_%7C_true|false_%7C_false|0.6222_%2C_0.3778_%7C_0.6222_%2C_0.3778|false_%7C_false|false_%7C_false|1_%7C_1|_%7C_|false_%7C_false|true_%7C_false|true_%7C_false|SNRNP27:11017_%7C_SNRNP27:11017|true_%7C_false|false_%7C_false|false_%7C_false|true_%7C_false|true_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|true_%7C_true|false_%7C_false|3214822_%7C_397747233|70120910_%7C_70120912|false_%7C_false|false_%7C_false|0_%7C_0|true_%7C_true|0_%7C_0|false_%7C_false|0.661217_%2C_0.338783_%7C_|false_%7C_false|false_%7C_false|false_%7C_false|DIV_%7C_DIV|true_%7C_false|0x05010002000517013e000200_%7C_0x050100020005000002000200|1_%7C_1|false_%7C_false|134_%7C_138|rs3214822_%7C_rs397747233|_%7C_]
Loading

0 comments on commit 47d0086

Please sign in to comment.