Skip to content

Commit

Permalink
Rewrite complex SV functional annotation in SVAnnotate (#8516)
Browse files Browse the repository at this point in the history
epiercehoffman authored Jan 23, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent b68fadc commit e796d20
Showing 7 changed files with 698 additions and 84 deletions.
Original file line number Diff line number Diff line change
@@ -89,7 +89,9 @@ public enum ComplexVariantSubtype {
piDUP_RF,
dDUP,
dDUP_iDEL,
INS_iDEL
INS_iDEL,
CTX_PP_QQ,
CTX_PQ_QP
}

// not defined in output vcf header but used in internal id that is currently output in the ID column
@@ -163,6 +165,7 @@ public enum ComplexVariantSubtype {
public static final String NONCODING_BREAKPOINT = "PREDICTED_NONCODING_BREAKPOINT";
public static final String NEAREST_TSS = "PREDICTED_NEAREST_TSS";
public static final String TSS_DUP = "PREDICTED_TSS_DUP";
public static final String PARTIAL_DISPERSED_DUP = "PREDICTED_PARTIAL_DISPERSED_DUP";

// SVTYPE classes
public enum StructuralVariantAnnotationType {
Original file line number Diff line number Diff line change
@@ -123,6 +123,11 @@
* duplicated. The partial duplication occurs when a duplication has one breakpoint within the transcript and one
* breakpoint after the end of the transcript. When the duplication is in tandem, the result is that there is one
* intact copy of the full endogenous gene.</p></li>
* <li><p><i>PREDICTED_PARTIAL_DISPERSED_DUP</i><br />
* Gene(s) which are partially overlapped by the duplicated segment involved in an SV's dispersed duplication.
* This annotation is applied to a dispersed (non-tandem) duplication segment that is part of a complex SV if the
* duplicated segment overlaps part of a transcript but not the entire transcript (which would be a
* PREDICTED_COPY_GAIN event).</p></li>
* <li><p><i>PREDICTED_INV_SPAN</i><br />
* Gene(s) which are entirely spanned by an SV's inversion. A whole-gene inversion occurs when an inversion spans
* the entire transcript, from the first base of the 5' UTR to the last base of the 3' UTR. </p></li>
@@ -354,6 +359,7 @@ private void addAnnotationInfoKeysToHeader(final VCFHeader header) {
header.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.NONCODING_SPAN, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Class(es) of noncoding elements spanned by SV."));
header.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.NONCODING_BREAKPOINT, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Class(es) of noncoding elements disrupted by SV breakpoint."));
header.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.NEAREST_TSS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Nearest transcription start site to an intergenic variant."));
header.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.PARTIAL_DISPERSED_DUP, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) overlapped partially by the duplicated interval involved in a dispersed duplication event in a complex SV."));

}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.broadinstitute.hellbender.utils;


import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.Locatable;
@@ -275,6 +276,25 @@ public SimpleInterval intersect( final Locatable that ) {
Math.min( getEnd(), that.getEnd()) );
}

/**
* Get section of starting interval (this) that is not overlapped by the other interval (that)
* @param that - interval to subtract from starting interval. Must overlap (but not fully contain) starting interval
* @return - SimpleInterval representing the portion of starting interval (this) not overlapped by other interval (that)
*/
@VisibleForTesting
public SimpleInterval subtract(final Locatable that) {
Utils.validateArg(this.overlaps(that), () ->
"SimpleIntervaL::subtract(): The two intervals need to overlap: " + this + ", " + that);
Utils.validateArg(!that.contains(this), () ->
"SimpleIntervaL::subtract(): Interval to subtract " + that + " cannot contain starting interval " + this);
if (this.getStart() < that.getStart()) {
return new SimpleInterval(this.getContig(), this.getStart(), that.getStart());
}
else {
return new SimpleInterval(this.getContig(), that.getEnd(), this.getEnd());
}
}

/**
* Returns a new SimpleInterval that represents the entire span of this and that. Requires that
* this and that SimpleInterval are contiguous.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -28,7 +28,8 @@ public class SVAnnotateIntegrationTest extends CommandLineProgramTest {
GATKSVVCFConstants.INV_SPAN, GATKSVVCFConstants.PROMOTER, GATKSVVCFConstants.COPY_GAIN,
GATKSVVCFConstants.INTERGENIC, GATKSVVCFConstants.NEAREST_TSS, GATKSVVCFConstants.INT_EXON_DUP,
GATKSVVCFConstants.PARTIAL_EXON_DUP, GATKSVVCFConstants.MSV_EXON_OVERLAP, GATKSVVCFConstants.UTR,
GATKSVVCFConstants.INTRONIC, GATKSVVCFConstants.TSS_DUP, GATKSVVCFConstants.BREAKEND_EXON);
GATKSVVCFConstants.INTRONIC, GATKSVVCFConstants.TSS_DUP, GATKSVVCFConstants.BREAKEND_EXON,
GATKSVVCFConstants.PARTIAL_DISPERSED_DUP);

private void assertVariantAnnotatedAsExpected(final List<VariantContext> vcf, final String variantID,
Map<String, Object> expectedAnnotations) {
Original file line number Diff line number Diff line change
@@ -260,6 +260,116 @@ public void testContains( final SimpleInterval firstInterval, final SimpleInterv
"contains() returned incorrect result for intervals " + firstInterval + " and " + secondInterval);
}

@DataProvider(name = "subtractIntervalData")
private Object[][] subtractIntervalData() {
return new Object[][] {
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 20, 40),
new SimpleInterval("chr1", 10, 20) },
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 5, 15),
new SimpleInterval("chr1", 15, 30) },
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 10, 20),
new SimpleInterval("chr1", 20, 30) },
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 20, 30),
new SimpleInterval("chr1", 10, 20) }
};
}

@Test(dataProvider = "subtractIntervalData")
public void testSubtractInterval( final SimpleInterval firstInterval,
final SimpleInterval secondInterval,
final SimpleInterval expectedResult ) {
Assert.assertEquals(firstInterval.subtract(secondInterval), expectedResult);
}

@DataProvider(name = "subtractIntervalDataExpectingException")
private Object[][] subtractIntervalDataExpectingException() {
return new Object[][] {
// different contigs
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr2", 20, 40) },
// non-overlapping intervals on same contig
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 50, 150) },
// second interval contains first
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 10, 40) }
};
}
@Test(dataProvider = "subtractIntervalDataExpectingException", expectedExceptions = IllegalArgumentException.class)
public void testSubtractIntervalExpectingException( final SimpleInterval firstInterval,
final SimpleInterval secondInterval) {
firstInterval.subtract(secondInterval);
}

@DataProvider(name = "mergeWithContiguousData")
private Object[][] mergeWithContiguousData() {
return new Object[][] {
// first is upstream, overlapping
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 20, 40),
new SimpleInterval("chr1", 10, 40) },
// first is downstream, overlapping
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 5, 15),
new SimpleInterval("chr1", 5, 30) },
// first contains second
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 15, 20),
new SimpleInterval("chr1", 10, 30) },
// second contains first
{ new SimpleInterval("chr1", 20, 30),
new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 10, 30) },
// first is upstream, overlapping by 1
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 30, 50),
new SimpleInterval("chr1", 10, 50) },
// first is upstream, adjacent
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 31, 50),
new SimpleInterval("chr1", 10, 50) },
// first is downstream, overlapping by 1
{ new SimpleInterval("chr1", 40, 60),
new SimpleInterval("chr1", 30, 40),
new SimpleInterval("chr1", 30, 60) },
// first is downstream, adjacent
{ new SimpleInterval("chr1", 40, 60),
new SimpleInterval("chr1", 30, 39),
new SimpleInterval("chr1", 30, 60) }
};
}

@Test(dataProvider = "mergeWithContiguousData")
public void testMergeWithContiguous( final SimpleInterval firstInterval,
final SimpleInterval secondInterval,
final SimpleInterval expectedResult ) {
Assert.assertEquals(firstInterval.mergeWithContiguous(secondInterval), expectedResult);
}

@DataProvider(name = "mergeWithContiguousDataExpectingException")
private Object[][] mergeWithContiguousDataExpectingException() {
return new Object[][] {
// different contigs
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr2", 20, 40) },
// non-contiguous intervals on same contig, first is upstream
{ new SimpleInterval("chr1", 10, 30),
new SimpleInterval("chr1", 50, 150) },
// non-contiguous intervals on same contig, first is downstream
{ new SimpleInterval("chr1", 20, 30),
new SimpleInterval("chr1", 5, 15) }
};
}
@Test(dataProvider = "mergeWithContiguousDataExpectingException", expectedExceptions = GATKException.class)
public void testMergeWithContiguousExpectingException( final SimpleInterval firstInterval,
final SimpleInterval secondInterval) {
firstInterval.mergeWithContiguous(secondInterval);
}

@Test(expectedExceptions = IllegalArgumentException.class)
public void testNoNullInConstruction() throws Exception {
new SimpleInterval((String)null);

0 comments on commit e796d20

Please sign in to comment.