diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/GATKSVVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/GATKSVVCFConstants.java index de91be5e5b6..5224fe5f7d6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/GATKSVVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/GATKSVVCFConstants.java @@ -1,7 +1,12 @@ package org.broadinstitute.hellbender.tools.spark.sv.utils; +import com.google.common.collect.HashBiMap; import htsjdk.variant.variantcontext.Allele; +import java.util.Map; + +import static java.util.Map.entry; + public final class GATKSVVCFConstants { // todo: add these and the other standard SV info fields from the VCF spec to htsjdk VCFStandardHeaderLines @@ -76,6 +81,7 @@ public final class GATKSVVCFConstants { public static final String CPX_INTERVALS = "CPX_INTERVALS"; public static final String CPX_TYPE = "CPX_TYPE"; + // keep in sync with map below public enum ComplexVariantSubtype { delINV, INVdel, @@ -95,6 +101,26 @@ public enum ComplexVariantSubtype { CTX_INV } + // keep in sync with enum above + public static final HashBiMap COMPLEX_VARIANT_SUBTYPE_MAP = HashBiMap.create(Map.ofEntries( + entry("delINV", ComplexVariantSubtype.delINV), + entry("INVdel", ComplexVariantSubtype.INVdel), + entry("dupINV", ComplexVariantSubtype.dupINV), + entry("INVdup", ComplexVariantSubtype.INVdup), + entry("delINVdel", ComplexVariantSubtype.delINVdel), + entry("dupINVdup", ComplexVariantSubtype.dupINVdup), + entry("delINVdup", ComplexVariantSubtype.delINVdup), + entry("dupINVdel", ComplexVariantSubtype.dupINVdel), + entry("piDUP_FR", ComplexVariantSubtype.piDUP_FR), + entry("piDUP_RF", ComplexVariantSubtype.piDUP_RF), + entry("dDUP", ComplexVariantSubtype.dDUP), + entry("dDUP_iDEL", ComplexVariantSubtype.dDUP_iDEL), + entry("INS_iDEL", ComplexVariantSubtype.INS_iDEL), + entry("CTX_PP/QQ", ComplexVariantSubtype.CTX_PP_QQ), + entry("CTX_PQ/QP", ComplexVariantSubtype.CTX_PQ_QP), + entry("CTX_INV", ComplexVariantSubtype.CTX_INV) + )); + // not defined in output vcf header but used in internal id that is currently output in the ID column public static final String INTERVAL_VARIANT_ID_FIELD_SEPARATOR = "_"; public static final String DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING = "DEL-DUPLICATION-TANDEM-CONTRACTION"; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java index 0f95878b389..c19ed8e3e9c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java @@ -384,7 +384,4 @@ public Double getLog10PError() { return log10PError; } - public GATKSVVCFConstants.ComplexVariantSubtype getCpxSubtype() { - return cpxSubtype; - } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java index 92c7f22835a..2f246a3ea5a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java @@ -91,7 +91,7 @@ public static VariantContextBuilder getVariantBuilder(final SVCallRecord record) builder.attribute(GATKSVVCFConstants.CONTIG2_ATTRIBUTE, chr2); } if (cpxType != null) { - builder.attribute(GATKSVVCFConstants.CPX_TYPE, record.getComplexSubtype().toString()); + builder.attribute(GATKSVVCFConstants.CPX_TYPE, getComplexSubtypeString(cpxType)); } builder.attribute(GATKSVVCFConstants.SVLEN, record.getLength()); @@ -329,8 +329,7 @@ public static SVCallRecord create(final VariantContext variant, boolean keepVari final int positionA = variant.getStart(); final GATKSVVCFConstants.StructuralVariantAnnotationType type = inferStructuralVariantType(variant); - final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype = - type == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX ? getComplexSubtype(variant) : null; + final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype = getComplexSubtype(variant); final List algorithms = getAlgorithms(variant); final String strands; @@ -426,15 +425,19 @@ public static List getAlgorithms(final VariantContext variant) { public static GATKSVVCFConstants.ComplexVariantSubtype getComplexSubtype(final VariantContext variant) { Utils.nonNull(variant); - final String subtypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null); + String subtypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null); if (subtypeString == null) { return null; } - if (!VALID_CPX_SUBTYPES.contains(subtypeString)) { + if (!GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.containsKey(subtypeString)) { throw new IllegalArgumentException("Invalid CPX subtype: " + subtypeString + ", valid values are: " + String.join(", ", VALID_CPX_SUBTYPES)); } - return GATKSVVCFConstants.ComplexVariantSubtype.valueOf(subtypeString); + return GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.get(subtypeString); + } + + public static String getComplexSubtypeString(final GATKSVVCFConstants.ComplexVariantSubtype subtype) { + return GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.inverse().get(subtype); } private static String getStrands(final VariantContext variant, final GATKSVVCFConstants.StructuralVariantAnnotationType type) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngine.java index 0e840e4652d..56b25ff8a5e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotateEngine.java @@ -9,6 +9,7 @@ import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils; +import org.broadinstitute.hellbender.tools.sv.SVCallRecordUtils; import org.broadinstitute.hellbender.utils.SVInterval; import org.broadinstitute.hellbender.utils.SVIntervalTree; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -862,12 +863,7 @@ protected static boolean includesDispersedDuplication(final GATKSVVCFConstants.C protected Map annotateStructuralVariant(final VariantContext variant) { final Map> variantConsequenceDict = new HashMap<>(); final GATKSVVCFConstants.StructuralVariantAnnotationType overallSVType = getSVType(variant); - final String complexTypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null); - GATKSVVCFConstants.ComplexVariantSubtype complexType = null; - if (complexTypeString != null) { - // replace / in CTX_PP/QQ and CTX_PQ/QP with _ to match ComplexVariantSubtype constants which cannot contain slashes - complexType = GATKSVVCFConstants.ComplexVariantSubtype.valueOf(complexTypeString.replace("/", "_")); - } + final GATKSVVCFConstants.ComplexVariantSubtype complexType = SVCallRecordUtils.getComplexSubtype(variant); final boolean includesDispersedDuplication = includesDispersedDuplication(complexType, COMPLEX_SUBTYPES_WITH_DISPERSED_DUP); final List svSegmentsForGeneOverlaps = getSVSegments(variant, overallSVType, maxBreakendLen, complexType); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java index d53c3a22b0b..2272a84fc11 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java @@ -199,7 +199,7 @@ protected SVCallRecord minimizeTruthFootprint(final SVCallRecord item) { final List genotypes = item.getGenotypes().stream().map(SVConcordance::stripTruthGenotype).collect(Collectors.toList()); return new SVCallRecord(item.getId(), item.getContigA(), item.getPositionA(), item.getStrandA(), item.getContigB(), item.getPositionB(), item.getStrandB(), item.getType(), - item.getCpxSubtype(), item.getLength(), item.getAlgorithms(), item.getAlleles(), genotypes, + item.getComplexSubtype(), item.getLength(), item.getAlgorithms(), item.getAlleles(), genotypes, item.getAttributes(), item.getFilters(), item.getLog10PError(), dictionary); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKSVVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKSVVariantContextUtils.java index c21f9450d29..fec5894b966 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKSVVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKSVVariantContextUtils.java @@ -12,6 +12,7 @@ public class GATKSVVariantContextUtils { public static final Allele BND_ALLELE = Allele.create(""); public static final Allele CPX_ALLELE = Allele.create(""); + public static final Allele CTX_ALLELE = Allele.create(""); /** * Build the list of called alleles based on reference and called copy numbers diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java index 3c1d291866e..183d72eec27 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java @@ -23,12 +23,17 @@ public class SVCallRecordUtilsUnitTest { private static final List ALLELES_INS = Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS); private static final List ALLELES_BND = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.BND_ALLELE); private static final List ALLELES_CPX = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE); + private static final List ALLELES_CTX = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CTX_ALLELE); private static final Map TEST_ATTRIBUTES = Collections.singletonMap("TEST_KEY", "TEST_VAL"); private static final Map TEST_ATTRIBUTES_CPX = Lists.newArrayList( new AbstractMap.SimpleImmutableEntry("TEST_KEY", "TEST_VAL"), new AbstractMap.SimpleImmutableEntry(GATKSVVCFConstants.CPX_TYPE, GATKSVVCFConstants.ComplexVariantSubtype.dDUP.toString()) ).stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + private static final Map TEST_ATTRIBUTES_CTX = Map.of( + "TEST_KEY", "TEST_VAL", + GATKSVVCFConstants.CPX_TYPE, "CTX_PP/QQ" + ); private static final Genotype GENOTYPE_DEL_1 = new GenotypeBuilder("sample1") .alleles(Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL)) @@ -54,6 +59,8 @@ public class SVCallRecordUtilsUnitTest { .alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.BND_ALLELE)).make(); private static final Genotype GENOTYPE_CPX_1 = new GenotypeBuilder("sample1") .alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE)).make(); + private static final Genotype GENOTYPE_CTX_1 = new GenotypeBuilder("sample1") + .alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CTX_ALLELE)).make(); private static final Comparator RECORD_COMPARATOR = SVCallRecordUtils.getCallComparator(SVTestUtils.hg38Dict); @@ -570,6 +577,15 @@ public Object[][] testCreateData() { SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), TEST_ATTRIBUTES, Collections.emptySet(), null) }, + { + SVTestUtils.newVariantContext("var11", "chr1", 1000, 1000, + ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1), -1, null, + GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, + "chrX", 2000, TEST_ATTRIBUTES_CTX, null), + new SVCallRecord("var11", "chr1", 1000, null, "chrX", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, null, + SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1), + TEST_ATTRIBUTES, Collections.emptySet(), null) + }, }; } @@ -583,4 +599,72 @@ public void testCreate(final VariantContext variant, final SVCallRecord expected final SVCallRecord resultKeepAttr = SVCallRecordUtils.create(variant, true); SVTestUtils.assertEqualsExceptExcludedAttributes(resultKeepAttr, expected, Collections.emptyList()); } + + @DataProvider(name = "testGetComplexSubtypeData") + public Object[][] testGetComplexSubtypeData() { + return new Object[][]{ + {new VariantContextBuilder() + .source("source") + .id("id") + .chr("chr1") + .start(2000) + .stop(3000) + .alleles(Arrays.asList(Allele.REF_N, Allele.create("", false))) + .attributes(Map.of( + GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + GATKSVVCFConstants.CPX_TYPE, "dupINVdup" + )) + .make(), + GATKSVVCFConstants.ComplexVariantSubtype.dupINVdup + }, + {new VariantContextBuilder() + .source("source") + .id("id") + .chr("chr1") + .start(2000) + .stop(3000) + .alleles(Arrays.asList(Allele.REF_N, Allele.create("", false))) + .attributes(Map.of( + GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, + GATKSVVCFConstants.CPX_TYPE, "CTX_PP/QQ" + )) + .make(), + GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ + }, + {new VariantContextBuilder() + .source("source") + .id("id") + .chr("chr1") + .start(2000) + .stop(3000) + .alleles(Arrays.asList(Allele.REF_N, Allele.create("", false))) + .attributes(Map.of( + GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL + )) + .make(), + null + } + }; + } + + @Test(dataProvider= "testGetComplexSubtypeData") + public void testGetComplexSubtype(final VariantContext variant, final GATKSVVCFConstants.ComplexVariantSubtype expected) { + final GATKSVVCFConstants.ComplexVariantSubtype actual = SVCallRecordUtils.getComplexSubtype(variant); + Assert.assertEquals(actual, expected); + } + + @DataProvider(name = "testGetComplexSubtypeStringData") + public Object[][] testGetComplexSubtypeStringData() { + return new Object[][]{ + {GATKSVVCFConstants.ComplexVariantSubtype.CTX_PQ_QP, "CTX_PQ/QP"}, + {GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, "CTX_PP/QQ"}, + {GATKSVVCFConstants.ComplexVariantSubtype.INS_iDEL, "INS_iDEL"} + }; + } + + @Test(dataProvider= "testGetComplexSubtypeStringData") + public void testGetComplexSubtypeString(final GATKSVVCFConstants.ComplexVariantSubtype subtype, final String expected) { + final String actual = SVCallRecordUtils.getComplexSubtypeString(subtype); + Assert.assertEquals(actual, expected); + } } \ No newline at end of file