Skip to content

Commit

Permalink
Handle CTX_PP/QQ and CTX_PQ/QP CPX_TYPE values in SVConcordance (#8885)
Browse files Browse the repository at this point in the history
  • Loading branch information
epiercehoffman authored Jul 1, 2024
1 parent 64348bc commit 4af2b49
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
package org.broadinstitute.hellbender.tools.spark.sv.utils;

import com.google.common.collect.HashBiMap;
import htsjdk.variant.variantcontext.Allele;

import java.util.Map;

import static java.util.Map.entry;

public final class GATKSVVCFConstants {

// todo: add these and the other standard SV info fields from the VCF spec to htsjdk VCFStandardHeaderLines
Expand Down Expand Up @@ -76,6 +81,7 @@ public final class GATKSVVCFConstants {
public static final String CPX_INTERVALS = "CPX_INTERVALS";
public static final String CPX_TYPE = "CPX_TYPE";

// keep in sync with map below
public enum ComplexVariantSubtype {
delINV,
INVdel,
Expand All @@ -95,6 +101,26 @@ public enum ComplexVariantSubtype {
CTX_INV
}

// keep in sync with enum above
public static final HashBiMap<String, ComplexVariantSubtype> COMPLEX_VARIANT_SUBTYPE_MAP = HashBiMap.create(Map.ofEntries(
entry("delINV", ComplexVariantSubtype.delINV),
entry("INVdel", ComplexVariantSubtype.INVdel),
entry("dupINV", ComplexVariantSubtype.dupINV),
entry("INVdup", ComplexVariantSubtype.INVdup),
entry("delINVdel", ComplexVariantSubtype.delINVdel),
entry("dupINVdup", ComplexVariantSubtype.dupINVdup),
entry("delINVdup", ComplexVariantSubtype.delINVdup),
entry("dupINVdel", ComplexVariantSubtype.dupINVdel),
entry("piDUP_FR", ComplexVariantSubtype.piDUP_FR),
entry("piDUP_RF", ComplexVariantSubtype.piDUP_RF),
entry("dDUP", ComplexVariantSubtype.dDUP),
entry("dDUP_iDEL", ComplexVariantSubtype.dDUP_iDEL),
entry("INS_iDEL", ComplexVariantSubtype.INS_iDEL),
entry("CTX_PP/QQ", ComplexVariantSubtype.CTX_PP_QQ),
entry("CTX_PQ/QP", ComplexVariantSubtype.CTX_PQ_QP),
entry("CTX_INV", ComplexVariantSubtype.CTX_INV)
));

// not defined in output vcf header but used in internal id that is currently output in the ID column
public static final String INTERVAL_VARIANT_ID_FIELD_SEPARATOR = "_";
public static final String DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING = "DEL-DUPLICATION-TANDEM-CONTRACTION";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,4 @@ public Double getLog10PError() {
return log10PError;
}

public GATKSVVCFConstants.ComplexVariantSubtype getCpxSubtype() {
return cpxSubtype;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public static VariantContextBuilder getVariantBuilder(final SVCallRecord record)
builder.attribute(GATKSVVCFConstants.CONTIG2_ATTRIBUTE, chr2);
}
if (cpxType != null) {
builder.attribute(GATKSVVCFConstants.CPX_TYPE, record.getComplexSubtype().toString());
builder.attribute(GATKSVVCFConstants.CPX_TYPE, getComplexSubtypeString(cpxType));
}

builder.attribute(GATKSVVCFConstants.SVLEN, record.getLength());
Expand Down Expand Up @@ -329,8 +329,7 @@ public static SVCallRecord create(final VariantContext variant, boolean keepVari
final int positionA = variant.getStart();

final GATKSVVCFConstants.StructuralVariantAnnotationType type = inferStructuralVariantType(variant);
final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype =
type == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX ? getComplexSubtype(variant) : null;
final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype = getComplexSubtype(variant);
final List<String> algorithms = getAlgorithms(variant);

final String strands;
Expand Down Expand Up @@ -426,15 +425,19 @@ public static List<String> getAlgorithms(final VariantContext variant) {

public static GATKSVVCFConstants.ComplexVariantSubtype getComplexSubtype(final VariantContext variant) {
Utils.nonNull(variant);
final String subtypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null);
String subtypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null);
if (subtypeString == null) {
return null;
}
if (!VALID_CPX_SUBTYPES.contains(subtypeString)) {
if (!GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.containsKey(subtypeString)) {
throw new IllegalArgumentException("Invalid CPX subtype: " + subtypeString + ", valid values are: " +
String.join(", ", VALID_CPX_SUBTYPES));
}
return GATKSVVCFConstants.ComplexVariantSubtype.valueOf(subtypeString);
return GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.get(subtypeString);
}

public static String getComplexSubtypeString(final GATKSVVCFConstants.ComplexVariantSubtype subtype) {
return GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.inverse().get(subtype);
}

private static String getStrands(final VariantContext variant, final GATKSVVCFConstants.StructuralVariantAnnotationType type) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils;
import org.broadinstitute.hellbender.tools.sv.SVCallRecordUtils;
import org.broadinstitute.hellbender.utils.SVInterval;
import org.broadinstitute.hellbender.utils.SVIntervalTree;
import org.broadinstitute.hellbender.utils.SimpleInterval;
Expand Down Expand Up @@ -862,12 +863,7 @@ protected static boolean includesDispersedDuplication(final GATKSVVCFConstants.C
protected Map<String, Object> annotateStructuralVariant(final VariantContext variant) {
final Map<String, Set<String>> variantConsequenceDict = new HashMap<>();
final GATKSVVCFConstants.StructuralVariantAnnotationType overallSVType = getSVType(variant);
final String complexTypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null);
GATKSVVCFConstants.ComplexVariantSubtype complexType = null;
if (complexTypeString != null) {
// replace / in CTX_PP/QQ and CTX_PQ/QP with _ to match ComplexVariantSubtype constants which cannot contain slashes
complexType = GATKSVVCFConstants.ComplexVariantSubtype.valueOf(complexTypeString.replace("/", "_"));
}
final GATKSVVCFConstants.ComplexVariantSubtype complexType = SVCallRecordUtils.getComplexSubtype(variant);
final boolean includesDispersedDuplication = includesDispersedDuplication(complexType, COMPLEX_SUBTYPES_WITH_DISPERSED_DUP);
final List<SVSegment> svSegmentsForGeneOverlaps = getSVSegments(variant, overallSVType, maxBreakendLen, complexType);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ protected SVCallRecord minimizeTruthFootprint(final SVCallRecord item) {
final List<Genotype> genotypes = item.getGenotypes().stream().map(SVConcordance::stripTruthGenotype).collect(Collectors.toList());
return new SVCallRecord(item.getId(), item.getContigA(), item.getPositionA(),
item.getStrandA(), item.getContigB(), item.getPositionB(), item.getStrandB(), item.getType(),
item.getCpxSubtype(), item.getLength(), item.getAlgorithms(), item.getAlleles(), genotypes,
item.getComplexSubtype(), item.getLength(), item.getAlgorithms(), item.getAlleles(), genotypes,
item.getAttributes(), item.getFilters(), item.getLog10PError(), dictionary);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public class GATKSVVariantContextUtils {

public static final Allele BND_ALLELE = Allele.create("<BND>");
public static final Allele CPX_ALLELE = Allele.create("<CPX>");
public static final Allele CTX_ALLELE = Allele.create("<CTX>");

/**
* Build the list of called alleles based on reference and called copy numbers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@ public class SVCallRecordUtilsUnitTest {
private static final List<Allele> ALLELES_INS = Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS);
private static final List<Allele> ALLELES_BND = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.BND_ALLELE);
private static final List<Allele> ALLELES_CPX = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE);
private static final List<Allele> ALLELES_CTX = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CTX_ALLELE);

private static final Map<String, Object> TEST_ATTRIBUTES = Collections.singletonMap("TEST_KEY", "TEST_VAL");
private static final Map<String, Object> TEST_ATTRIBUTES_CPX = Lists.newArrayList(
new AbstractMap.SimpleImmutableEntry<String, Object>("TEST_KEY", "TEST_VAL"),
new AbstractMap.SimpleImmutableEntry<String, Object>(GATKSVVCFConstants.CPX_TYPE, GATKSVVCFConstants.ComplexVariantSubtype.dDUP.toString())
).stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
private static final Map<String, Object> TEST_ATTRIBUTES_CTX = Map.of(
"TEST_KEY", "TEST_VAL",
GATKSVVCFConstants.CPX_TYPE, "CTX_PP/QQ"
);

private static final Genotype GENOTYPE_DEL_1 = new GenotypeBuilder("sample1")
.alleles(Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL))
Expand All @@ -54,6 +59,8 @@ public class SVCallRecordUtilsUnitTest {
.alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.BND_ALLELE)).make();
private static final Genotype GENOTYPE_CPX_1 = new GenotypeBuilder("sample1")
.alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE)).make();
private static final Genotype GENOTYPE_CTX_1 = new GenotypeBuilder("sample1")
.alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CTX_ALLELE)).make();

private static final Comparator<SVCallRecord> RECORD_COMPARATOR = SVCallRecordUtils.getCallComparator(SVTestUtils.hg38Dict);

Expand Down Expand Up @@ -570,6 +577,15 @@ public Object[][] testCreateData() {
SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1),
TEST_ATTRIBUTES, Collections.emptySet(), null)
},
{
SVTestUtils.newVariantContext("var11", "chr1", 1000, 1000,
ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1), -1, null,
GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, SVTestUtils.PESR_ONLY_ALGORITHM_LIST,
"chrX", 2000, TEST_ATTRIBUTES_CTX, null),
new SVCallRecord("var11", "chr1", 1000, null, "chrX", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, null,
SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1),
TEST_ATTRIBUTES, Collections.emptySet(), null)
},
};
}

Expand All @@ -583,4 +599,72 @@ public void testCreate(final VariantContext variant, final SVCallRecord expected
final SVCallRecord resultKeepAttr = SVCallRecordUtils.create(variant, true);
SVTestUtils.assertEqualsExceptExcludedAttributes(resultKeepAttr, expected, Collections.emptyList());
}

@DataProvider(name = "testGetComplexSubtypeData")
public Object[][] testGetComplexSubtypeData() {
return new Object[][]{
{new VariantContextBuilder()
.source("source")
.id("id")
.chr("chr1")
.start(2000)
.stop(3000)
.alleles(Arrays.asList(Allele.REF_N, Allele.create("<CPX>", false)))
.attributes(Map.of(
GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX,
GATKSVVCFConstants.CPX_TYPE, "dupINVdup"
))
.make(),
GATKSVVCFConstants.ComplexVariantSubtype.dupINVdup
},
{new VariantContextBuilder()
.source("source")
.id("id")
.chr("chr1")
.start(2000)
.stop(3000)
.alleles(Arrays.asList(Allele.REF_N, Allele.create("<CPX>", false)))
.attributes(Map.of(
GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX,
GATKSVVCFConstants.CPX_TYPE, "CTX_PP/QQ"
))
.make(),
GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ
},
{new VariantContextBuilder()
.source("source")
.id("id")
.chr("chr1")
.start(2000)
.stop(3000)
.alleles(Arrays.asList(Allele.REF_N, Allele.create("<DEL>", false)))
.attributes(Map.of(
GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL
))
.make(),
null
}
};
}

@Test(dataProvider= "testGetComplexSubtypeData")
public void testGetComplexSubtype(final VariantContext variant, final GATKSVVCFConstants.ComplexVariantSubtype expected) {
final GATKSVVCFConstants.ComplexVariantSubtype actual = SVCallRecordUtils.getComplexSubtype(variant);
Assert.assertEquals(actual, expected);
}

@DataProvider(name = "testGetComplexSubtypeStringData")
public Object[][] testGetComplexSubtypeStringData() {
return new Object[][]{
{GATKSVVCFConstants.ComplexVariantSubtype.CTX_PQ_QP, "CTX_PQ/QP"},
{GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, "CTX_PP/QQ"},
{GATKSVVCFConstants.ComplexVariantSubtype.INS_iDEL, "INS_iDEL"}
};
}

@Test(dataProvider= "testGetComplexSubtypeStringData")
public void testGetComplexSubtypeString(final GATKSVVCFConstants.ComplexVariantSubtype subtype, final String expected) {
final String actual = SVCallRecordUtils.getComplexSubtypeString(subtype);
Assert.assertEquals(actual, expected);
}
}

0 comments on commit 4af2b49

Please sign in to comment.