Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Be explicit about when VariantContexts are biallelic #8332

Merged
merged 14 commits into from
Jun 12, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Using a light ref/alt/pos Event class for assembled variation, pileup…
… events, and force-calling alleles
davidbenjamin committed Jun 2, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 33ac71e23d0794780f23cc37226364705419a689
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.haplotype.EventMap;
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
@@ -64,9 +65,9 @@ public static Triple<int[], int[], double[]> annotate(final VariantContext vc,
// encode each haplotype as a string of variant starts and alt allele strings, excluding the locus of vc (to avoid reference/germline bias)
// note that VariantContexts in an EventMap are always biallelic, so var.getAlternateAllele(0) is valid
final Map<String, List<Haplotype>> haplotypeGroups = haplotypeLikelihoods.alleles().stream()
.collect(Collectors.groupingBy(hap -> hap.getEventMap().getVariantContexts().stream()
.filter(var -> var.getStart() != vc.getStart())
.map(var -> var.getStart() + var.getAlternateAllele(0).getBaseString())
.collect(Collectors.groupingBy(hap -> hap.getEventMap().getEvents().stream()
.filter(event -> event.getStart() != vc.getStart())
.map(event -> event.getStart() + event.altAllele().getBaseString())
.collect(Collectors.joining())));

// sum the read support counts for all haplotypes within each group
@@ -143,16 +144,16 @@ public List<String> getKeyNames() {
// k bases longer we simply check that the event map alt allele matches the variant context alt allele excluding the
// latter's last k bases
private static boolean containsAltAllele(final EventMap eventMap, final VariantContext vc, final int altAlleleIndex) {
final List<VariantContext> overlapping = eventMap.getOverlappingEvents(vc.getStart());
final List<Event> overlapping = eventMap.getOverlappingEvents(vc.getStart());
if (overlapping.isEmpty()) {
return false;
} else if (overlapping.get(0).getStart() != vc.getStart()) {
return false;
} else {
final VariantContext eventMapVC = overlapping.get(0);
final int excessBases = vc.getReference().length() - eventMapVC.getReference().length();
final Event overlappingEvent = overlapping.get(0);
final int excessBases = vc.getReference().length() - overlappingEvent.refAllele().length();

return equalBasesExcludingSuffix(eventMapVC.getAlternateAllele(0).getBases(),
return equalBasesExcludingSuffix(overlappingEvent.altAllele().getBases(),
vc.getAlternateAllele(altAlleleIndex).getBases(), excessBases);
}
}
@@ -177,12 +178,11 @@ private static boolean equalBasesExcludingSuffix(final byte[] eventMapBases, fin
}

// count variants in one haplotype but not the other
// note that we use the fact that EventMap VariantContexts are biallelic
private static int uniqueVariants(final Haplotype hap1, final Haplotype hap2, final int excludedPosition) {
final EventMap eventMap2 = hap2.getEventMap();
return (int) hap1.getEventMap().getVariantContexts().stream()
.filter(vc -> vc.getStart() != excludedPosition)
.filter(vc -> !containsAltAllele(eventMap2, vc, 0))
return (int) hap1.getEventMap().getEvents().stream()
.filter(event -> event.getStart() != excludedPosition)
.filter(event -> !event.equals(eventMap2.get(event.getStart())))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these comparisons equivalent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's equivalent, and in any case this is an M3 annotation so I would only be harming myself.

.count();
}

Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
@@ -48,6 +49,12 @@ public Map<String, Object> annotate(final ReferenceContext ref,
return Collections.unmodifiableMap(map);
}

public static Pair<List<Integer>, byte[]> getNumTandemRepeatUnits(final ReferenceContext ref, final Event event) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gross....

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unavoidable i guess

Copy link
Contributor Author

@davidbenjamin davidbenjamin Jun 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I could refactor the tandem repeat code but it would be hard to justify that as a contribution to DRAGEN GATK.

final byte[] refBases = ref.getBases();
final int startIndex = event.getStart() + 1 - ref.getWindow().getStart(); // +1 to exclude leading match base common to VC's ref and alt alleles
return GATKVariantContextUtils.getNumTandemRepeatUnits(event.refAllele(), Collections.singletonList(event.altAllele()), Arrays.copyOfRange(refBases, startIndex, refBases.length));
}

public static Pair<List<Integer>, byte[]> getNumTandemRepeatUnits(final ReferenceContext ref, final VariantContext vc) {
final byte[] refBases = ref.getBases();
final int startIndex = vc.getStart() + 1 - ref.getWindow().getStart(); // +1 to exclude leading match base common to VC's ref and alt alleles
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.util.ArrayList;
@@ -162,7 +163,7 @@ private static String getRsID(final List<VariantContext> rsIDSourceVCs, final Va
Utils.nonNull(vcToAnnotate, "vcToAnnotate cannot be null");
final List<String> rsids = new ArrayList<>();

final List<VariantContext> vcAnnotateList = GATKVariantContextUtils.splitVariantContextToBiallelics(vcToAnnotate, true,
final List<Event> vcAnnotateList = GATKVariantContextUtils.splitVariantContextToEvents(vcToAnnotate, true,
GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS, true);

for ( final VariantContext vcCompSource : rsIDSourceVCs ) {
@@ -174,12 +175,12 @@ private static String getRsID(final List<VariantContext> rsIDSourceVCs, final Va
throw new IllegalArgumentException("source rsID VariantContext " + vcCompSource + " is not on same chromosome as vcToAnnotate " + vcToAnnotate);
}

final List<VariantContext> vcCompList = GATKVariantContextUtils.splitVariantContextToBiallelics(vcCompSource, true,
final List<Event> vcCompList = GATKVariantContextUtils.splitVariantContextToEvents(vcCompSource, true,
GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS, true);
boolean addThisID = false;
for (final VariantContext vcComp : vcCompList) {
for (final VariantContext vcToAnnotateBi : vcAnnotateList) {
if (vcComp.getStart() == vcToAnnotateBi.getStart() && vcToAnnotateBi.getReference().equals(vcComp.getReference()) && vcComp.getAlternateAlleles().equals(vcToAnnotateBi.getAlternateAlleles())) {
for (final Event vcComp : vcCompList) {
for (final Event vcToAnnotateBi : vcAnnotateList) {
if (vcComp.equals(vcToAnnotateBi)) {
addThisID = true;
break;
}
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils;
import org.broadinstitute.hellbender.utils.*;
import org.broadinstitute.hellbender.utils.genotyper.SampleList;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
@@ -124,7 +125,7 @@ public Config getConfiguration() {
* @param vc Input variant context to complete.
* @return VC with assigned genotypes
*/
public VariantContext calculateGenotypes(final VariantContext vc, final GenotypePriorCalculator gpc, final List<VariantContext> givenAlleles) {
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
public VariantContext calculateGenotypes(final VariantContext vc, final GenotypePriorCalculator gpc, final List<Event> givenAlleles) {
// if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
if (cannotBeGenotyped(vc) || vc.getNSamples() == 0) {
return null;
@@ -150,7 +151,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype
}

final AFCalculationResult AFresult = alleleFrequencyCalculator.calculate(reducedVC, defaultPloidy);
final Set<Allele> forcedAlleles = AssemblyBasedCallerUtils.getAllelesConsistentWithGivenAlleles(givenAlleles, vc);
final Set<Allele> forcedAlleles = AssemblyBasedCallerUtils.allelesConsistentWithGivenAlleles(givenAlleles, vc);
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc, forcedAlleles);

// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
@@ -289,7 +290,7 @@ public List<Integer> alternativeAlleleMLECounts() {
* Provided the exact mode computations it returns the appropriate subset of alleles that progress to genotyping.
* @param afCalculationResult the allele fraction calculation result.
* @param vc the variant context
* @param forcedAlleles alleles from the vc input that are consistent with forced alleles in the assembly region {@link AssemblyBasedCallerUtils#getAllelesConsistentWithGivenAlleles}
* @param forcedAlleles alleles from the vc input that are consistent with forced alleles in the assembly region {@link AssemblyBasedCallerUtils#allelesConsistentWithGivenAlleles}
* @return information about the alternative allele subsetting {@code null}.
*/
private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult afCalculationResult, final VariantContext vc, final Set<Allele> forcedAlleles) {
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc;

import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
import org.apache.commons.math3.special.Gamma;
@@ -10,11 +13,9 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeIndexCalculator;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypingLikelihoods;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AlleleAndContext;
import org.broadinstitute.hellbender.utils.*;
import org.broadinstitute.hellbender.utils.dragstr.DragstrParams;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
@@ -154,22 +155,13 @@ public AFCalculationResult fastCalculateDiploidBasedOnGLs(final GenotypingLikeli
final int numAlleles = gls.numberOfAlleles();
final List<Allele> alleles = gls.asListOfAlleles();

final List<Integer> alleleLengths = new ArrayList<>();
for (Allele al : gls.asListOfAlleles()) {
if (al instanceof AlleleAndContext) {
alleleLengths.add(((AlleleAndContext) al).maxAlleleLength());
} else {
alleleLengths.add(al.length());
}
}
final int alleleLength = alleleLengths.stream().max(Integer::compare).get();
final int alleleLength = gls.asListOfAlleles().stream().map(Allele::length).max(Integer::compare).get();

final List<String> samples = gls.asListOfSamples();
final List<Genotype> genotypes = IntStream.range(0, samples.size()).mapToObj(idx -> new GenotypeBuilder(samples.get(idx)).alleles(alleles).PL(gls.sampleLikelihoods(idx).getAsPLs()).make()).collect(Collectors.toList());
return calculate(numAlleles, alleles, genotypes, defaultPloidy, alleleLength);
}


/**
* Private function that actually calculates allele frequencies etc.
*

This file was deleted.

Loading