Skip to content

Commit

Permalink
Modified VariantRecalibrator to only use a deduped annotations list. …
Browse files Browse the repository at this point in the history
…This resolves issues that can result from differences in the number of annotations in the original and deduped lists. Also fixed the logger warning that prints the duplicate annotations. (#8227)
KevinCLydon authored Feb 28, 2023

Verified

This commit was signed with the committer’s verified signature.
rockerBOO Dave Lage
1 parent 6b2440d commit 227bbca
Showing 3 changed files with 21 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -38,7 +38,7 @@ public VariantDataManager( final List<String> annotationKeys, final VariantRecal
this.data = Collections.emptyList();
final List<String> uniqueAnnotations = annotationKeys.stream().distinct().collect(Collectors.toList());
if (annotationKeys.size() != uniqueAnnotations.size()) {
logger.warn("Ignoring duplicate annotations for recalibration %s.", Utils.getDuplicatedItems(annotationKeys));
logger.warn("Ignoring duplicate annotations for recalibration {}", Utils.getDuplicatedItems(annotationKeys));
}
this.annotationKeys = new ArrayList<>( uniqueAnnotations );
this.VRAC = VRAC;
Original file line number Diff line number Diff line change
@@ -655,7 +655,7 @@ public Object onTraversalSuccess() {
engine.evaluateData(dataManager.getData(), goodModel, false);
if (goodModel.failedToConverge) {
if (outputModel != null) {
final GATKReport report = writeModelReport(goodModel, null, USE_ANNOTATIONS);
final GATKReport report = writeModelReport(goodModel, null, dataManager.getAnnotationKeys());
saveModelReport(report, outputModel);
}
throw new UserException.VQSRPositiveModelFailure("Positive training model failed to converge. One or more annotations " +
@@ -678,7 +678,7 @@ public Object onTraversalSuccess() {
engine.evaluateData(dataManager.getData(), badModel, true);

if (outputModel != null) {
final GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
final GATKReport report = writeModelReport(goodModel, badModel, dataManager.getAnnotationKeys());
saveModelReport(report, outputModel);
}

@@ -711,7 +711,7 @@ public Object onTraversalSuccess() {
goodModel,
badModel,
0.0,
dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.size()]));
dataManager.getAnnotationKeys().toArray(new String[dataManager.getAnnotationKeys().size()]));
}

if (VRAC.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL) {
Original file line number Diff line number Diff line change
@@ -174,6 +174,22 @@ public class VariantRecalibratorIntegrationTest extends CommandLineProgramTest {
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false"
};

final private String[] variantRecalibratorSamplingParamsWithDupes = {
" --variant " + getLargeVQSRTestDataDir() + "phase1.projectConsensus.chr20.1M-10M.raw.snps.vcf" +
" -L 20:1,000,000-10,000,000" +
" --resource:known,known=true,prior=10.0 " + getLargeVQSRTestDataDir() + "dbsnp_132_b37.leftAligned.20.1M-10M.vcf" +
" --resource:truth_training1,truth=true,training=true,prior=15.0 " + getLargeVQSRTestDataDir() + "sites_r27_nr.b37_fwd.20.1M-10M.vcf" +
" --resource:truth_training2,training=true,truth=true,prior=12.0 " + getLargeVQSRTestDataDir() + "Omni25_sites_1525_samples.b37.20.1M-10M.vcf" +
" -an QD -an HaplotypeScore -an HRun -an QD" +
" --trust-all-polymorphic" + // for speed
" --output %s" +
" -tranches-file %s" +
" --output-model " + modelReportFilename +
" -mode SNP --max-gaussians 3" + //reduce max gaussians so we have negative training data with the sampled input
" -sample-every 2" +
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false"
};

@Override
public String getToolTestDataDir(){
return toolsTestDir + "walkers/VQSR/";
@@ -404,7 +420,7 @@ public void testVariantRecalibratorSampling() throws IOException {
public void testVariantRecalibratorRScriptOutput() throws IOException {
final String inputFile = getLargeVQSRTestDataDir() + "phase1.projectConsensus.chr20.1M-10M.raw.snps.vcf";
final File unrunRscript = createTempFile("rscriptOutput", ".R");
final String args = StringUtils.join(variantRecalibratorSamplingParams, " ");
final String args = StringUtils.join(variantRecalibratorSamplingParamsWithDupes, " ");

final IntegrationTestSpec spec = new IntegrationTestSpec(
args +

0 comments on commit 227bbca

Please sign in to comment.