use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModelUnitTest method testRefConfidenceBasic.
@Test(dataProvider = "RefConfidenceData")
public void testRefConfidenceBasic(final int nReads, final int extension) {
final RefConfData data = new RefConfData("ACGTAACCGGTT", extension);
final List<Haplotype> haplotypes = Arrays.asList(data.getRefHap());
final List<VariantContext> calls = Collections.emptyList();
for (int i = 0; i < nReads; i++) {
data.getActiveRegion().add(data.makeRead(0, data.getRefLength()));
}
final ReadLikelihoods<Haplotype> likelihoods = createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples, 2);
final IndependentSampleGenotypesModel genotypingModel = new IndependentSampleGenotypesModel();
final List<Integer> expectedDPs = Collections.nCopies(data.getActiveRegion().getSpan().size(), nReads);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.
the class GermlineProbabilityCalculatorUnitTest method testGetGermlineAltAlleleFrequencies.
@Test
public void testGetGermlineAltAlleleFrequencies() {
final double defaultAF = 0.001;
final double nonDefaultAF1 = 0.1;
final double nonDefaultAF2 = 0.01;
final Allele Aref = Allele.create("A", true);
final Allele C = Allele.create("C");
final Allele G = Allele.create("G");
final Allele T = Allele.create("T");
final String source = "SOURCE";
final int start = 1;
final int stop = 1;
//biallelic, vc has the same alt allele
final List<Allele> altAlleles1 = Arrays.asList(C);
final VariantContext vc1 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, C)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1 }).make();
final double[] af1 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles1, Optional.of(vc1), defaultAF);
Assert.assertEquals(af1.length, altAlleles1.size());
Assert.assertEquals(af1[0], nonDefaultAF1, 0.00001);
//biallelic, vc has different alt allele
final List<Allele> altAlleles2 = Arrays.asList(C);
final VariantContext vc2 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, G)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1 }).make();
final double[] af2 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles2, Optional.of(vc2), defaultAF);
Assert.assertEquals(af2.length, altAlleles2.size());
Assert.assertEquals(af2[0], defaultAF, 0.00001);
//triallelic, same alt alleles
final List<Allele> altAlleles3 = Arrays.asList(C, G);
final VariantContext vc3 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, C, G)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1, nonDefaultAF2 }).make();
final double[] af3 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles3, Optional.of(vc3), defaultAF);
Assert.assertEquals(af3.length, altAlleles3.size());
Assert.assertEquals(af3[0], nonDefaultAF1, 0.00001);
Assert.assertEquals(af3[1], nonDefaultAF2, 0.00001);
//triallelic, same alt alleles in different order
final List<Allele> altAlleles4 = Arrays.asList(C, G);
final VariantContext vc4 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, G, C)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1, nonDefaultAF2 }).make();
final double[] af4 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles4, Optional.of(vc4), defaultAF);
Assert.assertEquals(af4.length, altAlleles4.size());
Assert.assertEquals(af4[0], nonDefaultAF2, 0.00001);
Assert.assertEquals(af4[1], nonDefaultAF1, 0.00001);
//triallelic, only one allele in common
final List<Allele> altAlleles5 = Arrays.asList(C, G);
final VariantContext vc5 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, C, T)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1, nonDefaultAF2 }).make();
final double[] af5 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles5, Optional.of(vc5), defaultAF);
Assert.assertEquals(af5.length, altAlleles5.size());
Assert.assertEquals(af5[0], nonDefaultAF1, 0.00001);
Assert.assertEquals(af5[1], defaultAF, 0.00001);
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
final GenotypeBuilder builder = new GenotypeBuilder(sample);
// Set the call allele.
builder.alleles(Collections.singletonList(calledAllele));
// Set the truth allele.
builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
// Annotate the genotype with the number of calls.
builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
// When there is more than one qualified type of event we indicate how many.
builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
// Calculate the length in targets of the call as the sum across all calls.
builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
// Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
// Calculate the truth copy fraction.
builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
// Calculate the truth call quality.
final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
// Set genotype filters:
final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
// Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
final EvaluationClass evaluationClass;
if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
evaluationClass = EvaluationClass.FALSE_NEGATIVE;
} else if (calledAlleles.size() == 1) {
evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
EvaluationClass.DISCORDANT_POSITIVE;
} else {
evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
}
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
}
return builder.make();
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final VariantContext truth, final List<VariantContext> calls, final TargetCollection<Target> targets) {
final Genotype truthGenotype = truth.getGenotype(sample);
// if there is no truth genotype for that sample, we output the "empty" genotype.
if (truthGenotype == null) {
return GenotypeBuilder.create(sample, Collections.emptyList());
}
final int truthCopyNumber = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, GS_COPY_NUMBER_FORMAT_KEY, truthNeutralCopyNumber);
final CopyNumberTriStateAllele truthAllele = copyNumberToTrueAllele(truthCopyNumber);
final List<Pair<VariantContext, Genotype>> allCalls = calls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(pair -> pair.getRight() != null).filter(pair -> GATKProtectedVariantContextUtils.getAttributeAsString(pair.getRight(), XHMMSegmentGenotyper.DISCOVERY_KEY, XHMMSegmentGenotyper.DISCOVERY_FALSE).equals(XHMMSegmentGenotyper.DISCOVERY_TRUE)).collect(Collectors.toList());
final List<Pair<VariantContext, Genotype>> qualifiedCalls = composeQualifyingCallsList(targets, allCalls);
return buildAndAnnotateTruthOverlappingGenotype(sample, targets, truthGenotype, truthCopyNumber, truthAllele, qualifiedCalls);
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class HaplotypeCallerSpark method writeVariants.
/**
* WriteVariants, this is currently going to be horribly slow and explosive on a full size file since it performs a collect.
*
* This will be replaced by a parallel writer similar to what's done with {@link org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSink}
*/
private void writeVariants(JavaRDD<VariantContext> variants) {
final List<VariantContext> collectedVariants = variants.collect();
final SAMSequenceDictionary referenceDictionary = getReferenceSequenceDictionary();
final List<VariantContext> sortedVariants = collectedVariants.stream().sorted((o1, o2) -> IntervalUtils.compareLocatables(o1, o2, referenceDictionary)).collect(Collectors.toList());
final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgs, getHeaderForReads(), new ReferenceMultiSourceAdapter(getReference(), getAuthHolder()));
try (final VariantContextWriter writer = hcEngine.makeVCFWriter(output, getBestAvailableSequenceDictionary())) {
hcEngine.writeHeader(writer, getHeaderForReads().getSequenceDictionary(), Collections.emptySet());
sortedVariants.forEach(writer::add);
}
}
Aggregations