use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.
private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext call : callsVariants) {
final List<Target> overlappingTargets = targets.targets(call);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
if (!overlappingTruth.isEmpty()) {
continue;
}
@SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
final VariantContext matchingOutput = matchingOutputOptional.get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final Genotype callGenotype = call.getGenotype(sample);
final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
Assert.assertEquals(expectedCall, actualCall);
final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
Assert.assertEquals(actualDiscovered, expectedDiscovered);
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
if (expectedCall.isCalled() && actualDiscovered) {
expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
}
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
if (expectedCall.isCalled()) {
sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
}
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class XHMMModelUnitTest method testLogEmissionProbability.
@Test(dataProvider = "testData", dependsOnMethods = "testInstantiation")
public void testLogEmissionProbability(final double eventStartProbability, final double meanEventSize, final double deletionMean, final double duplicationMean) {
final XHMMModel model = new XHMMModel(eventStartProbability, meanEventSize, deletionMean, duplicationMean);
final Target target = new Target("NAME");
final double logDenominator = Math.log(Math.sqrt(2 * Math.PI));
final Function<Double, Double> neutralEmission = x -> -.5 * (x * x) - logDenominator;
final Function<Double, Double> deletionEmission = x -> -.5 * Math.pow(x - deletionMean, 2) - logDenominator;
final Function<Double, Double> duplicationEmission = x -> -.5 * Math.pow(x - duplicationMean, 2) - logDenominator;
for (final double coverage : TEST_COVERAGE_VALUES) {
final double neutralObserved = model.logEmissionProbability(new XHMMEmissionData(coverage), CopyNumberTriState.NEUTRAL, target);
final double deletionObserved = model.logEmissionProbability(new XHMMEmissionData(coverage), CopyNumberTriState.DELETION, target);
final double duplicationObserved = model.logEmissionProbability(new XHMMEmissionData(coverage), CopyNumberTriState.DUPLICATION, target);
final double neutralExpected = neutralEmission.apply(coverage);
final double deletionExpected = deletionEmission.apply(coverage);
final double duplicationExpected = duplicationEmission.apply(coverage);
Assert.assertEquals(neutralObserved, neutralExpected, "neutral emission for " + coverage);
Assert.assertEquals(deletionObserved, deletionExpected);
Assert.assertEquals(duplicationObserved, duplicationExpected);
}
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class XHMMModelUnitTest method testLogTransitionProbabilityInDifferentChromosomes.
// different chromosomes are independent Markov chains; hence the transition probabilities should equal the prior
// probabilities
@Test(dataProvider = "testData", dependsOnMethods = { "testInstantiation", "testLogPrior" })
public void testLogTransitionProbabilityInDifferentChromosomes(final double eventStartProbability, final double meanEventSize, final double deletionMean, final double duplicationMean) {
final XHMMModel model = new XHMMModel(eventStartProbability, meanEventSize, deletionMean, duplicationMean);
final Target fromTarget = new Target("target1", new SimpleInterval("1", 1, 100));
final Target toTarget = new Target("target2", new SimpleInterval("2", 1, 100));
assertTransitionProbabilities(eventStartProbability, meanEventSize, model, fromTarget, toTarget);
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class XHMMModelUnitTest method testDistance.
@Test(dependsOnMethods = "testInstantiation")
public void testDistance() {
final XHMMModel model = new XHMMModel(0.5, 10, -1, 1);
// different chromosomes
Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 1, 100)), new Target("target2", new SimpleInterval("2", 1, 100))), Double.POSITIVE_INFINITY);
// commutativity
Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 1, 100)), new Target("target2", new SimpleInterval("1", 200, 300))), XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 200, 300)), new Target("target2", new SimpleInterval("1", 1, 100))));
Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 100, 200)), new Target("target2", new SimpleInterval("1", 200, 300))), 100, EPSILON);
Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 100, 200)), new Target("target2", new SimpleInterval("1", 100, 110))), 45, EPSILON);
Assert.assertEquals(XHMMModel.calculateDistance(new Target("target1", new SimpleInterval("1", 100, 200)), new Target("target2", new SimpleInterval("1", 150, 250))), 50, EPSILON);
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class TargetCoverageSexGenotypeCalculator method processReadCountsAndTargets.
/**
* Processes raw read counts and targets:
* <dl>
* <dt> If more than one sample is present in the collection, filters out fully uncovered targets
* from read counts and removes the uncovered targets from the target list</dt>
*
* <dt> Otherwise, does nothing and warns the user
* </dt>
* </dl>
*
* @param rawReadCounts raw read count collection
* @param targetList user provided target list
* @return pair of processed read counts and targets
*/
private ImmutablePair<ReadCountCollection, List<Target>> processReadCountsAndTargets(@Nonnull final ReadCountCollection rawReadCounts, @Nonnull final List<Target> targetList) {
final ReadCountCollection finalReadCounts;
final List<Target> finalTargetList;
/* remove totally uncovered targets */
if (rawReadCounts.columnNames().size() > 1) {
finalReadCounts = ReadCountCollectionUtils.removeTotallyUncoveredTargets(rawReadCounts, logger);
final Set<Target> targetSetFromProcessedReadCounts = new HashSet<>(finalReadCounts.targets());
finalTargetList = targetList.stream().filter(targetSetFromProcessedReadCounts::contains).collect(Collectors.toList());
} else {
final long numUncoveredTargets = rawReadCounts.records().stream().filter(rec -> (int) rec.getDouble(0) == 0).count();
final long numAllTargets = rawReadCounts.targets().size();
logger.info("Since only one sample is given for genotyping, the user is responsible for asserting" + " the aptitude of targets. Fully uncovered (irrelevant) targets can not be automatically" + " identified (total targets: " + numAllTargets + ", uncovered targets: " + numUncoveredTargets + ")");
finalReadCounts = rawReadCounts;
finalTargetList = targetList;
}
return ImmutablePair.of(finalReadCounts, finalTargetList);
}
Aggregations