use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.
private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext call : callsVariants) {
final List<Target> overlappingTargets = targets.targets(call);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
if (!overlappingTruth.isEmpty()) {
continue;
}
@SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
final VariantContext matchingOutput = matchingOutputOptional.get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final Genotype callGenotype = call.getGenotype(sample);
final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
Assert.assertEquals(expectedCall, actualCall);
final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
Assert.assertEquals(actualDiscovered, expectedDiscovered);
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
if (expectedCall.isCalled() && actualDiscovered) {
expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
}
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
if (expectedCall.isCalled()) {
sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
}
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class ConvertGSVariantsToSegmentsIntegrationTest method composeExpectedSegments.
private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
final VCFFileReader reader = new VCFFileReader(vcf, false);
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
reader.iterator().forEachRemaining(vc -> {
final int targetCount = targets.indexRange(vc).size();
for (final Genotype genotype : vc.getGenotypes()) {
final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(",")).mapToDouble(Double::parseDouble).toArray();
final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
final CopyNumberTriState call = expectedCall(cn);
final double exactLog10Prob = expectedExactLog10(call, cnp);
final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()), 0.000, call, -10.0 * exactLog10Prob, Double.NaN, Double.NaN, Double.NaN, -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum));
result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
}
});
return result;
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class XHMMModelUnitTest method testLogTransitionProbabilityOverlappingTargets.
@Test(dataProvider = "testData", dependsOnMethods = "testInstantiation")
public void testLogTransitionProbabilityOverlappingTargets(final double eventRate, final double meanEventSize, final double deletionMean, final double duplicationMean) {
final XHMMModel model = new XHMMModel(eventRate, meanEventSize, deletionMean, duplicationMean);
final Target fromTarget = new Target("target1", new SimpleInterval("1", 1, 100));
final Target toTarget = new Target("target2", new SimpleInterval("1", 50, 150));
assertTransitionProbabilities(eventRate, meanEventSize, model, fromTarget, toTarget);
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class XHMMSegmentCallerIntegrationTest method assertOutputIsInOrder.
private void assertOutputIsInOrder(final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> outputRecords, final TargetCollection<Target> targets) {
for (int i = 1; i < outputRecords.size(); i++) {
final HiddenStateSegmentRecord<CopyNumberTriState, Target> nextRecord = outputRecords.get(i);
final HiddenStateSegmentRecord<CopyNumberTriState, Target> previousRecord = outputRecords.get(i - 1);
final IndexRange nextRange = targets.indexRange(nextRecord.getSegment());
final IndexRange previousRange = targets.indexRange(previousRecord.getSegment());
Assert.assertTrue(nextRange.from >= previousRange.from);
}
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class XHMMSegmentCallerIntegrationTest method testRunCommandLine.
//TODO: this test used to contain a test of concordance with XHMM. It no longer does that because our model has
//TODO: diverged from XHMM's. Eventually the right thing to do is use the simulateChain() method to generate
//TODO: simulated data for some artificial set of CNV segments and to test concordance with those segments.
//TODO: however we still use XHMM's emission model, which is both not generative and quite silly. Once we
//TODO: have a generative model of coverage we can modify simulateChain() accordingly and then write a concordance
//TODO: test here. Until then, we do not have an integration test but we do have our ongoing evaluations, which
//TODO: show the superiority of our modifications versus the original XHMM model.
@Test(dataProvider = "simulatedChainData")
public File testRunCommandLine(final XHMMData chain) {
final File inputFile = writeChainInTempFile(chain);
final File outputFile = createTempFile("output", ".tab");
runCommandLine(chain, inputFile, outputFile);
Assert.assertTrue(outputFile.exists());
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(REALISTIC_TARGETS_FILE);
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> outputRecords = readOutputRecords(outputFile);
assertOutputIsInOrder(outputRecords, targets);
assertOutputHasConsistentNumberOfTargets(outputRecords, targets);
final Map<String, List<HiddenStateSegmentRecord<CopyNumberTriState, Target>>> outputRecordsBySample = splitOutputRecordBySample(outputRecords);
assertSampleNames(outputRecordsBySample.keySet(), chain);
for (final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> sampleRecords : outputRecordsBySample.values()) {
assertSampleSegmentsCoverAllTargets(sampleRecords, targets);
assertSampleSegmentsCoordinates(sampleRecords, targets);
}
return outputFile;
}
Aggregations