use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCountsTwoIntervals.
@Test
public void testSparkGenomeReadCountsTwoIntervals() {
final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "10000", "-L", "1", "-L", "2" };
runCommandLine(arguments);
final ReadCountCollection proportionalCoverage = loadReadCountCollection(outputFile);
Assert.assertTrue(proportionalCoverage.records().stream().anyMatch(t -> t.getContig().equals("1")));
Assert.assertTrue(proportionalCoverage.records().stream().anyMatch(t -> t.getContig().equals("2")));
Assert.assertTrue(proportionalCoverage.records().stream().noneMatch(t -> t.getContig().equals("3") || t.getContig().equals("4")));
// raw coverage
final ReadCountCollection rawCoverage = loadReadCountCollection(new File(outputFile.getAbsolutePath() + SparkGenomeReadCounts.RAW_COV_OUTPUT_EXTENSION));
Assert.assertTrue(rawCoverage.records().stream().anyMatch(t -> t.getContig().equals("1")));
Assert.assertTrue(rawCoverage.records().stream().anyMatch(t -> t.getContig().equals("2")));
Assert.assertTrue(rawCoverage.records().stream().noneMatch(t -> t.getContig().equals("3") || t.getContig().equals("4")));
final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
Assert.assertTrue(targets.stream().allMatch(t -> (t.getContig().equals("1")) || (t.getContig().equals("2"))));
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCountsSubContig.
@Test
public void testSparkGenomeReadCountsSubContig() {
final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "100", "-L", "1:1-500" };
runCommandLine(arguments);
final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
Assert.assertTrue(targets.stream().allMatch(t -> t.getContig().equals("1")));
Assert.assertEquals(targets.size(), 5);
for (int i = 0; i < targets.size(); i++) {
Assert.assertEquals(targets.get(i).getStart(), i * 100 + 1);
Assert.assertEquals(targets.get(i).getEnd(), (i + 1) * 100);
}
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class GermlinePloidyAnnotatedTargetCollectionUnitTest method testTargetPloidy.
@Test
public void testTargetPloidy() {
/* on an 1 contig */
final Target testAutosomalTarget = new Target("UNNAMED_TARGET_0");
Assert.assertEquals(ploidyAnnotsCollection.getTargetGermlinePloidyByGenotype(testAutosomalTarget, "SEX_XX"), 2);
Assert.assertEquals(ploidyAnnotsCollection.getTargetGermlinePloidyByGenotype(testAutosomalTarget, "SEX_XX"), 2);
/* on X contig */
final Target testXTarget = new Target("UNNAMED_TARGET_10");
Assert.assertEquals(ploidyAnnotsCollection.getTargetGermlinePloidyByGenotype(testXTarget, "SEX_XX"), 2);
Assert.assertEquals(ploidyAnnotsCollection.getTargetGermlinePloidyByGenotype(testXTarget, "SEX_XY"), 1);
/* on Y contig */
final Target testYTarget = new Target("UNNAMED_TARGET_19");
Assert.assertEquals(ploidyAnnotsCollection.getTargetGermlinePloidyByGenotype(testYTarget, "SEX_XX"), 0);
Assert.assertEquals(ploidyAnnotsCollection.getTargetGermlinePloidyByGenotype(testYTarget, "SEX_XY"), 1);
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method processInterval.
/**
* Processes a cluster of truth and called variants that may overlap over a genome region.
* <p>
* It returns the list of {@link VariantEvaluationContext} instances to be output for the given interval. These
* are already sorted by coordinates.
* </p>
* @param truthReader reader to the truth variants.
* @param callsReader reader to the called variants.
* @param interval the interval to analyze.
* @return never {@code null}.
*/
private List<VariantEvaluationContext> processInterval(final VCFFileReader truthReader, final VCFFileReader callsReader, final SimpleInterval interval, final TargetCollection<Target> targets) {
final List<VariantContext> truthVariants = variantQueryToList(truthReader, interval);
final List<VariantContext> callsVariants = variantQueryToList(callsReader, interval);
final List<VariantEvaluationContext> evaluatedVariants = new ArrayList<>(truthVariants.size() + callsVariants.size());
for (final VariantContext truth : truthVariants) {
// skip truth that does not overlap a single target.
if (targets.targetCount(truth) == 0) {
continue;
}
final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> IntervalUtils.overlaps(truth, vc)).collect(Collectors.toList());
evaluatedVariants.add(composeTruthOverlappingVariantContext(truth, overlappingCalls, targets));
}
for (final VariantContext call : callsVariants) {
// skip call that does not overlap a single target (the user might want to call on a smaller set of targets)
if (targets.targetCount(call) == 0) {
continue;
}
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> IntervalUtils.overlaps(call, vc)).collect(Collectors.toList());
if (overlappingTruth.isEmpty()) {
evaluatedVariants.add(composeNonTruthOverlappingVariantContext(call, targets));
}
}
return evaluatedVariants.stream().sorted(IntervalUtils.LEXICOGRAPHICAL_ORDER_COMPARATOR).map(this::applyVariantContextFilters).collect(Collectors.toList());
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
final GenotypeBuilder builder = new GenotypeBuilder(sample);
// Set the call allele.
builder.alleles(Collections.singletonList(calledAllele));
// Set the truth allele.
builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
// Annotate the genotype with the number of calls.
builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
// When there is more than one qualified type of event we indicate how many.
builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
// Calculate the length in targets of the call as the sum across all calls.
builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
// Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
// Calculate the truth copy fraction.
builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
// Calculate the truth call quality.
final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
// Set genotype filters:
final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
// Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
final EvaluationClass evaluationClass;
if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
evaluationClass = EvaluationClass.FALSE_NEGATIVE;
} else if (calledAlleles.size() == 1) {
evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
EvaluationClass.DISCORDANT_POSITIVE;
} else {
evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
}
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
}
return builder.make();
}
Aggregations