Search in sources :

Example 36 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class XHMMSegmentCallerIntegrationTest method assertSampleSegmentsCoordinates.

private void assertSampleSegmentsCoordinates(List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> sampleRecords, TargetCollection<Target> targets) {
    for (final HiddenStateSegmentRecord<CopyNumberTriState, Target> record : sampleRecords) {
        final IndexRange range = targets.indexRange(record.getSegment());
        Assert.assertTrue(range.size() > 0);
        Assert.assertEquals(record.getSegment().getContig(), targets.location(range.from).getContig());
        Assert.assertEquals(record.getSegment().getStart(), targets.location(range.from).getStart());
        Assert.assertEquals(record.getSegment().getEnd(), targets.location(range.to - 1).getEnd());
    }
}
Also used : IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)

Example 37 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk-protected by broadinstitute.

the class XHMMSegmentCallerIntegrationTest method assertOutputHasConsistentNumberOfTargets.

private void assertOutputHasConsistentNumberOfTargets(final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> outputRecords, final TargetCollection<Target> targets) {
    for (final HiddenStateSegmentRecord<CopyNumberTriState, Target> nextRecord : outputRecords) {
        final IndexRange indexRange = targets.indexRange(nextRecord.getSegment());
        Assert.assertEquals(indexRange.to - indexRange.from, nextRecord.getSegment().getTargetCount());
    }
}
Also used : IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)

Example 38 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class TargetCoverageSexGenotyper method doWork.

@Override
protected Object doWork() {
    /* check args */
    IOUtils.canReadFile(inputRawReadCountsFile);
    IOUtils.canReadFile(inputContigAnnotsFile);
    Utils.validateArg(baselineMappingErrorProbability > 0 && baselineMappingErrorProbability < 1, "Must have 0 < baseline mapping error probability < 1.");
    /* read input read counts */
    final ReadCountCollection rawReadCounts;
    try {
        logger.info("Parsing raw read count collection file...");
        rawReadCounts = ReadCountCollectionUtils.parse(inputRawReadCountsFile);
    } catch (final IOException ex) {
        throw new UserException.CouldNotReadInputFile("Could not read raw read count collection file");
    }
    /* parse contig genotype ploidy annotations */
    final List<ContigGermlinePloidyAnnotation> contigGermlinePloidyAnnotationList;
    logger.info("Parsing contig genotype ploidy annotations file...");
    contigGermlinePloidyAnnotationList = ContigGermlinePloidyAnnotationTableReader.readContigGermlinePloidyAnnotationsFromFile(inputContigAnnotsFile);
    /* parse target list */
    final List<Target> inputTargetList;
    if (inputTargetListFile != null) {
        inputTargetList = TargetTableReader.readTargetFile(inputTargetListFile);
    } else {
        logger.info("Target list was not provided -- getting target list from the read count collection.");
        inputTargetList = rawReadCounts.targets();
    }
    /* perform genotyping */
    final TargetCoverageSexGenotypeCalculator genotyper = new TargetCoverageSexGenotypeCalculator(rawReadCounts, inputTargetList, contigGermlinePloidyAnnotationList, baselineMappingErrorProbability);
    final SexGenotypeDataCollection sampleSexGenotypeCollection = genotyper.inferSexGenotypes();
    /* save results */
    try {
        sampleSexGenotypeCollection.write(new FileWriter(outputSampleGenotypesFile));
    } catch (final IOException ex) {
        throw new UserException.CouldNotCreateOutputFile("Could not write inferred sample genotypes to file", ex);
    }
    return "SUCCESS";
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) FileWriter(java.io.FileWriter) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 39 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class SparkGenomeReadCounts method collectReads.

private void collectReads() {
    if (readArguments.getReadFilesNames().size() != 1) {
        throw new UserException("This tool only accepts a single bam/sam/cram as input");
    }
    final SampleCollection sampleCollection = new SampleCollection(getHeaderForReads());
    if (sampleCollection.sampleCount() > 1) {
        throw new UserException.BadInput("We do not support bams with more than one sample.");
    }
    final String sampleName = sampleCollection.sampleIds().get(0);
    final String[] commentsForRawCoverage = { "##fileFormat  = tsv", "##commandLine = " + getCommandLine(), String.format("##title = Coverage counts in %d base bins for WGS", binsize) };
    final ReadFilter filter = makeGenomeReadFilter();
    final SAMSequenceDictionary sequenceDictionary = getReferenceSequenceDictionary();
    logger.info("Starting Spark coverage collection...");
    final long coverageCollectionStartTime = System.currentTimeMillis();
    final JavaRDD<GATKRead> rawReads = getReads();
    final JavaRDD<GATKRead> reads = rawReads.filter(read -> filter.test(read));
    //Note: using a field inside a closure will pull in the whole enclosing object to serialization
    // (which leads to bad performance and can blow up if some objects in the fields are not
    // Serializable - closures always use java Serializable and not Kryo)
    //Solution here is to use a temp variable for binsize because it's just an int.
    final int binsize_tmp = binsize;
    final JavaRDD<SimpleInterval> readIntervals = reads.filter(read -> sequenceDictionary.getSequence(read.getContig()) != null).map(read -> SparkGenomeReadCounts.createKey(read, sequenceDictionary, binsize_tmp));
    final Map<SimpleInterval, Long> byKey = readIntervals.countByValue();
    final Set<SimpleInterval> readIntervalKeySet = byKey.keySet();
    final long totalReads = byKey.values().stream().mapToLong(v -> v).sum();
    final long coverageCollectionEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished the spark coverage collection with %d targets and %d reads. Elapse of %d seconds", readIntervalKeySet.size(), totalReads, (coverageCollectionEndTime - coverageCollectionStartTime) / 1000));
    final String[] commentsForProportionalCoverage = { commentsForRawCoverage[0], commentsForRawCoverage[1], String.format("##title = Proportional coverage counts in %d base bins for WGS (total reads: %d)", binsize, totalReads) };
    logger.info("Creating full genome bins...");
    final long createGenomeBinsStartTime = System.currentTimeMillis();
    final List<SimpleInterval> fullGenomeBins = createFullGenomeBins(binsize);
    List<Target> fullGenomeTargetCollection = createTargetListFromSimpleInterval(fullGenomeBins);
    TargetWriter.writeTargetsToFile(new File(outputFile.getAbsolutePath() + ".targets.tsv"), fullGenomeTargetCollection);
    final long createGenomeBinsEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished creating genome bins. Elapse of %d seconds", (createGenomeBinsEndTime - createGenomeBinsStartTime) / 1000));
    logger.info("Creating missing genome bins...");
    final long createMissingGenomeBinsStartTime = System.currentTimeMillis();
    logger.info("Creating missing genome bins: Creating a mutable mapping...");
    final Map<SimpleInterval, Long> byKeyMutable = new HashMap<>();
    byKeyMutable.putAll(byKey);
    logger.info("Creating missing genome bins: Populating mutable mapping with zero counts for empty regions...");
    fullGenomeBins.stream().forEach(b -> byKeyMutable.putIfAbsent(b, 0l));
    final long createMissingGenomeBinsEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished creating missing genome bins. Elapse of %d seconds", (createMissingGenomeBinsEndTime - createMissingGenomeBinsStartTime) / 1000));
    logger.info("Creating final map...");
    final long createFinalMapStartTime = System.currentTimeMillis();
    final SortedMap<SimpleInterval, Long> byKeySorted = new TreeMap<>(IntervalUtils.LEXICOGRAPHICAL_ORDER_COMPARATOR);
    byKeySorted.putAll(byKeyMutable);
    final long createFinalMapEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished creating final map. Elapse of %d seconds", (createFinalMapEndTime - createFinalMapStartTime) / 1000));
    logger.info("Creating proportional coverage... ");
    final long pCovFileStartTime = System.currentTimeMillis();
    final SortedMap<SimpleInterval, Double> byKeyProportionalSorted = new TreeMap<>(IntervalUtils.LEXICOGRAPHICAL_ORDER_COMPARATOR);
    byKeySorted.entrySet().stream().forEach(e -> byKeyProportionalSorted.put(e.getKey(), (double) e.getValue() / totalReads));
    final long pCovFileEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished creating proportional coverage map. Elapse of %d seconds", (pCovFileEndTime - pCovFileStartTime) / 1000));
    logger.info("Writing raw coverage file ...");
    final long writingCovFileStartTime = System.currentTimeMillis();
    ReadCountCollectionUtils.writeReadCountsFromSimpleInterval(new File(outputFile.getAbsolutePath() + RAW_COV_OUTPUT_EXTENSION), sampleName, byKeySorted, commentsForRawCoverage);
    final long writingCovFileEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished writing coverage file. Elapse of %d seconds", (writingCovFileEndTime - writingCovFileStartTime) / 1000));
    logger.info("Writing proportional coverage file ...");
    final long writingPCovFileStartTime = System.currentTimeMillis();
    ReadCountCollectionUtils.writeReadCountsFromSimpleInterval(outputFile, sampleName, byKeyProportionalSorted, commentsForProportionalCoverage);
    final long writingPCovFileEndTime = System.currentTimeMillis();
    logger.info(String.format("Finished writing proportional coverage file. Elapse of %d seconds", (writingPCovFileEndTime - writingPCovFileStartTime) / 1000));
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) java.util(java.util) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) GATKSparkTool(org.broadinstitute.hellbender.engine.spark.GATKSparkTool) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) Logger(org.apache.logging.log4j.Logger) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) UserException(org.broadinstitute.hellbender.exceptions.UserException) WellformedReadFilter(org.broadinstitute.hellbender.engine.filters.WellformedReadFilter) Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) TargetWriter(org.broadinstitute.hellbender.tools.exome.TargetWriter) LogManager(org.apache.logging.log4j.LogManager) SampleCollection(org.broadinstitute.hellbender.tools.exome.SampleCollection) JavaRDD(org.apache.spark.api.java.JavaRDD) ReadFilterLibrary(org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Target(org.broadinstitute.hellbender.tools.exome.Target) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException) SampleCollection(org.broadinstitute.hellbender.tools.exome.SampleCollection) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) WellformedReadFilter(org.broadinstitute.hellbender.engine.filters.WellformedReadFilter) File(java.io.File)

Example 40 with Target

use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.

the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithOverlappingTruthConcordance.

private void checkOutputCallsWithOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
    final List<VariantContext> truthVariants = readVCFFile(truthFile);
    final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
    final List<VariantContext> callsVariants = readVCFFile(callsFile);
    final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
    final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
    for (final VariantContext truth : truthVariants) {
        final List<Target> overlappingTargets = targets.targets(truth);
        final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
        final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
        if (overlappingTargets.isEmpty()) {
            Assert.assertTrue(overlappingOutput.isEmpty());
            continue;
        }
        @SuppressWarnings("all") final VariantContext matchingOutput = overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).findAny().get();
        final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
        for (final String sample : outputSamples) {
            final Genotype outputGenotype = matchingOutput.getGenotype(sample);
            final List<Pair<VariantContext, Genotype>> sampleCalls = overlappingCalls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(p -> XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(p.getRight().getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY))).filter(p -> callPassFilters(p.getLeft(), p.getRight(), targets, filteringOptions)).collect(Collectors.toList());
            final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
            sampleCalls.forEach(p -> {
                expectedCounts[CopyNumberTriStateAllele.valueOf(p.getRight().getAllele(0)).index()]++;
            });
            final int[] actualCounts = GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0);
            Assert.assertEquals(actualCounts, expectedCounts, Arrays.toString(actualCounts) + " " + Arrays.toString(expectedCounts));
            final int expectedTotalCount = (int) MathUtils.sum(expectedCounts);
            final int actualTotalCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1);
            Assert.assertEquals(actualTotalCount, expectedTotalCount);
            final int expectedTargetCount = sampleCalls.stream().mapToInt(p -> targets.targetCount(p.getLeft())).sum();
            final int observedTargetCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, -1);
            Assert.assertEquals(observedTargetCount, expectedTargetCount);
            final Allele truthCallAllele = outputTruthAllele(outputGenotype);
            final boolean isMixed = IntStream.of(actualCounts).filter(i -> i > 0).count() > 1;
            final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
            if (sampleCalls.size() > 0 && !isMixed) {
                final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
                final CopyNumberTriStateAllele expectedCall = CopyNumberTriStateAllele.valueOf(bestCall.getRight().getAllele(0));
                final CopyNumberTriStateAllele actualCall = CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0));
                Assert.assertEquals(actualCall, expectedCall);
                sampleCallsCount[expectedCall.index()]++;
                if (!truthCallAllele.isReference()) {
                    if (truthCallAllele.equals(actualCall)) {
                        Assert.assertEquals(evalClass, EvaluationClass.TRUE_POSITIVE.acronym);
                    } else if (!truthCallAllele.isNoCall()) {
                        Assert.assertEquals(evalClass, EvaluationClass.DISCORDANT_POSITIVE.acronym);
                    } else {
                        Assert.assertNull(evalClass);
                    }
                } else if (truthCallAllele.isReference()) {
                    Assert.assertEquals(evalClass, EvaluationClass.FALSE_POSITIVE.acronym);
                }
            } else {
                Assert.assertEquals(Allele.NO_CALL, outputGenotype.getAllele(0));
                if (sampleCalls.isEmpty()) {
                    Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.FALSE_NEGATIVE.acronym : null);
                    Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), 0.0);
                } else {
                    Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.MIXED_POSITIVE.acronym : EvaluationClass.FALSE_POSITIVE.acronym);
                    final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
                    Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), callGQ(bestCall.getRight()));
                }
            }
        }
        final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
        final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
        Assert.assertEquals(observedAN, expectedAN);
        final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
        final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
        Assert.assertNotNull(observedAF);
        assertEquals(observedAF, expectedAF, 0.01);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) java.util(java.util) DataProvider(org.testng.annotations.DataProvider) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) Argument(org.broadinstitute.barclay.argparser.Argument) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) TargetArgumentCollection(org.broadinstitute.hellbender.tools.exome.TargetArgumentCollection) Pair(org.apache.commons.lang3.tuple.Pair) Assert(org.testng.Assert) GATKProtectedVariantContextUtils(org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils) TargetCollection(org.broadinstitute.hellbender.tools.exome.TargetCollection) VCFConstants(htsjdk.variant.vcf.VCFConstants) IOException(java.io.IOException) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Field(java.lang.reflect.Field) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) XHMMSegmentGenotyper(org.broadinstitute.hellbender.tools.exome.germlinehmm.xhmm.XHMMSegmentGenotyper) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Target(org.broadinstitute.hellbender.tools.exome.Target) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) Target(org.broadinstitute.hellbender.tools.exome.Target) Allele(htsjdk.variant.variantcontext.Allele) CopyNumberTriStateAllele(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriStateAllele) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Aggregations

Target (org.broadinstitute.hellbender.tools.exome.Target)110 Test (org.testng.annotations.Test)56 File (java.io.File)52 Collectors (java.util.stream.Collectors)42 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)42 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)38 IOException (java.io.IOException)32 java.util (java.util)32 IntStream (java.util.stream.IntStream)32 Assert (org.testng.Assert)32 Pair (org.apache.commons.lang3.tuple.Pair)26 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)26 UserException (org.broadinstitute.hellbender.exceptions.UserException)26 Genotype (htsjdk.variant.variantcontext.Genotype)22 List (java.util.List)22 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)22 CopyNumberTriState (org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)22 DataProvider (org.testng.annotations.DataProvider)22 VariantContext (htsjdk.variant.variantcontext.VariantContext)20 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)20