Search in sources :

Example 26 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class SegmentUtils method mergeSpuriousStartsAndEnds.

//remerge spurious target-only segments introduced at starts and ends of the
//original target-coverage segments by union of breakpoints
private static List<SimpleInterval> mergeSpuriousStartsAndEnds(final List<SimpleInterval> segments, final List<SimpleInterval> targetSegments, final TargetCollection<AllelicCount> snps) {
    //get original target-segment starts and ends
    final Set<SimpleInterval> targetSegmentStarts = targetSegments.stream().map(s -> new SimpleInterval(s.getContig(), s.getStart(), s.getStart())).collect(Collectors.toSet());
    final Set<SimpleInterval> targetSegmentEnds = targetSegments.stream().map(s -> new SimpleInterval(s.getContig(), s.getEnd(), s.getEnd())).collect(Collectors.toSet());
    final List<SimpleInterval> mergedSegments = new ArrayList<>();
    final ListIterator<SimpleInterval> segmentsIter = segments.listIterator();
    while (segmentsIter.hasNext()) {
        final SimpleInterval segment = segmentsIter.next();
        //do not remerge non-target-only segments (i.e., those containing SNPs)
        if (snps.targetCount(segment) > 0) {
            mergedSegments.add(segment);
            continue;
        }
        final SimpleInterval segmentStart = new SimpleInterval(segment.getContig(), segment.getStart(), segment.getStart());
        final SimpleInterval segmentEnd = new SimpleInterval(segment.getContig(), segment.getEnd(), segment.getEnd());
        if (targetSegmentStarts.contains(segmentStart) && !targetSegmentEnds.contains(segmentEnd)) {
            //remerge segments introduced at starts to the right
            final SimpleInterval nextSegment = segmentsIter.next();
            mergedSegments.add(SegmentMergeUtils.mergeSegments(segment, nextSegment));
        } else if (!targetSegmentStarts.contains(segmentStart) && targetSegmentEnds.contains(segmentEnd)) {
            //remerge segments introduced at ends to the left
            final int previousIndex = mergedSegments.size() - 1;
            final SimpleInterval previousSegment = mergedSegments.get(previousIndex);
            mergedSegments.set(previousIndex, SegmentMergeUtils.mergeSegments(previousSegment, segment));
        } else {
            //do not merge otherwise; although some spurious segments remain, they will be merged in a later step
            mergedSegments.add(segment);
        }
    }
    return mergedSegments;
}
Also used : Locatable(htsjdk.samtools.util.Locatable) Decile(org.broadinstitute.hellbender.utils.mcmc.Decile) java.util(java.util) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) ACNV_DOUBLE_FORMAT(org.broadinstitute.hellbender.tools.exome.ACNVModeller.ACNV_DOUBLE_FORMAT) StringUtils(org.apache.commons.lang3.StringUtils) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) AllelicCalls(org.broadinstitute.hellbender.tools.exome.conversion.allelicbalancecaller.AllelicCalls) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) Interval(htsjdk.samtools.util.Interval) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) ListUtils(org.apache.commons.collections4.ListUtils) org.broadinstitute.hellbender.utils.tsv(org.broadinstitute.hellbender.utils.tsv) BiConsumer(java.util.function.BiConsumer) Utils(org.broadinstitute.hellbender.utils.Utils) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) DecileCollection(org.broadinstitute.hellbender.utils.mcmc.DecileCollection) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 27 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class AllelicPanelOfNormals method initializeSiteToHyperparameterPairMap.

//transforms ref/alt counts at each site to hyperparameters, see docs/CNVs/CNV-methods.pdf for details
private void initializeSiteToHyperparameterPairMap(final AllelicCountCollection counts) {
    logger.info("Initializing allelic panel of normals...");
    for (final AllelicCount count : counts.getCounts()) {
        final SimpleInterval site = count.getInterval();
        final HyperparameterValues hyperparameterValues = new HyperparameterValues(globalHyperparameterValues.alpha, globalHyperparameterValues.beta, count.getAltReadCount(), count.getRefReadCount());
        if (siteToHyperparameterValuesMap.containsKey(site)) {
            throw new UserException.BadInput("Input AllelicCountCollection for allelic panel of normals contains duplicate sites.");
        } else {
            siteToHyperparameterValuesMap.put(site, hyperparameterValues);
        }
    }
    logger.info("Allelic panel of normals initialized.");
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)

Example 28 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class AllelicPanelOfNormalsCreator method create.

/**
     * Creates an {@link AllelicPanelOfNormals} given a site-frequency threshold;
     * sites appearing in strictly less than this fraction of samples will not be included in the panel of normals.
     * @param siteFrequencyThreshold    site-frequency threshold
     * @return                          an {@link AllelicPanelOfNormals} containing sites
     *                                  above the site-frequency threshold
     */
public AllelicPanelOfNormals create(final double siteFrequencyThreshold) {
    logger.info("Creating allelic panel of normals...");
    //used to filter on frequency
    final Map<SimpleInterval, MutableInt> numberOfSamplesMap = new HashMap<>();
    //store only the total counts (smaller memory footprint)
    final Map<SimpleInterval, AllelicCount> totalCountsMap = new HashMap<>();
    int pulldownFileCounter = 1;
    final int totalNumberOfSamples = pulldownFiles.size();
    for (final File pulldownFile : pulldownFiles) {
        logger.info("Processing pulldown file " + pulldownFileCounter++ + "/" + totalNumberOfSamples + " (" + pulldownFile + ")...");
        final AllelicCountCollection pulldownCounts = new AllelicCountCollection(pulldownFile);
        for (final AllelicCount count : pulldownCounts.getCounts()) {
            //update the sum of ref and alt counts at each site
            final SimpleInterval site = count.getInterval();
            final AllelicCount currentCountAtSite = totalCountsMap.getOrDefault(site, new AllelicCount(site, 0, 0));
            final AllelicCount updatedCountAtSite = new AllelicCount(site, currentCountAtSite.getRefReadCount() + count.getRefReadCount(), currentCountAtSite.getAltReadCount() + count.getAltReadCount());
            totalCountsMap.put(site, updatedCountAtSite);
            //update the number of samples seen possessing each site
            final MutableInt numberOfSamplesAtSite = numberOfSamplesMap.get(site);
            if (numberOfSamplesAtSite == null) {
                numberOfSamplesMap.put(site, new MutableInt(1));
            } else {
                numberOfSamplesAtSite.increment();
            }
        }
    }
    logger.info("Total number of unique sites present in samples: " + totalCountsMap.size());
    //filter out sites that appear at a frequency strictly less than the provided threshold
    final AllelicCountCollection totalCounts = new AllelicCountCollection();
    numberOfSamplesMap.entrySet().stream().filter(e -> e.getValue().doubleValue() / totalNumberOfSamples >= siteFrequencyThreshold).map(e -> totalCountsMap.get(e.getKey())).forEach(totalCounts::add);
    logger.info(String.format("Number of unique sites present in samples above site frequency = %4.3f: %d", siteFrequencyThreshold, totalCounts.getCounts().size()));
    return new AllelicPanelOfNormals(totalCounts);
}
Also used : MutableInt(org.apache.commons.lang3.mutable.MutableInt) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) HashMap(java.util.HashMap) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) ArrayList(java.util.ArrayList) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Map(java.util.Map) Utils(org.broadinstitute.hellbender.utils.Utils) LogManager(org.apache.logging.log4j.LogManager) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) HashMap(java.util.HashMap) MutableInt(org.apache.commons.lang3.mutable.MutableInt) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)

Example 29 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class AlleleFractionSegmenterUnitTest method generateAllelicCount.

protected static AllelicCount generateAllelicCount(final double minorFraction, final SimpleInterval position, final RandomGenerator rng, final GammaDistribution biasGenerator, final double outlierProbability) {
    final int numReads = 100;
    final double bias = biasGenerator.sample();
    //flip a coin to decide alt minor (alt fraction = minor fraction) or ref minor (alt fraction = 1 - minor fraction)
    final double altFraction = rng.nextDouble() < 0.5 ? minorFraction : 1 - minorFraction;
    //the probability of an alt read is the alt fraction modified by the bias or, in the case of an outlier, random
    final double pAlt = rng.nextDouble() < outlierProbability ? rng.nextDouble() : altFraction / (altFraction + (1 - altFraction) * bias);
    final int numAltReads = new BinomialDistribution(rng, numReads, pAlt).sample();
    final int numRefReads = numReads - numAltReads;
    return new AllelicCount(position, numAltReads, numRefReads);
}
Also used : BinomialDistribution(org.apache.commons.math3.distribution.BinomialDistribution) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)

Example 30 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class HetPulldownCalculatorUnitTest method inputGetTumorHetPulldown.

@DataProvider(name = "inputGetTumorHetPulldown")
public Object[][] inputGetTumorHetPulldown() {
    final Pulldown tumorHetPulldown = new Pulldown(normalHeader);
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
    final IntervalList normalHetIntervals = new IntervalList(tumorHeader);
    normalHetIntervals.add(new Interval("1", 11522, 11522));
    normalHetIntervals.add(new Interval("1", 12098, 12098));
    normalHetIntervals.add(new Interval("1", 14630, 14630));
    normalHetIntervals.add(new Interval("2", 14689, 14689));
    normalHetIntervals.add(new Interval("2", 14982, 14982));
    return new Object[][] { { normalHetIntervals, tumorHetPulldown } };
}
Also used : IntervalList(htsjdk.samtools.util.IntervalList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Interval(htsjdk.samtools.util.Interval) DataProvider(org.testng.annotations.DataProvider)

Aggregations

AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)62 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)38 Test (org.testng.annotations.Test)32 File (java.io.File)22 UserException (org.broadinstitute.hellbender.exceptions.UserException)14 IOException (java.io.IOException)12 ArrayList (java.util.ArrayList)10 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)10 IntervalList (htsjdk.samtools.util.IntervalList)8 Pulldown (org.broadinstitute.hellbender.tools.exome.pulldown.Pulldown)8 Utils (org.broadinstitute.hellbender.utils.Utils)8 Interval (htsjdk.samtools.util.Interval)6 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)6 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)6 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)6 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)5 IOUtils (org.broadinstitute.hellbender.utils.io.IOUtils)5 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)4