Search in sources :

Example 41 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.

the class AlleleFractionHMMUnitTest method equalMinorFractionsTest.

// if all states have the same minor fraction, then regardless of data the hidden state probabilities are
// proportional to the weights
@Test
public void equalMinorFractionsTest() {
    // only the second state
    final List<Double> weights = Arrays.asList(0.2, 0.3, 0.5);
    final List<Double> minorAlleleFractions = Arrays.asList(0.3, 0.3, 0.3);
    final double memoryLength = 1e3;
    final AlleleFractionGlobalParameters params = new AlleleFractionGlobalParameters(0.1, 0.01, 0.03);
    final AlleleFractionHMM model = new AlleleFractionHMM(minorAlleleFractions, weights, memoryLength, AllelicPanelOfNormals.EMPTY_PON, params);
    final Random random = new Random(13);
    final int chainLength = 10000;
    final List<SimpleInterval> positions = new ArrayList<>();
    final List<AllelicCount> data = new ArrayList<>();
    int position = 1;
    for (int n = 0; n < chainLength; n++) {
        position += random.nextInt((int) (2 * memoryLength));
        final SimpleInterval interval = new SimpleInterval("chr1", position, position);
        positions.add(interval);
        data.add(new AllelicCount(interval, random.nextInt(30) + 1, random.nextInt(30) + 1));
    }
    final ForwardBackwardAlgorithm.Result<AllelicCount, SimpleInterval, Integer> fbResult = ForwardBackwardAlgorithm.apply(data, positions, model);
    for (int pos = 0; pos < chainLength; pos++) {
        for (int state = 0; state < weights.size(); state++) {
            Assert.assertEquals(fbResult.logProbability(pos, state), Math.log(weights.get(state)), 1e-5);
        }
    }
}
Also used : AlleleFractionGlobalParameters(org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionGlobalParameters) ForwardBackwardAlgorithm(org.broadinstitute.hellbender.utils.hmm.ForwardBackwardAlgorithm) ArrayList(java.util.ArrayList) Random(java.util.Random) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) Test(org.testng.annotations.Test)

Example 42 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.

the class AlleleFractionHMMUnitTest method obviousCallsTest.

// if we ignore ref bias, choose  well-separated minor fractions e.g. 0.1 and 0.5, and give really obvious
// allele counts like 10/90 (obviously 0.1) and 50/50 (obviously 0.5), the Viterbi algorithm should make the right calls
// this essentially tests whether we correctly defined the likelihood in terms of methods in the Allele Fraction model
@Test
public void obviousCallsTest() {
    // only the second state
    final List<Double> weights = Arrays.asList(0.5, 0.5);
    final List<Double> minorAlleleFractions = Arrays.asList(0.1, 0.5);
    final double memoryLength = 1e3;
    final AlleleFractionHMM model = new AlleleFractionHMM(minorAlleleFractions, weights, memoryLength, AllelicPanelOfNormals.EMPTY_PON, NO_BIAS_OR_OUTLIERS_PARAMS);
    final Random random = new Random(13);
    final int chainLength = 10000;
    final List<SimpleInterval> positions = new ArrayList<>();
    final List<AllelicCount> data = new ArrayList<>();
    int position = 1;
    for (int n = 0; n < chainLength; n++) {
        position += random.nextInt((int) (2 * memoryLength)) + (int) memoryLength;
        final SimpleInterval interval = new SimpleInterval("chr1", position, position);
        positions.add(interval);
        if (n < 2500) {
            // minor fraction = 0.1
            data.add(new AllelicCount(interval, 10, 90));
        } else if (n < 5000) {
            // minor fraction = 0.5
            data.add(new AllelicCount(interval, 50, 50));
        } else if (n < 7500) {
            // minor fraction = 0.1
            data.add(new AllelicCount(interval, 90, 10));
        } else {
            // minor fraction = 0.5
            data.add(new AllelicCount(interval, 40, 60));
        }
    }
    final List<Integer> states = ViterbiAlgorithm.apply(data, positions, model);
    for (int pos = 0; pos < chainLength; pos++) {
        if (pos < 2500) {
            Assert.assertEquals((int) states.get(pos), 0);
        } else if (pos < 5000) {
            Assert.assertEquals((int) states.get(pos), 1);
        } else if (pos < 7500) {
            Assert.assertEquals((int) states.get(pos), 0);
        } else {
            Assert.assertEquals((int) states.get(pos), 1);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) Random(java.util.Random) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) Test(org.testng.annotations.Test)

Example 43 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.

the class AlleleFractionSegmenterUnitTest method generateAllelicCount.

protected static AllelicCount generateAllelicCount(final double minorFraction, final SimpleInterval position, final RandomGenerator rng, final GammaDistribution biasGenerator, final double outlierProbability) {
    final int numReads = 100;
    final double bias = biasGenerator.sample();
    //flip a coin to decide alt minor (alt fraction = minor fraction) or ref minor (alt fraction = 1 - minor fraction)
    final double altFraction = rng.nextDouble() < 0.5 ? minorFraction : 1 - minorFraction;
    //the probability of an alt read is the alt fraction modified by the bias or, in the case of an outlier, random
    final double pAlt = rng.nextDouble() < outlierProbability ? rng.nextDouble() : altFraction / (altFraction + (1 - altFraction) * bias);
    final int numAltReads = new BinomialDistribution(rng, numReads, pAlt).sample();
    final int numRefReads = numReads - numAltReads;
    return new AllelicCount(position, numAltReads, numRefReads);
}
Also used : BinomialDistribution(org.apache.commons.math3.distribution.BinomialDistribution) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)

Example 44 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class ACNVModellerUnitTest method testMergeSimilarSegmentsCopyRatio.

/**
     * Test of similar-segment merging using only copy-ratio data (simulated coverages and segments).
     * Spurious breakpoints have been introduced into the list of true segments; similar-segment merging should
     * remerge segments broken by these breakpoints and reproduce the original list of true segments.
     */
@Test
public void testMergeSimilarSegmentsCopyRatio() throws IOException {
    final boolean doRefit = true;
    final String tempDir = publicTestDir + "similar-segment-copy-ratio-test";
    final File tempDirFile = createTempDir(tempDir);
    //load data (coverages and segments)
    final ReadCountCollection coverage = ReadCountCollectionUtils.parse(COVERAGES_FILE);
    final List<AllelicCount> snpCountsDummy = Collections.singletonList(new AllelicCount(new SimpleInterval("1", 1, 1), 0, 1));
    final Genome genome = new Genome(coverage, snpCountsDummy);
    final SegmentedGenome segmentedGenome = new SegmentedGenome(SEGMENT_FILE, genome);
    //initial MCMC model fitting performed by ACNVModeller constructor
    final ACNVModeller modeller = new ACNVModeller(segmentedGenome, NUM_SAMPLES, NUM_BURN_IN, 10, 0, ctx);
    //check that model is completely fit at construction
    Assert.assertTrue(modeller.isModelFit());
    //perform iterations of similar-segment merging until all similar segments are merged
    int prevNumSegments;
    for (int numIterations = 1; numIterations <= MAX_SIMILAR_SEGMENT_MERGE_ITERATIONS; numIterations++) {
        prevNumSegments = modeller.getACNVModeledSegments().size();
        modeller.performSimilarSegmentMergingIteration(INTERVAL_THRESHOLD, Double.POSITIVE_INFINITY, doRefit);
        if (modeller.getACNVModeledSegments().size() == prevNumSegments) {
            break;
        }
    }
    //write final segments to file
    final File finalModeledSegmentsFile = new File(tempDirFile, "test-" + FINAL_FIT_FILE_TAG + SEGMENT_FILE_SUFFIX);
    modeller.writeACNVModeledSegmentFile(finalModeledSegmentsFile);
    //check equality of segments
    final List<SimpleInterval> segmentsResult = SegmentUtils.readIntervalsFromSegmentFile(finalModeledSegmentsFile);
    Assert.assertEquals(segmentsResult, SEGMENTS_TRUTH);
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 45 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.

the class BayesianHetPulldownCalculator method getTumorHetPulldownFromNormalPulldown.

/**
     * Calculates the Het pulldown from a tumor file, given the tumor BAM file and the pulldown from a matched
     * normal BAM file.
     *
     * Note: this method does not perform any statistical inference. The Het SNP sites are directly carried over
     * from the matched normal pulldown. Here, we only collect statistics (ref count, alt count, read depth) and
     * save to a pulldown. The verbosity level of the pulldown is INTERMEDIATE (see {@link AllelicCountTableColumn}).
     *
     * @param tumorBamFile the tumor BAM file
     * @param normalHetPulldown the matched normal Het pulldown
     * @return tumor Het pulldown
     */
public Pulldown getTumorHetPulldownFromNormalPulldown(final File tumorBamFile, final Pulldown normalHetPulldown) {
    try (final SamReader bamReader = SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(refFile).open(tumorBamFile)) {
        if (bamReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            throw new UserException.BadInput("BAM file " + tumorBamFile.toString() + " must be coordinate sorted.");
        }
        final Pulldown tumorHetPulldown = new Pulldown(bamReader.getFileHeader());
        final SamLocusIterator locusIterator = getSamLocusIteratorWithDefaultFilters(bamReader);
        /* get a map of SimpleIntervals in the pulldown to their index */
        final Map<SimpleInterval, Integer> normalPulldownIndexMap = normalHetPulldown.getSimpleIntervalToIndexMap();
        final int totalNumberOfSNPs = snpIntervals.size();
        logger.info("Examining " + totalNumberOfSNPs + " sites in total...");
        int locusCount = 0;
        for (final SamLocusIterator.LocusInfo locus : locusIterator) {
            if (locusCount % NUMBER_OF_SITES_PER_LOGGED_STATUS_UPDATE == 0) {
                logger.info("Examined " + locusCount + " covered sites.");
            }
            locusCount++;
            final int totalReadCount = locus.getRecordAndOffsets().size();
            if (totalReadCount <= readDepthThreshold) {
                continue;
            }
            /* find the AllelicCount from the normal pulldown */
            int indexInNormalPulldown;
            try {
                indexInNormalPulldown = normalPulldownIndexMap.get(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()));
            } catch (NullPointerException e) {
                throw new GATKException.ShouldNeverReachHereException("Can not find the required AllelicCount " + "object in the normal pulldown. Stopping.");
            }
            /* just count the alt and ref nucleotide and add to the tumor pulldown */
            final Nucleotide.Counter baseCounts = getPileupBaseCounts(locus);
            tumorHetPulldown.add(new AllelicCount(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()), (int) baseCounts.get(normalHetPulldown.getCounts().get(indexInNormalPulldown).getRefNucleotide()), (int) baseCounts.get(normalHetPulldown.getCounts().get(indexInNormalPulldown).getAltNucleotide()), normalHetPulldown.getCounts().get(indexInNormalPulldown).getRefNucleotide(), normalHetPulldown.getCounts().get(indexInNormalPulldown).getAltNucleotide(), totalReadCount));
        }
        logger.info(locusCount + " covered sites out of " + totalNumberOfSNPs + " total sites were examined.");
        return tumorHetPulldown;
    } catch (final IOException | SAMFormatException e) {
        throw new UserException(e.getMessage());
    }
}
Also used : IOException(java.io.IOException) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator) UserException(org.broadinstitute.hellbender.exceptions.UserException) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)

Aggregations

AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)62 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)38 Test (org.testng.annotations.Test)32 File (java.io.File)22 UserException (org.broadinstitute.hellbender.exceptions.UserException)14 IOException (java.io.IOException)12 ArrayList (java.util.ArrayList)10 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)10 IntervalList (htsjdk.samtools.util.IntervalList)8 Pulldown (org.broadinstitute.hellbender.tools.exome.pulldown.Pulldown)8 Utils (org.broadinstitute.hellbender.utils.Utils)8 Interval (htsjdk.samtools.util.Interval)6 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)6 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)6 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)6 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)5 IOUtils (org.broadinstitute.hellbender.utils.io.IOUtils)5 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)4