Search in sources :

Example 1 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class AlleleFractionInitializer method initialMinorFractions.

/**
     *  Initialize minor fractions assuming no allelic bias <p></p>
     *
     * We integrate over f to get posterior probabilities (responsibilities) of alt / ref minor
     * that is, responsibility of alt minor is int_{0 to 1/2} f^a (1-f)^r df
     *          responsibility of ref minor is int_{0 to 1/2} f^r (1-f)^a df
     * these are proportional to I(1/2, a + 1, r + 1) and I(1/2, r + 1, a + 1),
     * respectively, where I is the (incomplete) regularized Beta function.
     * By definition these likelihoods sum to 1, ie they are already normalized. <p></p>
     *
     * Finally, we set each minor fraction to the responsibility-weighted total count of
     * reads in minor allele divided by total reads, ignoring outliers.
     */
private AlleleFractionState.MinorFractions initialMinorFractions(final AlleleFractionData data) {
    final int numSegments = data.getNumSegments();
    final AlleleFractionState.MinorFractions result = new AlleleFractionState.MinorFractions(numSegments);
    for (int segment = 0; segment < numSegments; segment++) {
        double responsibilityWeightedMinorAlleleReadCount = 0.0;
        double responsibilityWeightedTotalReadCount = 0.0;
        for (final AllelicCount count : data.getCountsInSegment(segment)) {
            final int a = count.getAltReadCount();
            final int r = count.getRefReadCount();
            double altMinorResponsibility;
            try {
                altMinorResponsibility = Beta.regularizedBeta(0.5, a + 1, r + 1);
            } catch (final MaxCountExceededException e) {
                //if the special function can't be computed, give an all-or-nothing responsibility
                altMinorResponsibility = a < r ? 1.0 : 0.0;
            }
            responsibilityWeightedMinorAlleleReadCount += altMinorResponsibility * a + (1 - altMinorResponsibility) * r;
            responsibilityWeightedTotalReadCount += a + r;
        }
        // we achieve a flat prior via a single pseudocount for minor and non-minor reads, hence the  +1 and +2
        result.add((responsibilityWeightedMinorAlleleReadCount + 1) / (responsibilityWeightedTotalReadCount + 2));
    }
    return result;
}
Also used : AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) MaxCountExceededException(org.apache.commons.math3.exception.MaxCountExceededException)

Example 2 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class SegmentUtilsUnitTest method testUnionSegments.

/**
     * Test for {@link SegmentUtils#unionSegments}.  Expected behavior:
     * <p>
     * On chr1 {@link SegmentUtils#collectBreakpointsByContig} gives:
     * </p>
     *      <p>
     *      1, 5, 10, 20, 40, 40, 42, 90, 91, 115, 125, 140.
     *      </p>
     * <p>
     * Then {@link SegmentUtils#constructUntrimmedSegments} finds the segments:
     * </p>
     *      <p>
     *      [1, 4], [5, 10], [11, 19], [20, 40], [41, 41], [42, 89], [90, 91], [92, 114], [115, 125], [126, 140].
     *      </p>
     * <p>
     * and returns the non-empty segments:
     * </p>
     *      <p>
     *      [1, 4], [5, 10], [20, 40], [42, 89], [90, 91], [92, 114], [115, 125], [126, 140].
     *      </p>
     * <p>
     * Then {@link SegmentUtils#mergeSpuriousStartsAndEnds} merges the last segment left to form [115, 140],
     * and {@link SegmentMergeUtils#mergeSpuriousMiddles} randomly merges segment [92, 114] left or right.
     * </p>
     * <p>
     * Finally, {@link SegmentUtils#trimInterval} gives:
     * </p>
     *      <p>
     *      [1, 10], [20, 40], [42, 42], [90, 114], [115, 140] (if [92, 114] merged left) or
     *      </p>
     *      <p>
     *      [1, 10], [20, 40], [42, 42], [90, 91], [92, 140] (if [92, 114] merged right)
     *      </p>
     * <p>
     * The remaining empty segment on chr2 is retained.
     */
@Test
public void testUnionSegments() {
    final String sampleName = "placeholder_sample_name";
    final List<Target> targets = new ArrayList<Target>();
    targets.add(new Target("t1", new SimpleInterval("chr1", 1, 10)));
    targets.add(new Target("t2", new SimpleInterval("chr1", 20, 30)));
    targets.add(new Target("t3", new SimpleInterval("chr1", 31, 40)));
    targets.add(new Target("t4", new SimpleInterval("chr1", 90, 100)));
    targets.add(new Target("t5", new SimpleInterval("chr1", 110, 120)));
    targets.add(new Target("t6", new SimpleInterval("chr1", 130, 140)));
    final RealMatrix zeroCoverageMatrix = new Array2DRowRealMatrix(targets.size(), 1);
    final ReadCountCollection counts = new ReadCountCollection(targets, Collections.singletonList(sampleName), zeroCoverageMatrix);
    final AllelicCount snp1 = new AllelicCount(new SimpleInterval("chr1", 5, 5), 0, 1);
    final AllelicCount snp2 = new AllelicCount(new SimpleInterval("chr1", 40, 40), 0, 1);
    final AllelicCount snp3 = new AllelicCount(new SimpleInterval("chr1", 42, 42), 0, 1);
    final AllelicCount snp4 = new AllelicCount(new SimpleInterval("chr1", 91, 91), 0, 1);
    final AllelicCount snp5 = new AllelicCount(new SimpleInterval("chr1", 115, 115), 0, 1);
    final AllelicCount snp6 = new AllelicCount(new SimpleInterval("chr1", 125, 125), 0, 1);
    final AllelicCount snp7 = new AllelicCount(new SimpleInterval("chr2", 10, 10), 0, 1);
    final List<AllelicCount> snps = Arrays.asList(snp1, snp2, snp3, snp4, snp5, snp6, snp7);
    final List<SimpleInterval> targetSegments = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 90, 140));
    final List<SimpleInterval> snpSegments = Arrays.asList(new SimpleInterval("chr1", 5, 40), new SimpleInterval("chr1", 42, 91), new SimpleInterval("chr1", 115, 125), new SimpleInterval("chr2", 10, 10));
    final List<SimpleInterval> unionedSegments = SegmentUtils.unionSegments(targetSegments, snpSegments, new Genome(counts, snps));
    final List<SimpleInterval> expectedLeft = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 42, 42), new SimpleInterval("chr1", 90, 114), new SimpleInterval("chr1", 115, 140), new SimpleInterval("chr2", 10, 10));
    final List<SimpleInterval> expectedRight = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 42, 42), new SimpleInterval("chr1", 90, 91), new SimpleInterval("chr1", 92, 140), new SimpleInterval("chr2", 10, 10));
    Assert.assertTrue(unionedSegments.equals(expectedLeft) || unionedSegments.equals(expectedRight));
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ArrayList(java.util.ArrayList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 3 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class SNPSegmenterUnitTest method testAllelicFractionBasedSegmentationNoSNPs.

@Test(expectedExceptions = IllegalArgumentException.class)
public void testAllelicFractionBasedSegmentationNoSNPs() {
    final String sampleName = "test";
    final List<AllelicCount> snpCounts = Collections.emptyList();
    final TargetCollection<AllelicCount> snps = new HashedListTargetCollection<>(snpCounts);
    final File resultFile = createTempFile("snp-segmenter-test-result", ".seg");
    SNPSegmenter.writeSegmentFile(snps, sampleName, resultFile);
}
Also used : File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 4 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class GetBayesianHetCoverageIntegrationTest method testMatchedNormalTumorJob.

@Test
public void testMatchedNormalTumorJob() {
    final File normalOutputFile = createTempFile("normal-test", ".tsv");
    final File tumorOutputFile = createTempFile("tumor-test", ".tsv");
    Pulldown tumorHetPulldownExpected, tumorHetPulldownResult;
    Pulldown normalHetPulldownExpected, normalHetPulldownResult;
    final String[] arguments = { "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NORMAL_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_BAM_FILE_SHORT_NAME, TUMOR_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_ALLELIC_COUNTS_FILE_SHORT_NAME, tumorOutputFile.getAbsolutePath(), "-" + GetBayesianHetCoverage.READ_DEPTH_THRESHOLD_SHORT_NAME, Integer.toString(10), "-" + GetBayesianHetCoverage.HET_CALLING_STRINGENCY_SHORT_NAME, Double.toString(10.0) };
    runCommandLine(arguments);
    normalHetPulldownResult = new Pulldown(normalOutputFile, normalHeader);
    tumorHetPulldownResult = new Pulldown(tumorOutputFile, tumorHeader);
    normalHetPulldownExpected = new Pulldown(normalHeader);
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 29.29));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 39.98));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 28.60));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11, 24.99));
    tumorHetPulldownExpected = new Pulldown(tumorHeader);
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11));
    Assert.assertEquals(normalHetPulldownExpected, normalHetPulldownResult);
    Assert.assertEquals(tumorHetPulldownExpected, tumorHetPulldownResult);
}
Also used : Pulldown(org.broadinstitute.hellbender.tools.exome.pulldown.Pulldown) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 5 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class GetBayesianHetCoverageIntegrationTest method testTumorJob.

@Test
public void testTumorJob() {
    final File tumorOutputFile = createTempFile("tumor-test", ".tsv");
    Pulldown tumorHetPulldownExpected, tumorHetPulldownResult;
    /* test 1: tight calling stringency */
    final String[] args_1 = { "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_BAM_FILE_SHORT_NAME, TUMOR_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_ALLELIC_COUNTS_FILE_SHORT_NAME, tumorOutputFile.getAbsolutePath(), "-" + GetBayesianHetCoverage.READ_DEPTH_THRESHOLD_SHORT_NAME, Integer.toString(10), "-" + GetBayesianHetCoverage.HET_CALLING_STRINGENCY_SHORT_NAME, Double.toString(10.0) };
    runCommandLine(args_1);
    tumorHetPulldownExpected = new Pulldown(tumorHeader);
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 24.85));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 34.03));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 24.23));
    tumorHetPulldownResult = new Pulldown(tumorOutputFile, tumorHeader);
    Assert.assertEquals(tumorHetPulldownExpected, tumorHetPulldownResult);
    /* test 2: tight calling stringency */
    final String[] args_2 = { "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_BAM_FILE_SHORT_NAME, TUMOR_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_ALLELIC_COUNTS_FILE_SHORT_NAME, tumorOutputFile.getAbsolutePath(), "-" + GetBayesianHetCoverage.READ_DEPTH_THRESHOLD_SHORT_NAME, Integer.toString(10), "-" + GetBayesianHetCoverage.HET_CALLING_STRINGENCY_SHORT_NAME, Double.toString(5.0) };
    runCommandLine(args_2);
    tumorHetPulldownExpected = new Pulldown(tumorHeader);
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4, Nucleotide.G, Nucleotide.A, 11, 15.72));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 24.85));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 34.03));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 24.23));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11, 21.23));
    tumorHetPulldownResult = new Pulldown(tumorOutputFile, tumorHeader);
    Assert.assertEquals(tumorHetPulldownExpected, tumorHetPulldownResult);
}
Also used : Pulldown(org.broadinstitute.hellbender.tools.exome.pulldown.Pulldown) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Aggregations

AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)62 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)38 Test (org.testng.annotations.Test)32 File (java.io.File)22 UserException (org.broadinstitute.hellbender.exceptions.UserException)14 IOException (java.io.IOException)12 ArrayList (java.util.ArrayList)10 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)10 IntervalList (htsjdk.samtools.util.IntervalList)8 Pulldown (org.broadinstitute.hellbender.tools.exome.pulldown.Pulldown)8 Utils (org.broadinstitute.hellbender.utils.Utils)8 Interval (htsjdk.samtools.util.Interval)6 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)6 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)6 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)6 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)5 IOUtils (org.broadinstitute.hellbender.utils.io.IOUtils)5 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)4