Search in sources :

Example 46 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class SNPSegmenter method writeSegmentFile.

/**
     * Write segment file based on maximum-likelihood estimates of the minor allele fraction at SNP sites,
     * assuming the specified allelic bias.  These estimates are converted to target coverages,
     * which are written to a temporary file and then passed to {@link RCBSSegmenter}.
     * @param snps                  TargetCollection of allelic counts at SNP sites
     * @param sampleName            sample name
     * @param outputFile            segment file to write to and return
     * @param allelicBias           allelic bias to use in estimate of minor allele fraction
     */
public static void writeSegmentFile(final TargetCollection<AllelicCount> snps, final String sampleName, final File outputFile, final double allelicBias) {
    Utils.validateArg(snps.totalSize() > 0, "Must have a positive number of SNPs to perform SNP segmentation.");
    try {
        final File targetsFromSNPCountsFile = File.createTempFile("targets-from-snps", ".tsv");
        final List<Target> targets = snps.targets().stream().map(ac -> new Target(name(ac), ac.getInterval())).collect(Collectors.toList());
        final RealMatrix minorAlleleFractions = new Array2DRowRealMatrix(snps.targetCount(), 1);
        minorAlleleFractions.setColumn(0, snps.targets().stream().mapToDouble(ac -> ac.estimateMinorAlleleFraction(allelicBias)).toArray());
        ReadCountCollectionUtils.write(targetsFromSNPCountsFile, new ReadCountCollection(targets, Collections.singletonList(sampleName), minorAlleleFractions));
        //segment SNPs based on observed log_2 minor allele fraction (log_2 is applied in CBS.R)
        RCBSSegmenter.writeSegmentFile(sampleName, targetsFromSNPCountsFile.getAbsolutePath(), outputFile.getAbsolutePath(), false);
    } catch (final IOException e) {
        throw new UserException.CouldNotCreateOutputFile("Could not create temporary output file during " + "SNP segmentation.", e);
    }
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) List(java.util.List) UserException(org.broadinstitute.hellbender.exceptions.UserException) RCBSSegmenter(org.broadinstitute.hellbender.utils.segmenter.RCBSSegmenter) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) IOException(java.io.IOException) Collections(java.util.Collections) Collectors(java.util.stream.Collectors) File(java.io.File) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException) File(java.io.File)

Example 47 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.

the class CalculatePulldownPhasePosteriors method calculatePhasePosteriors.

@VisibleForTesting
protected static AllelicCountWithPhasePosteriorsCollection calculatePhasePosteriors(final AllelicCountCollection counts, final List<SimpleInterval> segments, final AlleleFractionState state, final AllelicPanelOfNormals allelicPoN) {
    final TargetCollection<SimpleInterval> segmentTargetCollection = new HashedListTargetCollection<>(segments);
    final AllelicCountWithPhasePosteriorsCollection countsWithPhasePosteriors = new AllelicCountWithPhasePosteriorsCollection();
    for (final AllelicCount count : counts.getCounts()) {
        final int segmentIndex = segmentTargetCollection.index(count.getInterval());
        if (segmentIndex < 0) {
            throw new UserException.EmptyIntersection(String.format("The AllelicCount at %s is not located within one of the input segments.", count.getInterval()));
        }
        final AlleleFractionGlobalParameters parameters = state.globalParameters();
        final double minorFraction = state.segmentMinorFraction(segmentIndex);
        final double refMinorLogProb = AlleleFractionLikelihoods.hetLogLikelihood(parameters, minorFraction, count, AlleleFractionIndicator.REF_MINOR, allelicPoN);
        final double altMinorLogProb = AlleleFractionLikelihoods.hetLogLikelihood(parameters, minorFraction, count, AlleleFractionIndicator.ALT_MINOR, allelicPoN);
        final double outlierLogProb = AlleleFractionLikelihoods.hetLogLikelihood(parameters, minorFraction, count, AlleleFractionIndicator.OUTLIER, allelicPoN);
        final AllelicCountWithPhasePosteriors countWithPhasePosteriors = new AllelicCountWithPhasePosteriors(count, refMinorLogProb, altMinorLogProb, outlierLogProb);
        countsWithPhasePosteriors.add(countWithPhasePosteriors);
    }
    return countsWithPhasePosteriors;
}
Also used : AllelicCountWithPhasePosteriorsCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountWithPhasePosteriorsCollection) AllelicCountWithPhasePosteriors(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountWithPhasePosteriors) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 48 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.

the class ACNVModellerUnitTest method testMergeSimilarSegmentsCopyRatio.

/**
     * Test of similar-segment merging using only copy-ratio data (simulated coverages and segments).
     * Spurious breakpoints have been introduced into the list of true segments; similar-segment merging should
     * remerge segments broken by these breakpoints and reproduce the original list of true segments.
     */
@Test
public void testMergeSimilarSegmentsCopyRatio() throws IOException {
    final boolean doRefit = true;
    final String tempDir = publicTestDir + "similar-segment-copy-ratio-test";
    final File tempDirFile = createTempDir(tempDir);
    //load data (coverages and segments)
    final ReadCountCollection coverage = ReadCountCollectionUtils.parse(COVERAGES_FILE);
    final List<AllelicCount> snpCountsDummy = Collections.singletonList(new AllelicCount(new SimpleInterval("1", 1, 1), 0, 1));
    final Genome genome = new Genome(coverage, snpCountsDummy);
    final SegmentedGenome segmentedGenome = new SegmentedGenome(SEGMENT_FILE, genome);
    //initial MCMC model fitting performed by ACNVModeller constructor
    final ACNVModeller modeller = new ACNVModeller(segmentedGenome, NUM_SAMPLES, NUM_BURN_IN, 10, 0, ctx);
    //check that model is completely fit at construction
    Assert.assertTrue(modeller.isModelFit());
    //perform iterations of similar-segment merging until all similar segments are merged
    int prevNumSegments;
    for (int numIterations = 1; numIterations <= MAX_SIMILAR_SEGMENT_MERGE_ITERATIONS; numIterations++) {
        prevNumSegments = modeller.getACNVModeledSegments().size();
        modeller.performSimilarSegmentMergingIteration(INTERVAL_THRESHOLD, Double.POSITIVE_INFINITY, doRefit);
        if (modeller.getACNVModeledSegments().size() == prevNumSegments) {
            break;
        }
    }
    //write final segments to file
    final File finalModeledSegmentsFile = new File(tempDirFile, "test-" + FINAL_FIT_FILE_TAG + SEGMENT_FILE_SUFFIX);
    modeller.writeACNVModeledSegmentFile(finalModeledSegmentsFile);
    //check equality of segments
    final List<SimpleInterval> segmentsResult = SegmentUtils.readIntervalsFromSegmentFile(finalModeledSegmentsFile);
    Assert.assertEquals(segmentsResult, SEGMENTS_TRUTH);
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 49 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.

the class GetBayesianHetCoverageIntegrationTest method testMatchedNormalTumorJob.

@Test
public void testMatchedNormalTumorJob() {
    final File normalOutputFile = createTempFile("normal-test", ".tsv");
    final File tumorOutputFile = createTempFile("tumor-test", ".tsv");
    Pulldown tumorHetPulldownExpected, tumorHetPulldownResult;
    Pulldown normalHetPulldownExpected, normalHetPulldownResult;
    final String[] arguments = { "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NORMAL_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_BAM_FILE_SHORT_NAME, TUMOR_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_ALLELIC_COUNTS_FILE_SHORT_NAME, tumorOutputFile.getAbsolutePath(), "-" + GetBayesianHetCoverage.READ_DEPTH_THRESHOLD_SHORT_NAME, Integer.toString(10), "-" + GetBayesianHetCoverage.HET_CALLING_STRINGENCY_SHORT_NAME, Double.toString(10.0) };
    runCommandLine(arguments);
    normalHetPulldownResult = new Pulldown(normalOutputFile, normalHeader);
    tumorHetPulldownResult = new Pulldown(tumorOutputFile, tumorHeader);
    normalHetPulldownExpected = new Pulldown(normalHeader);
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14, 29.29));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17, 39.98));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15, 28.60));
    normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11, 24.99));
    tumorHetPulldownExpected = new Pulldown(tumorHeader);
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6, Nucleotide.G, Nucleotide.T, 14));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8, Nucleotide.T, Nucleotide.G, 17));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9, Nucleotide.T, Nucleotide.G, 15));
    tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5, Nucleotide.G, Nucleotide.C, 11));
    Assert.assertEquals(normalHetPulldownExpected, normalHetPulldownResult);
    Assert.assertEquals(tumorHetPulldownExpected, tumorHetPulldownResult);
}
Also used : Pulldown(org.broadinstitute.hellbender.tools.exome.pulldown.Pulldown) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 50 with AllelicCount

use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.

the class AlleleFractionLikelihoodsUnitTest method testHetLogLikelihoodTightDistribution.

//if variance is tiny we can approximate lambda ~ mu in the tricky part of the integral to get an analytic result
@Test
public void testHetLogLikelihoodTightDistribution() {
    //pi is just a prefactor so we don't need to test it thoroughly here
    final double pi = 0.01;
    for (final double f : Arrays.asList(0.1, 0.2, 0.3)) {
        for (final double mean : Arrays.asList(0.9, 1.0, 1.1)) {
            for (final double variance : Arrays.asList(1e-6, 1e-7, 1e-8)) {
                final AlleleFractionGlobalParameters parameters = new AlleleFractionGlobalParameters(mean, variance, pi);
                for (final int a : Arrays.asList(1, 10, 20)) {
                    //alt count
                    for (final int r : Arrays.asList(1, 10, 20)) {
                        //ref count
                        final AllelicCount count = new AllelicCount(DUMMY, r, a);
                        final double actual = AlleleFractionLikelihoods.hetLogLikelihood(parameters, f, count, AlleleFractionIndicator.ALT_MINOR);
                        final double expected = log((1 - pi) / 2) + a * log(f) + r * log(1 - f) + r * log(mean) - (a + r) * log(f + (1 - f) * mean);
                        Assert.assertEquals(actual, expected, 1e-3);
                    }
                }
            }
        }
    }
}
Also used : AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) Test(org.testng.annotations.Test)

Aggregations

AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)62 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)38 Test (org.testng.annotations.Test)32 File (java.io.File)22 UserException (org.broadinstitute.hellbender.exceptions.UserException)14 IOException (java.io.IOException)12 ArrayList (java.util.ArrayList)10 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)10 IntervalList (htsjdk.samtools.util.IntervalList)8 Pulldown (org.broadinstitute.hellbender.tools.exome.pulldown.Pulldown)8 Utils (org.broadinstitute.hellbender.utils.Utils)8 Interval (htsjdk.samtools.util.Interval)6 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)6 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)6 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)6 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)5 IOUtils (org.broadinstitute.hellbender.utils.io.IOUtils)5 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)4