Search in sources :

Example 86 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.

the class ACNVModeller method fitModel.

/**
     * Performs Markov-Chain Monte Carlo model fitting using the
     * number of total samples and number of burn-in samples pecified at construction.
     */
public void fitModel() {
    //perform MCMC to generate posterior samples
    logger.info("Fitting copy-ratio model...");
    copyRatioModeller = new CopyRatioModeller(segmentedGenome);
    copyRatioModeller.fitMCMC(numSamplesCopyRatio, numBurnInCopyRatio);
    logger.info("Fitting allele-fraction model...");
    alleleFractionModeller = new AlleleFractionModeller(segmentedGenome, allelicPoN);
    alleleFractionModeller.fitMCMC(numSamplesAlleleFraction, numBurnInAlleleFraction);
    //update list of ACNVModeledSegment with new PosteriorSummaries
    segments.clear();
    final List<SimpleInterval> unmodeledSegments = segmentedGenome.getSegments();
    final List<PosteriorSummary> segmentMeansPosteriorSummaries = copyRatioModeller.getSegmentMeansPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final List<PosteriorSummary> minorAlleleFractionsPosteriorSummaries = alleleFractionModeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    for (int segment = 0; segment < unmodeledSegments.size(); segment++) {
        segments.add(new ACNVModeledSegment(unmodeledSegments.get(segment), segmentMeansPosteriorSummaries.get(segment), minorAlleleFractionsPosteriorSummaries.get(segment)));
    }
    isModelFit = true;
}
Also used : PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) CopyRatioModeller(org.broadinstitute.hellbender.tools.exome.copyratio.CopyRatioModeller) AlleleFractionModeller(org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionModeller) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 87 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.

the class AllelicCNV method performSNPSegmentationStep.

//segment SNPs using CBS on per-SNP MLE minor allele fraction iteratively until convergence
//(final segment file is output as a side effect)
private List<SimpleInterval> performSNPSegmentationStep(final String sampleName, final Genome genome) {
    //initial segmentation of SNPs on per-SNP MLE minor allele fraction, assuming no allelic bias
    //(segments are written to temporary file)
    logger.info("Performing initial SNP segmentation (assuming no allelic bias)...");
    final File snpSegmentFile;
    try {
        snpSegmentFile = File.createTempFile(outputPrefix + "-" + SNP_MAF_SEG_FILE_TAG + "-tmp", SEGMENT_FILE_SUFFIX);
    } catch (final IOException e) {
        throw new UserException.CouldNotCreateOutputFile("Could not create temporary SNP segmentation file.", e);
    }
    List<SimpleInterval> snpSegments = calculateAndWriteSNPSegmentation(sampleName, genome, snpSegmentFile, INITIAL_ALLELIC_BIAS_GUESS);
    logger.info("Initial number of SNP segments: " + snpSegments.size());
    final Set<List<SimpleInterval>> snpSegmentationsFound = new HashSet<>();
    //perform iterations of SNP segmentation until convergence
    for (int numIterations = 1; numIterations <= maxNumSNPSegmentationIterations; numIterations++) {
        logger.info("SNP-segmentation iteration: " + numIterations);
        snpSegmentationsFound.add(snpSegments);
        //use AlleleFractionInitializer to determine mean of global allelic-bias distribution given current SNP segmentation
        final double allelicBias = calculateAllelicBias(genome, snpSegmentFile);
        logger.info(String.format("Mean allelic bias: %.3f", allelicBias));
        //resegment SNPs on per-SNP MLE minor allele fraction assuming mean allelic bias found by AlleleFractionInitializer
        //(segments are written to temporary file)
        snpSegments = calculateAndWriteSNPSegmentation(sampleName, genome, snpSegmentFile, allelicBias);
        logger.info("Number of SNP segments: " + snpSegments.size());
        //stop if a previously found SNP segmentation is found again
        if (snpSegmentationsFound.contains(snpSegments)) {
            //take final SNP segmentation to be the same as the last one found
            logger.info("Final number of SNP segments: " + snpSegments.size());
            final File finalSNPSegmentFile = new File(outputPrefix + "-" + SNP_MAF_SEG_FILE_TAG + SEGMENT_FILE_SUFFIX);
            snpSegments = calculateAndWriteSNPSegmentation(sampleName, genome, finalSNPSegmentFile, allelicBias);
            logSegmentFileWrittenMessage(finalSNPSegmentFile);
            break;
        }
    }
    return snpSegments;
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) List(java.util.List) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException) File(java.io.File) HashSet(java.util.HashSet)

Example 88 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.

the class AllelicCNV method runPipeline.

@Override
protected void runPipeline(final JavaSparkContext ctx) {
    validateArguments();
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    final String originalLogLevel = (ctx.getLocalProperty("logLevel") != null) ? ctx.getLocalProperty("logLevel") : "INFO";
    ctx.setLogLevel("WARN");
    //the string after the final slash in the output prefix (which may be an absolute file path) will be used as the sample name
    final String sampleName = outputPrefix.substring(outputPrefix.lastIndexOf("/") + 1);
    logger.info("Starting workflow for " + sampleName + "...");
    //make Genome from input target coverages and SNP counts
    logger.info("Loading input files...");
    final Genome genome = new Genome(tangentNormalizedCoverageFile, snpCountsFile);
    //load allelic-bias panel of normals if provided
    final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
    //load target-coverage segments from input file
    final List<ModeledSegment> targetSegmentsWithCalls = SegmentUtils.readModeledSegmentsFromSegmentFile(targetSegmentsFile);
    logger.info("Number of input target-coverage segments: " + targetSegmentsWithCalls.size());
    //merge copy-neutral and uncalled segments (unless disabled) and fix up target-segment start breakpoints
    logger.info("Preparing target-coverage segments...");
    final List<SimpleInterval> targetSegments = prepareTargetSegments(genome, targetSegmentsWithCalls);
    //segment SNPs using CBS on per-SNP MLE minor allele fraction iteratively until convergence
    //(final segment file is output as a side effect)
    logger.info("Performing SNP segmentation...");
    final List<SimpleInterval> snpSegments = performSNPSegmentationStep(sampleName, genome);
    //combine SNP and target-coverage segments
    logger.info("Combining SNP and target-coverage segments...");
    final List<SimpleInterval> unionedSegments = SegmentUtils.unionSegments(targetSegments, snpSegments, genome);
    logger.info("Number of segments after segment union: " + unionedSegments.size());
    final File unionedSegmentsFile = new File(outputPrefix + "-" + UNION_SEG_FILE_TAG + SEGMENT_FILE_SUFFIX);
    SegmentUtils.writeSegmentFileWithNumTargetsAndNumSNPs(unionedSegmentsFile, unionedSegments, genome);
    logSegmentFileWrittenMessage(unionedSegmentsFile);
    //small-segment merging (note that X and Y are always small segments and dropped, since GATK CNV drops them)
    logger.info("Merging small segments...");
    final SegmentedGenome segmentedGenomeWithSmallSegments = new SegmentedGenome(unionedSegments, genome);
    final SegmentedGenome segmentedGenome = segmentedGenomeWithSmallSegments.mergeSmallSegments(smallSegmentTargetNumberThreshold);
    logger.info("Number of segments after small-segment merging: " + segmentedGenome.getSegments().size());
    //initial MCMC model fitting performed by ACNVModeller constructor
    final ACNVModeller modeller = new ACNVModeller(segmentedGenome, allelicPoN, numSamplesCopyRatio, numBurnInCopyRatio, numSamplesAlleleFraction, numBurnInAlleleFraction, ctx);
    //write initial segments and parameters to file
    writeACNVModeledSegmentAndParameterFiles(modeller, INITIAL_FIT_FILE_TAG);
    //similar-segment merging (segment files are output for each merge iteration)
    logger.info("Merging similar segments...");
    performSimilarSegmentMergingStep(modeller);
    //write final segments and parameters to file
    writeACNVModeledSegmentAndParameterFiles(modeller, FINAL_FIT_FILE_TAG);
    ctx.setLogLevel(originalLogLevel);
    logger.info("SUCCESS: Allelic CNV run complete for sample " + sampleName + ".");
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) HDF5Library(org.broadinstitute.hdf5.HDF5Library) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File)

Example 89 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.

the class AlleleFractionSegmenterUnitTest method testSegmentation.

@Test
public void testSegmentation() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
    final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
    final List<Double> trueMinorAlleleFractions = Arrays.asList(0.12, 0.32, 0.5);
    final double trueMemoryLength = 1e5;
    final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
    final AlleleFractionHMM trueModel = new AlleleFractionHMM(trueMinorAlleleFractions, trueWeights, trueMemoryLength, AllelicPanelOfNormals.EMPTY_PON, trueParams);
    // randomly set positions
    final int chainLength = 10000;
    final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
    final List<Integer> trueStates = trueModel.generateHiddenStateChain(positions);
    final List<Double> truthMinorFractions = trueStates.stream().map(trueModel::getMinorAlleleFraction).collect(Collectors.toList());
    final AllelicCountCollection counts = generateCounts(truthMinorFractions, positions, rng, trueParams);
    final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    final double[] segmentMinorFractions = segments.stream().flatMap(s -> Collections.nCopies((int) s.getTargetCount(), s.getSegmentMean()).stream()).mapToDouble(x -> x).toArray();
    final double averageMinorFractionError = IntStream.range(0, truthMinorFractions.size()).mapToDouble(n -> Math.abs(segmentMinorFractions[n] - truthMinorFractions.get(n))).average().getAsDouble();
    Assert.assertEquals(averageMinorFractionError, 0, 0.01);
}
Also used : AlleleFractionGlobalParameters(org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionGlobalParameters) IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) BinomialDistribution(org.apache.commons.math3.distribution.BinomialDistribution) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) Test(org.testng.annotations.Test) Random(java.util.Random) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) GammaDistribution(org.apache.commons.math3.distribution.GammaDistribution) List(java.util.List) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AlleleFractionGlobalParameters(org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionGlobalParameters) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) Collections(java.util.Collections) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Random(java.util.Random) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 90 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.

the class CopyRatioSegmenterUnitTest method testChromosomesOnDifferentSegments.

@Test
public void testChromosomesOnDifferentSegments() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
    final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
    final double trueMemoryLength = 1e5;
    final double trueStandardDeviation = 0.2;
    // randomly set positions
    final int chainLength = 100;
    final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
    positions.addAll(randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
    positions.addAll(randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
    //fix everything to the same state 2
    final int trueState = 2;
    final List<Double> data = new ArrayList<>();
    for (int n = 0; n < positions.size(); n++) {
        final double copyRatio = trueLog2CopyRatios[trueState];
        final double observed = generateData(trueStandardDeviation, copyRatio, rng);
        data.add(observed);
    }
    final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
    final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
    final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    //check that each chromosome has at least one segment
    final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
    Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Also used : IntStream(java.util.stream.IntStream) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) java.util(java.util) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Target(org.broadinstitute.hellbender.tools.exome.Target) Test(org.testng.annotations.Test) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Target(org.broadinstitute.hellbender.tools.exome.Target) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Aggregations

SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)545 Test (org.testng.annotations.Test)287 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)202 File (java.io.File)102 ArrayList (java.util.ArrayList)66 DataProvider (org.testng.annotations.DataProvider)64 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)60 Collectors (java.util.stream.Collectors)53 java.util (java.util)41 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)40 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)40 UserException (org.broadinstitute.hellbender.exceptions.UserException)39 VariantContext (htsjdk.variant.variantcontext.VariantContext)36 IntStream (java.util.stream.IntStream)34 Target (org.broadinstitute.hellbender.tools.exome.Target)34 IOException (java.io.IOException)32 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)28 Assert (org.testng.Assert)27 Locatable (htsjdk.samtools.util.Locatable)26 List (java.util.List)26