Search in sources :

Example 11 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk by broadinstitute.

the class CreateAllelicPanelOfNormalsIntegrationTest method testHDF5AndTSVOutput.

@Test(dataProvider = "dataSiteFrequency", groups = "createsTempFiles")
public void testHDF5AndTSVOutput(final String[] testArguments, final File expectedHDF5File, final File expectedTSVFile) {
    final File allelicPoNHDF5File = createTempFile("create-allelic-pon-test", ".pon");
    allelicPoNHDF5File.delete();
    final File allelicPoNTSVFile = createTempFile("create-allelic-pon-test", ".tsv");
    allelicPoNTSVFile.delete();
    final String[] commonArguments = ArrayUtils.addAll(PULLDOWN_FILE_ARGUMENTS, "--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME, allelicPoNHDF5File.getAbsolutePath(), "--" + TSV_OUTPUT_FILE_LONG_NAME, allelicPoNTSVFile.getAbsolutePath());
    final String[] arguments = ArrayUtils.addAll(commonArguments, testArguments);
    runCommandLine(arguments);
    final AllelicPanelOfNormals resultHDF5 = AllelicPanelOfNormals.read(allelicPoNHDF5File);
    final AllelicPanelOfNormals expectedHDF5 = AllelicPanelOfNormals.read(expectedHDF5File);
    AllelicPoNTestUtils.assertAllelicPoNsEqual(resultHDF5, expectedHDF5);
    final AllelicPanelOfNormals resultTSV = AllelicPanelOfNormals.read(allelicPoNTSVFile);
    final AllelicPanelOfNormals expectedTSV = AllelicPanelOfNormals.read(expectedTSVFile);
    AllelicPoNTestUtils.assertAllelicPoNsEqual(resultTSV, expectedTSV);
    //check overwrite
    runCommandLine(arguments);
    final AllelicPanelOfNormals resultHDF5Overwrite = AllelicPanelOfNormals.read(allelicPoNHDF5File);
    AllelicPoNTestUtils.assertAllelicPoNsEqual(resultHDF5Overwrite, expectedHDF5);
    final AllelicPanelOfNormals resultTSVOverwrite = AllelicPanelOfNormals.read(allelicPoNTSVFile);
    AllelicPoNTestUtils.assertAllelicPoNsEqual(resultTSVOverwrite, expectedTSV);
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) File(java.io.File) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 12 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk by broadinstitute.

the class AllelicCNV method runPipeline.

@Override
protected void runPipeline(final JavaSparkContext ctx) {
    validateArguments();
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    final String originalLogLevel = (ctx.getLocalProperty("logLevel") != null) ? ctx.getLocalProperty("logLevel") : "INFO";
    ctx.setLogLevel("WARN");
    //the string after the final slash in the output prefix (which may be an absolute file path) will be used as the sample name
    final String sampleName = outputPrefix.substring(outputPrefix.lastIndexOf("/") + 1);
    logger.info("Starting workflow for " + sampleName + "...");
    //make Genome from input target coverages and SNP counts
    logger.info("Loading input files...");
    final Genome genome = new Genome(tangentNormalizedCoverageFile, snpCountsFile);
    //load allelic-bias panel of normals if provided
    final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
    //load target-coverage segments from input file
    final List<ModeledSegment> targetSegmentsWithCalls = SegmentUtils.readModeledSegmentsFromSegmentFile(targetSegmentsFile);
    logger.info("Number of input target-coverage segments: " + targetSegmentsWithCalls.size());
    //merge copy-neutral and uncalled segments (unless disabled) and fix up target-segment start breakpoints
    logger.info("Preparing target-coverage segments...");
    final List<SimpleInterval> targetSegments = prepareTargetSegments(genome, targetSegmentsWithCalls);
    //segment SNPs using CBS on per-SNP MLE minor allele fraction iteratively until convergence
    //(final segment file is output as a side effect)
    logger.info("Performing SNP segmentation...");
    final List<SimpleInterval> snpSegments = performSNPSegmentationStep(sampleName, genome);
    //combine SNP and target-coverage segments
    logger.info("Combining SNP and target-coverage segments...");
    final List<SimpleInterval> unionedSegments = SegmentUtils.unionSegments(targetSegments, snpSegments, genome);
    logger.info("Number of segments after segment union: " + unionedSegments.size());
    final File unionedSegmentsFile = new File(outputPrefix + "-" + UNION_SEG_FILE_TAG + SEGMENT_FILE_SUFFIX);
    SegmentUtils.writeSegmentFileWithNumTargetsAndNumSNPs(unionedSegmentsFile, unionedSegments, genome);
    logSegmentFileWrittenMessage(unionedSegmentsFile);
    //small-segment merging (note that X and Y are always small segments and dropped, since GATK CNV drops them)
    logger.info("Merging small segments...");
    final SegmentedGenome segmentedGenomeWithSmallSegments = new SegmentedGenome(unionedSegments, genome);
    final SegmentedGenome segmentedGenome = segmentedGenomeWithSmallSegments.mergeSmallSegments(smallSegmentTargetNumberThreshold);
    logger.info("Number of segments after small-segment merging: " + segmentedGenome.getSegments().size());
    //initial MCMC model fitting performed by ACNVModeller constructor
    final ACNVModeller modeller = new ACNVModeller(segmentedGenome, allelicPoN, numSamplesCopyRatio, numBurnInCopyRatio, numSamplesAlleleFraction, numBurnInAlleleFraction, ctx);
    //write initial segments and parameters to file
    writeACNVModeledSegmentAndParameterFiles(modeller, INITIAL_FIT_FILE_TAG);
    //similar-segment merging (segment files are output for each merge iteration)
    logger.info("Merging similar segments...");
    performSimilarSegmentMergingStep(modeller);
    //write final segments and parameters to file
    writeACNVModeledSegmentAndParameterFiles(modeller, FINAL_FIT_FILE_TAG);
    ctx.setLogLevel(originalLogLevel);
    logger.info("SUCCESS: Allelic CNV run complete for sample " + sampleName + ".");
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) HDF5Library(org.broadinstitute.hdf5.HDF5Library) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File)

Example 13 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk by broadinstitute.

the class CalculatePulldownPhasePosteriors method doWork.

@Override
public Object doWork() {
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    //read counts, segments, and parameters from files
    final AllelicCountCollection counts = new AllelicCountCollection(snpCountsFile);
    final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
    final AlleleFractionState state = reconstructState(segments, parametersFile);
    //load allelic-bias panel of normals if provided
    final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
    //calculate phase posteriors
    final List<SimpleInterval> unmodeledSegments = segments.stream().map(ACNVModeledSegment::getInterval).collect(Collectors.toList());
    final AllelicCountWithPhasePosteriorsCollection countsWithPhasePosteriors = calculatePhasePosteriors(counts, unmodeledSegments, state, allelicPoN);
    //write phase posteriors to file with same verbosity as input file
    countsWithPhasePosteriors.write(outputFile, counts.getVerbosity());
    return "SUCCESS";
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AllelicCountWithPhasePosteriorsCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountWithPhasePosteriorsCollection) HDF5Library(org.broadinstitute.hdf5.HDF5Library) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 14 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk-protected by broadinstitute.

the class AlleleFractionModellerUnitTest method testMCMCWithAllelicPoN.

/**
     * Test MCMC inference on simulated data using an allelic PoN.  Note that these MCMC tests were written to use
     * simulated hets before the allelic PoN was introduced.  Rather than generate a simulated PoN on the fly,
     * we simply use a fixed simulated PoN loaded from a file and check that its MLE hyperparameters are "sampled"
     * correctly by simply taking the MLE PoN values---i.e., the PoN does not actually cover the simulated sites and
     * hence is not used to correct reference bias in the simulated data in any way.
     * This latter functionality is tested on fixed data loaded from files in
     * {@link AlleleFractionModellerUnitTest#testBiasCorrection} instead.
     */
@Test
public void testMCMCWithAllelicPoN() {
    final double meanBiasSimulated = 1.2;
    final double biasVarianceSimulated = 0.04;
    // PoN generated with alpha = 65
    final double meanBiasOfPoN = 1.083;
    // PoN generated with beta = 60
    final double biasVarianceOfPoN = 0.0181;
    final AllelicPanelOfNormals allelicPoN = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_NORMAL_COUNTS_FILE));
    testMCMC(meanBiasSimulated, biasVarianceSimulated, meanBiasOfPoN, biasVarianceOfPoN, allelicPoN);
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 15 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk by broadinstitute.

the class PerformAlleleFractionSegmentation method doWork.

@Override
public Object doWork() {
    final String sampleName = FilenameUtils.getBaseName(snpCountsFile.getAbsolutePath());
    final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
    final AllelicCountCollection acc = new AllelicCountCollection(snpCountsFile);
    final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(initialNumStates, acc, allelicPoN);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    SegmentUtils.writeModeledSegmentFile(outputSegmentsFile, segments, sampleName, true);
    return "SUCCESS";
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment)

Aggregations

AllelicPanelOfNormals (org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals)20 AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)12 File (java.io.File)10 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)8 Test (org.testng.annotations.Test)8 HDF5Library (org.broadinstitute.hdf5.HDF5Library)6 Arrays (java.util.Arrays)4 List (java.util.List)4 Collectors (java.util.stream.Collectors)4 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)4 PosteriorSummary (org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary)4 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)4 DataProvider (org.testng.annotations.DataProvider)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 Log (htsjdk.samtools.util.Log)2 IOException (java.io.IOException)2 Map (java.util.Map)2 Pair (org.apache.commons.lang3.tuple.Pair)2 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)2 Argument (org.broadinstitute.barclay.argparser.Argument)2