Search in sources :

Example 31 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.

the class GCCorrectorUnitTest method testGCCorrection.

@Test
public void testGCCorrection() {
    final int numSamples = 5;
    final int numTargets = 10000;
    final Pair<ReadCountCollection, double[]> data = GCBiasSimulatedData.simulatedData(numTargets, numSamples);
    final ReadCountCollection rcc1 = data.getLeft();
    final double[] gcContentByTarget = data.getRight();
    final ReadCountCollection correctedCounts1 = GCCorrector.correctCoverage(rcc1, gcContentByTarget);
    final double[] correctedNoiseBySample = GATKProtectedMathUtils.columnStdDevs(correctedCounts1.counts());
    Arrays.stream(correctedNoiseBySample).forEach(x -> Assert.assertTrue(x < 0.02));
    //check that GC correction is approximately idempotent -- if you correct again, very little should happen
    final ReadCountCollection correctedCounts2 = GCCorrector.correctCoverage(correctedCounts1, gcContentByTarget);
    final double change1 = correctedCounts1.counts().subtract(rcc1.counts()).getFrobeniusNorm();
    final double change2 = correctedCounts2.counts().subtract(correctedCounts1.counts()).getFrobeniusNorm();
    Assert.assertTrue(change2 < change1 / 10);
}
Also used : ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Test(org.testng.annotations.Test)

Example 32 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.

the class TargetCoverageSexGenotyper method doWork.

@Override
protected Object doWork() {
    /* check args */
    IOUtils.canReadFile(inputRawReadCountsFile);
    IOUtils.canReadFile(inputContigAnnotsFile);
    Utils.validateArg(baselineMappingErrorProbability > 0 && baselineMappingErrorProbability < 1, "Must have 0 < baseline mapping error probability < 1.");
    /* read input read counts */
    final ReadCountCollection rawReadCounts;
    try {
        logger.info("Parsing raw read count collection file...");
        rawReadCounts = ReadCountCollectionUtils.parse(inputRawReadCountsFile);
    } catch (final IOException ex) {
        throw new UserException.CouldNotReadInputFile("Could not read raw read count collection file");
    }
    /* parse contig genotype ploidy annotations */
    final List<ContigGermlinePloidyAnnotation> contigGermlinePloidyAnnotationList;
    logger.info("Parsing contig genotype ploidy annotations file...");
    contigGermlinePloidyAnnotationList = ContigGermlinePloidyAnnotationTableReader.readContigGermlinePloidyAnnotationsFromFile(inputContigAnnotsFile);
    /* parse target list */
    final List<Target> inputTargetList;
    if (inputTargetListFile != null) {
        inputTargetList = TargetTableReader.readTargetFile(inputTargetListFile);
    } else {
        logger.info("Target list was not provided -- getting target list from the read count collection.");
        inputTargetList = rawReadCounts.targets();
    }
    /* perform genotyping */
    final TargetCoverageSexGenotypeCalculator genotyper = new TargetCoverageSexGenotypeCalculator(rawReadCounts, inputTargetList, contigGermlinePloidyAnnotationList, baselineMappingErrorProbability);
    final SexGenotypeDataCollection sampleSexGenotypeCollection = genotyper.inferSexGenotypes();
    /* save results */
    try {
        sampleSexGenotypeCollection.write(new FileWriter(outputSampleGenotypesFile));
    } catch (final IOException ex) {
        throw new UserException.CouldNotCreateOutputFile("Could not write inferred sample genotypes to file", ex);
    }
    return "SUCCESS";
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) FileWriter(java.io.FileWriter) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 33 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.

the class PerformCopyRatioSegmentation method doWork.

@Override
public Object doWork() {
    final String sampleName = ReadCountCollectionUtils.getSampleNameForCLIsFromReadCountsFile(new File(coverageFile));
    final ReadCountCollection rcc;
    try {
        rcc = ReadCountCollectionUtils.parse(new File(coverageFile));
    } catch (final IOException ex) {
        throw new UserException.BadInput("could not read input file");
    }
    final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(initialNumStates, rcc);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    SegmentUtils.writeModeledSegmentFile(outputSegmentsFile, segments, sampleName, false);
    return "SUCCESS";
}
Also used : ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) IOException(java.io.IOException) UserException(org.broadinstitute.hellbender.exceptions.UserException) File(java.io.File)

Example 34 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.

the class GCCorrectorUnitTest method testGCCorrection.

@Test
public void testGCCorrection() {
    final int numSamples = 5;
    final int numTargets = 10000;
    final Pair<ReadCountCollection, double[]> data = GCBiasSimulatedData.simulatedData(numTargets, numSamples);
    final ReadCountCollection rcc1 = data.getLeft();
    final double[] gcContentByTarget = data.getRight();
    final ReadCountCollection correctedCounts1 = GCCorrector.correctCoverage(rcc1, gcContentByTarget);
    final double[] correctedNoiseBySample = GATKProtectedMathUtils.columnStdDevs(correctedCounts1.counts());
    Arrays.stream(correctedNoiseBySample).forEach(x -> Assert.assertTrue(x < 0.02));
    //check that GC correction is approximately idempotent -- if you correct again, very little should happen
    final ReadCountCollection correctedCounts2 = GCCorrector.correctCoverage(correctedCounts1, gcContentByTarget);
    final double change1 = correctedCounts1.counts().subtract(rcc1.counts()).getFrobeniusNorm();
    final double change2 = correctedCounts2.counts().subtract(correctedCounts1.counts()).getFrobeniusNorm();
    Assert.assertTrue(change2 < change1 / 10);
}
Also used : ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Test(org.testng.annotations.Test)

Example 35 with ReadCountCollection

use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.

the class TargetCoverageSexGenotypeCalculator method processReadCountsAndTargets.

/**
     * Processes raw read counts and targets:
     * <dl>
     *     <dt> If more than one sample is present in the collection, filters out fully uncovered targets
     *     from read counts and removes the uncovered targets from the target list</dt>
     *
     *     <dt> Otherwise, does nothing and warns the user
     *     </dt>
     * </dl>
     *
     * @param rawReadCounts raw read count collection
     * @param targetList user provided target list
     * @return pair of processed read counts and targets
     */
private ImmutablePair<ReadCountCollection, List<Target>> processReadCountsAndTargets(@Nonnull final ReadCountCollection rawReadCounts, @Nonnull final List<Target> targetList) {
    final ReadCountCollection finalReadCounts;
    final List<Target> finalTargetList;
    /* remove totally uncovered targets */
    if (rawReadCounts.columnNames().size() > 1) {
        finalReadCounts = ReadCountCollectionUtils.removeTotallyUncoveredTargets(rawReadCounts, logger);
        final Set<Target> targetSetFromProcessedReadCounts = new HashSet<>(finalReadCounts.targets());
        finalTargetList = targetList.stream().filter(targetSetFromProcessedReadCounts::contains).collect(Collectors.toList());
    } else {
        final long numUncoveredTargets = rawReadCounts.records().stream().filter(rec -> (int) rec.getDouble(0) == 0).count();
        final long numAllTargets = rawReadCounts.targets().size();
        logger.info("Since only one sample is given for genotyping, the user is responsible for asserting" + " the aptitude of targets. Fully uncovered (irrelevant) targets can not be automatically" + " identified (total targets: " + numAllTargets + ", uncovered targets: " + numUncoveredTargets + ")");
        finalReadCounts = rawReadCounts;
        finalTargetList = targetList;
    }
    return ImmutablePair.of(finalReadCounts, finalTargetList);
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Sets(com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.Sets) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) UserException(org.broadinstitute.hellbender.exceptions.UserException) Target(org.broadinstitute.hellbender.tools.exome.Target) Median(org.apache.commons.math3.stat.descriptive.rank.Median) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) LogManager(org.apache.logging.log4j.LogManager) Nonnull(javax.annotation.Nonnull) Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection)

Aggregations

ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)74 Test (org.testng.annotations.Test)48 Target (org.broadinstitute.hellbender.tools.exome.Target)40 File (java.io.File)30 IOException (java.io.IOException)30 Collectors (java.util.stream.Collectors)30 List (java.util.List)28 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 IntStream (java.util.stream.IntStream)26 Assert (org.testng.Assert)26 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)24 RealMatrix (org.apache.commons.math3.linear.RealMatrix)22 Median (org.apache.commons.math3.stat.descriptive.rank.Median)22 ArrayList (java.util.ArrayList)20 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)20 Logger (org.apache.logging.log4j.Logger)20 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)20 Mean (org.apache.commons.math3.stat.descriptive.moment.Mean)18 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)18 DoubleStream (java.util.stream.DoubleStream)16