Search in sources :

Example 66 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class GermlineCNVCallerIntegrationTest method reportCopyNumberSummaryStatistics.

/* Shame on me for using {@link ReadCountCollection} to store copy numbers! */
private void reportCopyNumberSummaryStatistics(@Nonnull final File posteriorsOutputPath, @Nonnull final File truthCopyNumberFile, @Nonnull final List<Target> targets, @Nonnull final SexGenotypeDataCollection sexGenotypeDataCollection) {
    final ReadCountCollection truthCopyNumberCollection = loadTruthCopyNumberTable(truthCopyNumberFile, targets);
    final RealMatrix calledCopyNumberMatrix = Nd4jApacheAdapterUtils.convertINDArrayToApacheMatrix(Nd4jIOUtils.readNDArrayMatrixFromTextFile(new File(posteriorsOutputPath, CoverageModelGlobalConstants.COPY_RATIO_VITERBI_FILENAME)));
    final ReadCountCollection calledCopyNumberCollection = new ReadCountCollection(targets, truthCopyNumberCollection.columnNames(), calledCopyNumberMatrix);
    final int numSamples = calledCopyNumberCollection.columnNames().size();
    final List<String> sampleSexGenotypes = truthCopyNumberCollection.columnNames().stream().map(sampleName -> sexGenotypeDataCollection.getSampleSexGenotypeData(sampleName).getSexGenotype()).collect(Collectors.toList());
    final List<SampleCopyNumberSummaryStatistics> sampleSummaryStatisticsList = IntStream.range(0, numSamples).mapToObj(si -> calculateSampleCopyNumberConcordance(truthCopyNumberCollection, calledCopyNumberCollection, si, sampleSexGenotypes.get(si))).collect(Collectors.toList());
    /* calculation various summary statistics */
    final AbstractUnivariateStatistic calculator = new Mean();
    final ConfusionRates homDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.homozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
    final ConfusionRates hetDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.heterozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
    final ConfusionRates dupMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.duplicationConfusionMatrix).collect(Collectors.toList()), calculator);
    final double absoluteConcordance = Concordance.getCollectionConcordance(sampleSummaryStatisticsList.stream().map(ss -> ss.absoluteCopyNumberConcordance).collect(Collectors.toList()), calculator);
    /* log */
    logger.info("Homozygous deletion statistics: " + homDelMedianRates.toString());
    logger.info("Heterozygous deletion statistics: " + hetDelMedianRates.toString());
    logger.info("Duplication statistics: " + dupMedianRates.toString());
    logger.info(String.format("Absolute copy number calling concordance: %f", absoluteConcordance));
}
Also used : BeforeSuite(org.testng.annotations.BeforeSuite) IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) GermlinePloidyAnnotatedTargetCollection(org.broadinstitute.hellbender.tools.exome.sexgenotyper.GermlinePloidyAnnotatedTargetCollection) ArrayUtils(org.apache.commons.lang3.ArrayUtils) Test(org.testng.annotations.Test) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) ContigGermlinePloidyAnnotationTableReader(org.broadinstitute.hellbender.tools.exome.sexgenotyper.ContigGermlinePloidyAnnotationTableReader) AbstractUnivariateStatistic(org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) TargetTableReader(org.broadinstitute.hellbender.tools.exome.TargetTableReader) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) Nonnull(javax.annotation.Nonnull) LinkedHashSet(java.util.LinkedHashSet) MathIllegalArgumentException(org.apache.commons.math3.exception.MathIllegalArgumentException) Nd4jApacheAdapterUtils(org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jApacheAdapterUtils) SexGenotypeDataCollection(org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeDataCollection) Collection(java.util.Collection) Nd4jIOUtils(org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jIOUtils) IOException(java.io.IOException) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) Collectors(java.util.stream.Collectors) File(java.io.File) CoverageModelArgumentCollection(org.broadinstitute.hellbender.tools.coveragemodel.CoverageModelArgumentCollection) List(java.util.List) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) UserException(org.broadinstitute.hellbender.exceptions.UserException) Target(org.broadinstitute.hellbender.tools.exome.Target) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) SparkToggleCommandLineProgram(org.broadinstitute.hellbender.utils.SparkToggleCommandLineProgram) CoverageModelGlobalConstants(org.broadinstitute.hellbender.tools.coveragemodel.CoverageModelGlobalConstants) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) AbstractUnivariateStatistic(org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic) RealMatrix(org.apache.commons.math3.linear.RealMatrix) File(java.io.File)

Example 67 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class GCBiasSimulatedData method simulatedData.

// visible for the integration test
public static Pair<ReadCountCollection, double[]> simulatedData(final int numTargets, final int numSamples) {
    final List<Target> phonyTargets = SimulatedTargets.phonyTargets(numTargets);
    final List<String> phonySamples = SimulatedSamples.phonySamples(numSamples);
    final Random random = new Random(13);
    final double[] gcContentByTarget = IntStream.range(0, numTargets).mapToDouble(n -> 0.5 + 0.2 * random.nextGaussian()).map(x -> Math.min(x, 0.95)).map(x -> Math.max(x, 0.05)).toArray();
    final double[] gcBiasByTarget = Arrays.stream(gcContentByTarget).map(QUADRATIC_GC_BIAS_CURVE::apply).toArray();
    // model mainly GC bias with a small random amount of non-GC bias
    // thus noise after GC correction should be nearly zero
    final RealMatrix counts = new Array2DRowRealMatrix(numTargets, numSamples);
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int target, final int column, final double value) {
            return gcBiasByTarget[target] * (1.0 + 0.01 * random.nextDouble());
        }
    });
    final ReadCountCollection rcc = new ReadCountCollection(phonyTargets, phonySamples, counts);
    return new ImmutablePair<>(rcc, gcContentByTarget);
}
Also used : IntStream(java.util.stream.IntStream) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Arrays(java.util.Arrays) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Random(java.util.Random) Function(java.util.function.Function) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Collections(java.util.Collections) Random(java.util.Random) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Example 68 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method performWriting.

private static void performWriting(ReadCountCollection collection, TableWriter<ReadCountRecord> tableWriter, String[] headerComments) throws IOException {
    // print the header comments
    for (final String comment : headerComments) {
        tableWriter.writeComment(comment);
    }
    final List<Target> targets = collection.targets();
    final RealMatrix counts = collection.counts();
    for (int i = 0; i < targets.size(); i++) {
        tableWriter.writeRecord(new ReadCountRecord(targets.get(i), counts.getRow(i)));
    }
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Example 69 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method truncateExtremeCounts.

/**
     * Truncates the extreme count values in the input read-count collection.
     * Values are forced to be bound by the percentile indicated with the input {@code percentile} which must be
     * in the range [0 .. 50.0]. Values under that percentile and the complementary (1 - percentile) are set to the
     * corresponding threshold value.
     *
     * <p>The imputation is done in-place, thus the input matrix is modified as a result of this call.</p>
     *
     * @param readCounts the input and output read-count matrix.
     */
public static void truncateExtremeCounts(final ReadCountCollection readCounts, final double percentile, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final int targetCount = counts.getRowDimension();
    final int columnCount = counts.getColumnDimension();
    // Create a row major array of the counts.
    final double[] values = Doubles.concat(counts.getData());
    final Percentile bottomPercentileEvaluator = new Percentile(percentile);
    final Percentile topPercentileEvaluator = new Percentile(100.0 - percentile);
    final double bottomPercentileThreshold = bottomPercentileEvaluator.evaluate(values);
    final double topPercentileThreshold = topPercentileEvaluator.evaluate(values);
    long totalCounts = 0;
    long bottomTruncatedCounts = 0;
    long topTruncatedCounts = 0;
    for (int i = 0; i < targetCount; i++) {
        final double[] rowCounts = counts.getRow(i);
        for (int j = 0; j < columnCount; j++) {
            final double count = rowCounts[j];
            totalCounts++;
            if (count < bottomPercentileThreshold) {
                counts.setEntry(i, j, bottomPercentileThreshold);
                bottomTruncatedCounts++;
            } else if (count > topPercentileThreshold) {
                counts.setEntry(i, j, topPercentileThreshold);
                topTruncatedCounts++;
            }
        }
    }
    if (topTruncatedCounts == 0 && bottomTruncatedCounts == 0) {
        logger.info(String.format("None of the %d counts were truncated as they all fall in the non-extreme range " + "[%.2f, %.2f]", totalCounts, bottomPercentileThreshold, topPercentileThreshold));
    } else {
        final double truncatedPercentage = ((double) (topTruncatedCounts + bottomTruncatedCounts) / totalCounts) * 100;
        logger.info(String.format("Some counts (%d out of %d, %.2f%%) were truncated as they fall out of the " + "non-extreme range [%.2f, %.2f]", topTruncatedCounts + bottomTruncatedCounts, totalCounts, truncatedPercentage, bottomPercentileThreshold, topPercentileThreshold));
    }
}
Also used : Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Example 70 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method removeTargetsWithTooManyZeros.

/**
     * Remove targets that have too many counts equal to 0.
     * <p>
     *     It will return a copy of the input read-count collection with such targets dropped.
     * </p>
     *
     * @param readCounts the input read counts.
     * @param maximumTargetZeros maximum number of counts equal to 0 per target tolerated.
     * @return never {@code null}. It might be a reference to the input read-counts if there is
     *   is no target to be dropped.
     */
public static ReadCountCollection removeTargetsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumTargetZeros, final boolean roundToInteger, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Set<Target> targetsToKeep = IntStream.range(0, counts.getRowDimension()).boxed().filter(i -> countZeroes(counts.getRow(i), roundToInteger) <= maximumTargetZeros).map(i -> readCounts.targets().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
    final int targetsToDropCount = readCounts.targets().size() - targetsToKeep.size();
    if (targetsToDropCount == 0) {
        logger.info(String.format("There are no targets with large number of columns with zero counts (<= %d of %d) to drop", maximumTargetZeros, readCounts.columnNames().size()));
        return readCounts;
    } else if (targetsToDropCount == readCounts.targets().size()) {
        throw new UserException.BadInput("the number of zeros per target in the input is too large resulting " + "in all targets being dropped");
    } else {
        final double droppedPercentage = ((double) (targetsToDropCount) / readCounts.targets().size()) * 100;
        logger.info(String.format("Some targets dropped (%d out of %d, %.2f%%) as they had too many zeros (> %d of %d).", targetsToDropCount, readCounts.targets().size(), droppedPercentage, maximumTargetZeros, readCounts.columnNames().size()));
        return readCounts.subsetTargets(targetsToKeep);
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) StringUtils(org.apache.commons.lang3.StringUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) Median(org.apache.commons.math3.stat.descriptive.rank.Median) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) Nonnull(javax.annotation.Nonnull) Locatable(htsjdk.samtools.util.Locatable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Predicate(java.util.function.Predicate) FastMath(org.apache.commons.math3.util.FastMath) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) DoubleStream(java.util.stream.DoubleStream) TableWriter(org.broadinstitute.hellbender.utils.tsv.TableWriter) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) SetUniqueList(org.apache.commons.collections4.list.SetUniqueList) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)259 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)158 Test (org.testng.annotations.Test)86 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)60 IntStream (java.util.stream.IntStream)50 Collectors (java.util.stream.Collectors)48 Median (org.apache.commons.math3.stat.descriptive.rank.Median)42 HDF5File (org.broadinstitute.hdf5.HDF5File)42 File (java.io.File)40 List (java.util.List)37 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)36 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)36 ArrayList (java.util.ArrayList)32 Assert (org.testng.Assert)32 IOException (java.io.IOException)30 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)30 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)30 DoubleStream (java.util.stream.DoubleStream)28 Logger (org.apache.logging.log4j.Logger)27 Utils (org.broadinstitute.hellbender.utils.Utils)27