Search in sources :

Example 16 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method performWriting.

private static void performWriting(ReadCountCollection collection, TableWriter<ReadCountRecord> tableWriter, String[] headerComments) throws IOException {
    // print the header comments
    for (final String comment : headerComments) {
        tableWriter.writeComment(comment);
    }
    final List<Target> targets = collection.targets();
    final RealMatrix counts = collection.counts();
    for (int i = 0; i < targets.size(); i++) {
        tableWriter.writeRecord(new ReadCountRecord(targets.get(i), counts.getRow(i)));
    }
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Example 17 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method truncateExtremeCounts.

/**
     * Truncates the extreme count values in the input read-count collection.
     * Values are forced to be bound by the percentile indicated with the input {@code percentile} which must be
     * in the range [0 .. 50.0]. Values under that percentile and the complementary (1 - percentile) are set to the
     * corresponding threshold value.
     *
     * <p>The imputation is done in-place, thus the input matrix is modified as a result of this call.</p>
     *
     * @param readCounts the input and output read-count matrix.
     */
public static void truncateExtremeCounts(final ReadCountCollection readCounts, final double percentile, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final int targetCount = counts.getRowDimension();
    final int columnCount = counts.getColumnDimension();
    // Create a row major array of the counts.
    final double[] values = Doubles.concat(counts.getData());
    final Percentile bottomPercentileEvaluator = new Percentile(percentile);
    final Percentile topPercentileEvaluator = new Percentile(100.0 - percentile);
    final double bottomPercentileThreshold = bottomPercentileEvaluator.evaluate(values);
    final double topPercentileThreshold = topPercentileEvaluator.evaluate(values);
    long totalCounts = 0;
    long bottomTruncatedCounts = 0;
    long topTruncatedCounts = 0;
    for (int i = 0; i < targetCount; i++) {
        final double[] rowCounts = counts.getRow(i);
        for (int j = 0; j < columnCount; j++) {
            final double count = rowCounts[j];
            totalCounts++;
            if (count < bottomPercentileThreshold) {
                counts.setEntry(i, j, bottomPercentileThreshold);
                bottomTruncatedCounts++;
            } else if (count > topPercentileThreshold) {
                counts.setEntry(i, j, topPercentileThreshold);
                topTruncatedCounts++;
            }
        }
    }
    if (topTruncatedCounts == 0 && bottomTruncatedCounts == 0) {
        logger.info(String.format("None of the %d counts were truncated as they all fall in the non-extreme range " + "[%.2f, %.2f]", totalCounts, bottomPercentileThreshold, topPercentileThreshold));
    } else {
        final double truncatedPercentage = ((double) (topTruncatedCounts + bottomTruncatedCounts) / totalCounts) * 100;
        logger.info(String.format("Some counts (%d out of %d, %.2f%%) were truncated as they fall out of the " + "non-extreme range [%.2f, %.2f]", topTruncatedCounts + bottomTruncatedCounts, totalCounts, truncatedPercentage, bottomPercentileThreshold, topPercentileThreshold));
    }
}
Also used : Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Example 18 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method removeTargetsWithTooManyZeros.

/**
     * Remove targets that have too many counts equal to 0.
     * <p>
     *     It will return a copy of the input read-count collection with such targets dropped.
     * </p>
     *
     * @param readCounts the input read counts.
     * @param maximumTargetZeros maximum number of counts equal to 0 per target tolerated.
     * @return never {@code null}. It might be a reference to the input read-counts if there is
     *   is no target to be dropped.
     */
public static ReadCountCollection removeTargetsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumTargetZeros, final boolean roundToInteger, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Set<Target> targetsToKeep = IntStream.range(0, counts.getRowDimension()).boxed().filter(i -> countZeroes(counts.getRow(i), roundToInteger) <= maximumTargetZeros).map(i -> readCounts.targets().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
    final int targetsToDropCount = readCounts.targets().size() - targetsToKeep.size();
    if (targetsToDropCount == 0) {
        logger.info(String.format("There are no targets with large number of columns with zero counts (<= %d of %d) to drop", maximumTargetZeros, readCounts.columnNames().size()));
        return readCounts;
    } else if (targetsToDropCount == readCounts.targets().size()) {
        throw new UserException.BadInput("the number of zeros per target in the input is too large resulting " + "in all targets being dropped");
    } else {
        final double droppedPercentage = ((double) (targetsToDropCount) / readCounts.targets().size()) * 100;
        logger.info(String.format("Some targets dropped (%d out of %d, %.2f%%) as they had too many zeros (> %d of %d).", targetsToDropCount, readCounts.targets().size(), droppedPercentage, maximumTargetZeros, readCounts.columnNames().size()));
        return readCounts.subsetTargets(targetsToKeep);
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) StringUtils(org.apache.commons.lang3.StringUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) Median(org.apache.commons.math3.stat.descriptive.rank.Median) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) Nonnull(javax.annotation.Nonnull) Locatable(htsjdk.samtools.util.Locatable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Predicate(java.util.function.Predicate) FastMath(org.apache.commons.math3.util.FastMath) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) DoubleStream(java.util.stream.DoubleStream) TableWriter(org.broadinstitute.hellbender.utils.tsv.TableWriter) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) SetUniqueList(org.apache.commons.collections4.list.SetUniqueList) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 19 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method removeColumnsWithTooManyZeros.

/**
     * Remove columns that have too many counts equal to 0.
     * <p>
     *     It will return a copy of the input read-count collection with such columns dropped.
     * </p>
     *
     * @param readCounts the input read counts.
     * @param maximumColumnZeros maximum number of counts equal to 0 per column tolerated.
     * @return never {@code null}. It might be a reference to the input read-counts if there is
     *   is no column to be dropped.
     */
@VisibleForTesting
public static ReadCountCollection removeColumnsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumColumnZeros, final boolean roundToInteger, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Set<String> columnsToKeep = IntStream.range(0, counts.getColumnDimension()).boxed().filter(i -> countZeroes(counts.getColumn(i), roundToInteger) <= maximumColumnZeros).map(i -> readCounts.columnNames().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
    final int columnsToDropCount = readCounts.columnNames().size() - columnsToKeep.size();
    if (columnsToDropCount == 0) {
        logger.info(String.format("There were no columns with a large number of targets with zero counts " + "(<= %d of %d) to drop", maximumColumnZeros, readCounts.targets().size()));
        return readCounts;
    } else if (columnsToDropCount == readCounts.columnNames().size()) {
        throw new UserException.BadInput("The number of zeros per count column is too large resulting in all count " + "columns to be dropped");
    } else {
        final double droppedPercentage = ((double) (columnsToDropCount) / readCounts.columnNames().size()) * 100;
        logger.info(String.format("Some counts columns dropped (%d out of %d, %.2f%%) as they had too many targets with zeros (> %d of %d)", columnsToDropCount, readCounts.columnNames().size(), droppedPercentage, maximumColumnZeros, readCounts.targets().size()));
        return readCounts.subsetColumns(columnsToKeep);
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) StringUtils(org.apache.commons.lang3.StringUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) Median(org.apache.commons.math3.stat.descriptive.rank.Median) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) Nonnull(javax.annotation.Nonnull) Locatable(htsjdk.samtools.util.Locatable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Predicate(java.util.function.Predicate) FastMath(org.apache.commons.math3.util.FastMath) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) DoubleStream(java.util.stream.DoubleStream) TableWriter(org.broadinstitute.hellbender.utils.tsv.TableWriter) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) SetUniqueList(org.apache.commons.collections4.list.SetUniqueList) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) UserException(org.broadinstitute.hellbender.exceptions.UserException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 20 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class XHMMSegmentCallerBase method standardizeByTarget.

/**
     * Standardize read counts (per-target).
     * Note: modification is done in-place.
     *
     * @param counts original read counts
     */
private void standardizeByTarget(final RealMatrix counts) {
    final double[] rowMeans = GATKProtectedMathUtils.rowMeans(counts);
    final double[] rowStdDev = GATKProtectedMathUtils.rowStdDevs(counts);
    counts.walkInColumnOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return (value - rowMeans[row]) / rowStdDev[row];
        }
    });
}
Also used : DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)218 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)148 Test (org.testng.annotations.Test)86 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)60 IntStream (java.util.stream.IntStream)50 Collectors (java.util.stream.Collectors)48 Median (org.apache.commons.math3.stat.descriptive.rank.Median)42 HDF5File (org.broadinstitute.hdf5.HDF5File)42 File (java.io.File)40 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)36 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)36 List (java.util.List)34 Assert (org.testng.Assert)32 IOException (java.io.IOException)30 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)30 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)30 DoubleStream (java.util.stream.DoubleStream)28 Logger (org.apache.logging.log4j.Logger)27 Utils (org.broadinstitute.hellbender.utils.Utils)27 ArrayList (java.util.ArrayList)26