Search in sources :

Example 46 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.

the class TargetCoverageSexGenotypeCalculator method getSampleReadDepthFromAutosomalTargets.

/**
     * Estimates read depth per target per homolog for a given sample index in the collection.
     *
     * @param sampleIndex integer index of the sample in the read count collection
     * @return read depth per target per homolog
     */
private double getSampleReadDepthFromAutosomalTargets(final int sampleIndex) {
    final double[] readCounts = processedReadCounts.getColumnOnSpecifiedTargets(sampleIndex, autosomalTargetList, false);
    final double[] readCountsNormalizedByPloidy = IntStream.range(0, readCounts.length).mapToDouble(i -> readCounts[i] / (double) autosomalTargetPloidies[i]).toArray();
    return new Median().evaluate(readCountsNormalizedByPloidy);
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Sets(com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.Sets) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) UserException(org.broadinstitute.hellbender.exceptions.UserException) Target(org.broadinstitute.hellbender.tools.exome.Target) Median(org.apache.commons.math3.stat.descriptive.rank.Median) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) LogManager(org.apache.logging.log4j.LogManager) Nonnull(javax.annotation.Nonnull) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 47 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.

the class CoveragePoNQCUtils method hasSuspiciousContigs.

/**
     *  Given a single sample tangent normalization (or other coverage profile), determine whether any contig looks like
     *   it has an arm level event (defined as 25% (or more) of the contig amplified/deleted)
     *
     * @param singleSampleTangentNormalized Tangent normalized data for a single sample.
     * @return never {@code null}
     */
private static Boolean hasSuspiciousContigs(final ReadCountCollection singleSampleTangentNormalized, final Map<String, Double> contigToMedian) {
    final List<String> allContigsPresent = retrieveAllContigsPresent(singleSampleTangentNormalized);
    for (String contig : allContigsPresent) {
        final ReadCountCollection oneContigReadCountCollection = singleSampleTangentNormalized.subsetTargets(singleSampleTangentNormalized.targets().stream().filter(t -> t.getContig().equals(contig)).collect(Collectors.toSet()));
        final RealVector counts = oneContigReadCountCollection.counts().getColumnVector(0);
        for (int i = 0; i < 4; i++) {
            final RealVector partitionCounts = counts.getSubVector(i * counts.getDimension() / 4, counts.getDimension() / 4);
            final double[] partitionArray = DoubleStream.of(partitionCounts.toArray()).map(d -> Math.pow(2, d)).sorted().toArray();
            double median = new Median().evaluate(partitionArray);
            final double medianShiftInCRSpace = contigToMedian.getOrDefault(contig, 1.0) - 1.0;
            median -= medianShiftInCRSpace;
            if ((median > AMP_THRESHOLD) || (median < DEL_THRESHOLD)) {
                logger.info("Suspicious contig: " + singleSampleTangentNormalized.columnNames().get(0) + " " + contig + " (" + median + " -- " + i + ")");
                return true;
            }
        }
    }
    return false;
}
Also used : RealVector(org.apache.commons.math3.linear.RealVector) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 48 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project knime-core by knime.

the class MedianAbsoluteDeviationOperator method getResultInternal.

/**
 * {@inheritDoc}
 */
@Override
protected DataCell getResultInternal() {
    final double[] cells = super.getCells().getElements();
    if (cells.length == 0) {
        return DataType.getMissingCell();
    }
    final Median median = new Median();
    double medianValue = median.evaluate(cells);
    for (int i = 0; i < cells.length; i++) {
        cells[i] = Math.abs(medianValue - cells[i]);
    }
    medianValue = median.evaluate(cells);
    return new DoubleCell(medianValue);
}
Also used : DoubleCell(org.knime.core.data.def.DoubleCell) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 49 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project ASCIIGenome by dariober.

the class Utils method winsor2.

/**
 *Winsorise vector x. Adapted from https://www.r-bloggers.com/winsorization/.
 */
public static List<Float> winsor2(List<Float> x, double multiple) {
    /*
				winsor2<- function (x, multiple=3)
				{
				   med <- median(x)
				   y <- x - med
				   sc <- mad(y, center=0) * multiple
				   y[ y > sc ] <- sc
				   y[ y < -sc ] <- -sc
				   y + med
				}
				*/
    if (multiple <= 0) {
        throw new ArithmeticException();
    }
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (float z : x) {
        stats.addValue(z);
    }
    float median = (float) stats.getPercentile(50);
    List<Float> y = new ArrayList<Float>(x);
    for (int i = 0; i < x.size(); i++) {
        y.set(i, x.get(i) - median);
    }
    float sc = (float) (Utils.medianAbsoluteDeviation(y, 0) * multiple);
    for (int i = 0; i < y.size(); i++) {
        if (y.get(i) > sc) {
            y.set(i, sc);
        } else if (y.get(i) < -sc) {
            y.set(i, -sc);
        }
        y.set(i, y.get(i) + median);
    }
    return y;
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) ArrayList(java.util.ArrayList)

Example 50 with Median

use of org.apache.commons.math3.stat.descriptive.rank.Median in project ASCIIGenome by dariober.

the class Utils method medianAbsoluteDeviation.

/**
 * Translated from R function mad(x, center, constant= 1.4826, na.rm= FALSE, low= FALSE, high= FALSE).
 */
private static float medianAbsoluteDeviation(List<Float> x, float center) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 0; i < x.size(); i++) {
        stats.addValue(Math.abs(x.get(i) - center));
    }
    float median = (float) stats.getPercentile(50);
    return (float) 1.4826 * median;
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)

Aggregations

Median (org.apache.commons.math3.stat.descriptive.rank.Median)35 RealMatrix (org.apache.commons.math3.linear.RealMatrix)29 IntStream (java.util.stream.IntStream)28 Collectors (java.util.stream.Collectors)24 Logger (org.apache.logging.log4j.Logger)24 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)22 DoubleStream (java.util.stream.DoubleStream)20 File (java.io.File)18 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)17 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)16 List (java.util.List)15 ArrayList (java.util.ArrayList)14 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)14 UserException (org.broadinstitute.hellbender.exceptions.UserException)14 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)14 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 java.util (java.util)13 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)12 LogManager (org.apache.logging.log4j.LogManager)12