Search in sources :

Example 56 with Sum

use of org.apache.commons.math3.stat.descriptive.summary.Sum in project gatk by broadinstitute.

the class SomaticLikelihoodsEngineUnitTest method testEvidence.

@Test
public void testEvidence() {
    // one exact limit for the evidence is when the likelihoods of each read are so peaked (i.e. the most likely allele
    // of each read is much likelier than all other alleles) that the sum over latent read-to-allele assignments
    // (that is, over the indicator z in the notes) is dominated by the max-likelihood allele configuration
    // and thus the evidence reduces to exactly integrating out the Dirichlet allele fractions
    final double[] prior = new double[] { 1, 2 };
    final RealMatrix log10Likelihoods = new Array2DRowRealMatrix(2, 4);
    log10Likelihoods.setRow(0, new double[] { 0.1, 4.0, 3.0, -10 });
    log10Likelihoods.setRow(1, new double[] { -12, -9, -5.0, 0.5 });
    final double calculatedLog10Evidence = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods, prior);
    final double[] maxLikelihoodCounts = new double[] { 3, 1 };
    final double expectedLog10Evidence = SomaticLikelihoodsEngine.log10DirichletNormalization(prior) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior, maxLikelihoodCounts)) + new IndexRange(0, log10Likelihoods.getColumnDimension()).sum(read -> log10Likelihoods.getColumnVector(read).getMaxValue());
    Assert.assertEquals(calculatedLog10Evidence, expectedLog10Evidence, 1e-5);
    // when there's just one read we can calculate the likelihood exactly
    final double[] prior2 = new double[] { 1, 2 };
    final RealMatrix log10Likelihoods2 = new Array2DRowRealMatrix(2, 1);
    log10Likelihoods2.setRow(0, new double[] { 0.1 });
    log10Likelihoods2.setRow(1, new double[] { 0.5 });
    final double calculatedLog10Evidence2 = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods2, prior2);
    final double[] delta0 = new double[] { 1, 0 };
    final double[] delta1 = new double[] { 0, 1 };
    final double expectedLog10Evidence2 = MathUtils.log10SumLog10(log10Likelihoods2.getEntry(0, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta0)), +log10Likelihoods2.getEntry(1, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta1)));
    Assert.assertEquals(calculatedLog10Evidence2, expectedLog10Evidence2, 0.05);
}
Also used : BetaDistribution(org.apache.commons.math3.distribution.BetaDistribution) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Assert(org.testng.Assert) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) MathArrays(org.apache.commons.math3.util.MathArrays) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Test(org.testng.annotations.Test) Assert(org.junit.Assert) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 57 with Sum

use of org.apache.commons.math3.stat.descriptive.summary.Sum in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method imputeZeroCountsAsTargetMedians.

/**
     * Impute zero counts to the median of non-zero values in the enclosing target row.
     *
     * <p>The imputation is done in-place, thus the input matrix is well be modified as a result of this call.</p>
     *
     * @param readCounts the input and output read-count matrix.
     */
public static void imputeZeroCountsAsTargetMedians(final ReadCountCollection readCounts, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final int targetCount = counts.getRowDimension();
    final Median medianCalculator = new Median();
    int totalCounts = counts.getColumnDimension() * counts.getRowDimension();
    // Get the number of zeroes contained in the counts.
    final long totalZeroCounts = IntStream.range(0, targetCount).mapToLong(t -> DoubleStream.of(counts.getRow(t)).filter(c -> c == 0.0).count()).sum();
    // Get the median of each row, not including any entries that are zero.
    final double[] medians = IntStream.range(0, targetCount).mapToDouble(t -> medianCalculator.evaluate(DoubleStream.of(counts.getRow(t)).filter(c -> c != 0.0).toArray())).toArray();
    // Change any zeros in the counts to the median for the row.
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int row, final int column, final double value) {
            return value != 0 ? value : medians[row];
        }
    });
    if (totalZeroCounts > 0) {
        final double totalZeroCountsPercentage = 100.0 * (totalZeroCounts / totalCounts);
        logger.info(String.format("Some 0.0 counts (%d out of %d, %.2f%%) were imputed to their enclosing target " + "non-zero median", totalZeroCounts, totalZeroCounts, totalZeroCountsPercentage));
    } else {
        logger.info("No count is 0.0 thus no count needed to be imputed.");
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) StringUtils(org.apache.commons.lang3.StringUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) Median(org.apache.commons.math3.stat.descriptive.rank.Median) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) Nonnull(javax.annotation.Nonnull) Locatable(htsjdk.samtools.util.Locatable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Predicate(java.util.function.Predicate) FastMath(org.apache.commons.math3.util.FastMath) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) DoubleStream(java.util.stream.DoubleStream) TableWriter(org.broadinstitute.hellbender.utils.tsv.TableWriter) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) SetUniqueList(org.apache.commons.collections4.list.SetUniqueList) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 58 with Sum

use of org.apache.commons.math3.stat.descriptive.summary.Sum in project gatk by broadinstitute.

the class TestHMM method fromPhredProbabilities.

static TestHMM fromPhredProbabilities(final double... phred) {
    final double[] priors = Arrays.copyOfRange(phred, 0, State.values().length);
    final double[][] transition = reshape(Arrays.copyOfRange(phred, State.values().length, State.values().length * (State.values().length + 1)), State.values().length);
    final double[][] emission = reshape(Arrays.copyOfRange(phred, (State.values().length + 1) * State.values().length, phred.length), State.values().length);
    final DoubleUnaryOperator phredToLog = d -> d * -.1 * LN_OF_10;
    final double[] logPriorsRaw = DoubleStream.of(priors).map(phredToLog).toArray();
    final double logPriorsSum = GATKProtectedMathUtils.logSumExp(logPriorsRaw);
    final double[] logPriors = DoubleStream.of(logPriorsRaw).map(d -> d - logPriorsSum).toArray();
    final double[][] logEmissionProbs = Stream.of(emission).map(x -> {
        final double[] result = DoubleStream.of(x).map(phredToLog).toArray();
        final double sum = GATKProtectedMathUtils.logSumExp(result);
        return DoubleStream.of(result).map(d -> d - sum).toArray();
    }).toArray(double[][]::new);
    final double[][] logTransitionProbs = Stream.of(transition).map(x -> {
        final double[] result = DoubleStream.of(x).map(phredToLog).toArray();
        final double sum = GATKProtectedMathUtils.logSumExp(result);
        return DoubleStream.of(result).map(d -> d - sum).toArray();
    }).toArray(double[][]::new);
    return new TestHMM(logPriors, logEmissionProbs, logTransitionProbs);
}
Also used : java.util(java.util) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) Stream(java.util.stream.Stream) Pair(org.apache.commons.math3.util.Pair) Utils(org.broadinstitute.hellbender.utils.Utils) ArrayUtils(org.apache.commons.lang3.ArrayUtils) Collectors(java.util.stream.Collectors) DoubleUnaryOperator(java.util.function.DoubleUnaryOperator) DoubleStream(java.util.stream.DoubleStream) DoubleUnaryOperator(java.util.function.DoubleUnaryOperator)

Example 59 with Sum

use of org.apache.commons.math3.stat.descriptive.summary.Sum in project metron by apache.

the class StellarStatisticsFunctionsTest method run.

/**
 * Runs a Stellar expression.
 * @param expr The expression to run.
 * @param variables The variables available to the expression.
 */
private static Object run(String expr, Map<String, Object> variables) {
    StellarProcessor processor = new StellarProcessor();
    Object ret = processor.parse(expr, new DefaultVariableResolver(x -> variables.get(x), x -> variables.containsKey(x)), StellarFunctions.FUNCTION_RESOLVER(), Context.EMPTY_CONTEXT());
    byte[] raw = SerDeUtils.toBytes(ret);
    Object actual = SerDeUtils.fromBytes(raw, Object.class);
    if (ret instanceof StatisticsProvider) {
        StatisticsProvider left = (StatisticsProvider) ret;
        StatisticsProvider right = (StatisticsProvider) actual;
        // N
        tolerantAssertEquals(prov -> prov.getCount(), left, right);
        // sum
        tolerantAssertEquals(prov -> prov.getSum(), left, right, 1e-3);
        // sum of squares
        tolerantAssertEquals(prov -> prov.getSumSquares(), left, right, 1e-3);
        // sum of squares
        tolerantAssertEquals(prov -> prov.getSumLogs(), left, right, 1e-3);
        // Mean
        tolerantAssertEquals(prov -> prov.getMean(), left, right, 1e-3);
        // Quadratic Mean
        tolerantAssertEquals(prov -> prov.getQuadraticMean(), left, right, 1e-3);
        // SD
        tolerantAssertEquals(prov -> prov.getStandardDeviation(), left, right, 1e-3);
        // Variance
        tolerantAssertEquals(prov -> prov.getVariance(), left, right, 1e-3);
        // Min
        tolerantAssertEquals(prov -> prov.getMin(), left, right, 1e-3);
        // Max
        tolerantAssertEquals(prov -> prov.getMax(), left, right, 1e-3);
        // Kurtosis
        tolerantAssertEquals(prov -> prov.getKurtosis(), left, right, 1e-3);
        // Skewness
        tolerantAssertEquals(prov -> prov.getSkewness(), left, right, 1e-3);
        for (double d = 10.0; d < 100.0; d += 10) {
            final double pctile = d;
            // This is a sketch, so we're a bit more forgiving here in our choice of \epsilon.
            tolerantAssertEquals(prov -> prov.getPercentile(pctile), left, right, 1e-2);
        }
    }
    return ret;
}
Also used : StellarProcessor(org.apache.metron.stellar.common.StellarProcessor) java.util(java.util) Assert.assertNotNull(org.junit.Assert.assertNotNull) SerDeUtils(org.apache.metron.common.utils.SerDeUtils) StellarProcessor(org.apache.metron.stellar.common.StellarProcessor) RunWith(org.junit.runner.RunWith) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) GaussianRandomGenerator(org.apache.commons.math3.random.GaussianRandomGenerator) DefaultVariableResolver(org.apache.metron.stellar.dsl.DefaultVariableResolver) Function(java.util.function.Function) String.format(java.lang.String.format) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) ImmutableList(com.google.common.collect.ImmutableList) MersenneTwister(org.apache.commons.math3.random.MersenneTwister) DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) StellarFunctions(org.apache.metron.stellar.dsl.StellarFunctions) Assert(org.junit.Assert) Parameterized(org.junit.runners.Parameterized) Assert.assertEquals(org.junit.Assert.assertEquals) Joiner(com.google.common.base.Joiner) Context(org.apache.metron.stellar.dsl.Context) Before(org.junit.Before) DefaultVariableResolver(org.apache.metron.stellar.dsl.DefaultVariableResolver)

Example 60 with Sum

use of org.apache.commons.math3.stat.descriptive.summary.Sum in project imagingbook-common by imagingbook.

the class ProcrustesFit method getEuclideanError.

/**
 * Calculates the total error for the estimated fit as
 * the sum of the squared Euclidean distances between the
 * transformed point set X and the reference set Y.
 * This method is provided for testing as an alternative to
 * the quicker {@link getError} method.
 * @param X Sequence of n-dimensional points.
 * @param Y Sequence of n-dimensional points (reference).
 * @return The total error for the estimated fit.
 */
public double getEuclideanError(List<double[]> X, List<double[]> Y) {
    RealMatrix cR = R.scalarMultiply(c);
    double ee = 0;
    for (int i = 0; i < X.size(); i++) {
        RealVector ai = new ArrayRealVector(X.get(i));
        RealVector bi = new ArrayRealVector(Y.get(i));
        RealVector aiT = cR.operate(ai).add(t);
        double ei = aiT.subtract(bi).getNorm();
        ee = ee + sqr(ei);
    }
    return ee;
}
Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix) RealVector(org.apache.commons.math3.linear.RealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)22 Collectors (java.util.stream.Collectors)15 java.util (java.util)12 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)12 List (java.util.List)11 IntStream (java.util.stream.IntStream)11 Logger (org.apache.logging.log4j.Logger)11 ArrayList (java.util.ArrayList)9 LogManager (org.apache.logging.log4j.LogManager)9 IOException (java.io.IOException)8 Map (java.util.Map)8 TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)8 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)8 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)8 Test (org.testng.annotations.Test)8 File (java.io.File)7 VisibleForTesting (com.google.common.annotations.VisibleForTesting)6 Arrays (java.util.Arrays)6 Nonnull (javax.annotation.Nonnull)6