Search in sources :

Example 66 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project gatk-protected by broadinstitute.

the class OptimizationUtils method argmax.

public static double argmax(final Function<Double, Double> function, final double min, final double max, final double guess, final double relativeTolerance, final double absoluteTolerance, final int maxEvaluations) {
    final BrentOptimizer optimizer = new BrentOptimizer(relativeTolerance, absoluteTolerance);
    final SearchInterval interval = new SearchInterval(min, max, guess);
    return optimizer.optimize(new UnivariateObjectiveFunction(function::apply), GoalType.MAXIMIZE, interval, new MaxEval(maxEvaluations)).getPoint();
}
Also used : SearchInterval(org.apache.commons.math3.optim.univariate.SearchInterval) MaxEval(org.apache.commons.math3.optim.MaxEval) UnivariateObjectiveFunction(org.apache.commons.math3.optim.univariate.UnivariateObjectiveFunction) BrentOptimizer(org.apache.commons.math3.optim.univariate.BrentOptimizer)

Example 67 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project gatk by broadinstitute.

the class HetPulldownCalculator method isPileupHetCompatible.

/**
     * Returns true if the distribution of major and other base-pair counts from a pileup at a locus is compatible with
     * allele fraction of 0.5.
     *
     * <p>
     *     Compatibility is defined by a p-value threshold.  That is, compute the two-sided p-value of observing
     *     a number of major read counts out of a total number of reads, assuming the given heterozygous
     *     allele fraction.  If the p-value is less than the given threshold, then reject the null hypothesis
     *     that the heterozygous allele fraction is 0.5 (i.e., SNP is likely to be homozygous) and return false,
     *     otherwise return true.
     * </p>
     * @param baseCounts        base-pair counts
     * @param totalBaseCount    total base-pair counts (excluding N, etc.)
     * @param pvalThreshold     p-value threshold for two-sided binomial test (should be in [0, 1], but no check is performed)
     * @return                  boolean compatibility with heterozygous allele fraction
     */
@VisibleForTesting
protected static boolean isPileupHetCompatible(final Nucleotide.Counter baseCounts, final int totalBaseCount, final double pvalThreshold) {
    final int majorReadCount = Arrays.stream(BASES).mapToInt(b -> (int) baseCounts.get(b)).max().getAsInt();
    if (majorReadCount == 0 || totalBaseCount - majorReadCount == 0) {
        return false;
    }
    final double pval = new BinomialTest().binomialTest(totalBaseCount, majorReadCount, HET_ALLELE_FRACTION, AlternativeHypothesis.TWO_SIDED);
    return pval >= pvalThreshold;
}
Also used : BinomialTest(org.apache.commons.math3.stat.inference.BinomialTest) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 68 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project gatk by broadinstitute.

the class PosteriorSummaryUtils method calculatePosteriorMode.

/**
     * Given a list of posterior samples, returns an estimate of the posterior mode (using
     * mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}).
     * Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation),
     * or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain
     * {@link Double#NaN}, {@link Double#NaN} will be returned.
     * @param samples   posterior samples, cannot be {@code null} and number of samples must be greater than 0
     * @param ctx       {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation
     */
public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) {
    Utils.nonNull(samples);
    Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero.");
    //calculate sample min, max, mean, and standard deviation
    final double sampleMin = Collections.min(samples);
    final double sampleMax = Collections.max(samples);
    final double sampleMean = new Mean().evaluate(Doubles.toArray(samples));
    final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples));
    //if samples are all the same or contain NaN, can simply return mean
    if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) {
        return sampleMean;
    }
    //use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation
    //see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth
    final double bandwidth = SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT);
    //use kernel density estimation to approximate posterior from samples
    final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth);
    //use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior
    final BrentOptimizer optimizer = new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin));
    final UnivariateObjectiveFunction objective = new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] { f })[0]);
    //search for mode within sample range, start near sample mean
    final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean);
    return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint();
}
Also used : Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) SearchInterval(org.apache.commons.math3.optim.univariate.SearchInterval) UnivariateObjectiveFunction(org.apache.commons.math3.optim.univariate.UnivariateObjectiveFunction) BrentOptimizer(org.apache.commons.math3.optim.univariate.BrentOptimizer) KernelDensity(org.apache.spark.mllib.stat.KernelDensity) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)

Example 69 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project gatk by broadinstitute.

the class SomaticLikelihoodsEngineUnitTest method testEvidence.

@Test
public void testEvidence() {
    // one exact limit for the evidence is when the likelihoods of each read are so peaked (i.e. the most likely allele
    // of each read is much likelier than all other alleles) that the sum over latent read-to-allele assignments
    // (that is, over the indicator z in the notes) is dominated by the max-likelihood allele configuration
    // and thus the evidence reduces to exactly integrating out the Dirichlet allele fractions
    final double[] prior = new double[] { 1, 2 };
    final RealMatrix log10Likelihoods = new Array2DRowRealMatrix(2, 4);
    log10Likelihoods.setRow(0, new double[] { 0.1, 4.0, 3.0, -10 });
    log10Likelihoods.setRow(1, new double[] { -12, -9, -5.0, 0.5 });
    final double calculatedLog10Evidence = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods, prior);
    final double[] maxLikelihoodCounts = new double[] { 3, 1 };
    final double expectedLog10Evidence = SomaticLikelihoodsEngine.log10DirichletNormalization(prior) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior, maxLikelihoodCounts)) + new IndexRange(0, log10Likelihoods.getColumnDimension()).sum(read -> log10Likelihoods.getColumnVector(read).getMaxValue());
    Assert.assertEquals(calculatedLog10Evidence, expectedLog10Evidence, 1e-5);
    // when there's just one read we can calculate the likelihood exactly
    final double[] prior2 = new double[] { 1, 2 };
    final RealMatrix log10Likelihoods2 = new Array2DRowRealMatrix(2, 1);
    log10Likelihoods2.setRow(0, new double[] { 0.1 });
    log10Likelihoods2.setRow(1, new double[] { 0.5 });
    final double calculatedLog10Evidence2 = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods2, prior2);
    final double[] delta0 = new double[] { 1, 0 };
    final double[] delta1 = new double[] { 0, 1 };
    final double expectedLog10Evidence2 = MathUtils.log10SumLog10(log10Likelihoods2.getEntry(0, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta0)), +log10Likelihoods2.getEntry(1, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta1)));
    Assert.assertEquals(calculatedLog10Evidence2, expectedLog10Evidence2, 0.05);
}
Also used : BetaDistribution(org.apache.commons.math3.distribution.BetaDistribution) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Assert(org.testng.Assert) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) MathArrays(org.apache.commons.math3.util.MathArrays) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Test(org.testng.annotations.Test) Assert(org.junit.Assert) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 70 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project dhis2-core by dhis2.

the class DefaultChartService method getJFreeChartHistory.

@Override
public JFreeChart getJFreeChartHistory(DataElement dataElement, DataElementCategoryOptionCombo categoryOptionCombo, DataElementCategoryOptionCombo attributeOptionCombo, Period lastPeriod, OrganisationUnit organisationUnit, int historyLength, I18nFormat format) {
    lastPeriod = periodService.reloadPeriod(lastPeriod);
    List<Period> periods = periodService.getPeriods(lastPeriod, historyLength);
    MinMaxDataElement minMax = minMaxDataElementService.getMinMaxDataElement(organisationUnit, dataElement, categoryOptionCombo);
    UnivariateInterpolator interpolator = new SplineInterpolator();
    Integer periodCount = 0;
    List<Double> x = new ArrayList<>();
    List<Double> y = new ArrayList<>();
    // ---------------------------------------------------------------------
    // DataValue, MinValue and MaxValue DataSets
    // ---------------------------------------------------------------------
    DefaultCategoryDataset dataValueDataSet = new DefaultCategoryDataset();
    DefaultCategoryDataset metaDataSet = new DefaultCategoryDataset();
    for (Period period : periods) {
        ++periodCount;
        period.setName(format.formatPeriod(period));
        DataValue dataValue = dataValueService.getDataValue(dataElement, period, organisationUnit, categoryOptionCombo, attributeOptionCombo);
        double value = 0;
        if (dataValue != null && dataValue.getValue() != null && MathUtils.isNumeric(dataValue.getValue())) {
            value = Double.parseDouble(dataValue.getValue());
            x.add(periodCount.doubleValue());
            y.add(value);
        }
        dataValueDataSet.addValue(value, dataElement.getShortName(), period.getName());
        if (minMax != null) {
            metaDataSet.addValue(minMax.getMin(), "Min value", period.getName());
            metaDataSet.addValue(minMax.getMax(), "Max value", period.getName());
        }
    }
    if (// minimum 3 points required for interpolation
    x.size() >= 3) {
        periodCount = 0;
        double[] xa = getArray(x);
        int min = MathUtils.getMin(xa).intValue();
        int max = MathUtils.getMax(xa).intValue();
        try {
            UnivariateFunction function = interpolator.interpolate(xa, getArray(y));
            for (Period period : periods) {
                if (++periodCount >= min && periodCount <= max) {
                    metaDataSet.addValue(function.value(periodCount), "Regression value", period.getName());
                }
            }
        } catch (MathRuntimeException ex) {
            throw new RuntimeException("Failed to interpolate", ex);
        }
    }
    // ---------------------------------------------------------------------
    // Plots
    // ---------------------------------------------------------------------
    CategoryPlot plot = getCategoryPlot(dataValueDataSet, getBarRenderer(), PlotOrientation.VERTICAL, CategoryLabelPositions.UP_45);
    plot.setDataset(1, metaDataSet);
    plot.setRenderer(1, getLineRenderer());
    JFreeChart jFreeChart = getBasicJFreeChart(plot);
    return jFreeChart;
}
Also used : MathRuntimeException(org.apache.commons.math3.exception.MathRuntimeException) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) DataValue(org.hisp.dhis.datavalue.DataValue) Period(org.hisp.dhis.period.Period) JFreeChart(org.jfree.chart.JFreeChart) MathRuntimeException(org.apache.commons.math3.exception.MathRuntimeException) MinMaxDataElement(org.hisp.dhis.minmax.MinMaxDataElement) SplineInterpolator(org.apache.commons.math3.analysis.interpolation.SplineInterpolator) UnivariateInterpolator(org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator) DefaultCategoryDataset(org.jfree.data.category.DefaultCategoryDataset)

Aggregations

ArrayList (java.util.ArrayList)26 List (java.util.List)19 Collectors (java.util.stream.Collectors)13 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)13 Arrays (java.util.Arrays)11 Map (java.util.Map)11 IntStream (java.util.stream.IntStream)10 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)10 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)9 RealMatrix (org.apache.commons.math3.linear.RealMatrix)9 Plot2 (ij.gui.Plot2)8 File (java.io.File)8 IOException (java.io.IOException)8 TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)7 Test (org.testng.annotations.Test)7 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)6 Collections (java.util.Collections)6 HashMap (java.util.HashMap)6 Random (java.util.Random)6 UnivariateFunction (org.apache.commons.math3.analysis.UnivariateFunction)6