use of org.apache.commons.math3.stat.descriptive.rank.Max in project gatk-protected by broadinstitute.
the class OptimizationUtils method argmax.
public static double argmax(final Function<Double, Double> function, final double min, final double max, final double guess, final double relativeTolerance, final double absoluteTolerance, final int maxEvaluations) {
final BrentOptimizer optimizer = new BrentOptimizer(relativeTolerance, absoluteTolerance);
final SearchInterval interval = new SearchInterval(min, max, guess);
return optimizer.optimize(new UnivariateObjectiveFunction(function::apply), GoalType.MAXIMIZE, interval, new MaxEval(maxEvaluations)).getPoint();
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project gatk by broadinstitute.
the class HetPulldownCalculator method isPileupHetCompatible.
/**
* Returns true if the distribution of major and other base-pair counts from a pileup at a locus is compatible with
* allele fraction of 0.5.
*
* <p>
* Compatibility is defined by a p-value threshold. That is, compute the two-sided p-value of observing
* a number of major read counts out of a total number of reads, assuming the given heterozygous
* allele fraction. If the p-value is less than the given threshold, then reject the null hypothesis
* that the heterozygous allele fraction is 0.5 (i.e., SNP is likely to be homozygous) and return false,
* otherwise return true.
* </p>
* @param baseCounts base-pair counts
* @param totalBaseCount total base-pair counts (excluding N, etc.)
* @param pvalThreshold p-value threshold for two-sided binomial test (should be in [0, 1], but no check is performed)
* @return boolean compatibility with heterozygous allele fraction
*/
@VisibleForTesting
protected static boolean isPileupHetCompatible(final Nucleotide.Counter baseCounts, final int totalBaseCount, final double pvalThreshold) {
final int majorReadCount = Arrays.stream(BASES).mapToInt(b -> (int) baseCounts.get(b)).max().getAsInt();
if (majorReadCount == 0 || totalBaseCount - majorReadCount == 0) {
return false;
}
final double pval = new BinomialTest().binomialTest(totalBaseCount, majorReadCount, HET_ALLELE_FRACTION, AlternativeHypothesis.TWO_SIDED);
return pval >= pvalThreshold;
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project gatk by broadinstitute.
the class PosteriorSummaryUtils method calculatePosteriorMode.
/**
* Given a list of posterior samples, returns an estimate of the posterior mode (using
* mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}).
* Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation),
* or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain
* {@link Double#NaN}, {@link Double#NaN} will be returned.
* @param samples posterior samples, cannot be {@code null} and number of samples must be greater than 0
* @param ctx {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation
*/
public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) {
Utils.nonNull(samples);
Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero.");
//calculate sample min, max, mean, and standard deviation
final double sampleMin = Collections.min(samples);
final double sampleMax = Collections.max(samples);
final double sampleMean = new Mean().evaluate(Doubles.toArray(samples));
final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples));
//if samples are all the same or contain NaN, can simply return mean
if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) {
return sampleMean;
}
//use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation
//see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth
final double bandwidth = SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT);
//use kernel density estimation to approximate posterior from samples
final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth);
//use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior
final BrentOptimizer optimizer = new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin));
final UnivariateObjectiveFunction objective = new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] { f })[0]);
//search for mode within sample range, start near sample mean
final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean);
return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint();
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project gatk by broadinstitute.
the class SomaticLikelihoodsEngineUnitTest method testEvidence.
@Test
public void testEvidence() {
// one exact limit for the evidence is when the likelihoods of each read are so peaked (i.e. the most likely allele
// of each read is much likelier than all other alleles) that the sum over latent read-to-allele assignments
// (that is, over the indicator z in the notes) is dominated by the max-likelihood allele configuration
// and thus the evidence reduces to exactly integrating out the Dirichlet allele fractions
final double[] prior = new double[] { 1, 2 };
final RealMatrix log10Likelihoods = new Array2DRowRealMatrix(2, 4);
log10Likelihoods.setRow(0, new double[] { 0.1, 4.0, 3.0, -10 });
log10Likelihoods.setRow(1, new double[] { -12, -9, -5.0, 0.5 });
final double calculatedLog10Evidence = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods, prior);
final double[] maxLikelihoodCounts = new double[] { 3, 1 };
final double expectedLog10Evidence = SomaticLikelihoodsEngine.log10DirichletNormalization(prior) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior, maxLikelihoodCounts)) + new IndexRange(0, log10Likelihoods.getColumnDimension()).sum(read -> log10Likelihoods.getColumnVector(read).getMaxValue());
Assert.assertEquals(calculatedLog10Evidence, expectedLog10Evidence, 1e-5);
// when there's just one read we can calculate the likelihood exactly
final double[] prior2 = new double[] { 1, 2 };
final RealMatrix log10Likelihoods2 = new Array2DRowRealMatrix(2, 1);
log10Likelihoods2.setRow(0, new double[] { 0.1 });
log10Likelihoods2.setRow(1, new double[] { 0.5 });
final double calculatedLog10Evidence2 = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods2, prior2);
final double[] delta0 = new double[] { 1, 0 };
final double[] delta1 = new double[] { 0, 1 };
final double expectedLog10Evidence2 = MathUtils.log10SumLog10(log10Likelihoods2.getEntry(0, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta0)), +log10Likelihoods2.getEntry(1, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta1)));
Assert.assertEquals(calculatedLog10Evidence2, expectedLog10Evidence2, 0.05);
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project dhis2-core by dhis2.
the class DefaultChartService method getJFreeChartHistory.
@Override
public JFreeChart getJFreeChartHistory(DataElement dataElement, DataElementCategoryOptionCombo categoryOptionCombo, DataElementCategoryOptionCombo attributeOptionCombo, Period lastPeriod, OrganisationUnit organisationUnit, int historyLength, I18nFormat format) {
lastPeriod = periodService.reloadPeriod(lastPeriod);
List<Period> periods = periodService.getPeriods(lastPeriod, historyLength);
MinMaxDataElement minMax = minMaxDataElementService.getMinMaxDataElement(organisationUnit, dataElement, categoryOptionCombo);
UnivariateInterpolator interpolator = new SplineInterpolator();
Integer periodCount = 0;
List<Double> x = new ArrayList<>();
List<Double> y = new ArrayList<>();
// ---------------------------------------------------------------------
// DataValue, MinValue and MaxValue DataSets
// ---------------------------------------------------------------------
DefaultCategoryDataset dataValueDataSet = new DefaultCategoryDataset();
DefaultCategoryDataset metaDataSet = new DefaultCategoryDataset();
for (Period period : periods) {
++periodCount;
period.setName(format.formatPeriod(period));
DataValue dataValue = dataValueService.getDataValue(dataElement, period, organisationUnit, categoryOptionCombo, attributeOptionCombo);
double value = 0;
if (dataValue != null && dataValue.getValue() != null && MathUtils.isNumeric(dataValue.getValue())) {
value = Double.parseDouble(dataValue.getValue());
x.add(periodCount.doubleValue());
y.add(value);
}
dataValueDataSet.addValue(value, dataElement.getShortName(), period.getName());
if (minMax != null) {
metaDataSet.addValue(minMax.getMin(), "Min value", period.getName());
metaDataSet.addValue(minMax.getMax(), "Max value", period.getName());
}
}
if (// minimum 3 points required for interpolation
x.size() >= 3) {
periodCount = 0;
double[] xa = getArray(x);
int min = MathUtils.getMin(xa).intValue();
int max = MathUtils.getMax(xa).intValue();
try {
UnivariateFunction function = interpolator.interpolate(xa, getArray(y));
for (Period period : periods) {
if (++periodCount >= min && periodCount <= max) {
metaDataSet.addValue(function.value(periodCount), "Regression value", period.getName());
}
}
} catch (MathRuntimeException ex) {
throw new RuntimeException("Failed to interpolate", ex);
}
}
// ---------------------------------------------------------------------
// Plots
// ---------------------------------------------------------------------
CategoryPlot plot = getCategoryPlot(dataValueDataSet, getBarRenderer(), PlotOrientation.VERTICAL, CategoryLabelPositions.UP_45);
plot.setDataset(1, metaDataSet);
plot.setRenderer(1, getLineRenderer());
JFreeChart jFreeChart = getBasicJFreeChart(plot);
return jFreeChart;
}
Aggregations