use of org.apache.commons.math3.stat.descriptive.summary.Sum in project gatk by broadinstitute.
the class SomaticLikelihoodsEngineUnitTest method testEvidence.
@Test
public void testEvidence() {
// one exact limit for the evidence is when the likelihoods of each read are so peaked (i.e. the most likely allele
// of each read is much likelier than all other alleles) that the sum over latent read-to-allele assignments
// (that is, over the indicator z in the notes) is dominated by the max-likelihood allele configuration
// and thus the evidence reduces to exactly integrating out the Dirichlet allele fractions
final double[] prior = new double[] { 1, 2 };
final RealMatrix log10Likelihoods = new Array2DRowRealMatrix(2, 4);
log10Likelihoods.setRow(0, new double[] { 0.1, 4.0, 3.0, -10 });
log10Likelihoods.setRow(1, new double[] { -12, -9, -5.0, 0.5 });
final double calculatedLog10Evidence = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods, prior);
final double[] maxLikelihoodCounts = new double[] { 3, 1 };
final double expectedLog10Evidence = SomaticLikelihoodsEngine.log10DirichletNormalization(prior) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior, maxLikelihoodCounts)) + new IndexRange(0, log10Likelihoods.getColumnDimension()).sum(read -> log10Likelihoods.getColumnVector(read).getMaxValue());
Assert.assertEquals(calculatedLog10Evidence, expectedLog10Evidence, 1e-5);
// when there's just one read we can calculate the likelihood exactly
final double[] prior2 = new double[] { 1, 2 };
final RealMatrix log10Likelihoods2 = new Array2DRowRealMatrix(2, 1);
log10Likelihoods2.setRow(0, new double[] { 0.1 });
log10Likelihoods2.setRow(1, new double[] { 0.5 });
final double calculatedLog10Evidence2 = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods2, prior2);
final double[] delta0 = new double[] { 1, 0 };
final double[] delta1 = new double[] { 0, 1 };
final double expectedLog10Evidence2 = MathUtils.log10SumLog10(log10Likelihoods2.getEntry(0, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta0)), +log10Likelihoods2.getEntry(1, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta1)));
Assert.assertEquals(calculatedLog10Evidence2, expectedLog10Evidence2, 0.05);
}
use of org.apache.commons.math3.stat.descriptive.summary.Sum in project gatk-protected by broadinstitute.
the class ReadCountCollectionUtils method imputeZeroCountsAsTargetMedians.
/**
* Impute zero counts to the median of non-zero values in the enclosing target row.
*
* <p>The imputation is done in-place, thus the input matrix is well be modified as a result of this call.</p>
*
* @param readCounts the input and output read-count matrix.
*/
public static void imputeZeroCountsAsTargetMedians(final ReadCountCollection readCounts, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final int targetCount = counts.getRowDimension();
final Median medianCalculator = new Median();
int totalCounts = counts.getColumnDimension() * counts.getRowDimension();
// Get the number of zeroes contained in the counts.
final long totalZeroCounts = IntStream.range(0, targetCount).mapToLong(t -> DoubleStream.of(counts.getRow(t)).filter(c -> c == 0.0).count()).sum();
// Get the median of each row, not including any entries that are zero.
final double[] medians = IntStream.range(0, targetCount).mapToDouble(t -> medianCalculator.evaluate(DoubleStream.of(counts.getRow(t)).filter(c -> c != 0.0).toArray())).toArray();
// Change any zeros in the counts to the median for the row.
counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return value != 0 ? value : medians[row];
}
});
if (totalZeroCounts > 0) {
final double totalZeroCountsPercentage = 100.0 * (totalZeroCounts / totalCounts);
logger.info(String.format("Some 0.0 counts (%d out of %d, %.2f%%) were imputed to their enclosing target " + "non-zero median", totalZeroCounts, totalZeroCounts, totalZeroCountsPercentage));
} else {
logger.info("No count is 0.0 thus no count needed to be imputed.");
}
}
use of org.apache.commons.math3.stat.descriptive.summary.Sum in project gatk by broadinstitute.
the class TestHMM method fromPhredProbabilities.
static TestHMM fromPhredProbabilities(final double... phred) {
final double[] priors = Arrays.copyOfRange(phred, 0, State.values().length);
final double[][] transition = reshape(Arrays.copyOfRange(phred, State.values().length, State.values().length * (State.values().length + 1)), State.values().length);
final double[][] emission = reshape(Arrays.copyOfRange(phred, (State.values().length + 1) * State.values().length, phred.length), State.values().length);
final DoubleUnaryOperator phredToLog = d -> d * -.1 * LN_OF_10;
final double[] logPriorsRaw = DoubleStream.of(priors).map(phredToLog).toArray();
final double logPriorsSum = GATKProtectedMathUtils.logSumExp(logPriorsRaw);
final double[] logPriors = DoubleStream.of(logPriorsRaw).map(d -> d - logPriorsSum).toArray();
final double[][] logEmissionProbs = Stream.of(emission).map(x -> {
final double[] result = DoubleStream.of(x).map(phredToLog).toArray();
final double sum = GATKProtectedMathUtils.logSumExp(result);
return DoubleStream.of(result).map(d -> d - sum).toArray();
}).toArray(double[][]::new);
final double[][] logTransitionProbs = Stream.of(transition).map(x -> {
final double[] result = DoubleStream.of(x).map(phredToLog).toArray();
final double sum = GATKProtectedMathUtils.logSumExp(result);
return DoubleStream.of(result).map(d -> d - sum).toArray();
}).toArray(double[][]::new);
return new TestHMM(logPriors, logEmissionProbs, logTransitionProbs);
}
use of org.apache.commons.math3.stat.descriptive.summary.Sum in project metron by apache.
the class StellarStatisticsFunctionsTest method run.
/**
* Runs a Stellar expression.
* @param expr The expression to run.
* @param variables The variables available to the expression.
*/
private static Object run(String expr, Map<String, Object> variables) {
StellarProcessor processor = new StellarProcessor();
Object ret = processor.parse(expr, new DefaultVariableResolver(x -> variables.get(x), x -> variables.containsKey(x)), StellarFunctions.FUNCTION_RESOLVER(), Context.EMPTY_CONTEXT());
byte[] raw = SerDeUtils.toBytes(ret);
Object actual = SerDeUtils.fromBytes(raw, Object.class);
if (ret instanceof StatisticsProvider) {
StatisticsProvider left = (StatisticsProvider) ret;
StatisticsProvider right = (StatisticsProvider) actual;
// N
tolerantAssertEquals(prov -> prov.getCount(), left, right);
// sum
tolerantAssertEquals(prov -> prov.getSum(), left, right, 1e-3);
// sum of squares
tolerantAssertEquals(prov -> prov.getSumSquares(), left, right, 1e-3);
// sum of squares
tolerantAssertEquals(prov -> prov.getSumLogs(), left, right, 1e-3);
// Mean
tolerantAssertEquals(prov -> prov.getMean(), left, right, 1e-3);
// Quadratic Mean
tolerantAssertEquals(prov -> prov.getQuadraticMean(), left, right, 1e-3);
// SD
tolerantAssertEquals(prov -> prov.getStandardDeviation(), left, right, 1e-3);
// Variance
tolerantAssertEquals(prov -> prov.getVariance(), left, right, 1e-3);
// Min
tolerantAssertEquals(prov -> prov.getMin(), left, right, 1e-3);
// Max
tolerantAssertEquals(prov -> prov.getMax(), left, right, 1e-3);
// Kurtosis
tolerantAssertEquals(prov -> prov.getKurtosis(), left, right, 1e-3);
// Skewness
tolerantAssertEquals(prov -> prov.getSkewness(), left, right, 1e-3);
for (double d = 10.0; d < 100.0; d += 10) {
final double pctile = d;
// This is a sketch, so we're a bit more forgiving here in our choice of \epsilon.
tolerantAssertEquals(prov -> prov.getPercentile(pctile), left, right, 1e-2);
}
}
return ret;
}
use of org.apache.commons.math3.stat.descriptive.summary.Sum in project imagingbook-common by imagingbook.
the class ProcrustesFit method getEuclideanError.
/**
* Calculates the total error for the estimated fit as
* the sum of the squared Euclidean distances between the
* transformed point set X and the reference set Y.
* This method is provided for testing as an alternative to
* the quicker {@link getError} method.
* @param X Sequence of n-dimensional points.
* @param Y Sequence of n-dimensional points (reference).
* @return The total error for the estimated fit.
*/
public double getEuclideanError(List<double[]> X, List<double[]> Y) {
RealMatrix cR = R.scalarMultiply(c);
double ee = 0;
for (int i = 0; i < X.size(); i++) {
RealVector ai = new ArrayRealVector(X.get(i));
RealVector bi = new ArrayRealVector(Y.get(i));
RealVector aiT = cR.operate(ai).add(t);
double ei = aiT.subtract(bi).getNorm();
ee = ee + sqr(ei);
}
return ee;
}
Aggregations