use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk-protected by broadinstitute.
the class MatrixSummaryUtils method getRowVariances.
/**
* Return an array containing the variance for each row in the given matrix.
* @param m Not {@code null}. Size MxN. If any entry is NaN, the corresponding rows will have a
* variance of NaN.
* @return array of size M. Never {@code null} IF there is only one column (or only one entry
*/
public static double[] getRowVariances(final RealMatrix m) {
Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
final StandardDeviation std = new StandardDeviation();
return IntStream.range(0, m.getRowDimension()).boxed().mapToDouble(i -> Math.pow(std.evaluate(m.getRow(i)), 2)).toArray();
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk by broadinstitute.
the class PosteriorSummaryUtils method calculatePosteriorMode.
/**
* Given a list of posterior samples, returns an estimate of the posterior mode (using
* mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}).
* Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation),
* or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain
* {@link Double#NaN}, {@link Double#NaN} will be returned.
* @param samples posterior samples, cannot be {@code null} and number of samples must be greater than 0
* @param ctx {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation
*/
public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) {
Utils.nonNull(samples);
Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero.");
//calculate sample min, max, mean, and standard deviation
final double sampleMin = Collections.min(samples);
final double sampleMax = Collections.max(samples);
final double sampleMean = new Mean().evaluate(Doubles.toArray(samples));
final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples));
//if samples are all the same or contain NaN, can simply return mean
if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) {
return sampleMean;
}
//use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation
//see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth
final double bandwidth = SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT);
//use kernel density estimation to approximate posterior from samples
final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth);
//use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior
final BrentOptimizer optimizer = new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin));
final UnivariateObjectiveFunction objective = new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] { f })[0]);
//search for mode within sample range, start near sample mean
final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean);
return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint();
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project gatk-protected by broadinstitute.
the class ReCapSegCaller method calculateT.
private static double calculateT(final ReadCountCollection tangentNormalizedCoverage, final List<ModeledSegment> segments) {
//Get the segments that are likely copy neutral.
// Math.abs removed to mimic python...
final List<ModeledSegment> copyNeutralSegments = segments.stream().filter(s -> s.getSegmentMean() < COPY_NEUTRAL_CUTOFF).collect(Collectors.toList());
// Get the targets that correspond to the copyNeutralSegments... note that individual targets, due to noise,
// can be far away from copy neutral
final TargetCollection<ReadCountRecord.SingleSampleRecord> targetsWithCoverage = new HashedListTargetCollection<>(tangentNormalizedCoverage.records().stream().map(ReadCountRecord::asSingleSampleRecord).collect(Collectors.toList()));
final double[] copyNeutralTargetsCopyRatio = copyNeutralSegments.stream().flatMap(s -> targetsWithCoverage.targets(s).stream()).mapToDouble(ReadCountRecord.SingleSampleRecord::getCount).toArray();
final double meanCopyNeutralTargets = new Mean().evaluate(copyNeutralTargetsCopyRatio);
final double sigmaCopyNeutralTargets = new StandardDeviation().evaluate(copyNeutralTargetsCopyRatio);
// Now we filter outliers by only including those w/in 2 standard deviations.
final double[] filteredCopyNeutralTargetsCopyRatio = Arrays.stream(copyNeutralTargetsCopyRatio).filter(c -> Math.abs(c - meanCopyNeutralTargets) < sigmaCopyNeutralTargets * Z_THRESHOLD).toArray();
return new StandardDeviation().evaluate(filteredCopyNeutralTargetsCopyRatio);
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project jstructure by JonStargaryen.
the class StructuralInformationParserService method parseContactStructuralInformationFile.
public List<ContactStructuralInformation> parseContactStructuralInformationFile(InputStream inputStream, Chain chain, List<AminoAcid> earlyFoldingResidues) {
Map<Pair<Integer, Integer>, List<String>> parsingMap = new HashMap<>();
try (Stream<String> stream = new BufferedReader(new InputStreamReader(inputStream)).lines()) {
stream.forEach(line -> {
String[] split = line.split("\t");
String[] idSplit = split[0].split(",");
Pair<Integer, Integer> idPair = new Pair<>(Integer.valueOf(idSplit[0].split("\\(")[1].trim()), Integer.valueOf(idSplit[1].split("\\)")[0].trim()));
if (!parsingMap.containsKey(idPair)) {
parsingMap.put(idPair, new ArrayList<>());
}
parsingMap.get(idPair).add(line);
});
}
Map<Pair<Integer, Integer>, List<ReconstructionStructuralInformation>> reconstructionMap = new HashMap<>();
parsingMap.entrySet().stream().flatMap(entry -> {
String aa1 = chain.select().residueNumber(entry.getKey().getLeft()).asAminoAcid().getOneLetterCode();
String aa2 = chain.select().residueNumber(entry.getKey().getRight()).asAminoAcid().getOneLetterCode();
return entry.getValue().stream().map(line -> line.split("\t")).map(split -> new ReconstructionStructuralInformation(entry.getKey().getLeft(), aa1, entry.getKey().getRight(), aa2, ContactDistanceBin.resolve(new Pair<>(IdentifierFactory.createResidueIdentifier(entry.getKey().getLeft()), IdentifierFactory.createResidueIdentifier(entry.getKey().getRight()))).orElse(null), split[1].equals("true"), Double.valueOf(split[2]), Double.valueOf(split[3]), Double.valueOf(split[4]), Double.valueOf(split[5]), Double.valueOf(split[6]), Double.valueOf(split[7]), Double.valueOf(split[8]), Double.valueOf(split[9]), Double.valueOf(split[10])));
}).forEach(rsi -> {
Pair<Integer, Integer> idPair = new Pair<>(rsi.getResidueIdentifier1(), rsi.getResidueIdentifier2());
if (!reconstructionMap.containsKey(idPair)) {
reconstructionMap.put(idPair, new ArrayList<>());
}
reconstructionMap.get(idPair).add(rsi);
});
List<ReconstructionStructuralInformation> reconstructionStructuralInformation = reconstructionMap.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
int numberOfReconstructions = reconstructionStructuralInformation.size();
double averageRmsd = reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).average().orElse(0.0);
double standardDeviationRmsd = new StandardDeviation().evaluate(reconstructionStructuralInformation.stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).toArray());
double averageMaximumRmsd = reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).average().orElse(0.0);
double standardDeviationMaximumRmsd = new StandardDeviation().evaluate(reconstructionMap.entrySet().stream().mapToDouble(entry -> entry.getValue().stream().mapToDouble(ReconstructionStructuralInformation::getRmsdIncrease).max().orElse(0.0)).toArray());
List<ReconstructionStructuralInformation> topScoringReconstructions = reconstructionMap.values().stream().flatMap(Collection::stream).sorted(Comparator.comparingDouble(ReconstructionStructuralInformation::getRmsdIncrease).reversed()).limit((int) (0.1 * numberOfReconstructions)).collect(Collectors.toList());
return reconstructionMap.entrySet().stream().map(entry -> {
List<ReconstructionStructuralInformation> values = entry.getValue();
ReconstructionStructuralInformation reference = values.get(0);
return new ContactStructuralInformation(reference.getResidueIdentifier1(), reference.getAa1(), reference.getResidueIdentifier2(), reference.getAa2(), reference.getContactDistanceBin(), computeAverage(values, ReconstructionStructuralInformation::getBaselineRmsd), computeAverage(values, ReconstructionStructuralInformation::getBaselineTmScore), computeAverage(values, ReconstructionStructuralInformation::getBaselineQ), computeAverage(values, ReconstructionStructuralInformation::getRmsdIncrease), computeAverage(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeAverage(values, ReconstructionStructuralInformation::getqIncrease), computeMaximum(values, ReconstructionStructuralInformation::getRmsdIncrease), computeMaximum(values, ReconstructionStructuralInformation::getTmScoreIncrease), computeMaximum(values, ReconstructionStructuralInformation::getqIncrease), residueIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), contactIsInCollection(earlyFoldingResidues, entry.getKey().getLeft(), entry.getKey().getRight()), averageRmsd, standardDeviationRmsd, averageMaximumRmsd, standardDeviationMaximumRmsd, reconstructionStructuralInformation, topScoringReconstructions, values.stream().map(ReconstructionStructuralInformation::getRmsdIncrease).collect(Collectors.toList()));
}).collect(Collectors.toList());
}
use of org.apache.commons.math3.stat.descriptive.moment.StandardDeviation in project presto by prestodb.
the class TestDoubleStdDevPopAggregation method getExpectedValue.
@Override
public Number getExpectedValue(int start, int length) {
if (length == 0) {
return null;
}
double[] values = new double[length];
for (int i = 0; i < length; i++) {
values[i] = start + i;
}
StandardDeviation stdDev = new StandardDeviation(false);
return stdDev.evaluate(values);
}
Aggregations