use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.
the class TargetCoverageSexGenotypeCalculator method getSampleReadDepthFromAutosomalTargets.
/**
* Estimates read depth per target per homolog for a given sample index in the collection.
*
* @param sampleIndex integer index of the sample in the read count collection
* @return read depth per target per homolog
*/
private double getSampleReadDepthFromAutosomalTargets(final int sampleIndex) {
final double[] readCounts = processedReadCounts.getColumnOnSpecifiedTargets(sampleIndex, autosomalTargetList, false);
final double[] readCountsNormalizedByPloidy = IntStream.range(0, readCounts.length).mapToDouble(i -> readCounts[i] / (double) autosomalTargetPloidies[i]).toArray();
return new Median().evaluate(readCountsNormalizedByPloidy);
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtils method hasSuspiciousContigs.
/**
* Given a single sample tangent normalization (or other coverage profile), determine whether any contig looks like
* it has an arm level event (defined as 25% (or more) of the contig amplified/deleted)
*
* @param singleSampleTangentNormalized Tangent normalized data for a single sample.
* @return never {@code null}
*/
private static Boolean hasSuspiciousContigs(final ReadCountCollection singleSampleTangentNormalized, final Map<String, Double> contigToMedian) {
final List<String> allContigsPresent = retrieveAllContigsPresent(singleSampleTangentNormalized);
for (String contig : allContigsPresent) {
final ReadCountCollection oneContigReadCountCollection = singleSampleTangentNormalized.subsetTargets(singleSampleTangentNormalized.targets().stream().filter(t -> t.getContig().equals(contig)).collect(Collectors.toSet()));
final RealVector counts = oneContigReadCountCollection.counts().getColumnVector(0);
for (int i = 0; i < 4; i++) {
final RealVector partitionCounts = counts.getSubVector(i * counts.getDimension() / 4, counts.getDimension() / 4);
final double[] partitionArray = DoubleStream.of(partitionCounts.toArray()).map(d -> Math.pow(2, d)).sorted().toArray();
double median = new Median().evaluate(partitionArray);
final double medianShiftInCRSpace = contigToMedian.getOrDefault(contig, 1.0) - 1.0;
median -= medianShiftInCRSpace;
if ((median > AMP_THRESHOLD) || (median < DEL_THRESHOLD)) {
logger.info("Suspicious contig: " + singleSampleTangentNormalized.columnNames().get(0) + " " + contig + " (" + median + " -- " + i + ")");
return true;
}
}
}
return false;
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project knime-core by knime.
the class MedianAbsoluteDeviationOperator method getResultInternal.
/**
* {@inheritDoc}
*/
@Override
protected DataCell getResultInternal() {
final double[] cells = super.getCells().getElements();
if (cells.length == 0) {
return DataType.getMissingCell();
}
final Median median = new Median();
double medianValue = median.evaluate(cells);
for (int i = 0; i < cells.length; i++) {
cells[i] = Math.abs(medianValue - cells[i]);
}
medianValue = median.evaluate(cells);
return new DoubleCell(medianValue);
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project ASCIIGenome by dariober.
the class Utils method winsor2.
/**
*Winsorise vector x. Adapted from https://www.r-bloggers.com/winsorization/.
*/
public static List<Float> winsor2(List<Float> x, double multiple) {
/*
winsor2<- function (x, multiple=3)
{
med <- median(x)
y <- x - med
sc <- mad(y, center=0) * multiple
y[ y > sc ] <- sc
y[ y < -sc ] <- -sc
y + med
}
*/
if (multiple <= 0) {
throw new ArithmeticException();
}
DescriptiveStatistics stats = new DescriptiveStatistics();
for (float z : x) {
stats.addValue(z);
}
float median = (float) stats.getPercentile(50);
List<Float> y = new ArrayList<Float>(x);
for (int i = 0; i < x.size(); i++) {
y.set(i, x.get(i) - median);
}
float sc = (float) (Utils.medianAbsoluteDeviation(y, 0) * multiple);
for (int i = 0; i < y.size(); i++) {
if (y.get(i) > sc) {
y.set(i, sc);
} else if (y.get(i) < -sc) {
y.set(i, -sc);
}
y.set(i, y.get(i) + median);
}
return y;
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project ASCIIGenome by dariober.
the class Utils method medianAbsoluteDeviation.
/**
* Translated from R function mad(x, center, constant= 1.4826, na.rm= FALSE, low= FALSE, high= FALSE).
*/
private static float medianAbsoluteDeviation(List<Float> x, float center) {
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int i = 0; i < x.size(); i++) {
stats.addValue(Math.abs(x.get(i) - center));
}
float median = (float) stats.getPercentile(50);
return (float) 1.4826 * median;
}
Aggregations