use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class CNLOHCaller method sumOverSegments.
private double[] sumOverSegments(final JavaRDD<double[]> eZskBySeg) {
// S x K
final double[][] eZskBySeg2D = eZskBySeg.collect().stream().toArray(double[][]::new);
final RealMatrix eZskBySegMatrix = MatrixUtils.createRealMatrix(eZskBySeg2D);
return GATKProtectedMathUtils.columnSums(eZskBySegMatrix);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class XHMMSegmentCallerBase method standardizeBySample.
/**
* Standardize read counts (per-sample).
* Note: modification is done in-place.
*
* @param counts original read counts
*/
private void standardizeBySample(final RealMatrix counts) {
final double[] columnMeans = GATKProtectedMathUtils.columnMeans(counts);
final double[] columnStdDev = GATKProtectedMathUtils.columnStdDevs(counts);
counts.walkInColumnOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(int row, int column, double value) {
return (value - columnMeans[column]) / columnStdDev[column];
}
});
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method calculateTargetVariances.
/**
* Determine the variance for each target in the PoN (panel targets).
*
* @return array of doubles where each double corresponds to a target in the PoN (panel targets)
*/
private static double[] calculateTargetVariances(final ReadCountCollection normalizedCounts, final List<String> panelTargetNames, final ReductionResult reduction, final JavaSparkContext ctx) {
Utils.nonNull(panelTargetNames);
Utils.nonNull(normalizedCounts);
Utils.nonNull(reduction);
final PCATangentNormalizationResult allNormals = PCATangentNormalizationUtils.tangentNormalizeNormalsInPoN(normalizedCounts, panelTargetNames, reduction.getReducedCounts(), reduction.getReducedPseudoInverse(), ctx);
final RealMatrix allSampleProjectedTargets = allNormals.getTangentNormalized().counts();
return MatrixSummaryUtils.getRowVariances(allSampleProjectedTargets);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method normalizeAndLogReadCounts.
/**
* Final pre-panel normalization that consists of dividing all counts by the median of
* its column and log it with base 2.
* <p>
* The normalization occurs in-place.
* </p>
*
* @param readCounts the input counts to normalize.
*/
@VisibleForTesting
static void normalizeAndLogReadCounts(final ReadCountCollection readCounts, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Median medianCalculator = new Median();
final double[] medians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(col -> medianCalculator.evaluate(counts.getColumn(col))).toArray();
counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return Math.log(Math.max(EPSILON, value / medians[column])) * INV_LN_2;
}
});
logger.info("Counts normalized by the column median and log2'd.");
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method subtractMedianOfMedians.
/**
* Calculates the median of column medians and subtract it from all counts.
* @param readCounts the input counts to center.
* @return the median of medians that has been subtracted from all counts.
*/
@VisibleForTesting
static double subtractMedianOfMedians(final ReadCountCollection readCounts, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Median medianCalculator = new Median();
final double[] columnMedians = MatrixSummaryUtils.getColumnMedians(counts);
final double medianOfMedians = medianCalculator.evaluate(columnMedians);
counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return value - medianOfMedians;
}
});
logger.info(String.format("Counts centered around the median of medians %.2f", medianOfMedians));
return medianOfMedians;
}
Aggregations