use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk-protected by broadinstitute.
the class PCATangentNormalizationUtils method calculateBetaHats.
/**
* Calculate the beta-hats that best fit case read counts given the panel of normals.
*
* @param normalsPseudoinverse the log-normalized or reduced-panel pseudoinverse from a panel of normals
* @param input a {@code TxS} matrix where {@code T} is the number of targets and {@code S} the number of count groups (e.g. case samples).
* @return never {@code null} an {@code NxS} matrix, where N is the number of samples in
* the panel and S the original name of count groups.
*/
@VisibleForTesting
public static RealMatrix calculateBetaHats(final RealMatrix normalsPseudoinverse, final RealMatrix input, final double epsilon) {
Utils.nonNull(normalsPseudoinverse, "Normals inverse matrix cannot be null.");
Utils.nonNull(input, "Input counts cannot be null.");
Utils.validateArg(epsilon > 0, String.format("Invalid epsilon value, must be > 0: %f", epsilon));
final double targetThreshold = (Math.log(epsilon) / Math.log(2)) + 1;
// copy case samples in order to mask targets in-place and mask (set to zero) targets with coverage below threshold
final RealMatrix maskedInput = input.copy();
maskedInput.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return value > targetThreshold ? value : 0;
}
});
return normalsPseudoinverse.multiply(maskedInput);
}
use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method normalizeAndLogReadCounts.
/**
* Final pre-panel normalization that consists of dividing all counts by the median of
* its column and log it with base 2.
* <p>
* The normalization occurs in-place.
* </p>
*
* @param readCounts the input counts to normalize.
*/
@VisibleForTesting
static void normalizeAndLogReadCounts(final ReadCountCollection readCounts, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Median medianCalculator = new Median();
final double[] medians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(col -> medianCalculator.evaluate(counts.getColumn(col))).toArray();
counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return Math.log(Math.max(EPSILON, value / medians[column])) * INV_LN_2;
}
});
logger.info("Counts normalized by the column median and log2'd.");
}
use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method subtractMedianOfMedians.
/**
* Calculates the median of column medians and subtract it from all counts.
* @param readCounts the input counts to center.
* @return the median of medians that has been subtracted from all counts.
*/
@VisibleForTesting
static double subtractMedianOfMedians(final ReadCountCollection readCounts, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Median medianCalculator = new Median();
final double[] columnMedians = MatrixSummaryUtils.getColumnMedians(counts);
final double medianOfMedians = medianCalculator.evaluate(columnMedians);
counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return value - medianOfMedians;
}
});
logger.info(String.format("Counts centered around the median of medians %.2f", medianOfMedians));
return medianOfMedians;
}
use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.
the class PCATangentNormalizationUtils method composeTangentNormalizationInputMatrix.
/**
* Prepares the data to perform tangent normalization.
* <p>
* This is done by count group or column:
* <ol>
* </li>we divide counts by the column mean,</li>
* </li>then we transform value to their log_2,</li>
* </li>and finally we center them around the median.</li>
* </ol>
* </p>
*
* @param matrix input matrix.
* @return never {@code null}.
*/
private static RealMatrix composeTangentNormalizationInputMatrix(final RealMatrix matrix) {
final RealMatrix result = matrix.copy();
// step 1: divide by column means and log_2 transform
final double[] columnMeans = GATKProtectedMathUtils.columnMeans(matrix);
result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return truncatedLog2(value / columnMeans[column]);
}
});
// step 2: subtract column medians
final double[] columnMedians = IntStream.range(0, matrix.getColumnDimension()).mapToDouble(c -> new Median().evaluate(result.getColumn(c))).toArray();
result.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return value - columnMedians[column];
}
});
return result;
}
use of org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor in project gatk by broadinstitute.
the class PCATangentNormalizationUtils method calculateBetaHats.
/**
* Calculate the beta-hats that best fit case read counts given the panel of normals.
*
* @param normalsPseudoinverse the log-normalized or reduced-panel pseudoinverse from a panel of normals
* @param input a {@code TxS} matrix where {@code T} is the number of targets and {@code S} the number of count groups (e.g. case samples).
* @return never {@code null} an {@code NxS} matrix, where N is the number of samples in
* the panel and S the original name of count groups.
*/
@VisibleForTesting
public static RealMatrix calculateBetaHats(final RealMatrix normalsPseudoinverse, final RealMatrix input, final double epsilon) {
Utils.nonNull(normalsPseudoinverse, "Normals inverse matrix cannot be null.");
Utils.nonNull(input, "Input counts cannot be null.");
Utils.validateArg(epsilon > 0, String.format("Invalid epsilon value, must be > 0: %f", epsilon));
final double targetThreshold = (Math.log(epsilon) / Math.log(2)) + 1;
// copy case samples in order to mask targets in-place and mask (set to zero) targets with coverage below threshold
final RealMatrix maskedInput = input.copy();
maskedInput.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return value > targetThreshold ? value : 0;
}
});
return normalsPseudoinverse.multiply(maskedInput);
}
Aggregations