use of org.apache.commons.math3.linear.RealMatrix in project incubator-systemml by apache.
the class LibCommonsMath method computeQR.
/**
* Function to perform QR decomposition on a given matrix.
*
* @param in matrix object
* @return array of matrix blocks
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
private static MatrixBlock[] computeQR(MatrixObject in) throws DMLRuntimeException {
Array2DRowRealMatrix matrixInput = DataConverter.convertToArray2DRowRealMatrix(in);
// Perform QR decomposition
QRDecomposition qrdecompose = new QRDecomposition(matrixInput);
RealMatrix H = qrdecompose.getH();
RealMatrix R = qrdecompose.getR();
// Read the results into native format
MatrixBlock mbH = DataConverter.convertToMatrixBlock(H.getData());
MatrixBlock mbR = DataConverter.convertToMatrixBlock(R.getData());
return new MatrixBlock[] { mbH, mbR };
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class ReadCountCollection method subsetColumns.
/**
* Subsets the count columns in the read-count collection.
*
* <p>
* Creates a brand-new read-count collection. Changes in the new instance won't affect this one and vice-versa.
* </p>
*
* @param columnsToKeep column names to keep in the result read-count collection.
* @return never {@code null}.
*/
public ReadCountCollection subsetColumns(final Set<String> columnsToKeep) {
Utils.nonNull(columnsToKeep, "the set of input columns to keep cannot be null.");
Utils.nonEmpty(columnsToKeep, "the number of columns to keep must be greater than 0");
if (!new HashSet<>(columnNames).containsAll(columnsToKeep)) {
throw unknownColumnToKeepNames(columnsToKeep);
}
if (columnsToKeep.size() == columnNames.size()) {
return new ReadCountCollection(targets, columnNames, counts.copy(), false);
}
final int[] columnsToKeepIndices = IntStream.range(0, columnNames.size()).filter(i -> columnsToKeep.contains(columnNames.get(i))).toArray();
final List<String> resultColumnNames = Arrays.stream(columnsToKeepIndices).mapToObj(columnNames::get).collect(Collectors.toList());
final RealMatrix resultCountsM = new Array2DRowRealMatrix(counts.getRowDimension(), columnsToKeepIndices.length);
for (int i = 0; i < columnsToKeepIndices.length; i++) {
resultCountsM.setColumn(i, counts.getColumn(columnsToKeepIndices[i]));
}
return new ReadCountCollection(targets, Collections.unmodifiableList(resultColumnNames), resultCountsM, false);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class ReadCountCollection method arrangeTargets.
/**
* Rearrange the targets so that they are in a particular order.
* @return a new collection.
* @throws IllegalArgumentException if any of the following is true:
* <ul>
* <li>{@code targetsInOrder} is {@code null},</li>
* <li>is empty,</li>
* <li>it contains {@code null},</li>
* <li>contains any target not present in this collection.</li>
* </ul>
*/
public ReadCountCollection arrangeTargets(final List<Target> targetsInOrder) {
Utils.nonNull(targetsInOrder);
Utils.nonEmpty(targetsInOrder, "the input targets list cannot be empty");
final RealMatrix counts = new Array2DRowRealMatrix(targetsInOrder.size(), columnNames.size());
final Object2IntMap<Target> targetToIndex = new Object2IntOpenHashMap<>(targets.size());
for (int i = 0; i < targets.size(); i++) {
targetToIndex.put(targets.get(i), i);
}
for (int i = 0; i < targetsInOrder.size(); i++) {
final Target target = targetsInOrder.get(i);
Utils.validateArg(targetToIndex.containsKey(target), () -> String.format("target '%s' is not present in the collection", target.getName()));
counts.setRow(i, this.counts.getRow(targetToIndex.getInt(target)));
}
return new ReadCountCollection(new ArrayList<>(targetsInOrder), columnNames, counts, false);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class ReadCountCollectionUtils method removeTargetsWithTooManyZeros.
/**
* Remove targets that have too many counts equal to 0.
* <p>
* It will return a copy of the input read-count collection with such targets dropped.
* </p>
*
* @param readCounts the input read counts.
* @param maximumTargetZeros maximum number of counts equal to 0 per target tolerated.
* @return never {@code null}. It might be a reference to the input read-counts if there is
* is no target to be dropped.
*/
public static ReadCountCollection removeTargetsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumTargetZeros, final boolean roundToInteger, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Set<Target> targetsToKeep = IntStream.range(0, counts.getRowDimension()).boxed().filter(i -> countZeroes(counts.getRow(i), roundToInteger) <= maximumTargetZeros).map(i -> readCounts.targets().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
final int targetsToDropCount = readCounts.targets().size() - targetsToKeep.size();
if (targetsToDropCount == 0) {
logger.info(String.format("There are no targets with large number of columns with zero counts (<= %d of %d) to drop", maximumTargetZeros, readCounts.columnNames().size()));
return readCounts;
} else if (targetsToDropCount == readCounts.targets().size()) {
throw new UserException.BadInput("the number of zeros per target in the input is too large resulting " + "in all targets being dropped");
} else {
final double droppedPercentage = ((double) (targetsToDropCount) / readCounts.targets().size()) * 100;
logger.info(String.format("Some targets dropped (%d out of %d, %.2f%%) as they had too many zeros (> %d of %d).", targetsToDropCount, readCounts.targets().size(), droppedPercentage, maximumTargetZeros, readCounts.columnNames().size()));
return readCounts.subsetTargets(targetsToKeep);
}
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class ReadCountCollectionUtils method truncateExtremeCounts.
/**
* Truncates the extreme count values in the input read-count collection.
* Values are forced to be bound by the percentile indicated with the input {@code percentile} which must be
* in the range [0 .. 50.0]. Values under that percentile and the complementary (1 - percentile) are set to the
* corresponding threshold value.
*
* <p>The imputation is done in-place, thus the input matrix is modified as a result of this call.</p>
*
* @param readCounts the input and output read-count matrix.
*/
public static void truncateExtremeCounts(final ReadCountCollection readCounts, final double percentile, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final int targetCount = counts.getRowDimension();
final int columnCount = counts.getColumnDimension();
// Create a row major array of the counts.
final double[] values = Doubles.concat(counts.getData());
final Percentile bottomPercentileEvaluator = new Percentile(percentile);
final Percentile topPercentileEvaluator = new Percentile(100.0 - percentile);
final double bottomPercentileThreshold = bottomPercentileEvaluator.evaluate(values);
final double topPercentileThreshold = topPercentileEvaluator.evaluate(values);
long totalCounts = 0;
long bottomTruncatedCounts = 0;
long topTruncatedCounts = 0;
for (int i = 0; i < targetCount; i++) {
final double[] rowCounts = counts.getRow(i);
for (int j = 0; j < columnCount; j++) {
final double count = rowCounts[j];
totalCounts++;
if (count < bottomPercentileThreshold) {
counts.setEntry(i, j, bottomPercentileThreshold);
bottomTruncatedCounts++;
} else if (count > topPercentileThreshold) {
counts.setEntry(i, j, topPercentileThreshold);
topTruncatedCounts++;
}
}
}
if (topTruncatedCounts == 0 && bottomTruncatedCounts == 0) {
logger.info(String.format("None of the %d counts were truncated as they all fall in the non-extreme range " + "[%.2f, %.2f]", totalCounts, bottomPercentileThreshold, topPercentileThreshold));
} else {
final double truncatedPercentage = ((double) (topTruncatedCounts + bottomTruncatedCounts) / totalCounts) * 100;
logger.info(String.format("Some counts (%d out of %d, %.2f%%) were truncated as they fall out of the " + "non-extreme range [%.2f, %.2f]", topTruncatedCounts + bottomTruncatedCounts, totalCounts, truncatedPercentage, bottomPercentileThreshold, topPercentileThreshold));
}
}
Aggregations