use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method testSubtractMedianOfMedians.
@Test(dataProvider = "readCountOnlyData")
public void testSubtractMedianOfMedians(final ReadCountCollection readCounts) {
final RealMatrix counts = readCounts.counts();
final Median median = new Median();
final double[] columnMedians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(i -> median.evaluate(counts.getColumn(i))).toArray();
final double center = median.evaluate(columnMedians);
final double[][] expected = new double[counts.getRowDimension()][];
for (int i = 0; i < expected.length; i++) {
expected[i] = counts.getRow(i).clone();
for (int j = 0; j < expected[i].length; j++) {
expected[i][j] -= center;
}
}
HDF5PCACoveragePoNCreationUtils.subtractMedianOfMedians(readCounts, NULL_LOGGER);
final RealMatrix newCounts = readCounts.counts();
Assert.assertEquals(newCounts.getColumnDimension(), expected[0].length);
Assert.assertEquals(newCounts.getRowDimension(), expected.length);
for (int i = 0; i < expected.length; i++) {
for (int j = 0; j < expected[i].length; j++) {
Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j], 0.000001);
}
}
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNUnitTest method testReducedPoNMatrixReading.
@Test(dependsOnMethods = { "testTargetNameReading", "testLogNormalizedSampleNameReading" })
public void testReducedPoNMatrixReading() throws IOException {
final HDF5File reader = new HDF5File(TEST_PON);
final PCACoveragePoN pon = new HDF5PCACoveragePoN(reader);
final List<String> targets = pon.getTargetNames();
final List<String> samples = pon.getPanelSampleNames();
final RealMatrix actual = pon.getReducedPanelCounts();
Assert.assertNotNull(actual);
Assert.assertEquals(actual.getRowDimension(), targets.size());
Assert.assertTrue(actual.getColumnDimension() <= samples.size());
final RealMatrix expected = readDoubleMatrix(TEST_PON_REDUCED_PON);
MathObjectAsserts.assertRealMatrixEquals(actual, expected);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class PoNTestUtils method readTsvIntoMatrix.
/**
* Reads a very basic tsv (numbers separated by tabs) into a RealMatrix.
* <p>Very little error checking happens in this method</p>
*
* @param inputFile readable file. Not {@code null}
* @return never {@code null}
*/
public static RealMatrix readTsvIntoMatrix(final File inputFile) {
IOUtils.canReadFile(inputFile);
final List<double[]> allData = new ArrayList<>();
int ctr = 0;
try {
final CSVReader reader = new CSVReader(new FileReader(inputFile), '\t', CSVWriter.NO_QUOTE_CHARACTER);
String[] nextLine;
while ((nextLine = reader.readNext()) != null) {
ctr++;
allData.add(Arrays.stream(nextLine).filter(s -> StringUtils.trim(s).length() > 0).map(s -> Double.parseDouble(StringUtils.trim(s))).mapToDouble(d -> d).toArray());
}
} catch (final IOException ioe) {
Assert.fail("Could not open test file: " + inputFile, ioe);
}
final RealMatrix result = new Array2DRowRealMatrix(allData.size(), allData.get(0).length);
for (int i = 0; i < result.getRowDimension(); i++) {
result.setRow(i, allData.get(i));
}
return result;
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method testCalculateReducedPanelAndPInversesUsingJollifesRule.
@Test(dataProvider = "readCountOnlyWithDiverseShapeData")
public void testCalculateReducedPanelAndPInversesUsingJollifesRule(final ReadCountCollection readCounts) {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final ReductionResult result = HDF5PCACoveragePoNCreationUtils.calculateReducedPanelAndPInverses(readCounts, OptionalInt.empty(), NULL_LOGGER, ctx);
final RealMatrix counts = readCounts.counts();
Assert.assertNotNull(result);
Assert.assertNotNull(result.getPseudoInverse());
Assert.assertNotNull(result.getReducedCounts());
Assert.assertNotNull(result.getReducedPseudoInverse());
Assert.assertNotNull(result.getAllSingularValues());
Assert.assertEquals(counts.getColumnDimension(), result.getAllSingularValues().length);
Assert.assertEquals(result.getReducedCounts().getRowDimension(), counts.getRowDimension());
final int eigensamples = result.getReducedCounts().getColumnDimension();
final Mean mean = new Mean();
final double meanSingularValue = mean.evaluate(result.getAllSingularValues());
final double threshold = HDF5PCACoveragePoNCreationUtils.JOLLIFES_RULE_MEAN_FACTOR * meanSingularValue;
final int expectedEigensamples = (int) DoubleStream.of(result.getAllSingularValues()).filter(d -> d >= threshold).count();
Assert.assertTrue(eigensamples <= counts.getColumnDimension());
Assert.assertEquals(eigensamples, expectedEigensamples);
assertPseudoInverse(counts, result.getPseudoInverse());
assertPseudoInverse(result.getReducedCounts(), result.getReducedPseudoInverse());
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method testCalculateReducedPanelAndPInversesKeepingHalfOfAllColumns.
@Test(dataProvider = "readCountOnlyWithDiverseShapeData")
public void testCalculateReducedPanelAndPInversesKeepingHalfOfAllColumns(final ReadCountCollection readCounts) {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final ReductionResult result = HDF5PCACoveragePoNCreationUtils.calculateReducedPanelAndPInverses(readCounts, OptionalInt.of(readCounts.columnNames().size() / 2), NULL_LOGGER, ctx);
final RealMatrix counts = readCounts.counts();
Assert.assertNotNull(result);
Assert.assertNotNull(result.getPseudoInverse());
Assert.assertNotNull(result.getReducedCounts());
Assert.assertNotNull(result.getReducedPseudoInverse());
Assert.assertNotNull(result.getAllSingularValues());
Assert.assertEquals(counts.getColumnDimension(), result.getAllSingularValues().length);
Assert.assertEquals(result.getReducedCounts().getRowDimension(), counts.getRowDimension());
Assert.assertEquals(result.getReducedCounts().getColumnDimension(), readCounts.columnNames().size() / 2);
final int eigensamples = result.getReducedCounts().getColumnDimension();
Assert.assertEquals(eigensamples, readCounts.columnNames().size() / 2);
assertPseudoInverse(counts, result.getPseudoInverse());
assertPseudoInverse(result.getReducedCounts(), result.getReducedPseudoInverse());
}
Aggregations