use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class PoNTestUtils method readTsvIntoMatrix.
/**
* Reads a very basic tsv (numbers separated by tabs) into a RealMatrix.
* <p>Very little error checking happens in this method</p>
*
* @param inputFile readable file. Not {@code null}
* @return never {@code null}
*/
public static RealMatrix readTsvIntoMatrix(final File inputFile) {
IOUtils.canReadFile(inputFile);
final List<double[]> allData = new ArrayList<>();
int ctr = 0;
try {
final CSVReader reader = new CSVReader(new FileReader(inputFile), '\t', CSVWriter.NO_QUOTE_CHARACTER);
String[] nextLine;
while ((nextLine = reader.readNext()) != null) {
ctr++;
allData.add(Arrays.stream(nextLine).filter(s -> StringUtils.trim(s).length() > 0).map(s -> Double.parseDouble(StringUtils.trim(s))).mapToDouble(d -> d).toArray());
}
} catch (final IOException ioe) {
Assert.fail("Could not open test file: " + inputFile, ioe);
}
final RealMatrix result = new Array2DRowRealMatrix(allData.size(), allData.get(0).length);
for (int i = 0; i < result.getRowDimension(); i++) {
result.setRow(i, allData.get(i));
}
return result;
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method testCalculateReducedPanelAndPInversesUsingJollifesRule.
@Test(dataProvider = "readCountOnlyWithDiverseShapeData")
public void testCalculateReducedPanelAndPInversesUsingJollifesRule(final ReadCountCollection readCounts) {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final ReductionResult result = HDF5PCACoveragePoNCreationUtils.calculateReducedPanelAndPInverses(readCounts, OptionalInt.empty(), NULL_LOGGER, ctx);
final RealMatrix counts = readCounts.counts();
Assert.assertNotNull(result);
Assert.assertNotNull(result.getPseudoInverse());
Assert.assertNotNull(result.getReducedCounts());
Assert.assertNotNull(result.getReducedPseudoInverse());
Assert.assertNotNull(result.getAllSingularValues());
Assert.assertEquals(counts.getColumnDimension(), result.getAllSingularValues().length);
Assert.assertEquals(result.getReducedCounts().getRowDimension(), counts.getRowDimension());
final int eigensamples = result.getReducedCounts().getColumnDimension();
final Mean mean = new Mean();
final double meanSingularValue = mean.evaluate(result.getAllSingularValues());
final double threshold = HDF5PCACoveragePoNCreationUtils.JOLLIFES_RULE_MEAN_FACTOR * meanSingularValue;
final int expectedEigensamples = (int) DoubleStream.of(result.getAllSingularValues()).filter(d -> d >= threshold).count();
Assert.assertTrue(eigensamples <= counts.getColumnDimension());
Assert.assertEquals(eigensamples, expectedEigensamples);
assertPseudoInverse(counts, result.getPseudoInverse());
assertPseudoInverse(result.getReducedCounts(), result.getReducedPseudoInverse());
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method testCalculateReducedPanelAndPInversesKeepingHalfOfAllColumns.
@Test(dataProvider = "readCountOnlyWithDiverseShapeData")
public void testCalculateReducedPanelAndPInversesKeepingHalfOfAllColumns(final ReadCountCollection readCounts) {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final ReductionResult result = HDF5PCACoveragePoNCreationUtils.calculateReducedPanelAndPInverses(readCounts, OptionalInt.of(readCounts.columnNames().size() / 2), NULL_LOGGER, ctx);
final RealMatrix counts = readCounts.counts();
Assert.assertNotNull(result);
Assert.assertNotNull(result.getPseudoInverse());
Assert.assertNotNull(result.getReducedCounts());
Assert.assertNotNull(result.getReducedPseudoInverse());
Assert.assertNotNull(result.getAllSingularValues());
Assert.assertEquals(counts.getColumnDimension(), result.getAllSingularValues().length);
Assert.assertEquals(result.getReducedCounts().getRowDimension(), counts.getRowDimension());
Assert.assertEquals(result.getReducedCounts().getColumnDimension(), readCounts.columnNames().size() / 2);
final int eigensamples = result.getReducedCounts().getColumnDimension();
Assert.assertEquals(eigensamples, readCounts.columnNames().size() / 2);
assertPseudoInverse(counts, result.getPseudoInverse());
assertPseudoInverse(result.getReducedCounts(), result.getReducedPseudoInverse());
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNUnitTest method testReducedPoNPInvMatrixReading.
@Test(dependsOnMethods = { "testTargetNameReading", "testLogNormalizedSampleNameReading" })
public void testReducedPoNPInvMatrixReading() throws IOException {
final HDF5File reader = new HDF5File(TEST_PON);
final PCACoveragePoN pon = new HDF5PCACoveragePoN(reader);
final List<String> targets = pon.getTargetNames();
final List<String> samples = pon.getPanelSampleNames();
final RealMatrix actual = pon.getReducedPanelPInverseCounts();
Assert.assertNotNull(actual);
Assert.assertTrue(actual.getRowDimension() <= samples.size());
Assert.assertEquals(actual.getColumnDimension(), targets.size());
final RealMatrix expected = readDoubleMatrix(TEST_PON_REDUCED_PON_PINV);
MathObjectAsserts.assertRealMatrixEquals(actual, expected);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class HDF5PCACoveragePoNUnitTest method testNormalizedPcovReading.
@Test(dependsOnMethods = { "testTargetNameReading", "testSampleNameReading" })
public void testNormalizedPcovReading() throws IOException {
final HDF5File reader = new HDF5File(TEST_PON);
final PCACoveragePoN pon = new HDF5PCACoveragePoN(reader);
final List<String> targets = pon.getTargetNames();
final List<String> samples = pon.getSampleNames();
final RealMatrix actual = pon.getNormalizedCounts();
Assert.assertNotNull(actual);
Assert.assertEquals(actual.getRowDimension(), targets.size());
Assert.assertEquals(actual.getColumnDimension(), samples.size());
final RealMatrix expected = readDoubleMatrix(TEST_PON_NORMALIZED_PCOV);
MathObjectAsserts.assertRealMatrixEquals(actual, expected);
}
Aggregations