Search in sources :

Example 96 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtilsUnitTest method testSubtractMedianOfMedians.

@Test(dataProvider = "readCountOnlyData")
public void testSubtractMedianOfMedians(final ReadCountCollection readCounts) {
    final RealMatrix counts = readCounts.counts();
    final Median median = new Median();
    final double[] columnMedians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(i -> median.evaluate(counts.getColumn(i))).toArray();
    final double center = median.evaluate(columnMedians);
    final double[][] expected = new double[counts.getRowDimension()][];
    for (int i = 0; i < expected.length; i++) {
        expected[i] = counts.getRow(i).clone();
        for (int j = 0; j < expected[i].length; j++) {
            expected[i][j] -= center;
        }
    }
    HDF5PCACoveragePoNCreationUtils.subtractMedianOfMedians(readCounts, NULL_LOGGER);
    final RealMatrix newCounts = readCounts.counts();
    Assert.assertEquals(newCounts.getColumnDimension(), expected[0].length);
    Assert.assertEquals(newCounts.getRowDimension(), expected.length);
    for (int i = 0; i < expected.length; i++) {
        for (int j = 0; j < expected[i].length; j++) {
            Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j], 0.000001);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) SVD(org.broadinstitute.hellbender.utils.svd.SVD) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Level(org.apache.logging.log4j.Level) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) Test(org.testng.annotations.Test) Random(java.util.Random) OptionalInt(java.util.OptionalInt) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) ArrayList(java.util.ArrayList) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Pair(org.apache.commons.lang3.tuple.Pair) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) HDF5File(org.broadinstitute.hdf5.HDF5File) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Stream(java.util.stream.Stream) Target(org.broadinstitute.hellbender.tools.exome.Target) SVDFactory(org.broadinstitute.hellbender.utils.svd.SVDFactory) RealMatrix(org.apache.commons.math3.linear.RealMatrix) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) PoNTestUtils(org.broadinstitute.hellbender.tools.pon.PoNTestUtils) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 97 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class HDF5PCACoveragePoNUnitTest method testReducedPoNMatrixReading.

@Test(dependsOnMethods = { "testTargetNameReading", "testLogNormalizedSampleNameReading" })
public void testReducedPoNMatrixReading() throws IOException {
    final HDF5File reader = new HDF5File(TEST_PON);
    final PCACoveragePoN pon = new HDF5PCACoveragePoN(reader);
    final List<String> targets = pon.getTargetNames();
    final List<String> samples = pon.getPanelSampleNames();
    final RealMatrix actual = pon.getReducedPanelCounts();
    Assert.assertNotNull(actual);
    Assert.assertEquals(actual.getRowDimension(), targets.size());
    Assert.assertTrue(actual.getColumnDimension() <= samples.size());
    final RealMatrix expected = readDoubleMatrix(TEST_PON_REDUCED_PON);
    MathObjectAsserts.assertRealMatrixEquals(actual, expected);
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) HDF5File(org.broadinstitute.hdf5.HDF5File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 98 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class PoNTestUtils method readTsvIntoMatrix.

/**
     * Reads a very basic tsv (numbers separated by tabs) into a RealMatrix.
     * <p>Very little error checking happens in this method</p>
     *
     * @param inputFile readable file.  Not {@code null}
     * @return never {@code null}
     */
public static RealMatrix readTsvIntoMatrix(final File inputFile) {
    IOUtils.canReadFile(inputFile);
    final List<double[]> allData = new ArrayList<>();
    int ctr = 0;
    try {
        final CSVReader reader = new CSVReader(new FileReader(inputFile), '\t', CSVWriter.NO_QUOTE_CHARACTER);
        String[] nextLine;
        while ((nextLine = reader.readNext()) != null) {
            ctr++;
            allData.add(Arrays.stream(nextLine).filter(s -> StringUtils.trim(s).length() > 0).map(s -> Double.parseDouble(StringUtils.trim(s))).mapToDouble(d -> d).toArray());
        }
    } catch (final IOException ioe) {
        Assert.fail("Could not open test file: " + inputFile, ioe);
    }
    final RealMatrix result = new Array2DRowRealMatrix(allData.size(), allData.get(0).length);
    for (int i = 0; i < result.getRowDimension(); i++) {
        result.setRow(i, allData.get(i));
    }
    return result;
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Arrays(java.util.Arrays) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) CSVWriter(au.com.bytecode.opencsv.CSVWriter) CSVReader(com.opencsv.CSVReader) HDF5PCACoveragePoNCreationUtils(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoNCreationUtils) IOException(java.io.IOException) TargetArgumentCollection(org.broadinstitute.hellbender.tools.exome.TargetArgumentCollection) StringUtils(org.apache.commons.lang3.StringUtils) CreatePanelOfNormals(org.broadinstitute.hellbender.tools.exome.CreatePanelOfNormals) OptionalInt(java.util.OptionalInt) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) ArrayList(java.util.ArrayList) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) List(java.util.List) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) Assert(org.testng.Assert) Target(org.broadinstitute.hellbender.tools.exome.Target) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) HDF5File(org.broadinstitute.hdf5.HDF5File) FileReader(java.io.FileReader) TargetCollection(org.broadinstitute.hellbender.tools.exome.TargetCollection) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) CSVReader(com.opencsv.CSVReader) ArrayList(java.util.ArrayList) FileReader(java.io.FileReader) IOException(java.io.IOException)

Example 99 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtilsUnitTest method testCalculateReducedPanelAndPInversesUsingJollifesRule.

@Test(dataProvider = "readCountOnlyWithDiverseShapeData")
public void testCalculateReducedPanelAndPInversesUsingJollifesRule(final ReadCountCollection readCounts) {
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final ReductionResult result = HDF5PCACoveragePoNCreationUtils.calculateReducedPanelAndPInverses(readCounts, OptionalInt.empty(), NULL_LOGGER, ctx);
    final RealMatrix counts = readCounts.counts();
    Assert.assertNotNull(result);
    Assert.assertNotNull(result.getPseudoInverse());
    Assert.assertNotNull(result.getReducedCounts());
    Assert.assertNotNull(result.getReducedPseudoInverse());
    Assert.assertNotNull(result.getAllSingularValues());
    Assert.assertEquals(counts.getColumnDimension(), result.getAllSingularValues().length);
    Assert.assertEquals(result.getReducedCounts().getRowDimension(), counts.getRowDimension());
    final int eigensamples = result.getReducedCounts().getColumnDimension();
    final Mean mean = new Mean();
    final double meanSingularValue = mean.evaluate(result.getAllSingularValues());
    final double threshold = HDF5PCACoveragePoNCreationUtils.JOLLIFES_RULE_MEAN_FACTOR * meanSingularValue;
    final int expectedEigensamples = (int) DoubleStream.of(result.getAllSingularValues()).filter(d -> d >= threshold).count();
    Assert.assertTrue(eigensamples <= counts.getColumnDimension());
    Assert.assertEquals(eigensamples, expectedEigensamples);
    assertPseudoInverse(counts, result.getPseudoInverse());
    assertPseudoInverse(result.getReducedCounts(), result.getReducedPseudoInverse());
}
Also used : Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 100 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtilsUnitTest method testCalculateReducedPanelAndPInversesKeepingHalfOfAllColumns.

@Test(dataProvider = "readCountOnlyWithDiverseShapeData")
public void testCalculateReducedPanelAndPInversesKeepingHalfOfAllColumns(final ReadCountCollection readCounts) {
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final ReductionResult result = HDF5PCACoveragePoNCreationUtils.calculateReducedPanelAndPInverses(readCounts, OptionalInt.of(readCounts.columnNames().size() / 2), NULL_LOGGER, ctx);
    final RealMatrix counts = readCounts.counts();
    Assert.assertNotNull(result);
    Assert.assertNotNull(result.getPseudoInverse());
    Assert.assertNotNull(result.getReducedCounts());
    Assert.assertNotNull(result.getReducedPseudoInverse());
    Assert.assertNotNull(result.getAllSingularValues());
    Assert.assertEquals(counts.getColumnDimension(), result.getAllSingularValues().length);
    Assert.assertEquals(result.getReducedCounts().getRowDimension(), counts.getRowDimension());
    Assert.assertEquals(result.getReducedCounts().getColumnDimension(), readCounts.columnNames().size() / 2);
    final int eigensamples = result.getReducedCounts().getColumnDimension();
    Assert.assertEquals(eigensamples, readCounts.columnNames().size() / 2);
    assertPseudoInverse(counts, result.getPseudoInverse());
    assertPseudoInverse(result.getReducedCounts(), result.getReducedPseudoInverse());
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)218 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)148 Test (org.testng.annotations.Test)86 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)60 IntStream (java.util.stream.IntStream)50 Collectors (java.util.stream.Collectors)48 Median (org.apache.commons.math3.stat.descriptive.rank.Median)42 HDF5File (org.broadinstitute.hdf5.HDF5File)42 File (java.io.File)40 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)36 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)36 List (java.util.List)34 Assert (org.testng.Assert)32 IOException (java.io.IOException)30 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)30 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)30 DoubleStream (java.util.stream.DoubleStream)28 Logger (org.apache.logging.log4j.Logger)27 Utils (org.broadinstitute.hellbender.utils.Utils)27 ArrayList (java.util.ArrayList)26