Search in sources :

Example 36 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class HDF5PCACoveragePoNUnitTest method testNormalizedPcovReading.

@Test(dependsOnMethods = { "testTargetNameReading", "testSampleNameReading" })
public void testNormalizedPcovReading() throws IOException {
    final HDF5File reader = new HDF5File(TEST_PON);
    final PCACoveragePoN pon = new HDF5PCACoveragePoN(reader);
    final List<String> targets = pon.getTargetNames();
    final List<String> samples = pon.getSampleNames();
    final RealMatrix actual = pon.getNormalizedCounts();
    Assert.assertNotNull(actual);
    Assert.assertEquals(actual.getRowDimension(), targets.size());
    Assert.assertEquals(actual.getColumnDimension(), samples.size());
    final RealMatrix expected = readDoubleMatrix(TEST_PON_NORMALIZED_PCOV);
    MathObjectAsserts.assertRealMatrixEquals(actual, expected);
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) HDF5File(org.broadinstitute.hdf5.HDF5File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 37 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class HDF5PCACoveragePoNUnitTest method testLogNormalizedMatrixReading.

@Test(dependsOnMethods = { "testTargetNameReading", "testLogNormalizedSampleNameReading" })
public void testLogNormalizedMatrixReading() throws IOException {
    final HDF5File reader = new HDF5File(TEST_PON);
    final PCACoveragePoN pon = new HDF5PCACoveragePoN(reader);
    final List<String> targets = pon.getTargetNames();
    final List<String> samples = pon.getPanelSampleNames();
    final RealMatrix actual = pon.getLogNormalizedCounts();
    Assert.assertNotNull(actual);
    Assert.assertEquals(actual.getRowDimension(), targets.size());
    Assert.assertEquals(actual.getColumnDimension(), samples.size());
    final RealMatrix expected = readDoubleMatrix(TEST_PON_LOG_NORMALS);
    MathObjectAsserts.assertRealMatrixEquals(actual, expected);
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) HDF5File(org.broadinstitute.hdf5.HDF5File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 38 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class NormalizeSomaticReadCountsIntegrationTest method assertPreTangentNormalizedValues.

/**
     * This code reconstructs the expected pre-tangent normalization counts given the input,
     * and then compares against the actual pre-tangent output.
     * <p>
     * This method does not use any of the components that does the actual computation
     * in production, so that the calculation is independent.
     * </p>
     * <p>
     * This code also use an alternative way to calculate it to reduce overlap even further.
     * </p>
     * @param preTangentNormalized actual output.
     * @param factorNormalized input.
     */
private void assertPreTangentNormalizedValues(final ReadCountCollection factorNormalized, final ReadCountCollection preTangentNormalized) {
    final double epsilon = PCATangentNormalizationUtils.EPSILON;
    final RealMatrix outCounts = preTangentNormalized.counts();
    final RealMatrix inCounts = factorNormalized.counts();
    final double[] columnMeans = GATKProtectedMathUtils.columnMeans(inCounts);
    Assert.assertTrue(DoubleStream.of(columnMeans).allMatch(d -> d < 0.5));
    final double[][] expected = new double[inCounts.getRowDimension()][inCounts.getColumnDimension()];
    final double[] columnValues = new double[inCounts.getRowDimension()];
    for (int i = 0; i < columnMeans.length; i++) {
        for (int j = 0; j < inCounts.getRowDimension(); j++) {
            final double inValue = inCounts.getEntry(j, i);
            final double lowBoundedInValue = Math.max(epsilon, inValue / columnMeans[i]);
            final double outValue = Math.log(lowBoundedInValue) / Math.log(2);
            expected[j][i] = outValue;
            columnValues[j] = outValue;
        }
        Arrays.sort(columnValues);
        final int midIndex = columnValues.length >> 1;
        final double median = columnValues.length % 2 == 1 ? columnValues[midIndex] : (columnValues[midIndex] + columnValues[1 + midIndex]) * .5;
        for (int j = 0; j < inCounts.getRowDimension(); j++) {
            expected[j][i] -= median;
            Assert.assertEquals(outCounts.getEntry(j, i), expected[j][i], 0.000001, " Row " + j + " col " + i);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) DataProvider(org.testng.annotations.DataProvider) DefaultRealMatrixPreservingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixPreservingVisitor) Test(org.testng.annotations.Test) Function(java.util.function.Function) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) Assert(org.testng.Assert) TableUtils(org.broadinstitute.hellbender.utils.tsv.TableUtils) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) HDF5File(org.broadinstitute.hdf5.HDF5File) ExomeStandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.ExomeStandardArgumentDefinitions) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) PCATangentNormalizationUtils(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCATangentNormalizationUtils) IOException(java.io.IOException) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) PCATangentNormalizationResult(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCATangentNormalizationResult) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Example 39 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class NormalizeSomaticReadCountsIntegrationTest method testNameOnlyCountsInputRunOneSample.

@Test
public void testNameOnlyCountsInputRunOneSample() throws IOException {
    final File factorNormalizedOutput = createTempFile("test", ".txt");
    final File tangentNormalizationOutput = createTempFile("tangent-", ".txt");
    final File betaHatsOutput = createTempFile("tangent-", ".bhats");
    final File preTangentNormalizationOutput = createTempFile("pre-tn-", ".txt");
    final String[] arguments = { "-" + NormalizeSomaticReadCounts.READ_COUNTS_FILE_SHORT_NAME, TARGET_NAME_ONLY_READ_COUNTS_INPUT_ONE_SAMPLE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.PON_FILE_SHORT_NAME, TEST_PON.getAbsolutePath(), "-" + NormalizeSomaticReadCounts.FACTOR_NORMALIZED_COUNTS_SHORT_NAME, factorNormalizedOutput.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TANGENT_NORMALIZED_COUNTS_FILE_SHORT_NAME, tangentNormalizationOutput.getAbsolutePath(), "-" + NormalizeSomaticReadCounts.TANGENT_BETA_HATS_SHORT_NAME, betaHatsOutput.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.PRE_TANGENT_NORMALIZED_COUNTS_FILE_SHORT_NAME, preTangentNormalizationOutput.getAbsolutePath() };
    runCommandLine(arguments);
    final ReadCountCollection input = ReadCountCollectionUtils.parse(TARGET_NAME_ONLY_READ_COUNTS_INPUT_ONE_SAMPLE);
    final ReadCountCollection factorNormalized = ReadCountCollectionUtils.parse(factorNormalizedOutput);
    final ReadCountCollection tangentNormalized = ReadCountCollectionUtils.parse(tangentNormalizationOutput);
    final ReadCountCollection preTangentNormalized = ReadCountCollectionUtils.parse(preTangentNormalizationOutput);
    final RealMatrix betaHats = readBetaHats(betaHatsOutput, input);
    Assert.assertFalse(factorNormalized.targets().stream().anyMatch(t -> t.getInterval() != null));
    Assert.assertEquals(factorNormalized.columnNames(), input.columnNames());
    Assert.assertEquals(tangentNormalized.columnNames(), input.columnNames());
    Assert.assertEquals(preTangentNormalized.columnNames(), input.columnNames());
    Assert.assertEquals(factorNormalized.targets().stream().collect(Collectors.toSet()), input.targets().stream().collect(Collectors.toSet()));
    assertFactorNormalizedValues(input, factorNormalized);
    assertPreTangentNormalizedValues(factorNormalized, preTangentNormalized);
    assertBetaHats(preTangentNormalized, betaHats, TEST_PON);
    assertBetaHatsRobustToOutliers(preTangentNormalized, TEST_PON);
    assertTangentNormalized(tangentNormalized, preTangentNormalized, betaHats, TEST_PON);
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) DataProvider(org.testng.annotations.DataProvider) DefaultRealMatrixPreservingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixPreservingVisitor) Test(org.testng.annotations.Test) Function(java.util.function.Function) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) Assert(org.testng.Assert) TableUtils(org.broadinstitute.hellbender.utils.tsv.TableUtils) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) HDF5File(org.broadinstitute.hdf5.HDF5File) ExomeStandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.ExomeStandardArgumentDefinitions) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) PCATangentNormalizationUtils(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCATangentNormalizationUtils) IOException(java.io.IOException) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) PCATangentNormalizationResult(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCATangentNormalizationResult) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) HDF5File(org.broadinstitute.hdf5.HDF5File) File(java.io.File) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 40 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class NormalizeSomaticReadCountsIntegrationTest method assertBetaHatsRobustToOutliers.

/**
     * Asserts that the calculation of beta hats is not significantly affected by zero-coverage outlier counts
     * We perform this check by randomly setting some coverages to zero in copy ratio space (-infinity in log space).
     * betaHats imputes 0 in log space (1 in copy ratio space) whenever coverage is below a certain low threshold
     * and should thus be robust to this type of noise.
     */
private void assertBetaHatsRobustToOutliers(final ReadCountCollection preTangentNormalized, final File ponFile) {
    try (final HDF5File ponReader = new HDF5File(ponFile)) {
        final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
        final List<String> ponTargets = pon.getPanelTargetNames();
        final RealMatrix input = reorderTargetsToPoNOrder(preTangentNormalized, ponTargets);
        // randomly set some entries to zero in copy-ratio space (-infinity in log space)
        final Random random = new Random(13);
        final double noiseProportion = 0.01;
        final RealMatrix noisyInput = input.copy();
        noisyInput.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

            @Override
            public double visit(final int row, final int column, final double value) {
                return random.nextDouble() < noiseProportion ? Double.NEGATIVE_INFINITY : value;
            }
        });
        final RealMatrix betaHats = PCATangentNormalizationUtils.calculateBetaHats(pon.getReducedPanelPInverseCounts(), input, PCATangentNormalizationUtils.EPSILON);
        final RealMatrix noisyBetaHats = PCATangentNormalizationUtils.calculateBetaHats(pon.getReducedPanelPInverseCounts(), noisyInput, PCATangentNormalizationUtils.EPSILON);
        final RealMatrix difference = betaHats.subtract(noisyBetaHats);
        difference.walkInOptimizedOrder(new DefaultRealMatrixPreservingVisitor() {

            @Override
            public void visit(final int row, int column, double value) {
                Assert.assertEquals(value, 0, 0.01);
            }
        });
    }
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) DefaultRealMatrixPreservingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixPreservingVisitor) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5File(org.broadinstitute.hdf5.HDF5File)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)259 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)158 Test (org.testng.annotations.Test)86 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)60 IntStream (java.util.stream.IntStream)50 Collectors (java.util.stream.Collectors)48 Median (org.apache.commons.math3.stat.descriptive.rank.Median)42 HDF5File (org.broadinstitute.hdf5.HDF5File)42 File (java.io.File)40 List (java.util.List)37 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)36 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)36 ArrayList (java.util.ArrayList)32 Assert (org.testng.Assert)32 IOException (java.io.IOException)30 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)30 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)30 DoubleStream (java.util.stream.DoubleStream)28 Logger (org.apache.logging.log4j.Logger)27 Utils (org.broadinstitute.hellbender.utils.Utils)27