Search in sources :

Example 91 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testReadTargetNameOnlyFormattedFile.

@Test
public void testReadTargetNameOnlyFormattedFile() throws IOException {
    final File testFile = createTempFile();
    final PrintWriter writer = new PrintWriter(testFile);
    writer.println("## comment 1");
    writer.println("## comment 2");
    writer.println("SAMPLE2\tSAMPLE1\t" + TargetTableColumn.NAME.toString());
    writer.println("1.1\t2.2\ttgt_0");
    writer.println("-1.1E-7\t-2.2E-8\ttgt_1");
    writer.close();
    final ReadCountCollection subject = ReadCountCollectionUtils.parse(testFile);
    Assert.assertNotNull(subject);
    Assert.assertEquals(subject.columnNames(), Arrays.asList("SAMPLE2", "SAMPLE1"));
    Assert.assertEquals(subject.targets().stream().map(Target::getName).collect(Collectors.toList()), Arrays.asList("tgt_0", "tgt_1"));
    Assert.assertEquals(subject.targets().stream().map(Target::getInterval).collect(Collectors.toList()), Arrays.asList(null, null));
    Assert.assertEquals(subject.targets().size(), 2);
    final RealMatrix counts = subject.counts();
    Assert.assertEquals(counts.getRowDimension(), 2);
    Assert.assertEquals(counts.getColumnDimension(), 2);
    Assert.assertEquals(counts.getEntry(0, 0), 1.1, 0.0001);
    Assert.assertEquals(counts.getEntry(0, 1), 2.2, 0.0001);
    Assert.assertEquals(counts.getEntry(1, 0), -1.1E-7, 0.000000001);
    Assert.assertEquals(counts.getEntry(1, 1), -2.2E-8, 0.000000001);
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) File(java.io.File) PrintWriter(java.io.PrintWriter) Test(org.testng.annotations.Test)

Example 92 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testReadFullFormattedFile.

@Test
public void testReadFullFormattedFile() throws IOException {
    final File testFile = createTempFile();
    final PrintWriter writer = new PrintWriter(testFile);
    writer.println("## comment 1");
    writer.println("## comment 2");
    writer.println(CONTIG_START_END_NAME + "\tSAMPLE1\tSAMPLE2");
    writer.println("1\t100\t200\ttgt_0\t1.1\t2.2");
    writer.println("2\t200\t300\ttgt_1\t-1.1E-7\t-2.2E-8");
    writer.close();
    final ReadCountCollection subject = ReadCountCollectionUtils.parse(testFile);
    Assert.assertNotNull(subject);
    Assert.assertEquals(subject.columnNames(), Arrays.asList("SAMPLE1", "SAMPLE2"));
    Assert.assertEquals(subject.targets().stream().map(Target::getName).collect(Collectors.toList()), Arrays.asList("tgt_0", "tgt_1"));
    Assert.assertEquals(subject.targets().stream().map(Target::getInterval).collect(Collectors.toList()), Arrays.asList(new SimpleInterval("1", 100, 200), new SimpleInterval("2", 200, 300)));
    Assert.assertEquals(subject.targets().size(), 2);
    final RealMatrix counts = subject.counts();
    Assert.assertEquals(counts.getRowDimension(), 2);
    Assert.assertEquals(counts.getColumnDimension(), 2);
    Assert.assertEquals(counts.getEntry(0, 0), 1.1, 0.0001);
    Assert.assertEquals(counts.getEntry(0, 1), 2.2, 0.0001);
    Assert.assertEquals(counts.getEntry(1, 0), -1.1E-7, 0.000000001);
    Assert.assertEquals(counts.getEntry(1, 1), -2.2E-8, 0.000000001);
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File) PrintWriter(java.io.PrintWriter) Test(org.testng.annotations.Test)

Example 93 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testExtremeMedianColumnsData.

@Test(dataProvider = "readCountAndPercentileData")
public void testExtremeMedianColumnsData(final ReadCountCollection readCount, final double percentile) {
    final Median median = new Median();
    final RealMatrix counts = readCount.counts();
    final double[] columnMedians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(i -> median.evaluate(counts.getColumn(i))).toArray();
    final double top = new Percentile(100 - percentile).evaluate(columnMedians);
    final double bottom = new Percentile(percentile).evaluate(columnMedians);
    final Boolean[] toBeKept = DoubleStream.of(columnMedians).mapToObj(d -> d <= top && d >= bottom).toArray(Boolean[]::new);
    final int toBeKeptCount = (int) Stream.of(toBeKept).filter(b -> b).count();
    final ReadCountCollection result = ReadCountCollectionUtils.removeColumnsWithExtremeMedianCounts(readCount, percentile, NULL_LOGGER);
    Assert.assertEquals(result.columnNames().size(), toBeKeptCount);
    int nextIndex = 0;
    for (int i = 0; i < toBeKept.length; i++) {
        if (toBeKept[i]) {
            int index = result.columnNames().indexOf(readCount.columnNames().get(i));
            Assert.assertEquals(index, nextIndex++);
            Assert.assertEquals(counts.getColumn(i), result.counts().getColumn(index));
        } else {
            Assert.assertEquals(result.columnNames().indexOf(readCount.columnNames().get(i)), -1);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) Level(org.apache.logging.log4j.Level) Test(org.testng.annotations.Test) Random(java.util.Random) ArrayList(java.util.ArrayList) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) PrintWriter(java.io.PrintWriter) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Test(org.testng.annotations.Test)

Example 94 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class GCCorrector method correctCoverage.

/**
     *
     * @param inputCounts raw coverage before GC correction
     * @param gcContentByTarget      array of gc contents, one per target of the input
     * @return              GC-corrected coverage
     */
public static ReadCountCollection correctCoverage(final ReadCountCollection inputCounts, final double[] gcContentByTarget) {
    // each column (sample) has its own GC bias curve, hence its own GC corrector
    final List<GCCorrector> gcCorrectors = IntStream.range(0, inputCounts.columnNames().size()).mapToObj(n -> new GCCorrector(gcContentByTarget, inputCounts.counts().getColumnVector(n))).collect(Collectors.toList());
    // gc correct a copy of the input counts in-place
    final RealMatrix correctedCounts = inputCounts.counts().copy();
    correctedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(int target, int column, double coverage) {
            return gcCorrectors.get(column).correctedCoverage(coverage, gcContentByTarget[target]);
        }
    });
    // we would like the average correction factor to be 1.0 in the sense that average coverage before and after
    // correction should be equal
    final double[] columnNormalizationFactors = IntStream.range(0, inputCounts.columnNames().size()).mapToDouble(c -> inputCounts.counts().getColumnVector(c).getL1Norm() / correctedCounts.getColumnVector(c).getL1Norm()).toArray();
    correctedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(int target, int column, double coverage) {
            return coverage * columnNormalizationFactors[column];
        }
    });
    return new ReadCountCollection(inputCounts.targets(), inputCounts.columnNames(), correctedCounts);
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) List(java.util.List) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) RealVector(org.apache.commons.math3.linear.RealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection)

Example 95 with RealMatrix

use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.

the class CNLOHCaller method sumOverSegments.

private double[] sumOverSegments(final JavaRDD<double[]> eZskBySeg) {
    // S x K
    final double[][] eZskBySeg2D = eZskBySeg.collect().stream().toArray(double[][]::new);
    final RealMatrix eZskBySegMatrix = MatrixUtils.createRealMatrix(eZskBySeg2D);
    return GATKProtectedMathUtils.columnSums(eZskBySegMatrix);
}
Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)259 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)158 Test (org.testng.annotations.Test)86 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)60 IntStream (java.util.stream.IntStream)50 Collectors (java.util.stream.Collectors)48 Median (org.apache.commons.math3.stat.descriptive.rank.Median)42 HDF5File (org.broadinstitute.hdf5.HDF5File)42 File (java.io.File)40 List (java.util.List)37 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)36 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)36 ArrayList (java.util.ArrayList)32 Assert (org.testng.Assert)32 IOException (java.io.IOException)30 Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)30 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)30 DoubleStream (java.util.stream.DoubleStream)28 Logger (org.apache.logging.log4j.Logger)27 Utils (org.broadinstitute.hellbender.utils.Utils)27