Search in sources :

Example 81 with DataProvider

use of org.testng.annotations.DataProvider in project gatk by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method tooManyZerosData.

@DataProvider(name = "tooManyZerosData")
public Object[][] tooManyZerosData() {
    final double[] zeroProbabilities = new double[] { .001, .01, .02, 0.1 };
    final List<Object[]> result = new ArrayList<>();
    final Random rdn = new Random(13);
    final int columnCount = 100;
    final int targetCount = 100;
    final List<String> columnNames = IntStream.range(0, columnCount).mapToObj(i -> "sample_" + (i + 1)).collect(Collectors.toList());
    final List<Target> targets = IntStream.range(0, targetCount).mapToObj(i -> new Target("target_" + (i + 1))).collect(Collectors.toList());
    for (final double zeroProbability : zeroProbabilities) {
        final double[][] counts = new double[columnCount][targetCount];
        for (int i = 0; i < counts.length; i++) {
            for (int j = 0; j < counts[0].length; j++) {
                counts[i][j] = rdn.nextDouble() <= zeroProbability ? 0.0 : rdn.nextDouble();
            }
        }
        final ReadCountCollection readCounts = new ReadCountCollection(targets, columnNames, new Array2DRowRealMatrix(counts, false));
        result.add(new Object[] { readCounts });
    }
    return result.toArray(new Object[result.size()][]);
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) Level(org.apache.logging.log4j.Level) Test(org.testng.annotations.Test) Random(java.util.Random) ArrayList(java.util.ArrayList) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) PrintWriter(java.io.PrintWriter) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) ArrayList(java.util.ArrayList) Random(java.util.Random) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) DataProvider(org.testng.annotations.DataProvider)

Example 82 with DataProvider

use of org.testng.annotations.DataProvider in project gatk by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testRemoveTargetsWithTooManyZeros.

@Test(dataProvider = "tooManyZerosData")
public void testRemoveTargetsWithTooManyZeros(final ReadCountCollection readCount) {
    final RealMatrix counts = readCount.counts();
    final int[] numberOfZeros = IntStream.range(0, counts.getRowDimension()).map(i -> (int) DoubleStream.of(counts.getRow(i)).filter(d -> d == 0.0).count()).toArray();
    final int maximumNumberOfZeros = IntStream.of(numberOfZeros).max().getAsInt();
    for (int maxZeros = 0; maxZeros < maximumNumberOfZeros; maxZeros++) {
        final int maxZerosThres = maxZeros;
        final int expectedRemainingCount = (int) IntStream.of(numberOfZeros).filter(i -> i <= maxZerosThres).count();
        if (expectedRemainingCount == 0) {
            try {
                ReadCountCollectionUtils.removeTargetsWithTooManyZeros(readCount, maxZeros, false, NULL_LOGGER);
            } catch (final UserException.BadInput ex) {
                // expected.
                continue;
            }
            Assert.fail("expects an exception");
        }
        final ReadCountCollection rc = ReadCountCollectionUtils.removeTargetsWithTooManyZeros(readCount, maxZeros, false, NULL_LOGGER);
        Assert.assertEquals(rc.targets().size(), expectedRemainingCount);
        int nextIndex = 0;
        for (int i = 0; i < readCount.targets().size(); i++) {
            final Target target = readCount.targets().get(i);
            final int newIndex = rc.targets().indexOf(target);
            if (numberOfZeros[i] <= maxZeros) {
                Assert.assertTrue(newIndex >= 0, " " + numberOfZeros[i] + " " + maxZeros);
                Assert.assertEquals(newIndex, nextIndex++);
            } else {
                Assert.assertEquals(newIndex, -1);
            }
        }
        Assert.assertEquals(nextIndex, expectedRemainingCount);
    }
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) Level(org.apache.logging.log4j.Level) Test(org.testng.annotations.Test) Random(java.util.Random) ArrayList(java.util.ArrayList) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) PrintWriter(java.io.PrintWriter) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) UserException(org.broadinstitute.hellbender.exceptions.UserException) Test(org.testng.annotations.Test)

Example 83 with DataProvider

use of org.testng.annotations.DataProvider in project gatk by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testRemoveColumnsWithTooManyZeros.

@Test(dataProvider = "tooManyZerosData")
public void testRemoveColumnsWithTooManyZeros(final ReadCountCollection readCount) {
    final RealMatrix counts = readCount.counts();
    final int[] numberOfZeros = IntStream.range(0, counts.getColumnDimension()).map(i -> (int) DoubleStream.of(counts.getColumn(i)).filter(d -> d == 0.0).count()).toArray();
    final int maximumNumberOfZeros = IntStream.of(numberOfZeros).max().getAsInt();
    for (int maxZeros = 0; maxZeros < maximumNumberOfZeros; maxZeros++) {
        final int maxZerosThres = maxZeros;
        final int expectedRemainingCount = (int) IntStream.of(numberOfZeros).filter(i -> i <= maxZerosThres).count();
        if (expectedRemainingCount == 0) {
            try {
                ReadCountCollectionUtils.removeColumnsWithTooManyZeros(readCount, maxZeros, false, NULL_LOGGER);
            } catch (final UserException.BadInput ex) {
                // expected.
                continue;
            }
            Assert.fail("expects an exception");
        }
        final ReadCountCollection rc = ReadCountCollectionUtils.removeColumnsWithTooManyZeros(readCount, maxZeros, false, NULL_LOGGER);
        Assert.assertEquals(rc.columnNames().size(), expectedRemainingCount);
        final int[] newIndices = new int[expectedRemainingCount];
        int nextIndex = 0;
        for (int i = 0; i < readCount.columnNames().size(); i++) {
            final String name = readCount.columnNames().get(i);
            final int newIndex = rc.columnNames().indexOf(name);
            if (numberOfZeros[i] <= maxZeros) {
                Assert.assertTrue(newIndex >= 0);
                newIndices[nextIndex++] = i;
            } else {
                Assert.assertEquals(newIndex, -1);
            }
        }
        Assert.assertEquals(nextIndex, expectedRemainingCount);
        for (int i = 1; i < newIndices.length; i++) {
            Assert.assertTrue(newIndices[i - 1] < newIndices[i]);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) Level(org.apache.logging.log4j.Level) Test(org.testng.annotations.Test) Random(java.util.Random) ArrayList(java.util.ArrayList) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) PrintWriter(java.io.PrintWriter) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) UserException(org.broadinstitute.hellbender.exceptions.UserException) Test(org.testng.annotations.Test)

Example 84 with DataProvider

use of org.testng.annotations.DataProvider in project gatk by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testImputeZeroCounts.

@Test(dataProvider = "tooManyZerosData")
public void testImputeZeroCounts(final ReadCountCollection readCounts) {
    final Median median = new Median();
    final RealMatrix counts = readCounts.counts();
    final double[] targetNonZeroMedians = IntStream.range(0, counts.getRowDimension()).mapToDouble(i -> median.evaluate(DoubleStream.of(counts.getRow(i)).filter(d -> d != 0.0).toArray())).toArray();
    final double[][] expected = new double[counts.getRowDimension()][];
    final double[][] original = counts.getData();
    for (int i = 0; i < expected.length; i++) {
        final double[] rowCounts = counts.getRow(i).clone();
        expected[i] = rowCounts;
        for (int j = 0; j < expected[i].length; j++) {
            if (expected[i][j] == 0.0) {
                expected[i][j] = targetNonZeroMedians[i];
            }
        }
    }
    ReadCountCollectionUtils.imputeZeroCountsAsTargetMedians(readCounts, NULL_LOGGER);
    final RealMatrix newCounts = readCounts.counts();
    Assert.assertEquals(newCounts.getColumnDimension(), expected[0].length);
    Assert.assertEquals(newCounts.getRowDimension(), expected.length);
    for (int i = 0; i < expected.length; i++) {
        for (int j = 0; j < expected[i].length; j++) {
            Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j], "i,j == " + i + "," + j + " " + original[i][j]);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) Level(org.apache.logging.log4j.Level) Test(org.testng.annotations.Test) Random(java.util.Random) ArrayList(java.util.ArrayList) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) PrintWriter(java.io.PrintWriter) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Test(org.testng.annotations.Test)

Example 85 with DataProvider

use of org.testng.annotations.DataProvider in project gatk by broadinstitute.

the class ReadCountCollectionUtilsUnitTest method testExtremeMedianColumnsData.

@Test(dataProvider = "readCountAndPercentileData")
public void testExtremeMedianColumnsData(final ReadCountCollection readCount, final double percentile) {
    final Median median = new Median();
    final RealMatrix counts = readCount.counts();
    final double[] columnMedians = IntStream.range(0, counts.getColumnDimension()).mapToDouble(i -> median.evaluate(counts.getColumn(i))).toArray();
    final double top = new Percentile(100 - percentile).evaluate(columnMedians);
    final double bottom = new Percentile(percentile).evaluate(columnMedians);
    final Boolean[] toBeKept = DoubleStream.of(columnMedians).mapToObj(d -> d <= top && d >= bottom).toArray(Boolean[]::new);
    final int toBeKeptCount = (int) Stream.of(toBeKept).filter(b -> b).count();
    final ReadCountCollection result = ReadCountCollectionUtils.removeColumnsWithExtremeMedianCounts(readCount, percentile, NULL_LOGGER);
    Assert.assertEquals(result.columnNames().size(), toBeKeptCount);
    int nextIndex = 0;
    for (int i = 0; i < toBeKept.length; i++) {
        if (toBeKept[i]) {
            int index = result.columnNames().indexOf(readCount.columnNames().get(i));
            Assert.assertEquals(index, nextIndex++);
            Assert.assertEquals(counts.getColumn(i), result.counts().getColumn(index));
        } else {
            Assert.assertEquals(result.columnNames().indexOf(readCount.columnNames().get(i)), -1);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) Level(org.apache.logging.log4j.Level) Test(org.testng.annotations.Test) Random(java.util.Random) ArrayList(java.util.ArrayList) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) PrintWriter(java.io.PrintWriter) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOException(java.io.IOException) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Test(org.testng.annotations.Test)

Aggregations

DataProvider (org.testng.annotations.DataProvider)391 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)99 ArrayList (java.util.ArrayList)93 Test (org.testng.annotations.Test)85 File (java.io.File)74 List (java.util.List)72 Assert (org.testng.Assert)67 URIDetails (com.linkedin.restli.internal.testutils.URIDetails)65 Collectors (java.util.stream.Collectors)61 HashMap (java.util.HashMap)57 IntStream (java.util.stream.IntStream)54 Random (java.util.Random)50 RealMatrix (org.apache.commons.math3.linear.RealMatrix)44 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)41 IOException (java.io.IOException)39 ByteString (com.linkedin.data.ByteString)37 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)34 Stream (java.util.stream.Stream)31 DoubleStream (java.util.stream.DoubleStream)29 HashSet (java.util.HashSet)28