Search in sources :

Example 11 with Array2DRowRealMatrix

use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk by broadinstitute.

the class HDF5LibraryUnitTest method createMatrixOfGaussianValues.

private RealMatrix createMatrixOfGaussianValues(int numRows, int numCols, final double mean, final double sigma) {
    final RealMatrix bigCounts = new Array2DRowRealMatrix(numRows, numCols);
    final RandomDataGenerator randomDataGenerator = new RandomDataGenerator();
    randomDataGenerator.reSeed(337337337);
    bigCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(int row, int column, double value) {
            return randomDataGenerator.nextGaussian(mean, sigma);
        }
    });
    return bigCounts;
}
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Example 12 with Array2DRowRealMatrix

use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollection method subsetTargets.

/**
     * Subsets the targets in the read-count collection.
     * <p>
     *     Creates  brand-new read-count collection. Changes in the new read-count collection
     *     counts won't affect the this read-count collection and vice-versa.
     * </p>
     *
     * @param targetsToKeep the new target subset.
     * @return never {@code null}. The order of targets in the result is guaranteed to
     *  follow the original order of targets. The order of count columns is guaranteed to
     *  follow the original order of count columns.
     * @throws IllegalArgumentException if {@code targetsToKeep}:
     * <ul>
     *     <li>is {@code null},</li>
     *     <li>contains {@code null}s</li>
     *     <li>or contains targets that are not part of the read-count collection</li>
     * </ul>
     */
public ReadCountCollection subsetTargets(final Set<Target> targetsToKeep) {
    Utils.nonNull(targetsToKeep, "the input target set cannot be null");
    Utils.nonEmpty(targetsToKeep, "the input target subset size must be greater than 0");
    if (!new HashSet<>(targets).containsAll(targetsToKeep)) {
        throw unknownTargetsToKeep(targetsToKeep);
    }
    if (targetsToKeep.size() == targets.size()) {
        return new ReadCountCollection(targets, columnNames, counts.copy(), false);
    }
    final int[] targetsToKeepIndices = IntStream.range(0, targets.size()).filter(i -> targetsToKeep.contains(targets.get(i))).toArray();
    final List<Target> resultTargets = Arrays.stream(targetsToKeepIndices).mapToObj(targets::get).collect(Collectors.toList());
    // compose the new counts:
    final double[][] resultCounts = new double[targetsToKeepIndices.length][columnNames.size()];
    for (int i = 0; i < resultCounts.length; i++) {
        resultCounts[i] = counts.getRow(targetsToKeepIndices[i]);
    }
    return new ReadCountCollection(Collections.unmodifiableList(resultTargets), columnNames, new Array2DRowRealMatrix(resultCounts), false);
}
Also used : IntStream(java.util.stream.IntStream) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) java.util(java.util) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) Nonnull(javax.annotation.Nonnull) Collectors(java.util.stream.Collectors) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Serializable(java.io.Serializable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix)

Example 13 with Array2DRowRealMatrix

use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.

the class ReadCountCollection method subsetColumns.

/**
     * Subsets the count columns in the read-count collection.
     *
     * <p>
     *     Creates a brand-new read-count collection. Changes in the new instance won't affect this one and vice-versa.
     * </p>
     *
     * @param columnsToKeep column names to keep in the result read-count collection.
     * @return never {@code null}.
     */
public ReadCountCollection subsetColumns(final Set<String> columnsToKeep) {
    Utils.nonNull(columnsToKeep, "the set of input columns to keep cannot be null.");
    Utils.nonEmpty(columnsToKeep, "the number of columns to keep must be greater than 0");
    if (!new HashSet<>(columnNames).containsAll(columnsToKeep)) {
        throw unknownColumnToKeepNames(columnsToKeep);
    }
    if (columnsToKeep.size() == columnNames.size()) {
        return new ReadCountCollection(targets, columnNames, counts.copy(), false);
    }
    final int[] columnsToKeepIndices = IntStream.range(0, columnNames.size()).filter(i -> columnsToKeep.contains(columnNames.get(i))).toArray();
    final List<String> resultColumnNames = Arrays.stream(columnsToKeepIndices).mapToObj(columnNames::get).collect(Collectors.toList());
    final RealMatrix resultCountsM = new Array2DRowRealMatrix(counts.getRowDimension(), columnsToKeepIndices.length);
    for (int i = 0; i < columnsToKeepIndices.length; i++) {
        resultCountsM.setColumn(i, counts.getColumn(columnsToKeepIndices[i]));
    }
    return new ReadCountCollection(targets, Collections.unmodifiableList(resultColumnNames), resultCountsM, false);
}
Also used : IntStream(java.util.stream.IntStream) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) java.util(java.util) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) Nonnull(javax.annotation.Nonnull) Collectors(java.util.stream.Collectors) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Serializable(java.io.Serializable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix)

Example 14 with Array2DRowRealMatrix

use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.

the class CopyRatioSegmenterUnitTest method testChromosomesOnDifferentSegments.

@Test
public void testChromosomesOnDifferentSegments() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
    final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
    final double trueMemoryLength = 1e5;
    final double trueStandardDeviation = 0.2;
    // randomly set positions
    final int chainLength = 100;
    final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
    positions.addAll(randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
    positions.addAll(randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
    //fix everything to the same state 2
    final int trueState = 2;
    final List<Double> data = new ArrayList<>();
    for (int n = 0; n < positions.size(); n++) {
        final double copyRatio = trueLog2CopyRatios[trueState];
        final double observed = generateData(trueStandardDeviation, copyRatio, rng);
        data.add(observed);
    }
    final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
    final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
    final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    //check that each chromosome has at least one segment
    final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
    Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Also used : IntStream(java.util.stream.IntStream) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) java.util(java.util) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Target(org.broadinstitute.hellbender.tools.exome.Target) Test(org.testng.annotations.Test) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Target(org.broadinstitute.hellbender.tools.exome.Target) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 15 with Array2DRowRealMatrix

use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.

the class HDF5PCACoveragePoNCreationUtilsUnitTest method readCountAndPercentileData.

// this is duplicated from ReadCountCollectionUtilsUnitTest
@DataProvider(name = "readCountAndPercentileData")
public Object[][] readCountAndPercentileData() {
    final double[] percentiles = new double[] { 1.0, 2.5, 5.0, 10.0, 25.0 };
    final List<Object[]> result = new ArrayList<>();
    final Random rdn = new Random(13);
    final int columnCount = 100;
    final int targetCount = 100;
    final List<String> columnNames = IntStream.range(0, columnCount).mapToObj(i -> "sample_" + (i + 1)).collect(Collectors.toList());
    final List<Target> targets = IntStream.range(0, targetCount).mapToObj(i -> new Target("target_" + (i + 1))).collect(Collectors.toList());
    for (final double percentile : percentiles) {
        final double[][] counts = new double[columnCount][targetCount];
        for (int i = 0; i < counts.length; i++) {
            for (int j = 0; j < counts[0].length; j++) {
                counts[i][j] = rdn.nextDouble();
            }
        }
        final ReadCountCollection readCounts = new ReadCountCollection(targets, columnNames, new Array2DRowRealMatrix(counts, false));
        result.add(new Object[] { readCounts, percentile });
    }
    return result.toArray(new Object[result.size()][]);
}
Also used : IntStream(java.util.stream.IntStream) SVD(org.broadinstitute.hellbender.utils.svd.SVD) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Level(org.apache.logging.log4j.Level) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) Test(org.testng.annotations.Test) Random(java.util.Random) OptionalInt(java.util.OptionalInt) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) ArrayList(java.util.ArrayList) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Pair(org.apache.commons.lang3.tuple.Pair) Message(org.apache.logging.log4j.message.Message) Assert(org.testng.Assert) Median(org.apache.commons.math3.stat.descriptive.rank.Median) HDF5File(org.broadinstitute.hdf5.HDF5File) Marker(org.apache.logging.log4j.Marker) AbstractLogger(org.apache.logging.log4j.spi.AbstractLogger) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) DoubleStream(java.util.stream.DoubleStream) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Stream(java.util.stream.Stream) Target(org.broadinstitute.hellbender.tools.exome.Target) SVDFactory(org.broadinstitute.hellbender.utils.svd.SVDFactory) RealMatrix(org.apache.commons.math3.linear.RealMatrix) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) PoNTestUtils(org.broadinstitute.hellbender.tools.pon.PoNTestUtils) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) ArrayList(java.util.ArrayList) Target(org.broadinstitute.hellbender.tools.exome.Target) Random(java.util.Random) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) DataProvider(org.testng.annotations.DataProvider)

Aggregations

Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)141 RealMatrix (org.apache.commons.math3.linear.RealMatrix)101 Test (org.testng.annotations.Test)60 IntStream (java.util.stream.IntStream)31 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 File (java.io.File)27 Collectors (java.util.stream.Collectors)25 ArrayList (java.util.ArrayList)24 Assert (org.testng.Assert)24 List (java.util.List)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)22 Target (org.broadinstitute.hellbender.tools.exome.Target)18 java.util (java.util)15 Random (java.util.Random)14 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)14 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)14 DataProvider (org.testng.annotations.DataProvider)14 Stream (java.util.stream.Stream)13 Arrays (java.util.Arrays)12 DoubleStream (java.util.stream.DoubleStream)12