use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk by broadinstitute.
the class HDF5LibraryUnitTest method createMatrixOfGaussianValues.
private RealMatrix createMatrixOfGaussianValues(int numRows, int numCols, final double mean, final double sigma) {
final RealMatrix bigCounts = new Array2DRowRealMatrix(numRows, numCols);
final RandomDataGenerator randomDataGenerator = new RandomDataGenerator();
randomDataGenerator.reSeed(337337337);
bigCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(int row, int column, double value) {
return randomDataGenerator.nextGaussian(mean, sigma);
}
});
return bigCounts;
}
use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.
the class ReadCountCollection method subsetTargets.
/**
* Subsets the targets in the read-count collection.
* <p>
* Creates brand-new read-count collection. Changes in the new read-count collection
* counts won't affect the this read-count collection and vice-versa.
* </p>
*
* @param targetsToKeep the new target subset.
* @return never {@code null}. The order of targets in the result is guaranteed to
* follow the original order of targets. The order of count columns is guaranteed to
* follow the original order of count columns.
* @throws IllegalArgumentException if {@code targetsToKeep}:
* <ul>
* <li>is {@code null},</li>
* <li>contains {@code null}s</li>
* <li>or contains targets that are not part of the read-count collection</li>
* </ul>
*/
public ReadCountCollection subsetTargets(final Set<Target> targetsToKeep) {
Utils.nonNull(targetsToKeep, "the input target set cannot be null");
Utils.nonEmpty(targetsToKeep, "the input target subset size must be greater than 0");
if (!new HashSet<>(targets).containsAll(targetsToKeep)) {
throw unknownTargetsToKeep(targetsToKeep);
}
if (targetsToKeep.size() == targets.size()) {
return new ReadCountCollection(targets, columnNames, counts.copy(), false);
}
final int[] targetsToKeepIndices = IntStream.range(0, targets.size()).filter(i -> targetsToKeep.contains(targets.get(i))).toArray();
final List<Target> resultTargets = Arrays.stream(targetsToKeepIndices).mapToObj(targets::get).collect(Collectors.toList());
// compose the new counts:
final double[][] resultCounts = new double[targetsToKeepIndices.length][columnNames.size()];
for (int i = 0; i < resultCounts.length; i++) {
resultCounts[i] = counts.getRow(targetsToKeepIndices[i]);
}
return new ReadCountCollection(Collections.unmodifiableList(resultTargets), columnNames, new Array2DRowRealMatrix(resultCounts), false);
}
use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.
the class ReadCountCollection method subsetColumns.
/**
* Subsets the count columns in the read-count collection.
*
* <p>
* Creates a brand-new read-count collection. Changes in the new instance won't affect this one and vice-versa.
* </p>
*
* @param columnsToKeep column names to keep in the result read-count collection.
* @return never {@code null}.
*/
public ReadCountCollection subsetColumns(final Set<String> columnsToKeep) {
Utils.nonNull(columnsToKeep, "the set of input columns to keep cannot be null.");
Utils.nonEmpty(columnsToKeep, "the number of columns to keep must be greater than 0");
if (!new HashSet<>(columnNames).containsAll(columnsToKeep)) {
throw unknownColumnToKeepNames(columnsToKeep);
}
if (columnsToKeep.size() == columnNames.size()) {
return new ReadCountCollection(targets, columnNames, counts.copy(), false);
}
final int[] columnsToKeepIndices = IntStream.range(0, columnNames.size()).filter(i -> columnsToKeep.contains(columnNames.get(i))).toArray();
final List<String> resultColumnNames = Arrays.stream(columnsToKeepIndices).mapToObj(columnNames::get).collect(Collectors.toList());
final RealMatrix resultCountsM = new Array2DRowRealMatrix(counts.getRowDimension(), columnsToKeepIndices.length);
for (int i = 0; i < columnsToKeepIndices.length; i++) {
resultCountsM.setColumn(i, counts.getColumn(columnsToKeepIndices[i]));
}
return new ReadCountCollection(targets, Collections.unmodifiableList(resultColumnNames), resultCountsM, false);
}
use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.
the class CopyRatioSegmenterUnitTest method testChromosomesOnDifferentSegments.
@Test
public void testChromosomesOnDifferentSegments() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
final double trueMemoryLength = 1e5;
final double trueStandardDeviation = 0.2;
// randomly set positions
final int chainLength = 100;
final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
positions.addAll(randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
positions.addAll(randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
//fix everything to the same state 2
final int trueState = 2;
final List<Double> data = new ArrayList<>();
for (int n = 0; n < positions.size(); n++) {
final double copyRatio = trueLog2CopyRatios[trueState];
final double observed = generateData(trueStandardDeviation, copyRatio, rng);
data.add(observed);
}
final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
//check that each chromosome has at least one segment
final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
Assert.assertEquals(numDifferentContigsInSegments, 3);
}
use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method readCountAndPercentileData.
// this is duplicated from ReadCountCollectionUtilsUnitTest
@DataProvider(name = "readCountAndPercentileData")
public Object[][] readCountAndPercentileData() {
final double[] percentiles = new double[] { 1.0, 2.5, 5.0, 10.0, 25.0 };
final List<Object[]> result = new ArrayList<>();
final Random rdn = new Random(13);
final int columnCount = 100;
final int targetCount = 100;
final List<String> columnNames = IntStream.range(0, columnCount).mapToObj(i -> "sample_" + (i + 1)).collect(Collectors.toList());
final List<Target> targets = IntStream.range(0, targetCount).mapToObj(i -> new Target("target_" + (i + 1))).collect(Collectors.toList());
for (final double percentile : percentiles) {
final double[][] counts = new double[columnCount][targetCount];
for (int i = 0; i < counts.length; i++) {
for (int j = 0; j < counts[0].length; j++) {
counts[i][j] = rdn.nextDouble();
}
}
final ReadCountCollection readCounts = new ReadCountCollection(targets, columnNames, new Array2DRowRealMatrix(counts, false));
result.add(new Object[] { readCounts, percentile });
}
return result.toArray(new Object[result.size()][]);
}
Aggregations