use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method readCountOnlyData.
@DataProvider(name = "readCountOnlyData")
public Object[][] readCountOnlyData() {
final int repeats = 4;
final List<Object[]> result = new ArrayList<>(repeats);
final Random rdn = new Random(13);
final int columnCount = 100;
final int targetCount = 100;
final List<String> columnNames = IntStream.range(0, columnCount).mapToObj(i -> "sample_" + (i + 1)).collect(Collectors.toList());
final List<Target> targets = IntStream.range(0, targetCount).mapToObj(i -> new Target("target_" + (i + 1))).collect(Collectors.toList());
for (int k = 0; k < repeats; k++) {
final double[][] counts = new double[columnCount][targetCount];
for (int i = 0; i < counts.length; i++) {
for (int j = 0; j < counts[0].length; j++) {
counts[i][j] = rdn.nextDouble();
}
}
final ReadCountCollection readCounts = new ReadCountCollection(targets, columnNames, new Array2DRowRealMatrix(counts, false));
result.add(new Object[] { readCounts });
}
return result.toArray(new Object[result.size()][]);
}
use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk-protected by broadinstitute.
the class CopyRatioSegmenterUnitTest method testSegmentation.
@Test
public void testSegmentation() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
final List<Double> trueLog2CopyRatios = Arrays.asList(-2.0, 0.0, 1.4);
final double trueMemoryLength = 1e5;
final double trueStandardDeviation = 0.2;
final CopyRatioHMM trueModel = new CopyRatioHMM(trueLog2CopyRatios, trueWeights, trueMemoryLength, trueStandardDeviation);
final int chainLength = 10000;
final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
final List<Integer> trueStates = trueModel.generateHiddenStateChain(positions);
final List<Double> trueLog2CopyRatioSequence = trueStates.stream().map(n -> trueLog2CopyRatios.get(n)).collect(Collectors.toList());
final List<Double> data = trueLog2CopyRatioSequence.stream().map(cr -> generateData(trueStandardDeviation, cr, rng)).collect(Collectors.toList());
final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
final double[] segmentCopyRatios = segments.stream().flatMap(s -> Collections.nCopies((int) s.getTargetCount(), s.getSegmentMeanInLog2CRSpace()).stream()).mapToDouble(x -> x).toArray();
final double averageCopyRatioError = IntStream.range(0, trueLog2CopyRatioSequence.size()).mapToDouble(n -> Math.abs(segmentCopyRatios[n] - trueLog2CopyRatioSequence.get(n))).average().getAsDouble();
Assert.assertEquals(averageCopyRatioError, 0, 0.025);
}
use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk by broadinstitute.
the class ReadCountCollection method subsetColumns.
/**
* Subsets the count columns in the read-count collection.
*
* <p>
* Creates a brand-new read-count collection. Changes in the new instance won't affect this one and vice-versa.
* </p>
*
* @param columnsToKeep column names to keep in the result read-count collection.
* @return never {@code null}.
*/
public ReadCountCollection subsetColumns(final Set<String> columnsToKeep) {
Utils.nonNull(columnsToKeep, "the set of input columns to keep cannot be null.");
Utils.nonEmpty(columnsToKeep, "the number of columns to keep must be greater than 0");
if (!new HashSet<>(columnNames).containsAll(columnsToKeep)) {
throw unknownColumnToKeepNames(columnsToKeep);
}
if (columnsToKeep.size() == columnNames.size()) {
return new ReadCountCollection(targets, columnNames, counts.copy(), false);
}
final int[] columnsToKeepIndices = IntStream.range(0, columnNames.size()).filter(i -> columnsToKeep.contains(columnNames.get(i))).toArray();
final List<String> resultColumnNames = Arrays.stream(columnsToKeepIndices).mapToObj(columnNames::get).collect(Collectors.toList());
final RealMatrix resultCountsM = new Array2DRowRealMatrix(counts.getRowDimension(), columnsToKeepIndices.length);
for (int i = 0; i < columnsToKeepIndices.length; i++) {
resultCountsM.setColumn(i, counts.getColumn(columnsToKeepIndices[i]));
}
return new ReadCountCollection(targets, Collections.unmodifiableList(resultColumnNames), resultCountsM, false);
}
use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk by broadinstitute.
the class ReadCountCollection method arrangeTargets.
/**
* Rearrange the targets so that they are in a particular order.
* @return a new collection.
* @throws IllegalArgumentException if any of the following is true:
* <ul>
* <li>{@code targetsInOrder} is {@code null},</li>
* <li>is empty,</li>
* <li>it contains {@code null},</li>
* <li>contains any target not present in this collection.</li>
* </ul>
*/
public ReadCountCollection arrangeTargets(final List<Target> targetsInOrder) {
Utils.nonNull(targetsInOrder);
Utils.nonEmpty(targetsInOrder, "the input targets list cannot be empty");
final RealMatrix counts = new Array2DRowRealMatrix(targetsInOrder.size(), columnNames.size());
final Object2IntMap<Target> targetToIndex = new Object2IntOpenHashMap<>(targets.size());
for (int i = 0; i < targets.size(); i++) {
targetToIndex.put(targets.get(i), i);
}
for (int i = 0; i < targetsInOrder.size(); i++) {
final Target target = targetsInOrder.get(i);
Utils.validateArg(targetToIndex.containsKey(target), () -> String.format("target '%s' is not present in the collection", target.getName()));
counts.setRow(i, this.counts.getRow(targetToIndex.getInt(target)));
}
return new ReadCountCollection(new ArrayList<>(targetsInOrder), columnNames, counts, false);
}
use of org.apache.commons.math3.linear.Array2DRowRealMatrix in project gatk by broadinstitute.
the class ReadCountCollectionUtils method readCounts.
/**
* Reads the counts section of the file and create the resulting collection.
*
* @param sourceName the source name (used in error messages).
* @param tableReader the source table-reader.
* @param columnNames the name of the columns.
* @return never {@code null}.
* @throws IOException if there is a low level IO error.
*/
private static ReadCountCollection readCounts(final String sourceName, final TableReader<ReadCountRecord> tableReader, final List<String> columnNames) throws IOException {
final Buffer buffer = new Buffer();
ReadCountRecord record;
while ((record = tableReader.readRecord()) != null) {
final Target target = record.getTarget();
final double[] lineCounts = record.getDoubleCounts();
if (!buffer.add(target, lineCounts)) {
throw new UserException.BadInput(String.format("duplicated target with name %s in %s", target.getName(), sourceName));
}
}
if (buffer.getTargets().isEmpty()) {
throw new UserException.BadInput("there is no counts (zero targets) in the input source " + sourceName);
}
return new ReadCountCollection(buffer.getTargets(), columnNames, new Array2DRowRealMatrix(buffer.getCounts(), false));
}
Aggregations