use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class ReadCountCollectionUtils method performWriting.
private static void performWriting(ReadCountCollection collection, TableWriter<ReadCountRecord> tableWriter, String[] headerComments) throws IOException {
// print the header comments
for (final String comment : headerComments) {
tableWriter.writeComment(comment);
}
final List<Target> targets = collection.targets();
final RealMatrix counts = collection.counts();
for (int i = 0; i < targets.size(); i++) {
tableWriter.writeRecord(new ReadCountRecord(targets.get(i), counts.getRow(i)));
}
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class Nd4jApacheAdapterUtils method convertINDArrayToApacheMatrix.
/**
* INDArray to Apache
*
* @param matrix rank-2 INDArray
* @return Apache matrix
*/
public static RealMatrix convertINDArrayToApacheMatrix(@Nonnull final INDArray matrix) {
Utils.validateArg(matrix.rank() == 2, "Input rank is not 2 (not matrix)");
final int[] shape = matrix.shape();
final INDArray concreteMatrix = matrix.isView() ? matrix.dup() : matrix;
final double[] data = concreteMatrix.data().asDouble();
final char ordering = concreteMatrix.ordering();
if (ordering == 'c') {
return new BlockRealMatrix(monoToBiDiArrayRowMajor(data, shape[0], shape[1]));
} else {
/* ordering == 'f' */
return new BlockRealMatrix(monoToBiDiArrayColumnMajor(data, shape[0], shape[1]));
}
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class ReadCountCollectionUtils method removeColumnsWithTooManyZeros.
/**
* Remove columns that have too many counts equal to 0.
* <p>
* It will return a copy of the input read-count collection with such columns dropped.
* </p>
*
* @param readCounts the input read counts.
* @param maximumColumnZeros maximum number of counts equal to 0 per column tolerated.
* @return never {@code null}. It might be a reference to the input read-counts if there is
* is no column to be dropped.
*/
@VisibleForTesting
public static ReadCountCollection removeColumnsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumColumnZeros, final boolean roundToInteger, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Set<String> columnsToKeep = IntStream.range(0, counts.getColumnDimension()).boxed().filter(i -> countZeroes(counts.getColumn(i), roundToInteger) <= maximumColumnZeros).map(i -> readCounts.columnNames().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
final int columnsToDropCount = readCounts.columnNames().size() - columnsToKeep.size();
if (columnsToDropCount == 0) {
logger.info(String.format("There were no columns with a large number of targets with zero counts " + "(<= %d of %d) to drop", maximumColumnZeros, readCounts.targets().size()));
return readCounts;
} else if (columnsToDropCount == readCounts.columnNames().size()) {
throw new UserException.BadInput("The number of zeros per count column is too large resulting in all count " + "columns to be dropped");
} else {
final double droppedPercentage = ((double) (columnsToDropCount) / readCounts.columnNames().size()) * 100;
logger.info(String.format("Some counts columns dropped (%d out of %d, %.2f%%) as they had too many targets with zeros (> %d of %d)", columnsToDropCount, readCounts.columnNames().size(), droppedPercentage, maximumColumnZeros, readCounts.targets().size()));
return readCounts.subsetColumns(columnsToKeep);
}
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class GermlineCNVCallerIntegrationTest method reportCopyNumberSummaryStatistics.
/* Shame on me for using {@link ReadCountCollection} to store copy numbers! */
private void reportCopyNumberSummaryStatistics(@Nonnull final File posteriorsOutputPath, @Nonnull final File truthCopyNumberFile, @Nonnull final List<Target> targets, @Nonnull final SexGenotypeDataCollection sexGenotypeDataCollection) {
final ReadCountCollection truthCopyNumberCollection = loadTruthCopyNumberTable(truthCopyNumberFile, targets);
final RealMatrix calledCopyNumberMatrix = Nd4jApacheAdapterUtils.convertINDArrayToApacheMatrix(Nd4jIOUtils.readNDArrayMatrixFromTextFile(new File(posteriorsOutputPath, CoverageModelGlobalConstants.COPY_RATIO_VITERBI_FILENAME)));
final ReadCountCollection calledCopyNumberCollection = new ReadCountCollection(targets, truthCopyNumberCollection.columnNames(), calledCopyNumberMatrix);
final int numSamples = calledCopyNumberCollection.columnNames().size();
final List<String> sampleSexGenotypes = truthCopyNumberCollection.columnNames().stream().map(sampleName -> sexGenotypeDataCollection.getSampleSexGenotypeData(sampleName).getSexGenotype()).collect(Collectors.toList());
final List<SampleCopyNumberSummaryStatistics> sampleSummaryStatisticsList = IntStream.range(0, numSamples).mapToObj(si -> calculateSampleCopyNumberConcordance(truthCopyNumberCollection, calledCopyNumberCollection, si, sampleSexGenotypes.get(si))).collect(Collectors.toList());
/* calculation various summary statistics */
final AbstractUnivariateStatistic calculator = new Mean();
final ConfusionRates homDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.homozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
final ConfusionRates hetDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.heterozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
final ConfusionRates dupMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.duplicationConfusionMatrix).collect(Collectors.toList()), calculator);
final double absoluteConcordance = Concordance.getCollectionConcordance(sampleSummaryStatisticsList.stream().map(ss -> ss.absoluteCopyNumberConcordance).collect(Collectors.toList()), calculator);
/* log */
logger.info("Homozygous deletion statistics: " + homDelMedianRates.toString());
logger.info("Heterozygous deletion statistics: " + hetDelMedianRates.toString());
logger.info("Duplication statistics: " + dupMedianRates.toString());
logger.info(String.format("Absolute copy number calling concordance: %f", absoluteConcordance));
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class GCBiasSimulatedData method simulatedData.
// visible for the integration test
public static Pair<ReadCountCollection, double[]> simulatedData(final int numTargets, final int numSamples) {
final List<Target> phonyTargets = SimulatedTargets.phonyTargets(numTargets);
final List<String> phonySamples = SimulatedSamples.phonySamples(numSamples);
final Random random = new Random(13);
final double[] gcContentByTarget = IntStream.range(0, numTargets).mapToDouble(n -> 0.5 + 0.2 * random.nextGaussian()).map(x -> Math.min(x, 0.95)).map(x -> Math.max(x, 0.05)).toArray();
final double[] gcBiasByTarget = Arrays.stream(gcContentByTarget).map(QUADRATIC_GC_BIAS_CURVE::apply).toArray();
// model mainly GC bias with a small random amount of non-GC bias
// thus noise after GC correction should be nearly zero
final RealMatrix counts = new Array2DRowRealMatrix(numTargets, numSamples);
counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int target, final int column, final double value) {
return gcBiasByTarget[target] * (1.0 + 0.01 * random.nextDouble());
}
});
final ReadCountCollection rcc = new ReadCountCollection(phonyTargets, phonySamples, counts);
return new ImmutablePair<>(rcc, gcContentByTarget);
}
Aggregations