use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNUnitTest method testNormalizedPcovReading.
@Test(dependsOnMethods = { "testTargetNameReading", "testSampleNameReading" })
public void testNormalizedPcovReading() throws IOException {
final HDF5File reader = new HDF5File(TEST_PON);
final PCACoveragePoN pon = new HDF5PCACoveragePoN(reader);
final List<String> targets = pon.getTargetNames();
final List<String> samples = pon.getSampleNames();
final RealMatrix actual = pon.getNormalizedCounts();
Assert.assertNotNull(actual);
Assert.assertEquals(actual.getRowDimension(), targets.size());
Assert.assertEquals(actual.getColumnDimension(), samples.size());
final RealMatrix expected = readDoubleMatrix(TEST_PON_NORMALIZED_PCOV);
MathObjectAsserts.assertRealMatrixEquals(actual, expected);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNUnitTest method testLogNormalizedMatrixReading.
@Test(dependsOnMethods = { "testTargetNameReading", "testLogNormalizedSampleNameReading" })
public void testLogNormalizedMatrixReading() throws IOException {
final HDF5File reader = new HDF5File(TEST_PON);
final PCACoveragePoN pon = new HDF5PCACoveragePoN(reader);
final List<String> targets = pon.getTargetNames();
final List<String> samples = pon.getPanelSampleNames();
final RealMatrix actual = pon.getLogNormalizedCounts();
Assert.assertNotNull(actual);
Assert.assertEquals(actual.getRowDimension(), targets.size());
Assert.assertEquals(actual.getColumnDimension(), samples.size());
final RealMatrix expected = readDoubleMatrix(TEST_PON_LOG_NORMALS);
MathObjectAsserts.assertRealMatrixEquals(actual, expected);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class NormalizeSomaticReadCountsIntegrationTest method assertPreTangentNormalizedValues.
/**
* This code reconstructs the expected pre-tangent normalization counts given the input,
* and then compares against the actual pre-tangent output.
* <p>
* This method does not use any of the components that does the actual computation
* in production, so that the calculation is independent.
* </p>
* <p>
* This code also use an alternative way to calculate it to reduce overlap even further.
* </p>
* @param preTangentNormalized actual output.
* @param factorNormalized input.
*/
private void assertPreTangentNormalizedValues(final ReadCountCollection factorNormalized, final ReadCountCollection preTangentNormalized) {
final double epsilon = PCATangentNormalizationUtils.EPSILON;
final RealMatrix outCounts = preTangentNormalized.counts();
final RealMatrix inCounts = factorNormalized.counts();
final double[] columnMeans = GATKProtectedMathUtils.columnMeans(inCounts);
Assert.assertTrue(DoubleStream.of(columnMeans).allMatch(d -> d < 0.5));
final double[][] expected = new double[inCounts.getRowDimension()][inCounts.getColumnDimension()];
final double[] columnValues = new double[inCounts.getRowDimension()];
for (int i = 0; i < columnMeans.length; i++) {
for (int j = 0; j < inCounts.getRowDimension(); j++) {
final double inValue = inCounts.getEntry(j, i);
final double lowBoundedInValue = Math.max(epsilon, inValue / columnMeans[i]);
final double outValue = Math.log(lowBoundedInValue) / Math.log(2);
expected[j][i] = outValue;
columnValues[j] = outValue;
}
Arrays.sort(columnValues);
final int midIndex = columnValues.length >> 1;
final double median = columnValues.length % 2 == 1 ? columnValues[midIndex] : (columnValues[midIndex] + columnValues[1 + midIndex]) * .5;
for (int j = 0; j < inCounts.getRowDimension(); j++) {
expected[j][i] -= median;
Assert.assertEquals(outCounts.getEntry(j, i), expected[j][i], 0.000001, " Row " + j + " col " + i);
}
}
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class NormalizeSomaticReadCountsIntegrationTest method testNameOnlyCountsInputRunOneSample.
@Test
public void testNameOnlyCountsInputRunOneSample() throws IOException {
final File factorNormalizedOutput = createTempFile("test", ".txt");
final File tangentNormalizationOutput = createTempFile("tangent-", ".txt");
final File betaHatsOutput = createTempFile("tangent-", ".bhats");
final File preTangentNormalizationOutput = createTempFile("pre-tn-", ".txt");
final String[] arguments = { "-" + NormalizeSomaticReadCounts.READ_COUNTS_FILE_SHORT_NAME, TARGET_NAME_ONLY_READ_COUNTS_INPUT_ONE_SAMPLE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.PON_FILE_SHORT_NAME, TEST_PON.getAbsolutePath(), "-" + NormalizeSomaticReadCounts.FACTOR_NORMALIZED_COUNTS_SHORT_NAME, factorNormalizedOutput.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TANGENT_NORMALIZED_COUNTS_FILE_SHORT_NAME, tangentNormalizationOutput.getAbsolutePath(), "-" + NormalizeSomaticReadCounts.TANGENT_BETA_HATS_SHORT_NAME, betaHatsOutput.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.PRE_TANGENT_NORMALIZED_COUNTS_FILE_SHORT_NAME, preTangentNormalizationOutput.getAbsolutePath() };
runCommandLine(arguments);
final ReadCountCollection input = ReadCountCollectionUtils.parse(TARGET_NAME_ONLY_READ_COUNTS_INPUT_ONE_SAMPLE);
final ReadCountCollection factorNormalized = ReadCountCollectionUtils.parse(factorNormalizedOutput);
final ReadCountCollection tangentNormalized = ReadCountCollectionUtils.parse(tangentNormalizationOutput);
final ReadCountCollection preTangentNormalized = ReadCountCollectionUtils.parse(preTangentNormalizationOutput);
final RealMatrix betaHats = readBetaHats(betaHatsOutput, input);
Assert.assertFalse(factorNormalized.targets().stream().anyMatch(t -> t.getInterval() != null));
Assert.assertEquals(factorNormalized.columnNames(), input.columnNames());
Assert.assertEquals(tangentNormalized.columnNames(), input.columnNames());
Assert.assertEquals(preTangentNormalized.columnNames(), input.columnNames());
Assert.assertEquals(factorNormalized.targets().stream().collect(Collectors.toSet()), input.targets().stream().collect(Collectors.toSet()));
assertFactorNormalizedValues(input, factorNormalized);
assertPreTangentNormalizedValues(factorNormalized, preTangentNormalized);
assertBetaHats(preTangentNormalized, betaHats, TEST_PON);
assertBetaHatsRobustToOutliers(preTangentNormalized, TEST_PON);
assertTangentNormalized(tangentNormalized, preTangentNormalized, betaHats, TEST_PON);
}
use of org.apache.commons.math3.linear.RealMatrix in project gatk by broadinstitute.
the class NormalizeSomaticReadCountsIntegrationTest method assertBetaHatsRobustToOutliers.
/**
* Asserts that the calculation of beta hats is not significantly affected by zero-coverage outlier counts
* We perform this check by randomly setting some coverages to zero in copy ratio space (-infinity in log space).
* betaHats imputes 0 in log space (1 in copy ratio space) whenever coverage is below a certain low threshold
* and should thus be robust to this type of noise.
*/
private void assertBetaHatsRobustToOutliers(final ReadCountCollection preTangentNormalized, final File ponFile) {
try (final HDF5File ponReader = new HDF5File(ponFile)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
final List<String> ponTargets = pon.getPanelTargetNames();
final RealMatrix input = reorderTargetsToPoNOrder(preTangentNormalized, ponTargets);
// randomly set some entries to zero in copy-ratio space (-infinity in log space)
final Random random = new Random(13);
final double noiseProportion = 0.01;
final RealMatrix noisyInput = input.copy();
noisyInput.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int row, final int column, final double value) {
return random.nextDouble() < noiseProportion ? Double.NEGATIVE_INFINITY : value;
}
});
final RealMatrix betaHats = PCATangentNormalizationUtils.calculateBetaHats(pon.getReducedPanelPInverseCounts(), input, PCATangentNormalizationUtils.EPSILON);
final RealMatrix noisyBetaHats = PCATangentNormalizationUtils.calculateBetaHats(pon.getReducedPanelPInverseCounts(), noisyInput, PCATangentNormalizationUtils.EPSILON);
final RealMatrix difference = betaHats.subtract(noisyBetaHats);
difference.walkInOptimizedOrder(new DefaultRealMatrixPreservingVisitor() {
@Override
public void visit(final int row, int column, double value) {
Assert.assertEquals(value, 0, 0.01);
}
});
}
}
Aggregations