use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.
the class CreatePanelOfNormals method writeTargetWeightsFile.
/**
* Read target variances from an HDF5 PoN file and write the corresponding target weights
* to a file that can be read in by R CBS.
* @param ponFile never {@code null}, HDF5 PoN file
* @param outputFile never {@code null}, output file
*/
private static void writeTargetWeightsFile(final File ponFile, final File outputFile) {
IOUtils.canReadFile(ponFile);
try (final HDF5File file = new HDF5File(ponFile, HDF5File.OpenMode.READ_ONLY)) {
final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(file);
final double[] targetWeights = DoubleStream.of(pon.getTargetVariances()).map(v -> 1 / v).toArray();
ParamUtils.writeValuesToFile(targetWeights, outputFile);
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.
the class NormalizeSomaticReadCounts method doWork.
@Override
protected Object doWork() {
if (!new HDF5Library().load(null)) {
//Note: passing null means using the default temp dir.
throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
}
IOUtils.canReadFile(ponFile);
try (final HDF5File hdf5PoNFile = new HDF5File(ponFile)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(hdf5PoNFile, logger);
final TargetCollection<Target> targetCollection = readTargetCollection(targetFile);
final ReadCountCollection proportionalCoverageProfile = readInputReadCounts(readCountsFile, targetCollection);
final PCATangentNormalizationResult tangentNormalizationResult = pon.normalize(proportionalCoverageProfile);
;
tangentNormalizationResult.write(getCommandLine(), tangentNormalizationOutFile, preTangentNormalizationOutFile, betaHatsOutFile, fntOutFile);
return "SUCCESS";
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk-protected by broadinstitute.
the class CreatePanelOfNormalsIntegrationTest method assertRamPoNDuplicate.
private void assertRamPoNDuplicate(final File outputFile) {
try (final HDF5File hdf5FilePoN = new HDF5File(outputFile)) {
final HDF5PCACoveragePoN filePoN = new HDF5PCACoveragePoN(hdf5FilePoN);
assertRamPoNDuplicate(filePoN);
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk-protected by broadinstitute.
the class NormalizeSomaticReadCountsIntegrationTest method assertBetaHats.
/**
* Asserts that a collection of beta-hats corresponds to the expected value given
* the input pre-tangent normalization matrix and the PoN file.
*/
private void assertBetaHats(final ReadCountCollection preTangentNormalized, final RealMatrix actual, final File ponFile) {
Assert.assertEquals(actual.getColumnDimension(), preTangentNormalized.columnNames().size());
final double epsilon = PCATangentNormalizationUtils.EPSILON;
try (final HDF5File ponReader = new HDF5File(ponFile)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
final List<String> ponTargets = pon.getPanelTargetNames();
final RealMatrix inCounts = reorderTargetsToPoNOrder(preTangentNormalized, ponTargets);
// obtain subset of relevant targets to calculate the beta-hats;
final int[][] betaHatTargets = new int[inCounts.getColumnDimension()][];
for (int i = 0; i < inCounts.getColumnDimension(); i++) {
final List<Integer> relevantTargets = new ArrayList<>();
for (int j = 0; j < inCounts.getRowDimension(); j++) {
if (inCounts.getEntry(j, i) > 1 + (Math.log(epsilon) / Math.log(2))) {
relevantTargets.add(j);
}
}
betaHatTargets[i] = relevantTargets.stream().mapToInt(Integer::intValue).toArray();
}
// calculate beta-hats per column and check with actual values.
final RealMatrix normalsInv = pon.getReducedPanelPInverseCounts();
Assert.assertEquals(actual.getRowDimension(), normalsInv.getRowDimension());
final RealMatrix normalsInvT = normalsInv.transpose();
for (int i = 0; i < inCounts.getColumnDimension(); i++) {
final RealMatrix inValues = inCounts.getColumnMatrix(i).transpose().getSubMatrix(new int[] { 0 }, betaHatTargets[i]);
final RealMatrix normalValues = normalsInvT.getSubMatrix(betaHatTargets[i], IntStream.range(0, normalsInvT.getColumnDimension()).toArray());
final RealMatrix betaHats = inValues.multiply(normalValues);
for (int j = 0; j < actual.getRowDimension(); j++) {
Assert.assertEquals(actual.getEntry(j, i), betaHats.getEntry(0, j), 0.000001, "Col " + i + " row " + j);
}
}
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk-protected by broadinstitute.
the class NormalizeSomaticReadCountsIntegrationTest method assertTangentNormalized.
private void assertTangentNormalized(final ReadCountCollection actualReadCounts, final ReadCountCollection preTangentNormalized, final RealMatrix betaHats, final File ponFile) {
try (final HDF5File ponReader = new HDF5File(ponFile)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
final RealMatrix inCounts = reorderTargetsToPoNOrder(preTangentNormalized, pon.getPanelTargetNames());
final RealMatrix actual = reorderTargetsToPoNOrder(actualReadCounts, pon.getPanelTargetNames());
final RealMatrix ponMat = pon.getReducedPanelCounts();
final RealMatrix projection = ponMat.multiply(betaHats);
final RealMatrix expected = inCounts.subtract(projection);
Assert.assertEquals(actual.getRowDimension(), expected.getRowDimension());
Assert.assertEquals(actual.getColumnDimension(), expected.getColumnDimension());
for (int i = 0; i < actual.getRowDimension(); i++) {
Assert.assertEquals(actual.getRow(i), expected.getRow(i));
}
}
}
Aggregations