use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.
the class NormalizeSomaticReadCountsIntegrationTest method assertFactorNormalizedValues.
private void assertFactorNormalizedValues(final ReadCountCollection input, final ReadCountCollection factorNormalized) {
try (final HDF5File ponReader = new HDF5File(TEST_PON)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
final double[] targetFactors = pon.getTargetFactors();
final List<String> ponTargets = pon.getTargetNames();
final Map<String, Integer> ponTargetIndexes = new HashMap<>(ponTargets.size());
for (int i = 0; i < ponTargets.size(); i++) {
ponTargetIndexes.put(ponTargets.get(i), i);
}
final RealMatrix inputCounts = input.counts();
final RealMatrix factorNormalizedCounts = factorNormalized.counts();
for (int i = 0; i < factorNormalizedCounts.getRowDimension(); i++) {
final double factor = targetFactors[ponTargetIndexes.get(factorNormalized.targets().get(i).getName())];
final double[] inputValues = inputCounts.getRow(i);
final double[] outputValues = factorNormalizedCounts.getRow(i);
for (int j = 0; j < inputValues.length; j++) {
final double expected = inputValues[j] / factor;
Assert.assertEquals(outputValues[j], expected, 0.0000001, "" + i + " , " + j);
}
}
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.
the class PoNTestUtils method assertEquivalentPoN.
/**
* Make sure that two PoNs are effectively the same.
*
* @param left never {@code null}
* @param right never {@code null}
*/
public static void assertEquivalentPoN(final File left, final File right) {
IOUtils.canReadFile(left);
IOUtils.canReadFile(right);
try (final HDF5File leftFile = new HDF5File(left);
final HDF5File rightFile = new HDF5File(right)) {
final HDF5PCACoveragePoN leftPoN = new HDF5PCACoveragePoN(leftFile);
final HDF5PCACoveragePoN rightPoN = new HDF5PCACoveragePoN(rightFile);
assertEquivalentPoN(leftPoN, rightPoN);
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk-protected by broadinstitute.
the class CreatePanelOfNormals method writeTargetWeightsFile.
/**
* Read target variances from an HDF5 PoN file and write the corresponding target weights
* to a file that can be read in by R CBS.
* @param ponFile never {@code null}, HDF5 PoN file
* @param outputFile never {@code null}, output file
*/
private static void writeTargetWeightsFile(final File ponFile, final File outputFile) {
IOUtils.canReadFile(ponFile);
try (final HDF5File file = new HDF5File(ponFile, HDF5File.OpenMode.READ_ONLY)) {
final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(file);
final double[] targetWeights = DoubleStream.of(pon.getTargetVariances()).map(v -> 1 / v).toArray();
ParamUtils.writeValuesToFile(targetWeights, outputFile);
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk-protected by broadinstitute.
the class CreatePanelOfNormals method runPipeline.
@Override
protected void runPipeline(final JavaSparkContext ctx) {
if (!new HDF5Library().load(null)) {
//Note: passing null means using the default temp dir.
throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
}
if (blacklistOutFile == null) {
blacklistOutFile = new File(outFile + BLACKLIST_FILE_APPEND);
}
if (targetWeightsOutFile == null) {
targetWeightsOutFile = new File(outFile + TARGET_WEIGHTS_FILE_APPEND);
}
// Check parameters and load values to meet the backend PoN creation interface
validateArguments();
final TargetCollection<Target> targets = targetArguments.readTargetCollection(true);
final OptionalInt numberOfEigensamples = parseNumberOfEigensamples(numberOfEigensamplesString);
// Create the PoN, including QC, if specified.
if (!isNoQc && !dryRun) {
logger.info("QC: Beginning creation of QC PoN...");
final File outputQCFile = IOUtils.createTempFile("qc-pon-", ".hd5");
HDF5PCACoveragePoNCreationUtils.create(ctx, outputQCFile, HDF5File.OpenMode.READ_WRITE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, OptionalInt.of(NUM_QC_EIGENSAMPLES), dryRun);
logger.info("QC: QC PoN created...");
logger.info("QC: Collecting suspicious samples...");
try (final HDF5File ponReader = new HDF5File(outputQCFile, HDF5File.OpenMode.READ_ONLY)) {
final PCACoveragePoN qcPoN = new HDF5PCACoveragePoN(ponReader);
final List<String> failingSampleNames = CoveragePoNQCUtils.retrieveSamplesWithArmLevelEvents(qcPoN, ctx);
ParamUtils.writeStringListToFile(failingSampleNames, blacklistOutFile);
// If no suspicious samples were found, just redo the PoN reduction to save time.
if (failingSampleNames.size() != 0) {
logger.info("QC: Suspicious sample list created...");
logger.info("Creating final PoN with " + failingSampleNames.size() + " suspicious samples removed...");
HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, failingSampleNames, targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
} else {
logger.info("QC: No suspicious samples found ...");
logger.info("Creating final PoN only redo'ing the reduction step ...");
HDF5PCACoveragePoNCreationUtils.redoReduction(ctx, numberOfEigensamples, outputQCFile, outFile, HDF5File.OpenMode.CREATE);
}
}
} else {
logger.info("Creating PoN directly (skipping QC)...");
HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
}
if (!dryRun) {
logger.info("Writing target weights file to " + targetWeightsOutFile + "...");
writeTargetWeightsFile(outFile, targetWeightsOutFile);
}
logger.info("Done...");
}
use of org.broadinstitute.hdf5.HDF5File in project gatk-protected by broadinstitute.
the class NormalizeSomaticReadCounts method doWork.
@Override
protected Object doWork() {
if (!new HDF5Library().load(null)) {
//Note: passing null means using the default temp dir.
throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
}
IOUtils.canReadFile(ponFile);
try (final HDF5File hdf5PoNFile = new HDF5File(ponFile)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(hdf5PoNFile, logger);
final TargetCollection<Target> targetCollection = readTargetCollection(targetFile);
final ReadCountCollection proportionalCoverageProfile = readInputReadCounts(readCountsFile, targetCollection);
final PCATangentNormalizationResult tangentNormalizationResult = pon.normalize(proportionalCoverageProfile);
;
tangentNormalizationResult.write(getCommandLine(), tangentNormalizationOutFile, preTangentNormalizationOutFile, betaHatsOutFile, fntOutFile);
return "SUCCESS";
}
}
Aggregations