use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.
the class CreatePanelOfNormals method runPipeline.
@Override
protected void runPipeline(final JavaSparkContext ctx) {
if (!new HDF5Library().load(null)) {
//Note: passing null means using the default temp dir.
throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
}
if (blacklistOutFile == null) {
blacklistOutFile = new File(outFile + BLACKLIST_FILE_APPEND);
}
if (targetWeightsOutFile == null) {
targetWeightsOutFile = new File(outFile + TARGET_WEIGHTS_FILE_APPEND);
}
// Check parameters and load values to meet the backend PoN creation interface
validateArguments();
final TargetCollection<Target> targets = targetArguments.readTargetCollection(true);
final OptionalInt numberOfEigensamples = parseNumberOfEigensamples(numberOfEigensamplesString);
// Create the PoN, including QC, if specified.
if (!isNoQc && !dryRun) {
logger.info("QC: Beginning creation of QC PoN...");
final File outputQCFile = IOUtils.createTempFile("qc-pon-", ".hd5");
HDF5PCACoveragePoNCreationUtils.create(ctx, outputQCFile, HDF5File.OpenMode.READ_WRITE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, OptionalInt.of(NUM_QC_EIGENSAMPLES), dryRun);
logger.info("QC: QC PoN created...");
logger.info("QC: Collecting suspicious samples...");
try (final HDF5File ponReader = new HDF5File(outputQCFile, HDF5File.OpenMode.READ_ONLY)) {
final PCACoveragePoN qcPoN = new HDF5PCACoveragePoN(ponReader);
final List<String> failingSampleNames = CoveragePoNQCUtils.retrieveSamplesWithArmLevelEvents(qcPoN, ctx);
ParamUtils.writeStringListToFile(failingSampleNames, blacklistOutFile);
// If no suspicious samples were found, just redo the PoN reduction to save time.
if (failingSampleNames.size() != 0) {
logger.info("QC: Suspicious sample list created...");
logger.info("Creating final PoN with " + failingSampleNames.size() + " suspicious samples removed...");
HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, failingSampleNames, targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
} else {
logger.info("QC: No suspicious samples found ...");
logger.info("Creating final PoN only redo'ing the reduction step ...");
HDF5PCACoveragePoNCreationUtils.redoReduction(ctx, numberOfEigensamples, outputQCFile, outFile, HDF5File.OpenMode.CREATE);
}
}
} else {
logger.info("Creating PoN directly (skipping QC)...");
HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
}
if (!dryRun) {
logger.info("Writing target weights file to " + targetWeightsOutFile + "...");
writeTargetWeightsFile(outFile, targetWeightsOutFile);
}
logger.info("Done...");
}
use of org.broadinstitute.hdf5.HDF5File in project gatk-protected by broadinstitute.
the class NormalizeSomaticReadCountsIntegrationTest method assertFactorNormalizedValues.
private void assertFactorNormalizedValues(final ReadCountCollection input, final ReadCountCollection factorNormalized) {
try (final HDF5File ponReader = new HDF5File(TEST_PON)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
final double[] targetFactors = pon.getTargetFactors();
final List<String> ponTargets = pon.getTargetNames();
final Map<String, Integer> ponTargetIndexes = new HashMap<>(ponTargets.size());
for (int i = 0; i < ponTargets.size(); i++) {
ponTargetIndexes.put(ponTargets.get(i), i);
}
final RealMatrix inputCounts = input.counts();
final RealMatrix factorNormalizedCounts = factorNormalized.counts();
for (int i = 0; i < factorNormalizedCounts.getRowDimension(); i++) {
final double factor = targetFactors[ponTargetIndexes.get(factorNormalized.targets().get(i).getName())];
final double[] inputValues = inputCounts.getRow(i);
final double[] outputValues = factorNormalizedCounts.getRow(i);
for (int j = 0; j < inputValues.length; j++) {
final double expected = inputValues[j] / factor;
Assert.assertEquals(outputValues[j], expected, 0.0000001, "" + i + " , " + j);
}
}
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method testCalculateVarianceCalculation.
@Test
public void testCalculateVarianceCalculation() {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
try (final HDF5File ponHDF5File = new HDF5File(TEST_FULL_PON_FILE)) {
final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponHDF5File);
final PCATangentNormalizationResult tnWithSpark = pon.normalizeNormalsInPoN(ctx);
final double[] variances = MatrixSummaryUtils.getRowVariances(tnWithSpark.getTangentNormalized().counts());
final double[] targetVariances = pon.getTargetVariances();
PoNTestUtils.assertEqualsDoubleArrays(targetVariances, variances);
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtilsUnitTest method testCalculateVariance.
@Test
public void testCalculateVariance() {
/*
This tests that the post-projection variance is correct. Ground truth was acquired through a breakpoint (to get input values),
some text parsing, and matlab. This simply tests that the variances are correctly calculated from a matrix.
*/
final int numEigensamples = 20;
final File ponFile = PoNTestUtils.createDummyHDF5FilePoN(TEST_PCOV_FILE, numEigensamples);
final double[] gtTargetVariances = ParamUtils.readValuesFromFile(GT_TARGET_VAR_FILE);
try (final HDF5File ponHDF5File = new HDF5File(ponFile)) {
final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(ponHDF5File);
final double[] targetVariances = pon.getTargetVariances();
PoNTestUtils.assertEqualsDoubleArrays(targetVariances, gtTargetVariances, 1E-4);
}
}
use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.
the class HDF5PCACoveragePoNUnitTest method testTargetNameReading.
@Test
public void testTargetNameReading() throws IOException {
final HDF5File reader = new HDF5File(TEST_PON);
final PCACoveragePoN pon = new HDF5PCACoveragePoN(reader);
final List<String> targetNames = pon.getTargetNames();
final List<String> expected = readLines(TEST_PON_TARGETS);
Assert.assertNotNull(targetNames);
Assert.assertEquals(targetNames, expected);
reader.close();
}
Aggregations