use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk-protected by broadinstitute.
the class PoNTestUtils method assertEquivalentPoN.
/**
* Make sure that two PoNs are effectively the same.
*
* @param left never {@code null}
* @param right never {@code null}
*/
public static void assertEquivalentPoN(final File left, final File right) {
IOUtils.canReadFile(left);
IOUtils.canReadFile(right);
try (final HDF5File leftFile = new HDF5File(left);
final HDF5File rightFile = new HDF5File(right)) {
final HDF5PCACoveragePoN leftPoN = new HDF5PCACoveragePoN(leftFile);
final HDF5PCACoveragePoN rightPoN = new HDF5PCACoveragePoN(rightFile);
assertEquivalentPoN(leftPoN, rightPoN);
}
}
use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk by broadinstitute.
the class CreatePanelOfNormalsIntegrationTest method assertBasicPoNAssumptions.
private void assertBasicPoNAssumptions(final File ponFile, final File initialTargetsFileUsedToCreatePoN) {
try (final HDF5File ponHDF5File = new HDF5File(ponFile)) {
final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(ponHDF5File);
Assert.assertTrue(pon.getTargets().size() >= pon.getPanelTargets().size());
Assert.assertTrue(pon.getRawTargets().size() > pon.getTargets().size());
Assert.assertTrue(pon.getTargetNames().size() == pon.getTargets().size());
Assert.assertTrue(pon.getPanelTargetNames().size() == pon.getPanelTargetNames().size());
Assert.assertTrue(pon.getRawTargetNames().size() == pon.getRawTargetNames().size());
if (initialTargetsFileUsedToCreatePoN != null) {
final TargetCollection<Target> tc = TargetArgumentCollection.readTargetCollection(initialTargetsFileUsedToCreatePoN);
Assert.assertEquals(pon.getRawTargets().size(), tc.targetCount());
// Check that the raw targets are the same
Assert.assertTrue(IntStream.of(new IntRange(0, pon.getRawTargets().size() - 1).toArray()).boxed().map(i -> pon.getRawTargets().get(i).equals(tc.target(i))).allMatch(t -> t));
}
}
}
use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk by broadinstitute.
the class PoNTestUtils method assertEquivalentPoN.
/**
* Make sure that two PoNs are effectively the same.
*
* @param left never {@code null}
* @param right never {@code null}
*/
public static void assertEquivalentPoN(final File left, final File right) {
IOUtils.canReadFile(left);
IOUtils.canReadFile(right);
try (final HDF5File leftFile = new HDF5File(left);
final HDF5File rightFile = new HDF5File(right)) {
final HDF5PCACoveragePoN leftPoN = new HDF5PCACoveragePoN(leftFile);
final HDF5PCACoveragePoN rightPoN = new HDF5PCACoveragePoN(rightFile);
assertEquivalentPoN(leftPoN, rightPoN);
}
}
use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk-protected by broadinstitute.
the class CreatePanelOfNormals method writeTargetWeightsFile.
/**
* Read target variances from an HDF5 PoN file and write the corresponding target weights
* to a file that can be read in by R CBS.
* @param ponFile never {@code null}, HDF5 PoN file
* @param outputFile never {@code null}, output file
*/
private static void writeTargetWeightsFile(final File ponFile, final File outputFile) {
IOUtils.canReadFile(ponFile);
try (final HDF5File file = new HDF5File(ponFile, HDF5File.OpenMode.READ_ONLY)) {
final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(file);
final double[] targetWeights = DoubleStream.of(pon.getTargetVariances()).map(v -> 1 / v).toArray();
ParamUtils.writeValuesToFile(targetWeights, outputFile);
}
}
use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk-protected by broadinstitute.
the class CreatePanelOfNormals method runPipeline.
@Override
protected void runPipeline(final JavaSparkContext ctx) {
if (!new HDF5Library().load(null)) {
//Note: passing null means using the default temp dir.
throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
}
if (blacklistOutFile == null) {
blacklistOutFile = new File(outFile + BLACKLIST_FILE_APPEND);
}
if (targetWeightsOutFile == null) {
targetWeightsOutFile = new File(outFile + TARGET_WEIGHTS_FILE_APPEND);
}
// Check parameters and load values to meet the backend PoN creation interface
validateArguments();
final TargetCollection<Target> targets = targetArguments.readTargetCollection(true);
final OptionalInt numberOfEigensamples = parseNumberOfEigensamples(numberOfEigensamplesString);
// Create the PoN, including QC, if specified.
if (!isNoQc && !dryRun) {
logger.info("QC: Beginning creation of QC PoN...");
final File outputQCFile = IOUtils.createTempFile("qc-pon-", ".hd5");
HDF5PCACoveragePoNCreationUtils.create(ctx, outputQCFile, HDF5File.OpenMode.READ_WRITE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, OptionalInt.of(NUM_QC_EIGENSAMPLES), dryRun);
logger.info("QC: QC PoN created...");
logger.info("QC: Collecting suspicious samples...");
try (final HDF5File ponReader = new HDF5File(outputQCFile, HDF5File.OpenMode.READ_ONLY)) {
final PCACoveragePoN qcPoN = new HDF5PCACoveragePoN(ponReader);
final List<String> failingSampleNames = CoveragePoNQCUtils.retrieveSamplesWithArmLevelEvents(qcPoN, ctx);
ParamUtils.writeStringListToFile(failingSampleNames, blacklistOutFile);
// If no suspicious samples were found, just redo the PoN reduction to save time.
if (failingSampleNames.size() != 0) {
logger.info("QC: Suspicious sample list created...");
logger.info("Creating final PoN with " + failingSampleNames.size() + " suspicious samples removed...");
HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, failingSampleNames, targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
} else {
logger.info("QC: No suspicious samples found ...");
logger.info("Creating final PoN only redo'ing the reduction step ...");
HDF5PCACoveragePoNCreationUtils.redoReduction(ctx, numberOfEigensamples, outputQCFile, outFile, HDF5File.OpenMode.CREATE);
}
}
} else {
logger.info("Creating PoN directly (skipping QC)...");
HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
}
if (!dryRun) {
logger.info("Writing target weights file to " + targetWeightsOutFile + "...");
writeTargetWeightsFile(outFile, targetWeightsOutFile);
}
logger.info("Done...");
}
Aggregations