Search in sources :

Example 6 with HDF5Library

use of org.broadinstitute.hdf5.HDF5Library in project gatk-protected by broadinstitute.

the class CreatePanelOfNormals method runPipeline.

@Override
protected void runPipeline(final JavaSparkContext ctx) {
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    if (blacklistOutFile == null) {
        blacklistOutFile = new File(outFile + BLACKLIST_FILE_APPEND);
    }
    if (targetWeightsOutFile == null) {
        targetWeightsOutFile = new File(outFile + TARGET_WEIGHTS_FILE_APPEND);
    }
    // Check parameters and load values to meet the backend PoN creation interface
    validateArguments();
    final TargetCollection<Target> targets = targetArguments.readTargetCollection(true);
    final OptionalInt numberOfEigensamples = parseNumberOfEigensamples(numberOfEigensamplesString);
    // Create the PoN, including QC, if specified.
    if (!isNoQc && !dryRun) {
        logger.info("QC:  Beginning creation of QC PoN...");
        final File outputQCFile = IOUtils.createTempFile("qc-pon-", ".hd5");
        HDF5PCACoveragePoNCreationUtils.create(ctx, outputQCFile, HDF5File.OpenMode.READ_WRITE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, OptionalInt.of(NUM_QC_EIGENSAMPLES), dryRun);
        logger.info("QC:  QC PoN created...");
        logger.info("QC:  Collecting suspicious samples...");
        try (final HDF5File ponReader = new HDF5File(outputQCFile, HDF5File.OpenMode.READ_ONLY)) {
            final PCACoveragePoN qcPoN = new HDF5PCACoveragePoN(ponReader);
            final List<String> failingSampleNames = CoveragePoNQCUtils.retrieveSamplesWithArmLevelEvents(qcPoN, ctx);
            ParamUtils.writeStringListToFile(failingSampleNames, blacklistOutFile);
            // If no suspicious samples were found, just redo the PoN reduction to save time.
            if (failingSampleNames.size() != 0) {
                logger.info("QC:  Suspicious sample list created...");
                logger.info("Creating final PoN with " + failingSampleNames.size() + " suspicious samples removed...");
                HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, failingSampleNames, targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
            } else {
                logger.info("QC:  No suspicious samples found ...");
                logger.info("Creating final PoN only redo'ing the reduction step ...");
                HDF5PCACoveragePoNCreationUtils.redoReduction(ctx, numberOfEigensamples, outputQCFile, outFile, HDF5File.OpenMode.CREATE);
            }
        }
    } else {
        logger.info("Creating PoN directly (skipping QC)...");
        HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
    }
    if (!dryRun) {
        logger.info("Writing target weights file to " + targetWeightsOutFile + "...");
        writeTargetWeightsFile(outFile, targetWeightsOutFile);
    }
    logger.info("Done...");
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5Library(org.broadinstitute.hdf5.HDF5Library) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) OptionalInt(java.util.OptionalInt) HDF5File(org.broadinstitute.hdf5.HDF5File) File(java.io.File) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 7 with HDF5Library

use of org.broadinstitute.hdf5.HDF5Library in project gatk-protected by broadinstitute.

the class NormalizeSomaticReadCounts method doWork.

@Override
protected Object doWork() {
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    IOUtils.canReadFile(ponFile);
    try (final HDF5File hdf5PoNFile = new HDF5File(ponFile)) {
        final PCACoveragePoN pon = new HDF5PCACoveragePoN(hdf5PoNFile, logger);
        final TargetCollection<Target> targetCollection = readTargetCollection(targetFile);
        final ReadCountCollection proportionalCoverageProfile = readInputReadCounts(readCountsFile, targetCollection);
        final PCATangentNormalizationResult tangentNormalizationResult = pon.normalize(proportionalCoverageProfile);
        ;
        tangentNormalizationResult.write(getCommandLine(), tangentNormalizationOutFile, preTangentNormalizationOutFile, betaHatsOutFile, fntOutFile);
        return "SUCCESS";
    }
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5Library(org.broadinstitute.hdf5.HDF5Library) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) PCATangentNormalizationResult(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCATangentNormalizationResult) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 8 with HDF5Library

use of org.broadinstitute.hdf5.HDF5Library in project gatk by broadinstitute.

the class AllelicCNV method runPipeline.

@Override
protected void runPipeline(final JavaSparkContext ctx) {
    validateArguments();
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    final String originalLogLevel = (ctx.getLocalProperty("logLevel") != null) ? ctx.getLocalProperty("logLevel") : "INFO";
    ctx.setLogLevel("WARN");
    //the string after the final slash in the output prefix (which may be an absolute file path) will be used as the sample name
    final String sampleName = outputPrefix.substring(outputPrefix.lastIndexOf("/") + 1);
    logger.info("Starting workflow for " + sampleName + "...");
    //make Genome from input target coverages and SNP counts
    logger.info("Loading input files...");
    final Genome genome = new Genome(tangentNormalizedCoverageFile, snpCountsFile);
    //load allelic-bias panel of normals if provided
    final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
    //load target-coverage segments from input file
    final List<ModeledSegment> targetSegmentsWithCalls = SegmentUtils.readModeledSegmentsFromSegmentFile(targetSegmentsFile);
    logger.info("Number of input target-coverage segments: " + targetSegmentsWithCalls.size());
    //merge copy-neutral and uncalled segments (unless disabled) and fix up target-segment start breakpoints
    logger.info("Preparing target-coverage segments...");
    final List<SimpleInterval> targetSegments = prepareTargetSegments(genome, targetSegmentsWithCalls);
    //segment SNPs using CBS on per-SNP MLE minor allele fraction iteratively until convergence
    //(final segment file is output as a side effect)
    logger.info("Performing SNP segmentation...");
    final List<SimpleInterval> snpSegments = performSNPSegmentationStep(sampleName, genome);
    //combine SNP and target-coverage segments
    logger.info("Combining SNP and target-coverage segments...");
    final List<SimpleInterval> unionedSegments = SegmentUtils.unionSegments(targetSegments, snpSegments, genome);
    logger.info("Number of segments after segment union: " + unionedSegments.size());
    final File unionedSegmentsFile = new File(outputPrefix + "-" + UNION_SEG_FILE_TAG + SEGMENT_FILE_SUFFIX);
    SegmentUtils.writeSegmentFileWithNumTargetsAndNumSNPs(unionedSegmentsFile, unionedSegments, genome);
    logSegmentFileWrittenMessage(unionedSegmentsFile);
    //small-segment merging (note that X and Y are always small segments and dropped, since GATK CNV drops them)
    logger.info("Merging small segments...");
    final SegmentedGenome segmentedGenomeWithSmallSegments = new SegmentedGenome(unionedSegments, genome);
    final SegmentedGenome segmentedGenome = segmentedGenomeWithSmallSegments.mergeSmallSegments(smallSegmentTargetNumberThreshold);
    logger.info("Number of segments after small-segment merging: " + segmentedGenome.getSegments().size());
    //initial MCMC model fitting performed by ACNVModeller constructor
    final ACNVModeller modeller = new ACNVModeller(segmentedGenome, allelicPoN, numSamplesCopyRatio, numBurnInCopyRatio, numSamplesAlleleFraction, numBurnInAlleleFraction, ctx);
    //write initial segments and parameters to file
    writeACNVModeledSegmentAndParameterFiles(modeller, INITIAL_FIT_FILE_TAG);
    //similar-segment merging (segment files are output for each merge iteration)
    logger.info("Merging similar segments...");
    performSimilarSegmentMergingStep(modeller);
    //write final segments and parameters to file
    writeACNVModeledSegmentAndParameterFiles(modeller, FINAL_FIT_FILE_TAG);
    ctx.setLogLevel(originalLogLevel);
    logger.info("SUCCESS: Allelic CNV run complete for sample " + sampleName + ".");
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) HDF5Library(org.broadinstitute.hdf5.HDF5Library) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) File(java.io.File)

Example 9 with HDF5Library

use of org.broadinstitute.hdf5.HDF5Library in project gatk by broadinstitute.

the class CalculatePulldownPhasePosteriors method doWork.

@Override
public Object doWork() {
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    //read counts, segments, and parameters from files
    final AllelicCountCollection counts = new AllelicCountCollection(snpCountsFile);
    final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
    final AlleleFractionState state = reconstructState(segments, parametersFile);
    //load allelic-bias panel of normals if provided
    final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
    //calculate phase posteriors
    final List<SimpleInterval> unmodeledSegments = segments.stream().map(ACNVModeledSegment::getInterval).collect(Collectors.toList());
    final AllelicCountWithPhasePosteriorsCollection countsWithPhasePosteriors = calculatePhasePosteriors(counts, unmodeledSegments, state, allelicPoN);
    //write phase posteriors to file with same verbosity as input file
    countsWithPhasePosteriors.write(outputFile, counts.getVerbosity());
    return "SUCCESS";
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AllelicCountWithPhasePosteriorsCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountWithPhasePosteriorsCollection) HDF5Library(org.broadinstitute.hdf5.HDF5Library) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 10 with HDF5Library

use of org.broadinstitute.hdf5.HDF5Library in project gatk by broadinstitute.

the class CreatePanelOfNormals method runPipeline.

@Override
protected void runPipeline(final JavaSparkContext ctx) {
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    if (blacklistOutFile == null) {
        blacklistOutFile = new File(outFile + BLACKLIST_FILE_APPEND);
    }
    if (targetWeightsOutFile == null) {
        targetWeightsOutFile = new File(outFile + TARGET_WEIGHTS_FILE_APPEND);
    }
    // Check parameters and load values to meet the backend PoN creation interface
    validateArguments();
    final TargetCollection<Target> targets = targetArguments.readTargetCollection(true);
    final OptionalInt numberOfEigensamples = parseNumberOfEigensamples(numberOfEigensamplesString);
    // Create the PoN, including QC, if specified.
    if (!isNoQc && !dryRun) {
        logger.info("QC:  Beginning creation of QC PoN...");
        final File outputQCFile = IOUtils.createTempFile("qc-pon-", ".hd5");
        HDF5PCACoveragePoNCreationUtils.create(ctx, outputQCFile, HDF5File.OpenMode.READ_WRITE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, OptionalInt.of(NUM_QC_EIGENSAMPLES), dryRun);
        logger.info("QC:  QC PoN created...");
        logger.info("QC:  Collecting suspicious samples...");
        try (final HDF5File ponReader = new HDF5File(outputQCFile, HDF5File.OpenMode.READ_ONLY)) {
            final PCACoveragePoN qcPoN = new HDF5PCACoveragePoN(ponReader);
            final List<String> failingSampleNames = CoveragePoNQCUtils.retrieveSamplesWithArmLevelEvents(qcPoN, ctx);
            ParamUtils.writeStringListToFile(failingSampleNames, blacklistOutFile);
            // If no suspicious samples were found, just redo the PoN reduction to save time.
            if (failingSampleNames.size() != 0) {
                logger.info("QC:  Suspicious sample list created...");
                logger.info("Creating final PoN with " + failingSampleNames.size() + " suspicious samples removed...");
                HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, failingSampleNames, targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
            } else {
                logger.info("QC:  No suspicious samples found ...");
                logger.info("Creating final PoN only redo'ing the reduction step ...");
                HDF5PCACoveragePoNCreationUtils.redoReduction(ctx, numberOfEigensamples, outputQCFile, outFile, HDF5File.OpenMode.CREATE);
            }
        }
    } else {
        logger.info("Creating PoN directly (skipping QC)...");
        HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
    }
    if (!dryRun) {
        logger.info("Writing target weights file to " + targetWeightsOutFile + "...");
        writeTargetWeightsFile(outFile, targetWeightsOutFile);
    }
    logger.info("Done...");
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5Library(org.broadinstitute.hdf5.HDF5Library) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) OptionalInt(java.util.OptionalInt) HDF5File(org.broadinstitute.hdf5.HDF5File) File(java.io.File) HDF5File(org.broadinstitute.hdf5.HDF5File)

Aggregations

HDF5Library (org.broadinstitute.hdf5.HDF5Library)10 AllelicPanelOfNormals (org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals)6 File (java.io.File)4 HDF5File (org.broadinstitute.hdf5.HDF5File)4 HDF5PCACoveragePoN (org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN)4 PCACoveragePoN (org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN)4 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)4 OptionalInt (java.util.OptionalInt)2 AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)2 AllelicCountWithPhasePosteriorsCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountWithPhasePosteriorsCollection)2 AllelicPanelOfNormalsCreator (org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormalsCreator)2 PCATangentNormalizationResult (org.broadinstitute.hellbender.tools.pon.coverage.pca.PCATangentNormalizationResult)2