Search in sources :

Example 11 with HDF5PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk-protected by broadinstitute.

the class PoNTestUtils method assertEquivalentPoN.

/**
     * Make sure that two PoNs are effectively the same.
     *
     * @param left never {@code null}
     * @param right never {@code null}
     */
public static void assertEquivalentPoN(final File left, final File right) {
    IOUtils.canReadFile(left);
    IOUtils.canReadFile(right);
    try (final HDF5File leftFile = new HDF5File(left);
        final HDF5File rightFile = new HDF5File(right)) {
        final HDF5PCACoveragePoN leftPoN = new HDF5PCACoveragePoN(leftFile);
        final HDF5PCACoveragePoN rightPoN = new HDF5PCACoveragePoN(rightFile);
        assertEquivalentPoN(leftPoN, rightPoN);
    }
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 12 with HDF5PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk by broadinstitute.

the class CreatePanelOfNormalsIntegrationTest method assertBasicPoNAssumptions.

private void assertBasicPoNAssumptions(final File ponFile, final File initialTargetsFileUsedToCreatePoN) {
    try (final HDF5File ponHDF5File = new HDF5File(ponFile)) {
        final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(ponHDF5File);
        Assert.assertTrue(pon.getTargets().size() >= pon.getPanelTargets().size());
        Assert.assertTrue(pon.getRawTargets().size() > pon.getTargets().size());
        Assert.assertTrue(pon.getTargetNames().size() == pon.getTargets().size());
        Assert.assertTrue(pon.getPanelTargetNames().size() == pon.getPanelTargetNames().size());
        Assert.assertTrue(pon.getRawTargetNames().size() == pon.getRawTargetNames().size());
        if (initialTargetsFileUsedToCreatePoN != null) {
            final TargetCollection<Target> tc = TargetArgumentCollection.readTargetCollection(initialTargetsFileUsedToCreatePoN);
            Assert.assertEquals(pon.getRawTargets().size(), tc.targetCount());
            // Check that the raw targets are the same
            Assert.assertTrue(IntStream.of(new IntRange(0, pon.getRawTargets().size() - 1).toArray()).boxed().map(i -> pon.getRawTargets().get(i).equals(tc.target(i))).allMatch(t -> t));
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) DataProvider(org.testng.annotations.DataProvider) FileUtils(org.apache.commons.io.FileUtils) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) File(java.io.File) ArrayList(java.util.ArrayList) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) List(java.util.List) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) UserException(org.broadinstitute.hellbender.exceptions.UserException) Assert(org.testng.Assert) HDF5File(org.broadinstitute.hdf5.HDF5File) RamPCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.RamPCACoveragePoN) PoNTestUtils(org.broadinstitute.hellbender.tools.pon.PoNTestUtils) IntRange(org.apache.commons.lang.math.IntRange) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) IntRange(org.apache.commons.lang.math.IntRange) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 13 with HDF5PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk by broadinstitute.

the class PoNTestUtils method assertEquivalentPoN.

/**
     * Make sure that two PoNs are effectively the same.
     *
     * @param left never {@code null}
     * @param right never {@code null}
     */
public static void assertEquivalentPoN(final File left, final File right) {
    IOUtils.canReadFile(left);
    IOUtils.canReadFile(right);
    try (final HDF5File leftFile = new HDF5File(left);
        final HDF5File rightFile = new HDF5File(right)) {
        final HDF5PCACoveragePoN leftPoN = new HDF5PCACoveragePoN(leftFile);
        final HDF5PCACoveragePoN rightPoN = new HDF5PCACoveragePoN(rightFile);
        assertEquivalentPoN(leftPoN, rightPoN);
    }
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 14 with HDF5PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk-protected by broadinstitute.

the class CreatePanelOfNormals method writeTargetWeightsFile.

/**
     * Read target variances from an HDF5 PoN file and write the corresponding target weights
     * to a file that can be read in by R CBS.
     * @param ponFile       never {@code null}, HDF5 PoN file
     * @param outputFile    never {@code null}, output file
     */
private static void writeTargetWeightsFile(final File ponFile, final File outputFile) {
    IOUtils.canReadFile(ponFile);
    try (final HDF5File file = new HDF5File(ponFile, HDF5File.OpenMode.READ_ONLY)) {
        final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(file);
        final double[] targetWeights = DoubleStream.of(pon.getTargetVariances()).map(v -> 1 / v).toArray();
        ParamUtils.writeValuesToFile(targetWeights, outputFile);
    }
}
Also used : DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) Argument(org.broadinstitute.barclay.argparser.Argument) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HDF5PCACoveragePoNCreationUtils(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoNCreationUtils) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) ArgumentCollection(org.broadinstitute.barclay.argparser.ArgumentCollection) OptionalInt(java.util.OptionalInt) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) ArrayList(java.util.ArrayList) DoubleStream(java.util.stream.DoubleStream) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) List(java.util.List) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) UserException(org.broadinstitute.hellbender.exceptions.UserException) CoveragePoNQCUtils(org.broadinstitute.hellbender.tools.pon.coverage.CoveragePoNQCUtils) Utils(org.broadinstitute.hellbender.utils.Utils) HDF5File(org.broadinstitute.hdf5.HDF5File) SparkToggleCommandLineProgram(org.broadinstitute.hellbender.utils.SparkToggleCommandLineProgram) HDF5Library(org.broadinstitute.hdf5.HDF5Library) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 15 with HDF5PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN in project gatk-protected by broadinstitute.

the class CreatePanelOfNormals method runPipeline.

@Override
protected void runPipeline(final JavaSparkContext ctx) {
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    if (blacklistOutFile == null) {
        blacklistOutFile = new File(outFile + BLACKLIST_FILE_APPEND);
    }
    if (targetWeightsOutFile == null) {
        targetWeightsOutFile = new File(outFile + TARGET_WEIGHTS_FILE_APPEND);
    }
    // Check parameters and load values to meet the backend PoN creation interface
    validateArguments();
    final TargetCollection<Target> targets = targetArguments.readTargetCollection(true);
    final OptionalInt numberOfEigensamples = parseNumberOfEigensamples(numberOfEigensamplesString);
    // Create the PoN, including QC, if specified.
    if (!isNoQc && !dryRun) {
        logger.info("QC:  Beginning creation of QC PoN...");
        final File outputQCFile = IOUtils.createTempFile("qc-pon-", ".hd5");
        HDF5PCACoveragePoNCreationUtils.create(ctx, outputQCFile, HDF5File.OpenMode.READ_WRITE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, OptionalInt.of(NUM_QC_EIGENSAMPLES), dryRun);
        logger.info("QC:  QC PoN created...");
        logger.info("QC:  Collecting suspicious samples...");
        try (final HDF5File ponReader = new HDF5File(outputQCFile, HDF5File.OpenMode.READ_ONLY)) {
            final PCACoveragePoN qcPoN = new HDF5PCACoveragePoN(ponReader);
            final List<String> failingSampleNames = CoveragePoNQCUtils.retrieveSamplesWithArmLevelEvents(qcPoN, ctx);
            ParamUtils.writeStringListToFile(failingSampleNames, blacklistOutFile);
            // If no suspicious samples were found, just redo the PoN reduction to save time.
            if (failingSampleNames.size() != 0) {
                logger.info("QC:  Suspicious sample list created...");
                logger.info("Creating final PoN with " + failingSampleNames.size() + " suspicious samples removed...");
                HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, failingSampleNames, targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
            } else {
                logger.info("QC:  No suspicious samples found ...");
                logger.info("Creating final PoN only redo'ing the reduction step ...");
                HDF5PCACoveragePoNCreationUtils.redoReduction(ctx, numberOfEigensamples, outputQCFile, outFile, HDF5File.OpenMode.CREATE);
            }
        }
    } else {
        logger.info("Creating PoN directly (skipping QC)...");
        HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
    }
    if (!dryRun) {
        logger.info("Writing target weights file to " + targetWeightsOutFile + "...");
        writeTargetWeightsFile(outFile, targetWeightsOutFile);
    }
    logger.info("Done...");
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5Library(org.broadinstitute.hdf5.HDF5Library) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) OptionalInt(java.util.OptionalInt) HDF5File(org.broadinstitute.hdf5.HDF5File) File(java.io.File) HDF5File(org.broadinstitute.hdf5.HDF5File)

Aggregations

HDF5File (org.broadinstitute.hdf5.HDF5File)20 HDF5PCACoveragePoN (org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN)20 PCACoveragePoN (org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN)16 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)8 RealMatrix (org.apache.commons.math3.linear.RealMatrix)8 File (java.io.File)6 HDF5Library (org.broadinstitute.hdf5.HDF5Library)6 ArrayList (java.util.ArrayList)4 List (java.util.List)4 OptionalInt (java.util.OptionalInt)4 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)4 UserException (org.broadinstitute.hellbender.exceptions.UserException)4 DoubleStream (java.util.stream.DoubleStream)2 IntStream (java.util.stream.IntStream)2 FileUtils (org.apache.commons.io.FileUtils)2 IntRange (org.apache.commons.lang.math.IntRange)2 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)2 DefaultRealMatrixPreservingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixPreservingVisitor)2 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)2 Argument (org.broadinstitute.barclay.argparser.Argument)2