Search in sources :

Example 56 with HDF5File

use of org.broadinstitute.hdf5.HDF5File in project gatk-protected by broadinstitute.

the class CreatePanelOfNormalsIntegrationTest method assertBasicPoNAssumptions.

private void assertBasicPoNAssumptions(final File ponFile, final File initialTargetsFileUsedToCreatePoN) {
    try (final HDF5File ponHDF5File = new HDF5File(ponFile)) {
        final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(ponHDF5File);
        Assert.assertTrue(pon.getTargets().size() >= pon.getPanelTargets().size());
        Assert.assertTrue(pon.getRawTargets().size() > pon.getTargets().size());
        Assert.assertTrue(pon.getTargetNames().size() == pon.getTargets().size());
        Assert.assertTrue(pon.getPanelTargetNames().size() == pon.getPanelTargetNames().size());
        Assert.assertTrue(pon.getRawTargetNames().size() == pon.getRawTargetNames().size());
        if (initialTargetsFileUsedToCreatePoN != null) {
            final TargetCollection<Target> tc = TargetArgumentCollection.readTargetCollection(initialTargetsFileUsedToCreatePoN);
            Assert.assertEquals(pon.getRawTargets().size(), tc.targetCount());
            // Check that the raw targets are the same
            Assert.assertTrue(IntStream.of(new IntRange(0, pon.getRawTargets().size() - 1).toArray()).boxed().map(i -> pon.getRawTargets().get(i).equals(tc.target(i))).allMatch(t -> t));
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) DataProvider(org.testng.annotations.DataProvider) FileUtils(org.apache.commons.io.FileUtils) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) File(java.io.File) ArrayList(java.util.ArrayList) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) List(java.util.List) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) UserException(org.broadinstitute.hellbender.exceptions.UserException) Assert(org.testng.Assert) HDF5File(org.broadinstitute.hdf5.HDF5File) RamPCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.RamPCACoveragePoN) PoNTestUtils(org.broadinstitute.hellbender.tools.pon.PoNTestUtils) IntRange(org.apache.commons.lang.math.IntRange) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) IntRange(org.apache.commons.lang.math.IntRange) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 57 with HDF5File

use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.

the class AllelicPanelOfNormals method read.

/**
     * Reads an allelic panel of normals from an HDF5 or tab-separated file (file type is automatically detected).
     * Tab-separated files should have global hyperparameter values alpha and beta specified by comment lines
     * denoted by {@code GLOBAL_ALPHA_COMMENT_STRING} and {@code GLOBAL_BETA_COMMENT_STRING}:
     * <p>
     *     #GLOBAL_ALPHA=...<br>
     *     #GLOBAL_BETA=...
     * </p>
     * followed by lines specifying hyperparameter values at each site,
     * with column headers as in {@link AllelicPanelOfNormalsTableColumn}:
     * <p>
     *     CONTIG \t POSITION \t ALPHA \t BETA
     * </p>
     *
     * Note that we opt for a static read method as opposed to a constructor that takes a file parameter because
     * the former allows us to return the static {@code EMPTY_PON} if the allelic panel of normals is not present in
     * an HDF5 file.
     * @param inputFile    HDF5 file containing a coverage panel of normals created by {@link CreatePanelOfNormals}
     *                     and an allelic panel of normals created and set by {@link CreateAllelicPanelOfNormals}
     *                     ({@code EMPTY_PON} is returned if the latter was never set), or a
     *                     tab-separated file that contains global hyperparameters in comment lines and lines specifying hyperparameter values at each site
     */
public static AllelicPanelOfNormals read(final File inputFile) {
    IOUtils.canReadFile(inputFile);
    if (isHDF5File(inputFile)) {
        //construct from HDF5 file
        try (final HDF5File hdf5File = new HDF5File(inputFile)) {
            final AllelicPanelOfNormals allelicPoN = HDF5AllelicPoNUtils.read(hdf5File);
            logger.info(String.format("Loaded allelic panel of normals from HDF5 file: %s.", inputFile));
            return allelicPoN;
        }
    } else {
        //construct from TSV file
        try (final AllelicPanelOfNormalsReader reader = new AllelicPanelOfNormalsReader(inputFile)) {
            //parse comment lines for global hyperparameter values
            final AllelicPanelOfNormals.HyperparameterValues globalHyperparameterValues = parseGlobalHyperparameterValues(inputFile);
            //parse data lines for local hyperparameter values
            final Map<SimpleInterval, HyperparameterValues> siteToHyperparameterValuesMap = new HashMap<>();
            reader.stream().forEach(s -> siteToHyperparameterValuesMap.put(s.getKey(), s.getValue()));
            final AllelicPanelOfNormals allelicPoN = new AllelicPanelOfNormals(globalHyperparameterValues, siteToHyperparameterValuesMap);
            logger.info(String.format("Loaded allelic panel of normals from TSV file: %s.", inputFile));
            return allelicPoN;
        } catch (final IOException | UncheckedIOException ex) {
            throw new UserException.CouldNotReadInputFile(inputFile, ex);
        }
    }
}
Also used : UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) CreateAllelicPanelOfNormals(org.broadinstitute.hellbender.tools.exome.CreateAllelicPanelOfNormals) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 58 with HDF5File

use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.

the class HDF5PCACoveragePoN method write.

/**
     * Write all of the coverage PoN fields to HDF5.
     */
public static void write(final File outFile, final HDF5File.OpenMode openMode, final List<Target> rawTargets, final ReadCountCollection normalizedCounts, final ReadCountCollection logNormalizedCounts, final double[] targetFactors, final double[] targetVariances, final ReductionResult reduction) {
    Utils.nonNull(outFile);
    Utils.nonNull(normalizedCounts);
    Utils.nonNull(logNormalizedCounts);
    Utils.nonNull(rawTargets);
    Utils.nonNull(targetFactors);
    Utils.nonNull(targetVariances);
    Utils.nonNull(reduction);
    try (final HDF5File file = new HDF5File(outFile, openMode)) {
        logger.info("Creating " + outFile.getAbsolutePath() + "...");
        final HDF5PCACoveragePoN pon = new HDF5PCACoveragePoN(file);
        logger.info("Setting version number (" + CURRENT_PON_VERSION + ")...");
        pon.setVersion(CURRENT_PON_VERSION);
        final List<Target> targets = normalizedCounts.targets();
        final List<Target> panelTargets = logNormalizedCounts.targets();
        logger.info("Setting targets ...");
        pon.setTargets(targets);
        logger.info("Setting raw targets ...");
        pon.setRawTargets(rawTargets);
        logger.info("Setting reduced panel targets ...");
        pon.setPanelTargets(panelTargets);
        logger.info("Setting target factors (" + targetFactors.length + ") ...");
        pon.setTargetFactors(targetFactors);
        logger.info("Setting target variances...");
        pon.setTargetVariances(targetVariances);
        logger.info("Setting normalized counts (" + normalizedCounts.counts().getRowDimension() + " x " + normalizedCounts.counts().getColumnDimension() + ") (T)...");
        pon.setNormalizedCounts(normalizedCounts.counts());
        logger.info("Setting log-normalized counts (" + logNormalizedCounts.counts().getRowDimension() + " x " + logNormalizedCounts.counts().getColumnDimension() + ") (T) ...");
        pon.setLogNormalizedCounts(logNormalizedCounts.counts());
        logger.info("Setting log-normalized pseudoinverse (" + reduction.getPseudoInverse().getRowDimension() + " x " + reduction.getPseudoInverse().getColumnDimension() + ") ...");
        pon.setLogNormalizedPInverseCounts(reduction.getPseudoInverse());
        logger.info("Setting reduced panel counts (" + reduction.getReducedCounts().getRowDimension() + " x " + reduction.getReducedCounts().getColumnDimension() + ") (T) ...");
        pon.setReducedPanelCounts(reduction.getReducedCounts());
        logger.info("Setting reduced panel pseudoinverse (" + reduction.getReducedPseudoInverse().getRowDimension() + " x " + reduction.getReducedPseudoInverse().getColumnDimension() + ") ...");
        pon.setReducedPanelPInverseCounts(reduction.getReducedPseudoInverse());
        final List<String> targetNames = normalizedCounts.targets().stream().map(Target::getName).collect(Collectors.toList());
        final List<String> rawTargetNames = rawTargets.stream().map(Target::getName).collect(Collectors.toList());
        final List<String> panelTargetNames = logNormalizedCounts.targets().stream().map(Target::getName).collect(Collectors.toList());
        logger.info("Setting target names ...");
        pon.setTargetNames(targetNames);
        logger.info("Setting raw target names ...");
        pon.setRawTargetNames(rawTargetNames);
        logger.info("Setting reduced target names ...");
        pon.setPanelTargetNames(panelTargetNames);
        final List<String> sampleNames = normalizedCounts.columnNames();
        final List<String> panelSampleNames = logNormalizedCounts.columnNames();
        logger.info("Setting sample names ...");
        pon.setSampleNames(sampleNames);
        logger.info("Setting reduced sample names ...");
        pon.setPanelSampleNames(panelSampleNames);
    }
}
Also used : Target(org.broadinstitute.hellbender.tools.exome.Target) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 59 with HDF5File

use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.

the class HDF5PCACoveragePoNCreationUtils method redoReduction.

/**
     * Creates a new PoN file using values from a given PoN file, except for reduction.  Does the reduction step a
     * second time and populates a new PoN file.
     *
     * Please note that this code will redo the reduction even if the number of eigensamples is the same in both the input and desired output.
     *
     * @param ctx  {@code null} is okay if not using Spark
     * @param newNumberOfEigensamples  desired number of eigensamples in the new PoN reduction.
     * @param inputHDF5Filename  input PoN file
     * @param outputHDF5Filename  output PoN file, which will be the same as the input, except for fields regarding the PoN reduction
     * @param openMode            desired {@link HDF5File.OpenMode} (if {@code HDF5File.OpenMode.READ_ONLY}, an exception will be thrown)
     */
public static void redoReduction(final JavaSparkContext ctx, final OptionalInt newNumberOfEigensamples, final File inputHDF5Filename, final File outputHDF5Filename, final HDF5File.OpenMode openMode) {
    Utils.nonNull(newNumberOfEigensamples);
    IOUtils.canReadFile(inputHDF5Filename);
    Utils.nonNull(outputHDF5Filename);
    if (inputHDF5Filename.getAbsolutePath().equals(outputHDF5Filename.getAbsolutePath())) {
        throw new UserException.CouldNotCreateOutputFile(outputHDF5Filename, "Cannot create a new PoN overwriting an old one.");
    }
    try (final HDF5File ponReader = new HDF5File(inputHDF5Filename, HDF5File.OpenMode.READ_ONLY)) {
        final PCACoveragePoN inputPoN = new HDF5PCACoveragePoN(ponReader);
        final ReadCountCollection normalizedCounts = new ReadCountCollection(inputPoN.getTargets(), inputPoN.getSampleNames(), inputPoN.getNormalizedCounts());
        final ReadCountCollection logNormalizedCounts = new ReadCountCollection(inputPoN.getPanelTargets(), inputPoN.getPanelSampleNames(), inputPoN.getLogNormalizedCounts());
        final ReductionResult newReduction = calculateReducedPanelAndPInverses(logNormalizedCounts, newNumberOfEigensamples, logger, ctx);
        final List<String> panelTargetNames = logNormalizedCounts.targets().stream().map(Target::getName).collect(Collectors.toList());
        final double[] targetVariances = calculateTargetVariances(normalizedCounts, panelTargetNames, newReduction, ctx);
        HDF5PCACoveragePoN.write(outputHDF5Filename, openMode, inputPoN.getRawTargets(), normalizedCounts, logNormalizedCounts, inputPoN.getTargetFactors(), targetVariances, newReduction);
    }
}
Also used : HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 60 with HDF5File

use of org.broadinstitute.hdf5.HDF5File in project gatk by broadinstitute.

the class HDF5LibraryUnitTest method testMakeNaNDouble.

@Test()
public void testMakeNaNDouble() throws IOException {
    final File testFile = File.createTempFile("hdf5", ".hd5");
    HDF5File file = new HDF5File(testFile, HDF5File.OpenMode.CREATE);
    file.makeGroup("test-group/double-group");
    Assert.assertTrue(file.makeDouble("test-group/double-group/my-double", Double.NaN));
    System.err.println(testFile);
    file.close();
    final long time = System.currentTimeMillis();
    Assert.assertTrue(testFile.length() > 0);
    Assert.assertTrue(testFile.lastModified() <= time);
    file = new HDF5File(testFile, HDF5File.OpenMode.READ_ONLY);
    final double theDouble = file.readDouble("test-group/double-group/my-double");
    Assert.assertTrue(Double.isNaN(theDouble));
    file.close();
}
Also used : HDF5File(org.broadinstitute.hdf5.HDF5File) File(java.io.File) HDF5File(org.broadinstitute.hdf5.HDF5File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

HDF5File (org.broadinstitute.hdf5.HDF5File)82 Test (org.testng.annotations.Test)58 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)56 File (java.io.File)32 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)24 RealMatrix (org.apache.commons.math3.linear.RealMatrix)24 HDF5PCACoveragePoN (org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN)20 BeforeTest (org.testng.annotations.BeforeTest)20 PCACoveragePoN (org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN)16 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)10 HDF5Library (org.broadinstitute.hdf5.HDF5Library)6 UserException (org.broadinstitute.hellbender.exceptions.UserException)6 ArrayList (java.util.ArrayList)4 List (java.util.List)4 OptionalInt (java.util.OptionalInt)4 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)4 IOException (java.io.IOException)2 UncheckedIOException (java.io.UncheckedIOException)2 DoubleStream (java.util.stream.DoubleStream)2 IntStream (java.util.stream.IntStream)2