Search in sources :

Example 6 with PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN in project gatk by broadinstitute.

the class NormalizeSomaticReadCountsIntegrationTest method assertBetaHatsRobustToOutliers.

/**
     * Asserts that the calculation of beta hats is not significantly affected by zero-coverage outlier counts
     * We perform this check by randomly setting some coverages to zero in copy ratio space (-infinity in log space).
     * betaHats imputes 0 in log space (1 in copy ratio space) whenever coverage is below a certain low threshold
     * and should thus be robust to this type of noise.
     */
private void assertBetaHatsRobustToOutliers(final ReadCountCollection preTangentNormalized, final File ponFile) {
    try (final HDF5File ponReader = new HDF5File(ponFile)) {
        final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
        final List<String> ponTargets = pon.getPanelTargetNames();
        final RealMatrix input = reorderTargetsToPoNOrder(preTangentNormalized, ponTargets);
        // randomly set some entries to zero in copy-ratio space (-infinity in log space)
        final Random random = new Random(13);
        final double noiseProportion = 0.01;
        final RealMatrix noisyInput = input.copy();
        noisyInput.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

            @Override
            public double visit(final int row, final int column, final double value) {
                return random.nextDouble() < noiseProportion ? Double.NEGATIVE_INFINITY : value;
            }
        });
        final RealMatrix betaHats = PCATangentNormalizationUtils.calculateBetaHats(pon.getReducedPanelPInverseCounts(), input, PCATangentNormalizationUtils.EPSILON);
        final RealMatrix noisyBetaHats = PCATangentNormalizationUtils.calculateBetaHats(pon.getReducedPanelPInverseCounts(), noisyInput, PCATangentNormalizationUtils.EPSILON);
        final RealMatrix difference = betaHats.subtract(noisyBetaHats);
        difference.walkInOptimizedOrder(new DefaultRealMatrixPreservingVisitor() {

            @Override
            public void visit(final int row, int column, double value) {
                Assert.assertEquals(value, 0, 0.01);
            }
        });
    }
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) DefaultRealMatrixPreservingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixPreservingVisitor) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 7 with PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN in project gatk by broadinstitute.

the class CreatePanelOfNormalsIntegrationTest method assertRamPoNDuplicate.

/**
     * Assert that we can create a valid ram pon duplicate.
     *
     * @param pon never {@code null}
     */
private void assertRamPoNDuplicate(final PCACoveragePoN pon) {
    final PCACoveragePoN ramPoN = new RamPCACoveragePoN(pon);
    PoNTestUtils.assertEqualsMatrix(pon.getLogNormalizedCounts(), ramPoN.getLogNormalizedCounts(), false);
    PoNTestUtils.assertEqualsMatrix(pon.getLogNormalizedPInverseCounts(), ramPoN.getLogNormalizedPInverseCounts(), false);
    PoNTestUtils.assertEqualsMatrix(pon.getNormalizedCounts(), ramPoN.getNormalizedCounts(), false);
    PoNTestUtils.assertEqualsMatrix(pon.getReducedPanelCounts(), ramPoN.getReducedPanelCounts(), false);
    PoNTestUtils.assertEqualsMatrix(pon.getReducedPanelPInverseCounts(), ramPoN.getReducedPanelPInverseCounts(), false);
    PoNTestUtils.assertEqualsDoubleArrays(pon.getTargetFactors(), ramPoN.getTargetFactors());
    final List<Target> ponPanelTargets = pon.getPanelTargets();
    final List<Target> ponTargets = pon.getTargets();
    final List<Target> ponRawTargets = pon.getRawTargets();
    final List<Target> ramPoNPanelTargets = ramPoN.getPanelTargets();
    final List<Target> ramPoNTargets = ramPoN.getTargets();
    final List<Target> ramPoNRawTargets = ramPoN.getRawTargets();
    Assert.assertEquals(ponPanelTargets.size(), ramPoNPanelTargets.size());
    Assert.assertEquals(ponTargets.size(), ramPoNTargets.size());
    // Make sure every target is the same
    Assert.assertTrue(IntStream.range(0, ponRawTargets.size()).boxed().allMatch(i -> ponRawTargets.get(i).equals(ramPoNRawTargets.get(i))));
    Assert.assertTrue(IntStream.range(0, ponTargets.size()).boxed().allMatch(i -> ponTargets.get(i).equals(ramPoNTargets.get(i))));
    Assert.assertTrue(IntStream.range(0, ponPanelTargets.size()).boxed().allMatch(i -> ponPanelTargets.get(i).equals(ramPoNPanelTargets.get(i))));
    // Make sure every sample name is the same
    Assert.assertTrue(IntStream.range(0, pon.getSampleNames().size()).boxed().allMatch(i -> pon.getSampleNames().get(i).equals(ramPoN.getSampleNames().get(i))));
    Assert.assertTrue(IntStream.range(0, pon.getPanelSampleNames().size()).boxed().allMatch(i -> pon.getPanelSampleNames().get(i).equals(ramPoN.getPanelSampleNames().get(i))));
    // Make sure every target name is the same
    Assert.assertTrue(IntStream.range(0, pon.getTargetNames().size()).boxed().allMatch(i -> pon.getTargetNames().get(i).equals(ramPoN.getTargetNames().get(i))));
    Assert.assertTrue(IntStream.range(0, pon.getPanelTargetNames().size()).boxed().allMatch(i -> pon.getPanelTargetNames().get(i).equals(ramPoN.getPanelTargetNames().get(i))));
}
Also used : IntStream(java.util.stream.IntStream) DataProvider(org.testng.annotations.DataProvider) FileUtils(org.apache.commons.io.FileUtils) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) File(java.io.File) ArrayList(java.util.ArrayList) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) List(java.util.List) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) UserException(org.broadinstitute.hellbender.exceptions.UserException) Assert(org.testng.Assert) HDF5File(org.broadinstitute.hdf5.HDF5File) RamPCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.RamPCACoveragePoN) PoNTestUtils(org.broadinstitute.hellbender.tools.pon.PoNTestUtils) IntRange(org.apache.commons.lang.math.IntRange) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) RamPCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.RamPCACoveragePoN) RamPCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.RamPCACoveragePoN)

Example 8 with PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN in project gatk by broadinstitute.

the class NormalizeSomaticReadCountsIntegrationTest method assertTangentNormalized.

private void assertTangentNormalized(final ReadCountCollection actualReadCounts, final ReadCountCollection preTangentNormalized, final RealMatrix betaHats, final File ponFile) {
    try (final HDF5File ponReader = new HDF5File(ponFile)) {
        final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
        final RealMatrix inCounts = reorderTargetsToPoNOrder(preTangentNormalized, pon.getPanelTargetNames());
        final RealMatrix actual = reorderTargetsToPoNOrder(actualReadCounts, pon.getPanelTargetNames());
        final RealMatrix ponMat = pon.getReducedPanelCounts();
        final RealMatrix projection = ponMat.multiply(betaHats);
        final RealMatrix expected = inCounts.subtract(projection);
        Assert.assertEquals(actual.getRowDimension(), expected.getRowDimension());
        Assert.assertEquals(actual.getColumnDimension(), expected.getColumnDimension());
        for (int i = 0; i < actual.getRowDimension(); i++) {
            Assert.assertEquals(actual.getRow(i), expected.getRow(i));
        }
    }
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 9 with PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN in project gatk by broadinstitute.

the class NormalizeSomaticReadCountsIntegrationTest method assertBetaHats.

/**
     * Asserts that a collection of beta-hats corresponds to the expected value given
     * the input pre-tangent normalization matrix and the PoN file.
     */
private void assertBetaHats(final ReadCountCollection preTangentNormalized, final RealMatrix actual, final File ponFile) {
    Assert.assertEquals(actual.getColumnDimension(), preTangentNormalized.columnNames().size());
    final double epsilon = PCATangentNormalizationUtils.EPSILON;
    try (final HDF5File ponReader = new HDF5File(ponFile)) {
        final PCACoveragePoN pon = new HDF5PCACoveragePoN(ponReader);
        final List<String> ponTargets = pon.getPanelTargetNames();
        final RealMatrix inCounts = reorderTargetsToPoNOrder(preTangentNormalized, ponTargets);
        // obtain subset of relevant targets to calculate the beta-hats;
        final int[][] betaHatTargets = new int[inCounts.getColumnDimension()][];
        for (int i = 0; i < inCounts.getColumnDimension(); i++) {
            final List<Integer> relevantTargets = new ArrayList<>();
            for (int j = 0; j < inCounts.getRowDimension(); j++) {
                if (inCounts.getEntry(j, i) > 1 + (Math.log(epsilon) / Math.log(2))) {
                    relevantTargets.add(j);
                }
            }
            betaHatTargets[i] = relevantTargets.stream().mapToInt(Integer::intValue).toArray();
        }
        // calculate beta-hats per column and check with actual values.
        final RealMatrix normalsInv = pon.getReducedPanelPInverseCounts();
        Assert.assertEquals(actual.getRowDimension(), normalsInv.getRowDimension());
        final RealMatrix normalsInvT = normalsInv.transpose();
        for (int i = 0; i < inCounts.getColumnDimension(); i++) {
            final RealMatrix inValues = inCounts.getColumnMatrix(i).transpose().getSubMatrix(new int[] { 0 }, betaHatTargets[i]);
            final RealMatrix normalValues = normalsInvT.getSubMatrix(betaHatTargets[i], IntStream.range(0, normalsInvT.getColumnDimension()).toArray());
            final RealMatrix betaHats = inValues.multiply(normalValues);
            for (int j = 0; j < actual.getRowDimension(); j++) {
                Assert.assertEquals(actual.getEntry(j, i), betaHats.getEntry(0, j), 0.000001, "Col " + i + " row " + j);
            }
        }
    }
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5File(org.broadinstitute.hdf5.HDF5File)

Example 10 with PCACoveragePoN

use of org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN in project gatk-protected by broadinstitute.

the class CreatePanelOfNormals method runPipeline.

@Override
protected void runPipeline(final JavaSparkContext ctx) {
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    if (blacklistOutFile == null) {
        blacklistOutFile = new File(outFile + BLACKLIST_FILE_APPEND);
    }
    if (targetWeightsOutFile == null) {
        targetWeightsOutFile = new File(outFile + TARGET_WEIGHTS_FILE_APPEND);
    }
    // Check parameters and load values to meet the backend PoN creation interface
    validateArguments();
    final TargetCollection<Target> targets = targetArguments.readTargetCollection(true);
    final OptionalInt numberOfEigensamples = parseNumberOfEigensamples(numberOfEigensamplesString);
    // Create the PoN, including QC, if specified.
    if (!isNoQc && !dryRun) {
        logger.info("QC:  Beginning creation of QC PoN...");
        final File outputQCFile = IOUtils.createTempFile("qc-pon-", ".hd5");
        HDF5PCACoveragePoNCreationUtils.create(ctx, outputQCFile, HDF5File.OpenMode.READ_WRITE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, OptionalInt.of(NUM_QC_EIGENSAMPLES), dryRun);
        logger.info("QC:  QC PoN created...");
        logger.info("QC:  Collecting suspicious samples...");
        try (final HDF5File ponReader = new HDF5File(outputQCFile, HDF5File.OpenMode.READ_ONLY)) {
            final PCACoveragePoN qcPoN = new HDF5PCACoveragePoN(ponReader);
            final List<String> failingSampleNames = CoveragePoNQCUtils.retrieveSamplesWithArmLevelEvents(qcPoN, ctx);
            ParamUtils.writeStringListToFile(failingSampleNames, blacklistOutFile);
            // If no suspicious samples were found, just redo the PoN reduction to save time.
            if (failingSampleNames.size() != 0) {
                logger.info("QC:  Suspicious sample list created...");
                logger.info("Creating final PoN with " + failingSampleNames.size() + " suspicious samples removed...");
                HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, failingSampleNames, targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
            } else {
                logger.info("QC:  No suspicious samples found ...");
                logger.info("Creating final PoN only redo'ing the reduction step ...");
                HDF5PCACoveragePoNCreationUtils.redoReduction(ctx, numberOfEigensamples, outputQCFile, outFile, HDF5File.OpenMode.CREATE);
            }
        }
    } else {
        logger.info("Creating PoN directly (skipping QC)...");
        HDF5PCACoveragePoNCreationUtils.create(ctx, outFile, HDF5File.OpenMode.CREATE, inputFile, targets, new ArrayList<>(), targetFactorThreshold, maximumPercentZerosInColumn, maximumPercentZerosInTarget, columnExtremeThresholdPercentile, outlierTruncatePercentileThresh, numberOfEigensamples, dryRun);
    }
    if (!dryRun) {
        logger.info("Writing target weights file to " + targetWeightsOutFile + "...");
        writeTargetWeightsFile(outFile, targetWeightsOutFile);
    }
    logger.info("Done...");
}
Also used : HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) HDF5Library(org.broadinstitute.hdf5.HDF5Library) HDF5PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN) PCACoveragePoN(org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN) OptionalInt(java.util.OptionalInt) HDF5File(org.broadinstitute.hdf5.HDF5File) File(java.io.File) HDF5File(org.broadinstitute.hdf5.HDF5File)

Aggregations

HDF5File (org.broadinstitute.hdf5.HDF5File)14 HDF5PCACoveragePoN (org.broadinstitute.hellbender.tools.pon.coverage.pca.HDF5PCACoveragePoN)14 PCACoveragePoN (org.broadinstitute.hellbender.tools.pon.coverage.pca.PCACoveragePoN)14 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)8 RealMatrix (org.apache.commons.math3.linear.RealMatrix)8 File (java.io.File)4 HDF5Library (org.broadinstitute.hdf5.HDF5Library)4 ArrayList (java.util.ArrayList)2 List (java.util.List)2 OptionalInt (java.util.OptionalInt)2 IntStream (java.util.stream.IntStream)2 FileUtils (org.apache.commons.io.FileUtils)2 IntRange (org.apache.commons.lang.math.IntRange)2 DefaultRealMatrixChangingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)2 DefaultRealMatrixPreservingVisitor (org.apache.commons.math3.linear.DefaultRealMatrixPreservingVisitor)2 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)2 StandardArgumentDefinitions (org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions)2 UserException (org.broadinstitute.hellbender.exceptions.UserException)2 PoNTestUtils (org.broadinstitute.hellbender.tools.pon.PoNTestUtils)2 PCATangentNormalizationResult (org.broadinstitute.hellbender.tools.pon.coverage.pca.PCATangentNormalizationResult)2