Search in sources :

Example 21 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.

the class CoverageModelParameters method write.

/**
     * Writes the model to disk.
     *
     * @param outputPath model output path
     */
public static void write(@Nonnull CoverageModelParameters model, @Nonnull final String outputPath) {
    /* create output directory if it doesn't exist */
    createOutputPath(Utils.nonNull(outputPath, "The output path string must be non-null"));
    /* write targets list */
    final File targetListFile = new File(outputPath, CoverageModelGlobalConstants.TARGET_LIST_OUTPUT_FILE);
    TargetWriter.writeTargetsToFile(targetListFile, model.getTargetList());
    final List<String> targetNames = model.getTargetList().stream().map(Target::getName).collect(Collectors.toList());
    /* write target mean bias to file */
    final File targetMeanBiasFile = new File(outputPath, CoverageModelGlobalConstants.TARGET_MEAN_LOG_BIAS_OUTPUT_FILE);
    Nd4jIOUtils.writeNDArrayMatrixToTextFile(model.getTargetMeanLogBias().transpose(), targetMeanBiasFile, MEAN_LOG_BIAS_MATRIX_NAME, targetNames, null);
    /* write target unexplained variance to file */
    final File targetUnexplainedVarianceFile = new File(outputPath, CoverageModelGlobalConstants.TARGET_UNEXPLAINED_VARIANCE_OUTPUT_FILE);
    Nd4jIOUtils.writeNDArrayMatrixToTextFile(model.getTargetUnexplainedVariance().transpose(), targetUnexplainedVarianceFile, TARGET_UNEXPLAINED_VARIANCE_MATRIX_NAME, targetNames, null);
    if (model.isBiasCovariatesEnabled()) {
        /* write mean bias covariates to file */
        final List<String> meanBiasCovariatesNames = IntStream.range(0, model.getNumLatents()).mapToObj(li -> String.format(BIAS_COVARIATE_COLUMN_NAME_FORMAT, li)).collect(Collectors.toList());
        final File meanBiasCovariatesFile = new File(outputPath, CoverageModelGlobalConstants.MEAN_BIAS_COVARIATES_OUTPUT_FILE);
        Nd4jIOUtils.writeNDArrayMatrixToTextFile(model.getMeanBiasCovariates(), meanBiasCovariatesFile, MEAN_BIAS_COVARIATES_MATRIX_NAME, targetNames, meanBiasCovariatesNames);
        /* write norm_2 of mean bias covariates to file */
        final INDArray WTW = model.getMeanBiasCovariates().transpose().mmul(model.getMeanBiasCovariates());
        final double[] biasCovariatesNorm2 = IntStream.range(0, model.getNumLatents()).mapToDouble(li -> WTW.getDouble(li, li)).toArray();
        final File biasCovariatesNorm2File = new File(outputPath, CoverageModelGlobalConstants.MEAN_BIAS_COVARIATES_NORM2_OUTPUT_FILE);
        Nd4jIOUtils.writeNDArrayMatrixToTextFile(Nd4j.create(biasCovariatesNorm2, new int[] { 1, model.getNumLatents() }), biasCovariatesNorm2File, MEAN_BIAS_COVARIATES_NORM_2_MATRIX_NAME, null, meanBiasCovariatesNames);
        /* if ARD is enabled, write the ARD coefficients and covariance of W as well */
        if (model.isARDEnabled()) {
            final File biasCovariatesARDCoefficientsFile = new File(outputPath, CoverageModelGlobalConstants.BIAS_COVARIATES_ARD_COEFFICIENTS_OUTPUT_FILE);
            Nd4jIOUtils.writeNDArrayMatrixToTextFile(model.getBiasCovariateARDCoefficients(), biasCovariatesARDCoefficientsFile, BIAS_COVARIATES_ARD_COEFFICIENTS_MATRIX_NAME, null, meanBiasCovariatesNames);
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) NDArrayIndex(org.nd4j.linalg.indexing.NDArrayIndex) Nd4jIOUtils(org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jIOUtils) Nd4j(org.nd4j.linalg.factory.Nd4j) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Sets(com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.Sets) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Pair(org.apache.commons.lang3.tuple.Pair) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Target(org.broadinstitute.hellbender.tools.exome.Target) TargetTableReader(org.broadinstitute.hellbender.tools.exome.TargetTableReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) TargetWriter(org.broadinstitute.hellbender.tools.exome.TargetWriter) Utils(org.broadinstitute.hellbender.utils.Utils) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) INDArray(org.nd4j.linalg.api.ndarray.INDArray)

Example 22 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.

the class CoverageModelParameters method generateRandomModel.

/**
     * Generates random coverage model parameters.
     *
     * @param targetList list of targets
     * @param numLatents number of latent variables
     * @param seed random seed
     * @param randomMeanLogBiasStandardDeviation std of mean log bias (mean is set to 0)
     * @param randomBiasCovariatesStandardDeviation std of bias covariates (mean is set to 0)
     * @param randomMaxUnexplainedVariance max value of unexplained variance (samples are taken from a uniform
     *                                     distribution [0, {@code randomMaxUnexplainedVariance}])
     * @param initialBiasCovariatesARDCoefficients initial row vector of ARD coefficients
     * @return an instance of {@link CoverageModelParameters}
     */
public static CoverageModelParameters generateRandomModel(final List<Target> targetList, final int numLatents, final long seed, final double randomMeanLogBiasStandardDeviation, final double randomBiasCovariatesStandardDeviation, final double randomMaxUnexplainedVariance, final INDArray initialBiasCovariatesARDCoefficients) {
    Utils.validateArg(numLatents >= 0, "Dimension of the bias space must be non-negative");
    Utils.validateArg(randomBiasCovariatesStandardDeviation >= 0, "Standard deviation of random bias covariates" + " must be non-negative");
    Utils.validateArg(randomMeanLogBiasStandardDeviation >= 0, "Standard deviation of random mean log bias" + " must be non-negative");
    Utils.validateArg(randomMaxUnexplainedVariance >= 0, "Max random unexplained variance must be non-negative");
    Utils.validateArg(initialBiasCovariatesARDCoefficients == null || numLatents > 0 && initialBiasCovariatesARDCoefficients.length() == numLatents, "If ARD is enabled, the dimension" + " of the bias latent space must be positive and match the length of ARD coeffecient vector");
    final boolean biasCovariatesEnabled = numLatents > 0;
    final int numTargets = targetList.size();
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(seed));
    /* Gaussian random for mean log bias */
    final INDArray initialMeanLogBias = Nd4j.create(getNormalRandomNumbers(numTargets, 0, randomMeanLogBiasStandardDeviation, rng), new int[] { 1, numTargets });
    /* Uniform random for unexplained variance */
    final INDArray initialUnexplainedVariance = Nd4j.create(getUniformRandomNumbers(numTargets, 0, randomMaxUnexplainedVariance, rng), new int[] { 1, numTargets });
    final INDArray initialMeanBiasCovariates;
    if (biasCovariatesEnabled) {
        /* Gaussian random for bias covariates */
        initialMeanBiasCovariates = Nd4j.create(getNormalRandomNumbers(numTargets * numLatents, 0, randomBiasCovariatesStandardDeviation, rng), new int[] { numTargets, numLatents });
    } else {
        initialMeanBiasCovariates = null;
    }
    return new CoverageModelParameters(targetList, initialMeanLogBias, initialUnexplainedVariance, initialMeanBiasCovariates, initialBiasCovariatesARDCoefficients);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) RandomGenerator(org.apache.commons.math3.random.RandomGenerator)

Example 23 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.

the class ReCapSegCaller method calculateT.

private static double calculateT(final ReadCountCollection tangentNormalizedCoverage, final List<ModeledSegment> segments) {
    //Get the segments that are likely copy neutral.
    // Math.abs removed to mimic python...
    final List<ModeledSegment> copyNeutralSegments = segments.stream().filter(s -> s.getSegmentMean() < COPY_NEUTRAL_CUTOFF).collect(Collectors.toList());
    // Get the targets that correspond to the copyNeutralSegments... note that individual targets, due to noise,
    //  can be far away from copy neutral
    final TargetCollection<ReadCountRecord.SingleSampleRecord> targetsWithCoverage = new HashedListTargetCollection<>(tangentNormalizedCoverage.records().stream().map(ReadCountRecord::asSingleSampleRecord).collect(Collectors.toList()));
    final double[] copyNeutralTargetsCopyRatio = copyNeutralSegments.stream().flatMap(s -> targetsWithCoverage.targets(s).stream()).mapToDouble(ReadCountRecord.SingleSampleRecord::getCount).toArray();
    final double meanCopyNeutralTargets = new Mean().evaluate(copyNeutralTargetsCopyRatio);
    final double sigmaCopyNeutralTargets = new StandardDeviation().evaluate(copyNeutralTargetsCopyRatio);
    // Now we filter outliers by only including those w/in 2 standard deviations.
    final double[] filteredCopyNeutralTargetsCopyRatio = Arrays.stream(copyNeutralTargetsCopyRatio).filter(c -> Math.abs(c - meanCopyNeutralTargets) < sigmaCopyNeutralTargets * Z_THRESHOLD).toArray();
    return new StandardDeviation().evaluate(filteredCopyNeutralTargetsCopyRatio);
}
Also used : Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Arrays(java.util.Arrays) List(java.util.List) Logger(org.apache.logging.log4j.Logger) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) Utils(org.broadinstitute.hellbender.utils.Utils) LogManager(org.apache.logging.log4j.LogManager) Collectors(java.util.stream.Collectors) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)

Example 24 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.

the class GermlineCNVCallerIntegrationTest method reportCopyNumberSummaryStatistics.

/* Shame on me for using {@link ReadCountCollection} to store copy numbers! */
private void reportCopyNumberSummaryStatistics(@Nonnull final File posteriorsOutputPath, @Nonnull final File truthCopyNumberFile, @Nonnull final List<Target> targets, @Nonnull final SexGenotypeDataCollection sexGenotypeDataCollection) {
    final ReadCountCollection truthCopyNumberCollection = loadTruthCopyNumberTable(truthCopyNumberFile, targets);
    final RealMatrix calledCopyNumberMatrix = Nd4jApacheAdapterUtils.convertINDArrayToApacheMatrix(Nd4jIOUtils.readNDArrayMatrixFromTextFile(new File(posteriorsOutputPath, CoverageModelGlobalConstants.COPY_RATIO_VITERBI_FILENAME)));
    final ReadCountCollection calledCopyNumberCollection = new ReadCountCollection(targets, truthCopyNumberCollection.columnNames(), calledCopyNumberMatrix);
    final int numSamples = calledCopyNumberCollection.columnNames().size();
    final List<String> sampleSexGenotypes = truthCopyNumberCollection.columnNames().stream().map(sampleName -> sexGenotypeDataCollection.getSampleSexGenotypeData(sampleName).getSexGenotype()).collect(Collectors.toList());
    final List<SampleCopyNumberSummaryStatistics> sampleSummaryStatisticsList = IntStream.range(0, numSamples).mapToObj(si -> calculateSampleCopyNumberConcordance(truthCopyNumberCollection, calledCopyNumberCollection, si, sampleSexGenotypes.get(si))).collect(Collectors.toList());
    /* calculation various summary statistics */
    final AbstractUnivariateStatistic calculator = new Mean();
    final ConfusionRates homDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.homozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
    final ConfusionRates hetDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.heterozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
    final ConfusionRates dupMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.duplicationConfusionMatrix).collect(Collectors.toList()), calculator);
    final double absoluteConcordance = Concordance.getCollectionConcordance(sampleSummaryStatisticsList.stream().map(ss -> ss.absoluteCopyNumberConcordance).collect(Collectors.toList()), calculator);
    /* log */
    logger.info("Homozygous deletion statistics: " + homDelMedianRates.toString());
    logger.info("Heterozygous deletion statistics: " + hetDelMedianRates.toString());
    logger.info("Duplication statistics: " + dupMedianRates.toString());
    logger.info(String.format("Absolute copy number calling concordance: %f", absoluteConcordance));
}
Also used : BeforeSuite(org.testng.annotations.BeforeSuite) IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) GermlinePloidyAnnotatedTargetCollection(org.broadinstitute.hellbender.tools.exome.sexgenotyper.GermlinePloidyAnnotatedTargetCollection) ArrayUtils(org.apache.commons.lang3.ArrayUtils) Test(org.testng.annotations.Test) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) ContigGermlinePloidyAnnotationTableReader(org.broadinstitute.hellbender.tools.exome.sexgenotyper.ContigGermlinePloidyAnnotationTableReader) AbstractUnivariateStatistic(org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) TargetTableReader(org.broadinstitute.hellbender.tools.exome.TargetTableReader) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) Nonnull(javax.annotation.Nonnull) LinkedHashSet(java.util.LinkedHashSet) MathIllegalArgumentException(org.apache.commons.math3.exception.MathIllegalArgumentException) Nd4jApacheAdapterUtils(org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jApacheAdapterUtils) SexGenotypeDataCollection(org.broadinstitute.hellbender.tools.exome.sexgenotyper.SexGenotypeDataCollection) Collection(java.util.Collection) Nd4jIOUtils(org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jIOUtils) IOException(java.io.IOException) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest) Collectors(java.util.stream.Collectors) File(java.io.File) CoverageModelArgumentCollection(org.broadinstitute.hellbender.tools.coveragemodel.CoverageModelArgumentCollection) List(java.util.List) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) UserException(org.broadinstitute.hellbender.exceptions.UserException) Target(org.broadinstitute.hellbender.tools.exome.Target) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) SparkToggleCommandLineProgram(org.broadinstitute.hellbender.utils.SparkToggleCommandLineProgram) CoverageModelGlobalConstants(org.broadinstitute.hellbender.tools.coveragemodel.CoverageModelGlobalConstants) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) AbstractUnivariateStatistic(org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic) RealMatrix(org.apache.commons.math3.linear.RealMatrix) File(java.io.File)

Example 25 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.

the class SliceSamplerUnitTest method testSliceSamplingOfPeakedBetaDistribution.

/**
     * Test slice sampling of a peaked beta distribution as an example of sampling of a bounded random variable.
     * Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
     * respectively.
     */
@Test
public void testSliceSamplingOfPeakedBetaDistribution() {
    rng.setSeed(RANDOM_SEED);
    final double alpha = 10.;
    final double beta = 4.;
    final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
    final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;
    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numSamples = 10000;
    final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));
    final double mean = betaDistribution.getNumericalMean();
    final double variance = betaDistribution.getNumericalVariance();
    final double sampleMean = new Mean().evaluate(samples);
    final double sampleVariance = new Variance().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
Also used : BetaDistribution(org.apache.commons.math3.distribution.BetaDistribution) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) Test(org.testng.annotations.Test)

Aggregations

Test (org.testng.annotations.Test)27 Mean (org.apache.commons.math3.stat.descriptive.moment.Mean)23 List (java.util.List)17 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)16 RealMatrix (org.apache.commons.math3.linear.RealMatrix)14 ArrayList (java.util.ArrayList)12 Collectors (java.util.stream.Collectors)12 StandardDeviation (org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)12 Utils (org.broadinstitute.hellbender.utils.Utils)12 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)10 Arrays (java.util.Arrays)10 IntStream (java.util.stream.IntStream)10 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)10 WeightedObservedPoint (org.apache.commons.math3.fitting.WeightedObservedPoint)10 Logger (org.apache.logging.log4j.Logger)10 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)10 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)10 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)10 Function (java.util.function.Function)9 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)9