use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.
the class CoverageModelParameters method write.
/**
* Writes the model to disk.
*
* @param outputPath model output path
*/
public static void write(@Nonnull CoverageModelParameters model, @Nonnull final String outputPath) {
/* create output directory if it doesn't exist */
createOutputPath(Utils.nonNull(outputPath, "The output path string must be non-null"));
/* write targets list */
final File targetListFile = new File(outputPath, CoverageModelGlobalConstants.TARGET_LIST_OUTPUT_FILE);
TargetWriter.writeTargetsToFile(targetListFile, model.getTargetList());
final List<String> targetNames = model.getTargetList().stream().map(Target::getName).collect(Collectors.toList());
/* write target mean bias to file */
final File targetMeanBiasFile = new File(outputPath, CoverageModelGlobalConstants.TARGET_MEAN_LOG_BIAS_OUTPUT_FILE);
Nd4jIOUtils.writeNDArrayMatrixToTextFile(model.getTargetMeanLogBias().transpose(), targetMeanBiasFile, MEAN_LOG_BIAS_MATRIX_NAME, targetNames, null);
/* write target unexplained variance to file */
final File targetUnexplainedVarianceFile = new File(outputPath, CoverageModelGlobalConstants.TARGET_UNEXPLAINED_VARIANCE_OUTPUT_FILE);
Nd4jIOUtils.writeNDArrayMatrixToTextFile(model.getTargetUnexplainedVariance().transpose(), targetUnexplainedVarianceFile, TARGET_UNEXPLAINED_VARIANCE_MATRIX_NAME, targetNames, null);
if (model.isBiasCovariatesEnabled()) {
/* write mean bias covariates to file */
final List<String> meanBiasCovariatesNames = IntStream.range(0, model.getNumLatents()).mapToObj(li -> String.format(BIAS_COVARIATE_COLUMN_NAME_FORMAT, li)).collect(Collectors.toList());
final File meanBiasCovariatesFile = new File(outputPath, CoverageModelGlobalConstants.MEAN_BIAS_COVARIATES_OUTPUT_FILE);
Nd4jIOUtils.writeNDArrayMatrixToTextFile(model.getMeanBiasCovariates(), meanBiasCovariatesFile, MEAN_BIAS_COVARIATES_MATRIX_NAME, targetNames, meanBiasCovariatesNames);
/* write norm_2 of mean bias covariates to file */
final INDArray WTW = model.getMeanBiasCovariates().transpose().mmul(model.getMeanBiasCovariates());
final double[] biasCovariatesNorm2 = IntStream.range(0, model.getNumLatents()).mapToDouble(li -> WTW.getDouble(li, li)).toArray();
final File biasCovariatesNorm2File = new File(outputPath, CoverageModelGlobalConstants.MEAN_BIAS_COVARIATES_NORM2_OUTPUT_FILE);
Nd4jIOUtils.writeNDArrayMatrixToTextFile(Nd4j.create(biasCovariatesNorm2, new int[] { 1, model.getNumLatents() }), biasCovariatesNorm2File, MEAN_BIAS_COVARIATES_NORM_2_MATRIX_NAME, null, meanBiasCovariatesNames);
/* if ARD is enabled, write the ARD coefficients and covariance of W as well */
if (model.isARDEnabled()) {
final File biasCovariatesARDCoefficientsFile = new File(outputPath, CoverageModelGlobalConstants.BIAS_COVARIATES_ARD_COEFFICIENTS_OUTPUT_FILE);
Nd4jIOUtils.writeNDArrayMatrixToTextFile(model.getBiasCovariateARDCoefficients(), biasCovariatesARDCoefficientsFile, BIAS_COVARIATES_ARD_COEFFICIENTS_MATRIX_NAME, null, meanBiasCovariatesNames);
}
}
}
use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk-protected by broadinstitute.
the class CoverageModelParameters method generateRandomModel.
/**
* Generates random coverage model parameters.
*
* @param targetList list of targets
* @param numLatents number of latent variables
* @param seed random seed
* @param randomMeanLogBiasStandardDeviation std of mean log bias (mean is set to 0)
* @param randomBiasCovariatesStandardDeviation std of bias covariates (mean is set to 0)
* @param randomMaxUnexplainedVariance max value of unexplained variance (samples are taken from a uniform
* distribution [0, {@code randomMaxUnexplainedVariance}])
* @param initialBiasCovariatesARDCoefficients initial row vector of ARD coefficients
* @return an instance of {@link CoverageModelParameters}
*/
public static CoverageModelParameters generateRandomModel(final List<Target> targetList, final int numLatents, final long seed, final double randomMeanLogBiasStandardDeviation, final double randomBiasCovariatesStandardDeviation, final double randomMaxUnexplainedVariance, final INDArray initialBiasCovariatesARDCoefficients) {
Utils.validateArg(numLatents >= 0, "Dimension of the bias space must be non-negative");
Utils.validateArg(randomBiasCovariatesStandardDeviation >= 0, "Standard deviation of random bias covariates" + " must be non-negative");
Utils.validateArg(randomMeanLogBiasStandardDeviation >= 0, "Standard deviation of random mean log bias" + " must be non-negative");
Utils.validateArg(randomMaxUnexplainedVariance >= 0, "Max random unexplained variance must be non-negative");
Utils.validateArg(initialBiasCovariatesARDCoefficients == null || numLatents > 0 && initialBiasCovariatesARDCoefficients.length() == numLatents, "If ARD is enabled, the dimension" + " of the bias latent space must be positive and match the length of ARD coeffecient vector");
final boolean biasCovariatesEnabled = numLatents > 0;
final int numTargets = targetList.size();
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(seed));
/* Gaussian random for mean log bias */
final INDArray initialMeanLogBias = Nd4j.create(getNormalRandomNumbers(numTargets, 0, randomMeanLogBiasStandardDeviation, rng), new int[] { 1, numTargets });
/* Uniform random for unexplained variance */
final INDArray initialUnexplainedVariance = Nd4j.create(getUniformRandomNumbers(numTargets, 0, randomMaxUnexplainedVariance, rng), new int[] { 1, numTargets });
final INDArray initialMeanBiasCovariates;
if (biasCovariatesEnabled) {
/* Gaussian random for bias covariates */
initialMeanBiasCovariates = Nd4j.create(getNormalRandomNumbers(numTargets * numLatents, 0, randomBiasCovariatesStandardDeviation, rng), new int[] { numTargets, numLatents });
} else {
initialMeanBiasCovariates = null;
}
return new CoverageModelParameters(targetList, initialMeanLogBias, initialUnexplainedVariance, initialMeanBiasCovariates, initialBiasCovariatesARDCoefficients);
}
use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.
the class ReCapSegCaller method calculateT.
private static double calculateT(final ReadCountCollection tangentNormalizedCoverage, final List<ModeledSegment> segments) {
//Get the segments that are likely copy neutral.
// Math.abs removed to mimic python...
final List<ModeledSegment> copyNeutralSegments = segments.stream().filter(s -> s.getSegmentMean() < COPY_NEUTRAL_CUTOFF).collect(Collectors.toList());
// Get the targets that correspond to the copyNeutralSegments... note that individual targets, due to noise,
// can be far away from copy neutral
final TargetCollection<ReadCountRecord.SingleSampleRecord> targetsWithCoverage = new HashedListTargetCollection<>(tangentNormalizedCoverage.records().stream().map(ReadCountRecord::asSingleSampleRecord).collect(Collectors.toList()));
final double[] copyNeutralTargetsCopyRatio = copyNeutralSegments.stream().flatMap(s -> targetsWithCoverage.targets(s).stream()).mapToDouble(ReadCountRecord.SingleSampleRecord::getCount).toArray();
final double meanCopyNeutralTargets = new Mean().evaluate(copyNeutralTargetsCopyRatio);
final double sigmaCopyNeutralTargets = new StandardDeviation().evaluate(copyNeutralTargetsCopyRatio);
// Now we filter outliers by only including those w/in 2 standard deviations.
final double[] filteredCopyNeutralTargetsCopyRatio = Arrays.stream(copyNeutralTargetsCopyRatio).filter(c -> Math.abs(c - meanCopyNeutralTargets) < sigmaCopyNeutralTargets * Z_THRESHOLD).toArray();
return new StandardDeviation().evaluate(filteredCopyNeutralTargetsCopyRatio);
}
use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.
the class GermlineCNVCallerIntegrationTest method reportCopyNumberSummaryStatistics.
/* Shame on me for using {@link ReadCountCollection} to store copy numbers! */
private void reportCopyNumberSummaryStatistics(@Nonnull final File posteriorsOutputPath, @Nonnull final File truthCopyNumberFile, @Nonnull final List<Target> targets, @Nonnull final SexGenotypeDataCollection sexGenotypeDataCollection) {
final ReadCountCollection truthCopyNumberCollection = loadTruthCopyNumberTable(truthCopyNumberFile, targets);
final RealMatrix calledCopyNumberMatrix = Nd4jApacheAdapterUtils.convertINDArrayToApacheMatrix(Nd4jIOUtils.readNDArrayMatrixFromTextFile(new File(posteriorsOutputPath, CoverageModelGlobalConstants.COPY_RATIO_VITERBI_FILENAME)));
final ReadCountCollection calledCopyNumberCollection = new ReadCountCollection(targets, truthCopyNumberCollection.columnNames(), calledCopyNumberMatrix);
final int numSamples = calledCopyNumberCollection.columnNames().size();
final List<String> sampleSexGenotypes = truthCopyNumberCollection.columnNames().stream().map(sampleName -> sexGenotypeDataCollection.getSampleSexGenotypeData(sampleName).getSexGenotype()).collect(Collectors.toList());
final List<SampleCopyNumberSummaryStatistics> sampleSummaryStatisticsList = IntStream.range(0, numSamples).mapToObj(si -> calculateSampleCopyNumberConcordance(truthCopyNumberCollection, calledCopyNumberCollection, si, sampleSexGenotypes.get(si))).collect(Collectors.toList());
/* calculation various summary statistics */
final AbstractUnivariateStatistic calculator = new Mean();
final ConfusionRates homDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.homozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
final ConfusionRates hetDelMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.heterozygousDeletionConfusionMatrix).collect(Collectors.toList()), calculator);
final ConfusionRates dupMedianRates = ConfusionMatrix.getConfusionRates(sampleSummaryStatisticsList.stream().map(ss -> ss.duplicationConfusionMatrix).collect(Collectors.toList()), calculator);
final double absoluteConcordance = Concordance.getCollectionConcordance(sampleSummaryStatisticsList.stream().map(ss -> ss.absoluteCopyNumberConcordance).collect(Collectors.toList()), calculator);
/* log */
logger.info("Homozygous deletion statistics: " + homDelMedianRates.toString());
logger.info("Heterozygous deletion statistics: " + hetDelMedianRates.toString());
logger.info("Duplication statistics: " + dupMedianRates.toString());
logger.info(String.format("Absolute copy number calling concordance: %f", absoluteConcordance));
}
use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.
the class SliceSamplerUnitTest method testSliceSamplingOfPeakedBetaDistribution.
/**
* Test slice sampling of a peaked beta distribution as an example of sampling of a bounded random variable.
* Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
* respectively.
*/
@Test
public void testSliceSamplingOfPeakedBetaDistribution() {
rng.setSeed(RANDOM_SEED);
final double alpha = 10.;
final double beta = 4.;
final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;
final double xInitial = 0.5;
final double xMin = 0.;
final double xMax = 1.;
final double width = 0.1;
final int numSamples = 10000;
final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));
final double mean = betaDistribution.getNumericalMean();
final double variance = betaDistribution.getNumericalVariance();
final double sampleMean = new Mean().evaluate(samples);
final double sampleVariance = new Variance().evaluate(samples);
Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
Aggregations