Search in sources :

Example 21 with Pair

use of org.apache.commons.math3.util.Pair in project gatk by broadinstitute.

the class GCBiasSimulatedData method simulatedData.

// visible for the integration test
public static Pair<ReadCountCollection, double[]> simulatedData(final int numTargets, final int numSamples) {
    final List<Target> phonyTargets = SimulatedTargets.phonyTargets(numTargets);
    final List<String> phonySamples = SimulatedSamples.phonySamples(numSamples);
    final Random random = new Random(13);
    final double[] gcContentByTarget = IntStream.range(0, numTargets).mapToDouble(n -> 0.5 + 0.2 * random.nextGaussian()).map(x -> Math.min(x, 0.95)).map(x -> Math.max(x, 0.05)).toArray();
    final double[] gcBiasByTarget = Arrays.stream(gcContentByTarget).map(QUADRATIC_GC_BIAS_CURVE::apply).toArray();
    // model mainly GC bias with a small random amount of non-GC bias
    // thus noise after GC correction should be nearly zero
    final RealMatrix counts = new Array2DRowRealMatrix(numTargets, numSamples);
    counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {

        @Override
        public double visit(final int target, final int column, final double value) {
            return gcBiasByTarget[target] * (1.0 + 0.01 * random.nextDouble());
        }
    });
    final ReadCountCollection rcc = new ReadCountCollection(phonyTargets, phonySamples, counts);
    return new ImmutablePair<>(rcc, gcContentByTarget);
}
Also used : IntStream(java.util.stream.IntStream) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) Arrays(java.util.Arrays) org.broadinstitute.hellbender.tools.exome(org.broadinstitute.hellbender.tools.exome) IOException(java.io.IOException) Random(java.util.Random) Function(java.util.function.Function) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) File(java.io.File) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Collections(java.util.Collections) Random(java.util.Random) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor)

Example 22 with Pair

use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.

the class CNLOHCaller method calcNewRhos.

private double[] calcNewRhos(final List<ACNVModeledSegment> segments, final List<double[][][]> responsibilitiesBySeg, final double lambda, final double[] rhos, final int[] mVals, final int[] nVals, final JavaSparkContext ctx) {
    // Since, we pass in the entire responsibilities matrix, we need the correct index for each rho.  That, and the
    //  fact that this is a univariate objective function, means we need to create an instance for each rho.  And
    //  then we blast across Spark.
    final List<Pair<? extends Function<Double, Double>, SearchInterval>> objectives = IntStream.range(0, rhos.length).mapToObj(i -> new Pair<>(new Function<Double, Double>() {

        @Override
        public Double apply(Double rho) {
            return calculateESmnObjective(rho, segments, responsibilitiesBySeg, mVals, nVals, lambda, i);
        }
    }, new SearchInterval(0.0, 1.0, rhos[i]))).collect(Collectors.toList());
    final JavaRDD<Pair<? extends Function<Double, Double>, SearchInterval>> objectivesRDD = ctx.parallelize(objectives);
    final List<Double> resultsAsDouble = objectivesRDD.map(objective -> optimizeIt(objective.getFirst(), objective.getSecond())).collect();
    return resultsAsDouble.stream().mapToDouble(Double::doubleValue).toArray();
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SearchInterval(org.apache.commons.math3.optim.univariate.SearchInterval) RealVector(org.apache.commons.math3.linear.RealVector) Function(java.util.function.Function) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) GammaDistribution(org.apache.commons.math3.distribution.GammaDistribution) Gamma(org.apache.commons.math3.special.Gamma) ACNVModeledSegment(org.broadinstitute.hellbender.tools.exome.ACNVModeledSegment) BaseAbstractUnivariateIntegrator(org.apache.commons.math3.analysis.integration.BaseAbstractUnivariateIntegrator) GoalType(org.apache.commons.math3.optim.nonlinear.scalar.GoalType) MatrixUtils(org.apache.commons.math3.linear.MatrixUtils) UnivariateObjectiveFunction(org.apache.commons.math3.optim.univariate.UnivariateObjectiveFunction) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) SimpsonIntegrator(org.apache.commons.math3.analysis.integration.SimpsonIntegrator) JavaRDD(org.apache.spark.api.java.JavaRDD) GATKProtectedMathUtils(org.broadinstitute.hellbender.utils.GATKProtectedMathUtils) Pair(org.apache.commons.math3.util.Pair) Collectors(java.util.stream.Collectors) BrentOptimizer(org.apache.commons.math3.optim.univariate.BrentOptimizer) Serializable(java.io.Serializable) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HomoSapiensConstants(org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants) MaxEval(org.apache.commons.math3.optim.MaxEval) LogManager(org.apache.logging.log4j.LogManager) Function(java.util.function.Function) UnivariateObjectiveFunction(org.apache.commons.math3.optim.univariate.UnivariateObjectiveFunction) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) SearchInterval(org.apache.commons.math3.optim.univariate.SearchInterval) Pair(org.apache.commons.math3.util.Pair)

Example 23 with Pair

use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.

the class ArtifactStatisticsScorer method calculateArtifactPValue.

/** p-value for being an artifact
     *
     * @param totalAltAlleleCount total number of alt reads
     * @param artifactAltAlleleCount alt read counts in a pair orientation that supports an artifact
     * @param biasP believed bias p value for a binomial distribution in artifact variants
     * @return p-value for this being an artifact
     */
public static double calculateArtifactPValue(final int totalAltAlleleCount, final int artifactAltAlleleCount, final double biasP) {
    ParamUtils.isPositiveOrZero(biasP, "bias parameter must be positive or zero.");
    ParamUtils.isPositiveOrZero(totalAltAlleleCount, "total alt allele count must be positive or zero.");
    ParamUtils.isPositiveOrZero(artifactAltAlleleCount, "artifact supporting alt allele count must be positive or zero.");
    ParamUtils.isPositiveOrZero(totalAltAlleleCount - artifactAltAlleleCount, "Total alt count must be same or greater than the artifact alt count.");
    return new BinomialDistribution(null, totalAltAlleleCount, biasP).cumulativeProbability(artifactAltAlleleCount);
}
Also used : BinomialDistribution(org.apache.commons.math3.distribution.BinomialDistribution)

Example 24 with Pair

use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.

the class TargetCoverageSexGenotypeCalculator method processReadCountsAndTargets.

/**
     * Processes raw read counts and targets:
     * <dl>
     *     <dt> If more than one sample is present in the collection, filters out fully uncovered targets
     *     from read counts and removes the uncovered targets from the target list</dt>
     *
     *     <dt> Otherwise, does nothing and warns the user
     *     </dt>
     * </dl>
     *
     * @param rawReadCounts raw read count collection
     * @param targetList user provided target list
     * @return pair of processed read counts and targets
     */
private ImmutablePair<ReadCountCollection, List<Target>> processReadCountsAndTargets(@Nonnull final ReadCountCollection rawReadCounts, @Nonnull final List<Target> targetList) {
    final ReadCountCollection finalReadCounts;
    final List<Target> finalTargetList;
    /* remove totally uncovered targets */
    if (rawReadCounts.columnNames().size() > 1) {
        finalReadCounts = ReadCountCollectionUtils.removeTotallyUncoveredTargets(rawReadCounts, logger);
        final Set<Target> targetSetFromProcessedReadCounts = new HashSet<>(finalReadCounts.targets());
        finalTargetList = targetList.stream().filter(targetSetFromProcessedReadCounts::contains).collect(Collectors.toList());
    } else {
        final long numUncoveredTargets = rawReadCounts.records().stream().filter(rec -> (int) rec.getDouble(0) == 0).count();
        final long numAllTargets = rawReadCounts.targets().size();
        logger.info("Since only one sample is given for genotyping, the user is responsible for asserting" + " the aptitude of targets. Fully uncovered (irrelevant) targets can not be automatically" + " identified (total targets: " + numAllTargets + ", uncovered targets: " + numUncoveredTargets + ")");
        finalReadCounts = rawReadCounts;
        finalTargetList = targetList;
    }
    return ImmutablePair.of(finalReadCounts, finalTargetList);
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Sets(com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.Sets) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) UserException(org.broadinstitute.hellbender.exceptions.UserException) Target(org.broadinstitute.hellbender.tools.exome.Target) Median(org.apache.commons.math3.stat.descriptive.rank.Median) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) LogManager(org.apache.logging.log4j.LogManager) Nonnull(javax.annotation.Nonnull) Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection)

Example 25 with Pair

use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.

the class HetPulldownCalculator method isPileupHetCompatible.

/**
     * Returns true if the distribution of major and other base-pair counts from a pileup at a locus is compatible with
     * allele fraction of 0.5.
     *
     * <p>
     *     Compatibility is defined by a p-value threshold.  That is, compute the two-sided p-value of observing
     *     a number of major read counts out of a total number of reads, assuming the given heterozygous
     *     allele fraction.  If the p-value is less than the given threshold, then reject the null hypothesis
     *     that the heterozygous allele fraction is 0.5 (i.e., SNP is likely to be homozygous) and return false,
     *     otherwise return true.
     * </p>
     * @param baseCounts        base-pair counts
     * @param totalBaseCount    total base-pair counts (excluding N, etc.)
     * @param pvalThreshold     p-value threshold for two-sided binomial test (should be in [0, 1], but no check is performed)
     * @return                  boolean compatibility with heterozygous allele fraction
     */
@VisibleForTesting
protected static boolean isPileupHetCompatible(final Nucleotide.Counter baseCounts, final int totalBaseCount, final double pvalThreshold) {
    final int majorReadCount = Arrays.stream(BASES).mapToInt(b -> (int) baseCounts.get(b)).max().getAsInt();
    if (majorReadCount == 0 || totalBaseCount - majorReadCount == 0) {
        return false;
    }
    final double pval = new BinomialTest().binomialTest(totalBaseCount, majorReadCount, HET_ALLELE_FRACTION, AlternativeHypothesis.TWO_SIDED);
    return pval >= pvalThreshold;
}
Also used : BinomialTest(org.apache.commons.math3.stat.inference.BinomialTest) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

Pair (android.support.v4.util.Pair)32 ArrayList (java.util.ArrayList)17 IntStream (java.util.stream.IntStream)16 View (android.view.View)14 Collectors (java.util.stream.Collectors)14 ActivityOptionsCompat (android.support.v4.app.ActivityOptionsCompat)12 Logger (org.apache.logging.log4j.Logger)12 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)12 Intent (android.content.Intent)10 IOException (java.io.IOException)10 java.util (java.util)10 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)10 Pair (org.apache.commons.lang3.tuple.Pair)10 RealMatrix (org.apache.commons.math3.linear.RealMatrix)10 Pair (org.apache.commons.math3.util.Pair)10 File (java.io.File)9 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)8 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 org.broadinstitute.hellbender.tools.exome (org.broadinstitute.hellbender.tools.exome)8 VisibleForTesting (com.google.common.annotations.VisibleForTesting)7