Search in sources :

Example 6 with RealVector

use of org.apache.commons.math3.linear.RealVector in project gatk-protected by broadinstitute.

the class CoveragePoNQCUtils method hasSuspiciousContigs.

/**
     *  Given a single sample tangent normalization (or other coverage profile), determine whether any contig looks like
     *   it has an arm level event (defined as 25% (or more) of the contig amplified/deleted)
     *
     * @param singleSampleTangentNormalized Tangent normalized data for a single sample.
     * @return never {@code null}
     */
private static Boolean hasSuspiciousContigs(final ReadCountCollection singleSampleTangentNormalized, final Map<String, Double> contigToMedian) {
    final List<String> allContigsPresent = retrieveAllContigsPresent(singleSampleTangentNormalized);
    for (String contig : allContigsPresent) {
        final ReadCountCollection oneContigReadCountCollection = singleSampleTangentNormalized.subsetTargets(singleSampleTangentNormalized.targets().stream().filter(t -> t.getContig().equals(contig)).collect(Collectors.toSet()));
        final RealVector counts = oneContigReadCountCollection.counts().getColumnVector(0);
        for (int i = 0; i < 4; i++) {
            final RealVector partitionCounts = counts.getSubVector(i * counts.getDimension() / 4, counts.getDimension() / 4);
            final double[] partitionArray = DoubleStream.of(partitionCounts.toArray()).map(d -> Math.pow(2, d)).sorted().toArray();
            double median = new Median().evaluate(partitionArray);
            final double medianShiftInCRSpace = contigToMedian.getOrDefault(contig, 1.0) - 1.0;
            median -= medianShiftInCRSpace;
            if ((median > AMP_THRESHOLD) || (median < DEL_THRESHOLD)) {
                logger.info("Suspicious contig: " + singleSampleTangentNormalized.columnNames().get(0) + " " + contig + " (" + median + " -- " + i + ")");
                return true;
            }
        }
    }
    return false;
}
Also used : RealVector(org.apache.commons.math3.linear.RealVector) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Example 7 with RealVector

use of org.apache.commons.math3.linear.RealVector in project gatk by broadinstitute.

the class CNLOHCaller method calcEffectivePhis.

protected double[] calcEffectivePhis(final double E_alpha, final double[] responsibilitiesByRho) {
    final double sumResponsibilities = MathUtils.sum(responsibilitiesByRho);
    final double[] result = new double[responsibilitiesByRho.length];
    final int k = responsibilitiesByRho.length;
    // Initialize all pseudocounts to 1, except for index 0, which is 20;
    //  This artificially increases the odds for a rho = 0.
    final RealVector pseudocounts = new ArrayRealVector(responsibilitiesByRho.length);
    pseudocounts.set(1.0);
    pseudocounts.setEntry(0, 20.0);
    final double sumPseudocounts = MathUtils.sum(pseudocounts.toArray());
    final double term2 = Gamma.digamma(E_alpha + sumPseudocounts + sumResponsibilities);
    for (int i = 0; i < result.length; i++) {
        final double term1 = Gamma.digamma(E_alpha / k + pseudocounts.getEntry(i) + responsibilitiesByRho[i]);
        result[i] = Math.exp(term1 - term2);
    }
    return result;
}
Also used : RealVector(org.apache.commons.math3.linear.RealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector)

Aggregations

RealVector (org.apache.commons.math3.linear.RealVector)7 ArrayRealVector (org.apache.commons.math3.linear.ArrayRealVector)5 Median (org.apache.commons.math3.stat.descriptive.rank.Median)2 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)2 FisherInformationMatrix (gdsc.smlm.fitting.FisherInformationMatrix)1 GradientCalculator (gdsc.smlm.fitting.nonlinear.gradient.GradientCalculator)1 ExtendedNonLinearFunction (gdsc.smlm.function.ExtendedNonLinearFunction)1 MultivariateMatrixFunctionWrapper (gdsc.smlm.function.MultivariateMatrixFunctionWrapper)1 MultivariateVectorFunctionWrapper (gdsc.smlm.function.MultivariateVectorFunctionWrapper)1 NonLinearFunction (gdsc.smlm.function.NonLinearFunction)1 ValueProcedure (gdsc.smlm.function.ValueProcedure)1 Gaussian2DFunction (gdsc.smlm.function.gaussian.Gaussian2DFunction)1 ConvergenceException (org.apache.commons.math3.exception.ConvergenceException)1 TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)1 TooManyIterationsException (org.apache.commons.math3.exception.TooManyIterationsException)1 LeastSquaresBuilder (org.apache.commons.math3.fitting.leastsquares.LeastSquaresBuilder)1 Optimum (org.apache.commons.math3.fitting.leastsquares.LeastSquaresOptimizer.Optimum)1 LeastSquaresProblem (org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem)1 LevenbergMarquardtOptimizer (org.apache.commons.math3.fitting.leastsquares.LevenbergMarquardtOptimizer)1 ValueAndJacobianFunction (org.apache.commons.math3.fitting.leastsquares.ValueAndJacobianFunction)1