Search in sources :

Example 31 with RealVector

use of org.apache.commons.math3.linear.RealVector in project GDSC-SMLM by aherbert.

the class TrackPopulationAnalysis method extractTrackData.

/**
 * Extract the track data. This extracts different descriptors of the track using a rolling local
 * window.
 *
 * <p>Distances are converted to {@code unit} using the provided converter and time units are
 * converted from frame to seconds (s). The diffusion coefficients is in unit^2/s.
 *
 * <p>If categories are added they are remapped to be a natural sequence starting from 0.
 *
 * @param tracks the tracks
 * @param distanceConverter the distance converter
 * @param deltaT the time step of each frame in seconds (delta T)
 * @param hasCategory if true add the category from the result to the end of the data
 * @return the track data (track lengths and descriptors)
 */
private Pair<int[], double[][]> extractTrackData(List<Trace> tracks, TypeConverter<DistanceUnit> distanceConverter, double deltaT, boolean hasCategory) {
    final List<double[]> data = new LocalList<>(tracks.size());
    double[] x = new double[0];
    double[] y = new double[0];
    final int wm1 = settings.window - 1;
    final int valuesLength = hasCategory ? 5 : 4;
    final int mid = settings.window / 2;
    final MsdFitter msdFitter = new MsdFitter(settings, deltaT);
    final double significance = settings.significance;
    final int[] fitResult = new int[4];
    // Factor for the diffusion coefficient: 1/N * 1/(2*dimensions*deltaT) = 1 / 4Nt
    // with N the number of points to average.
    final double diffusionCoefficientFactor = 1.0 / (4 * wm1 * deltaT);
    // Used for the standard deviations
    final Statistics statsX = new Statistics();
    final Statistics statsY = new Statistics();
    final Ticker ticker = ImageJUtils.createTicker(tracks.size(), 1, "Computing track features...");
    // Collect the track categories. Later these are renumbered to a natural sequence from 0.
    final TIntHashSet catSet = new TIntHashSet();
    // final StoredDataStatistics statsAlpha = new StoredDataStatistics(tracks.size());
    // Process each track
    final TIntArrayList lengths = new TIntArrayList(tracks.size());
    for (final Trace track : tracks) {
        // Get xy coordinates
        final int size = track.size();
        if (x.length < size) {
            x = new double[size];
            y = new double[size];
        }
        for (int i = 0; i < size; i++) {
            final PeakResult peak = track.get(i);
            x[i] = distanceConverter.convert(peak.getXPosition());
            y[i] = distanceConverter.convert(peak.getYPosition());
        }
        final int smwm1 = size - wm1;
        final int previousSize = data.size();
        for (int k = 0; k < smwm1; k++) {
            final double[] values = new double[valuesLength];
            data.add(values);
            // middle of even sized windows is between two localisations.
            if (hasCategory) {
                final int cat = track.get(k + mid).getCategory();
                values[4] = cat;
                catSet.add(cat);
            }
            // First point in window = k
            // Last point in window = k + w - 1 = k + wm1
            final int end = k + wm1;
            // 1. Anomalous exponent.
            msdFitter.setData(x, y);
            try {
                msdFitter.fit(k, null);
                // statsAlpha.add(msdFitter.alpha);
                int fitIndex = msdFitter.positiveSlope ? 0 : 2;
                // If better then this is anomalous diffusion
                final double alpha = msdFitter.lvmSolution2.getPoint().getEntry(2);
                values[0] = alpha;
                if (msdFitter.pValue > significance) {
                    fitIndex++;
                }
                fitResult[fitIndex]++;
            // values[0] = msdFitter.alpha;
            // // Debug
            // if (
            // // msdFitter.pValue < 0.2
            // msdFitter.pValue < 0.2 && values[0] < 0
            // // !msdFitter.positiveSlope
            // ) {
            // final RealVector p = msdFitter.lvmSolution2.getPoint();
            // final String title = "anomalous exponent";
            // final Plot plot = new Plot(title, "time (s)", "MSD (um^2)");
            // final double[] t = SimpleArrayUtils.newArray(msdFitter.s.length, deltaT, deltaT);
            // plot.addLabel(0, 0, msdFitter.lvmSolution2.getPoint().toString() + " p="
            // + msdFitter.pValue + ". " + msdFitter.lvmSolution1.getPoint().toString());
            // plot.addPoints(t, msdFitter.s, Plot.CROSS);
            // plot.addPoints(t, msdFitter.model2.value(p).getFirst().toArray(), Plot.LINE);
            // plot.setColor(Color.BLUE);
            // plot.addPoints(t,
            // msdFitter.model1.value(msdFitter.lvmSolution1.getPoint()).getFirst().toArray(),
            // Plot.LINE);
            // plot.setColor(Color.RED);
            // final double[] yy = Arrays.stream(t).map(msdFitter.reg::predict).toArray();
            // plot.addPoints(t, yy, Plot.CIRCLE);
            // System.out.printf("%s : %s", msdFitter.lvmSolution2.getPoint(), values[0]);
            // ImageJUtils.display(title, plot, ImageJUtils.NO_TO_FRONT);
            // System.out.println();
            // }
            } catch (TooManyIterationsException | ConvergenceException ex) {
                if (settings.debug) {
                    ImageJUtils.log("Failed to fit anomalous exponent: " + ex.getMessage());
                }
                // Ignore this and leave as Brownian motion
                values[0] = 1.0;
            }
            // Referenced papers:
            // Hozé, N. H., D. (2017) Statistical methods for large ensembles of super-resolution
            // stochastic single particle trajectories in cell biology.
            // Annual Review of Statistics and Its Application 4, 189-223
            // 
            // Amitai, A., Seeber, A., Gasser, S. M. & Holcman, D. (2017) Visualization of Chromatin
            // Decompaction and Break Site Extrusion as Predicted by Statistical Polymer
            // Modeling of Single-Locus Trajectories. Cell reports 18, 1200-1214
            // 2. Effective diffusion coefficient (Hozé, eq 10).
            // This is the average squared jump distance between successive points
            // divided by 1 / (2 * dimensions * deltaT), i.e. 1 / 4t.
            double sum = 0;
            for (int i = k; i < end; i++) {
                sum += MathUtils.distance2(x[i], y[i], x[i + 1], y[i + 1]);
            }
            values[1] = sum * diffusionCoefficientFactor;
            // 3. Length of confinement (Amitai et al, eq 1).
            // Compute the average of the standard deviation of the position in each dimension.
            statsX.reset();
            statsY.reset();
            for (int i = k; i <= end; i++) {
                statsX.add(x[i]);
                statsY.add(y[i]);
            }
            values[2] = (statsX.getStandardDeviation() + statsY.getStandardDeviation()) / 2;
            // 4. Magnitude of drift vector (Hozé, eq 9).
            // Note: The drift field is given as the expected distance between successive points, i.e.
            // the average step. Since all track windows are the same length this is the same
            // as the distance between the first and last point divided by the number of points.
            // The drift field in each dimension is combined to create a drift norm, i.e. Euclidean
            // distance.
            values[3] = MathUtils.distance(x[k], y[k], x[end], y[end]) / wm1;
        }
        lengths.add(data.size() - previousSize);
        ticker.tick();
    }
    ImageJUtils.finished();
    if (settings.debug) {
        ImageJUtils.log("  +Slope, significant:   %d", fitResult[0]);
        ImageJUtils.log("  +Slope, insignificant: %d", fitResult[1]);
        ImageJUtils.log("  -Slope, significant:   %d", fitResult[2]);
        ImageJUtils.log("  -Slope, insignificant: %d", fitResult[3]);
    }
    ImageJUtils.log("Insignificant anomalous exponents: %d / %d", fitResult[1] + fitResult[3], data.size());
    // System.out.println(statsAlpha.getStatistics().toString());
    final double[][] trackData = data.toArray(new double[0][0]);
    if (hasCategory) {
        final int[] categories = catSet.toArray();
        Arrays.sort(categories);
        // Only remap if non-compact (i.e. not 0 to n)
        final int max = categories[categories.length - 1];
        if (categories[0] != 0 || max + 1 != categories.length) {
            final int[] remap = new int[max + 1];
            for (int i = 0; i < categories.length; i++) {
                remap[categories[i]] = i;
            }
            final int end = valuesLength - 1;
            for (final double[] values : trackData) {
                values[end] = remap[(int) values[end]];
            }
        }
    }
    return Pair.create(lengths.toArray(), trackData);
}
Also used : Ticker(uk.ac.sussex.gdsc.core.logging.Ticker) Statistics(uk.ac.sussex.gdsc.core.utils.Statistics) TIntHashSet(gnu.trove.set.hash.TIntHashSet) TIntArrayList(gnu.trove.list.array.TIntArrayList) PeakResult(uk.ac.sussex.gdsc.smlm.results.PeakResult) AttributePeakResult(uk.ac.sussex.gdsc.smlm.results.AttributePeakResult) Trace(uk.ac.sussex.gdsc.smlm.results.Trace) LocalList(uk.ac.sussex.gdsc.core.utils.LocalList) ConvergenceException(org.apache.commons.math3.exception.ConvergenceException) TooManyIterationsException(org.apache.commons.math3.exception.TooManyIterationsException)

Example 32 with RealVector

use of org.apache.commons.math3.linear.RealVector in project gatk-protected by broadinstitute.

the class CNLOHCaller method calcEffectivePhis.

protected double[] calcEffectivePhis(final double E_alpha, final double[] responsibilitiesByRho) {
    final double sumResponsibilities = MathUtils.sum(responsibilitiesByRho);
    final double[] result = new double[responsibilitiesByRho.length];
    final int k = responsibilitiesByRho.length;
    // Initialize all pseudocounts to 1, except for index 0, which is 20;
    //  This artificially increases the odds for a rho = 0.
    final RealVector pseudocounts = new ArrayRealVector(responsibilitiesByRho.length);
    pseudocounts.set(1.0);
    pseudocounts.setEntry(0, 20.0);
    final double sumPseudocounts = MathUtils.sum(pseudocounts.toArray());
    final double term2 = Gamma.digamma(E_alpha + sumPseudocounts + sumResponsibilities);
    for (int i = 0; i < result.length; i++) {
        final double term1 = Gamma.digamma(E_alpha / k + pseudocounts.getEntry(i) + responsibilitiesByRho[i]);
        result[i] = Math.exp(term1 - term2);
    }
    return result;
}
Also used : RealVector(org.apache.commons.math3.linear.RealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector)

Example 33 with RealVector

use of org.apache.commons.math3.linear.RealVector in project gatk by broadinstitute.

the class CNLOHCaller method calcEffectivePhis.

protected double[] calcEffectivePhis(final double E_alpha, final double[] responsibilitiesByRho) {
    final double sumResponsibilities = MathUtils.sum(responsibilitiesByRho);
    final double[] result = new double[responsibilitiesByRho.length];
    final int k = responsibilitiesByRho.length;
    // Initialize all pseudocounts to 1, except for index 0, which is 20;
    //  This artificially increases the odds for a rho = 0.
    final RealVector pseudocounts = new ArrayRealVector(responsibilitiesByRho.length);
    pseudocounts.set(1.0);
    pseudocounts.setEntry(0, 20.0);
    final double sumPseudocounts = MathUtils.sum(pseudocounts.toArray());
    final double term2 = Gamma.digamma(E_alpha + sumPseudocounts + sumResponsibilities);
    for (int i = 0; i < result.length; i++) {
        final double term1 = Gamma.digamma(E_alpha / k + pseudocounts.getEntry(i) + responsibilitiesByRho[i]);
        result[i] = Math.exp(term1 - term2);
    }
    return result;
}
Also used : RealVector(org.apache.commons.math3.linear.RealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector)

Example 34 with RealVector

use of org.apache.commons.math3.linear.RealVector in project gatk by broadinstitute.

the class Nd4jApacheAdapterUtilsUnitTest method assertINDArrayToApacheVectorCorrectness.

public void assertINDArrayToApacheVectorCorrectness(final INDArray arr) {
    final RealVector result = Nd4jApacheAdapterUtils.convertINDArrayToApacheVector(arr);
    final RealVector expected = new ArrayRealVector(arr.dup().data().asDouble());
    Assert.assertEquals(result.getDimension(), expected.getDimension());
    ArrayAsserts.assertArrayEquals(result.toArray(), expected.toArray(), EPS);
}
Also used : RealVector(org.apache.commons.math3.linear.RealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector) ArrayRealVector(org.apache.commons.math3.linear.ArrayRealVector)

Example 35 with RealVector

use of org.apache.commons.math3.linear.RealVector in project gatk-protected by broadinstitute.

the class CoveragePoNQCUtils method hasSuspiciousContigs.

/**
     *  Given a single sample tangent normalization (or other coverage profile), determine whether any contig looks like
     *   it has an arm level event (defined as 25% (or more) of the contig amplified/deleted)
     *
     * @param singleSampleTangentNormalized Tangent normalized data for a single sample.
     * @return never {@code null}
     */
private static Boolean hasSuspiciousContigs(final ReadCountCollection singleSampleTangentNormalized, final Map<String, Double> contigToMedian) {
    final List<String> allContigsPresent = retrieveAllContigsPresent(singleSampleTangentNormalized);
    for (String contig : allContigsPresent) {
        final ReadCountCollection oneContigReadCountCollection = singleSampleTangentNormalized.subsetTargets(singleSampleTangentNormalized.targets().stream().filter(t -> t.getContig().equals(contig)).collect(Collectors.toSet()));
        final RealVector counts = oneContigReadCountCollection.counts().getColumnVector(0);
        for (int i = 0; i < 4; i++) {
            final RealVector partitionCounts = counts.getSubVector(i * counts.getDimension() / 4, counts.getDimension() / 4);
            final double[] partitionArray = DoubleStream.of(partitionCounts.toArray()).map(d -> Math.pow(2, d)).sorted().toArray();
            double median = new Median().evaluate(partitionArray);
            final double medianShiftInCRSpace = contigToMedian.getOrDefault(contig, 1.0) - 1.0;
            median -= medianShiftInCRSpace;
            if ((median > AMP_THRESHOLD) || (median < DEL_THRESHOLD)) {
                logger.info("Suspicious contig: " + singleSampleTangentNormalized.columnNames().get(0) + " " + contig + " (" + median + " -- " + i + ")");
                return true;
            }
        }
    }
    return false;
}
Also used : RealVector(org.apache.commons.math3.linear.RealVector) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) Median(org.apache.commons.math3.stat.descriptive.rank.Median)

Aggregations

RealVector (org.apache.commons.math3.linear.RealVector)41 ArrayRealVector (org.apache.commons.math3.linear.ArrayRealVector)30 RealMatrix (org.apache.commons.math3.linear.RealMatrix)22 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)10 Test (org.junit.jupiter.api.Test)5 ConvergenceException (org.apache.commons.math3.exception.ConvergenceException)4 TooManyIterationsException (org.apache.commons.math3.exception.TooManyIterationsException)4 VectorPool (nars.rl.horde.math.VectorPool)3 LeastSquaresBuilder (org.apache.commons.math3.fitting.leastsquares.LeastSquaresBuilder)3 Optimum (org.apache.commons.math3.fitting.leastsquares.LeastSquaresOptimizer.Optimum)3 LeastSquaresProblem (org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem)3 LevenbergMarquardtOptimizer (org.apache.commons.math3.fitting.leastsquares.LevenbergMarquardtOptimizer)3 DecompositionSolver (org.apache.commons.math3.linear.DecompositionSolver)3 SingularValueDecomposition (org.apache.commons.math3.linear.SingularValueDecomposition)3 CombinedAttributeValues (org.knime.base.node.mine.treeensemble2.data.BinaryNominalSplitsPCA.CombinedAttributeValues)3 DoubleDoubleBiPredicate (uk.ac.sussex.gdsc.test.api.function.DoubleDoubleBiPredicate)3 File (java.io.File)2 Comparator (java.util.Comparator)2 TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)2 ValueAndJacobianFunction (org.apache.commons.math3.fitting.leastsquares.ValueAndJacobianFunction)2