use of org.apache.commons.math3.linear.RealVector in project GDSC-SMLM by aherbert.
the class TrackPopulationAnalysis method extractTrackData.
/**
* Extract the track data. This extracts different descriptors of the track using a rolling local
* window.
*
* <p>Distances are converted to {@code unit} using the provided converter and time units are
* converted from frame to seconds (s). The diffusion coefficients is in unit^2/s.
*
* <p>If categories are added they are remapped to be a natural sequence starting from 0.
*
* @param tracks the tracks
* @param distanceConverter the distance converter
* @param deltaT the time step of each frame in seconds (delta T)
* @param hasCategory if true add the category from the result to the end of the data
* @return the track data (track lengths and descriptors)
*/
private Pair<int[], double[][]> extractTrackData(List<Trace> tracks, TypeConverter<DistanceUnit> distanceConverter, double deltaT, boolean hasCategory) {
final List<double[]> data = new LocalList<>(tracks.size());
double[] x = new double[0];
double[] y = new double[0];
final int wm1 = settings.window - 1;
final int valuesLength = hasCategory ? 5 : 4;
final int mid = settings.window / 2;
final MsdFitter msdFitter = new MsdFitter(settings, deltaT);
final double significance = settings.significance;
final int[] fitResult = new int[4];
// Factor for the diffusion coefficient: 1/N * 1/(2*dimensions*deltaT) = 1 / 4Nt
// with N the number of points to average.
final double diffusionCoefficientFactor = 1.0 / (4 * wm1 * deltaT);
// Used for the standard deviations
final Statistics statsX = new Statistics();
final Statistics statsY = new Statistics();
final Ticker ticker = ImageJUtils.createTicker(tracks.size(), 1, "Computing track features...");
// Collect the track categories. Later these are renumbered to a natural sequence from 0.
final TIntHashSet catSet = new TIntHashSet();
// final StoredDataStatistics statsAlpha = new StoredDataStatistics(tracks.size());
// Process each track
final TIntArrayList lengths = new TIntArrayList(tracks.size());
for (final Trace track : tracks) {
// Get xy coordinates
final int size = track.size();
if (x.length < size) {
x = new double[size];
y = new double[size];
}
for (int i = 0; i < size; i++) {
final PeakResult peak = track.get(i);
x[i] = distanceConverter.convert(peak.getXPosition());
y[i] = distanceConverter.convert(peak.getYPosition());
}
final int smwm1 = size - wm1;
final int previousSize = data.size();
for (int k = 0; k < smwm1; k++) {
final double[] values = new double[valuesLength];
data.add(values);
// middle of even sized windows is between two localisations.
if (hasCategory) {
final int cat = track.get(k + mid).getCategory();
values[4] = cat;
catSet.add(cat);
}
// First point in window = k
// Last point in window = k + w - 1 = k + wm1
final int end = k + wm1;
// 1. Anomalous exponent.
msdFitter.setData(x, y);
try {
msdFitter.fit(k, null);
// statsAlpha.add(msdFitter.alpha);
int fitIndex = msdFitter.positiveSlope ? 0 : 2;
// If better then this is anomalous diffusion
final double alpha = msdFitter.lvmSolution2.getPoint().getEntry(2);
values[0] = alpha;
if (msdFitter.pValue > significance) {
fitIndex++;
}
fitResult[fitIndex]++;
// values[0] = msdFitter.alpha;
// // Debug
// if (
// // msdFitter.pValue < 0.2
// msdFitter.pValue < 0.2 && values[0] < 0
// // !msdFitter.positiveSlope
// ) {
// final RealVector p = msdFitter.lvmSolution2.getPoint();
// final String title = "anomalous exponent";
// final Plot plot = new Plot(title, "time (s)", "MSD (um^2)");
// final double[] t = SimpleArrayUtils.newArray(msdFitter.s.length, deltaT, deltaT);
// plot.addLabel(0, 0, msdFitter.lvmSolution2.getPoint().toString() + " p="
// + msdFitter.pValue + ". " + msdFitter.lvmSolution1.getPoint().toString());
// plot.addPoints(t, msdFitter.s, Plot.CROSS);
// plot.addPoints(t, msdFitter.model2.value(p).getFirst().toArray(), Plot.LINE);
// plot.setColor(Color.BLUE);
// plot.addPoints(t,
// msdFitter.model1.value(msdFitter.lvmSolution1.getPoint()).getFirst().toArray(),
// Plot.LINE);
// plot.setColor(Color.RED);
// final double[] yy = Arrays.stream(t).map(msdFitter.reg::predict).toArray();
// plot.addPoints(t, yy, Plot.CIRCLE);
// System.out.printf("%s : %s", msdFitter.lvmSolution2.getPoint(), values[0]);
// ImageJUtils.display(title, plot, ImageJUtils.NO_TO_FRONT);
// System.out.println();
// }
} catch (TooManyIterationsException | ConvergenceException ex) {
if (settings.debug) {
ImageJUtils.log("Failed to fit anomalous exponent: " + ex.getMessage());
}
// Ignore this and leave as Brownian motion
values[0] = 1.0;
}
// Referenced papers:
// Hozé, N. H., D. (2017) Statistical methods for large ensembles of super-resolution
// stochastic single particle trajectories in cell biology.
// Annual Review of Statistics and Its Application 4, 189-223
//
// Amitai, A., Seeber, A., Gasser, S. M. & Holcman, D. (2017) Visualization of Chromatin
// Decompaction and Break Site Extrusion as Predicted by Statistical Polymer
// Modeling of Single-Locus Trajectories. Cell reports 18, 1200-1214
// 2. Effective diffusion coefficient (Hozé, eq 10).
// This is the average squared jump distance between successive points
// divided by 1 / (2 * dimensions * deltaT), i.e. 1 / 4t.
double sum = 0;
for (int i = k; i < end; i++) {
sum += MathUtils.distance2(x[i], y[i], x[i + 1], y[i + 1]);
}
values[1] = sum * diffusionCoefficientFactor;
// 3. Length of confinement (Amitai et al, eq 1).
// Compute the average of the standard deviation of the position in each dimension.
statsX.reset();
statsY.reset();
for (int i = k; i <= end; i++) {
statsX.add(x[i]);
statsY.add(y[i]);
}
values[2] = (statsX.getStandardDeviation() + statsY.getStandardDeviation()) / 2;
// 4. Magnitude of drift vector (Hozé, eq 9).
// Note: The drift field is given as the expected distance between successive points, i.e.
// the average step. Since all track windows are the same length this is the same
// as the distance between the first and last point divided by the number of points.
// The drift field in each dimension is combined to create a drift norm, i.e. Euclidean
// distance.
values[3] = MathUtils.distance(x[k], y[k], x[end], y[end]) / wm1;
}
lengths.add(data.size() - previousSize);
ticker.tick();
}
ImageJUtils.finished();
if (settings.debug) {
ImageJUtils.log(" +Slope, significant: %d", fitResult[0]);
ImageJUtils.log(" +Slope, insignificant: %d", fitResult[1]);
ImageJUtils.log(" -Slope, significant: %d", fitResult[2]);
ImageJUtils.log(" -Slope, insignificant: %d", fitResult[3]);
}
ImageJUtils.log("Insignificant anomalous exponents: %d / %d", fitResult[1] + fitResult[3], data.size());
// System.out.println(statsAlpha.getStatistics().toString());
final double[][] trackData = data.toArray(new double[0][0]);
if (hasCategory) {
final int[] categories = catSet.toArray();
Arrays.sort(categories);
// Only remap if non-compact (i.e. not 0 to n)
final int max = categories[categories.length - 1];
if (categories[0] != 0 || max + 1 != categories.length) {
final int[] remap = new int[max + 1];
for (int i = 0; i < categories.length; i++) {
remap[categories[i]] = i;
}
final int end = valuesLength - 1;
for (final double[] values : trackData) {
values[end] = remap[(int) values[end]];
}
}
}
return Pair.create(lengths.toArray(), trackData);
}
use of org.apache.commons.math3.linear.RealVector in project gatk-protected by broadinstitute.
the class CNLOHCaller method calcEffectivePhis.
protected double[] calcEffectivePhis(final double E_alpha, final double[] responsibilitiesByRho) {
final double sumResponsibilities = MathUtils.sum(responsibilitiesByRho);
final double[] result = new double[responsibilitiesByRho.length];
final int k = responsibilitiesByRho.length;
// Initialize all pseudocounts to 1, except for index 0, which is 20;
// This artificially increases the odds for a rho = 0.
final RealVector pseudocounts = new ArrayRealVector(responsibilitiesByRho.length);
pseudocounts.set(1.0);
pseudocounts.setEntry(0, 20.0);
final double sumPseudocounts = MathUtils.sum(pseudocounts.toArray());
final double term2 = Gamma.digamma(E_alpha + sumPseudocounts + sumResponsibilities);
for (int i = 0; i < result.length; i++) {
final double term1 = Gamma.digamma(E_alpha / k + pseudocounts.getEntry(i) + responsibilitiesByRho[i]);
result[i] = Math.exp(term1 - term2);
}
return result;
}
use of org.apache.commons.math3.linear.RealVector in project gatk by broadinstitute.
the class CNLOHCaller method calcEffectivePhis.
protected double[] calcEffectivePhis(final double E_alpha, final double[] responsibilitiesByRho) {
final double sumResponsibilities = MathUtils.sum(responsibilitiesByRho);
final double[] result = new double[responsibilitiesByRho.length];
final int k = responsibilitiesByRho.length;
// Initialize all pseudocounts to 1, except for index 0, which is 20;
// This artificially increases the odds for a rho = 0.
final RealVector pseudocounts = new ArrayRealVector(responsibilitiesByRho.length);
pseudocounts.set(1.0);
pseudocounts.setEntry(0, 20.0);
final double sumPseudocounts = MathUtils.sum(pseudocounts.toArray());
final double term2 = Gamma.digamma(E_alpha + sumPseudocounts + sumResponsibilities);
for (int i = 0; i < result.length; i++) {
final double term1 = Gamma.digamma(E_alpha / k + pseudocounts.getEntry(i) + responsibilitiesByRho[i]);
result[i] = Math.exp(term1 - term2);
}
return result;
}
use of org.apache.commons.math3.linear.RealVector in project gatk by broadinstitute.
the class Nd4jApacheAdapterUtilsUnitTest method assertINDArrayToApacheVectorCorrectness.
public void assertINDArrayToApacheVectorCorrectness(final INDArray arr) {
final RealVector result = Nd4jApacheAdapterUtils.convertINDArrayToApacheVector(arr);
final RealVector expected = new ArrayRealVector(arr.dup().data().asDouble());
Assert.assertEquals(result.getDimension(), expected.getDimension());
ArrayAsserts.assertArrayEquals(result.toArray(), expected.toArray(), EPS);
}
use of org.apache.commons.math3.linear.RealVector in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtils method hasSuspiciousContigs.
/**
* Given a single sample tangent normalization (or other coverage profile), determine whether any contig looks like
* it has an arm level event (defined as 25% (or more) of the contig amplified/deleted)
*
* @param singleSampleTangentNormalized Tangent normalized data for a single sample.
* @return never {@code null}
*/
private static Boolean hasSuspiciousContigs(final ReadCountCollection singleSampleTangentNormalized, final Map<String, Double> contigToMedian) {
final List<String> allContigsPresent = retrieveAllContigsPresent(singleSampleTangentNormalized);
for (String contig : allContigsPresent) {
final ReadCountCollection oneContigReadCountCollection = singleSampleTangentNormalized.subsetTargets(singleSampleTangentNormalized.targets().stream().filter(t -> t.getContig().equals(contig)).collect(Collectors.toSet()));
final RealVector counts = oneContigReadCountCollection.counts().getColumnVector(0);
for (int i = 0; i < 4; i++) {
final RealVector partitionCounts = counts.getSubVector(i * counts.getDimension() / 4, counts.getDimension() / 4);
final double[] partitionArray = DoubleStream.of(partitionCounts.toArray()).map(d -> Math.pow(2, d)).sorted().toArray();
double median = new Median().evaluate(partitionArray);
final double medianShiftInCRSpace = contigToMedian.getOrDefault(contig, 1.0) - 1.0;
median -= medianShiftInCRSpace;
if ((median > AMP_THRESHOLD) || (median < DEL_THRESHOLD)) {
logger.info("Suspicious contig: " + singleSampleTangentNormalized.columnNames().get(0) + " " + contig + " (" + median + " -- " + i + ")");
return true;
}
}
}
return false;
}
Aggregations