use of org.apache.commons.math3.linear.RealVector in project pyramid by cheng-li.
the class GMMDemo method main.
public static void main(String[] args) throws Exception {
List<String> lines = FileUtils.readLines(new File("/Users/chengli/Dropbox/Shared/CS6220DM/2_cluster_EM_mixt/HW2/mnist_features.txt"));
Collections.shuffle(lines);
int dim = lines.get(0).split(" ").length;
int rows = 100;
RealMatrix data = new Array2DRowRealMatrix(rows, dim);
for (int i = 0; i < rows; i++) {
String[] split = lines.get(i).split(" ");
for (int j = 0; j < dim; j++) {
data.setEntry(i, j, Double.parseDouble(split[j]) + Math.random());
}
}
double[] mins = new double[data.getColumnDimension()];
double[] maxs = new double[data.getColumnDimension()];
double[] vars = new double[data.getColumnDimension()];
for (int j = 0; j < data.getColumnDimension(); j++) {
RealVector column = data.getColumnVector(j);
mins[j] = column.getMinValue();
maxs[j] = column.getMaxValue();
DescriptiveStatistics stats = new DescriptiveStatistics(column.toArray());
vars[j] = stats.getVariance();
}
// DataSet dataSet = DataSetBuilder.getBuilder()
// .numDataPoints(rows)
// .numFeatures(data.getColumnDimension())
// .build();
// for (int i=0;i<dataSet.getNumDataPoints();i++){
// for (int j=0;j<dataSet.getNumFeatures();j++){
// if (data.getEntry(i,j)>255.0/2){
// dataSet.setFeatureValue(i,j,1);
// } else {
// dataSet.setFeatureValue(i,j,0);
// }
//
// }
// }
//
// int numComponents = 10;
//
//
// BM bm = BMSelector.select(dataSet, numComponents, 100);
// System.out.println(Arrays.toString(bm.getMixtureCoefficients()));
// StringBuilder stringBuilder = new StringBuilder();
// for (int k=0;k<numComponents;k++){
// for (int d=0;d<dataSet.getNumFeatures();d++){
// stringBuilder.append(bm.getDistributions()[k][d].getP());
// if (d!=dataSet.getNumFeatures()-1){
// stringBuilder.append(",");
// }
// }
// stringBuilder.append("\n");
// }
// FileUtils.writeStringToFile(new File("/Users/chengli/tmp/gmm/bm"),stringBuilder.toString());
// BMTrainer bmTrainer = BMSelector.selectTrainer(dataSet,numComponents,50);
//
// GMM gmm = new GMM(dim,numComponents, data);
//
// GMMTrainer trainer = new GMMTrainer(data, gmm);
//
// trainer.setGammas(bmTrainer.getGammas());
// trainer.mStep();
//
// for (int i=1;i<=5;i++){
// System.out.println("iteration = "+i);
// trainer.iterate();
// double logLikelihood = IntStream.range(0,rows).parallel()
// .mapToDouble(j->gmm.logDensity(data.getRowVector(j))).sum();
// System.out.println("log likelihood = "+logLikelihood);
// Serialization.serialize(gmm, "/Users/chengli/tmp/gmm/model_iter_"+i);
// for (int k=0;k<gmm.getNumComponents();k++){
// FileUtils.writeStringToFile(new File("/Users/chengli/tmp/gmm/mean_iter_"+i+"_component_"+(k+1)),
// gmm.getGaussianDistributions()[k].getMean().toString().replace("{","")
// .replace("}","").replace(";",","));
// }
// }
//
// FileUtils.writeStringToFile(new File("/Users/chengli/tmp/gmm/modeltext"), gmm.toString());
// GMM gmm = (GMM) Serialization.deserialize("/Users/chengli/tmp/gmm/model_3");
}
use of org.apache.commons.math3.linear.RealVector in project GDSC-SMLM by aherbert.
the class TrackPopulationAnalysisTest method canComputeFbmDiffusionFunction.
@Test
void canComputeFbmDiffusionFunction() {
final int size = 10;
final double delta = 1e-6;
final DoubleDoubleBiPredicate test = TestHelper.doublesAreClose(1e-5);
for (final double t : new double[] { 0.8, 1, 1.2 }) {
final MultivariateJacobianFunction f = new FbmDiffusionFunction(size, t);
for (final double d : new double[] { 0.8, 0.9, 1, 1.1, 1.2 }) {
for (final double s : new double[] { 2, 20 }) {
for (final double alpha : new double[] { 1e-2, 0.8, 0.9, 1, 1.1, 1.2, 2.0 }) {
// Check the value and Jacobian
final RealVector point = new ArrayRealVector(new double[] { d, s, alpha }, false);
final Pair<RealVector, RealMatrix> p = f.value(point);
final double[] value = p.getFirst().toArray();
Assertions.assertEquals(size, value.length);
final double a1 = alpha + 1;
final double a2 = alpha + 2;
final double ta = Math.pow(t, alpha);
for (int n = 1; n <= size; n++) {
// MSD = [4Dt^a / (a+2)(a+1)] * [(n+1)^(a+2) + (n-1)^(a+2) - 2n^(a+2)]
// - [8Dt^a / (a+2)(a+1)] + 4s^2
// assume t=1.
final double msd = (4 * d * ta / (a2 * a1)) * (Math.pow(n + 1, a2) + Math.pow(n - 1, a2) - 2 * Math.pow(n, a2)) - 8 * d * ta / (a2 * a1) + 4 * s * s;
TestAssertions.assertTest(msd, value[n - 1], test, "value");
}
// Columns of the Jacobian
final double[] dfda1 = p.getSecond().getColumn(0);
final double[] dfdb1 = p.getSecond().getColumn(1);
final double[] dfdc1 = p.getSecond().getColumn(2);
point.setEntry(0, d - delta);
RealVector v1 = f.value(point).getFirst();
point.setEntry(0, d + delta);
RealVector v2 = f.value(point).getFirst();
final double[] dfda = v2.subtract(v1).mapDivide(2 * delta).toArray();
point.setEntry(0, d);
point.setEntry(1, s - delta);
v1 = f.value(point).getFirst();
point.setEntry(1, s + delta);
v2 = f.value(point).getFirst();
final double[] dfdb = v2.subtract(v1).mapDivide(2 * delta).toArray();
point.setEntry(1, s);
point.setEntry(2, alpha - delta);
v1 = f.value(point).getFirst();
point.setEntry(2, alpha + delta);
v2 = f.value(point).getFirst();
final double[] dfdc = v2.subtract(v1).mapDivide(2 * delta).toArray();
// Element-by-element relative error
TestAssertions.assertArrayTest(dfda, dfda1, test, "jacobian dfda");
TestAssertions.assertArrayTest(dfdb, dfdb1, test, "jacobian dfdb");
TestAssertions.assertArrayTest(dfdc, dfdc1, test, "jacobian dfdc");
}
}
}
}
}
use of org.apache.commons.math3.linear.RealVector in project GDSC-SMLM by aherbert.
the class TrackPopulationAnalysisTest method canComputeBrownianModelUsingFbmFunction.
@Test
void canComputeBrownianModelUsingFbmFunction() {
final int size = 10;
final DoubleDoubleBiPredicate test = TestHelper.doublesAreClose(1e-5);
final double alpha = 1.0;
for (final double t : new double[] { 0.8, 1, 1.2 }) {
final BrownianDiffusionFunction f1 = new BrownianDiffusionFunction(size, t);
final FbmDiffusionFunction f2 = new FbmDiffusionFunction(size, t);
for (final double d : new double[] { 0.8, 0.9, 1, 1.1, 1.2 }) {
for (final double s : new double[] { 2, 20 }) {
// Check the value and Jacobian
final RealVector point1 = new ArrayRealVector(new double[] { d, s, alpha }, false);
final Pair<RealVector, RealMatrix> p1 = f1.value(point1);
final double[] value1 = p1.getFirst().toArray();
final RealVector point2 = new ArrayRealVector(new double[] { d, s, alpha }, false);
final Pair<RealVector, RealMatrix> p2 = f2.value(point2);
final double[] value2 = p2.getFirst().toArray();
TestAssertions.assertArrayTest(value1, value2, test, "value");
final double[] dfda1 = p1.getSecond().getColumn(0);
final double[] dfdb1 = p1.getSecond().getColumn(1);
final double[] dfda2 = p2.getSecond().getColumn(0);
final double[] dfdb2 = p2.getSecond().getColumn(1);
TestAssertions.assertArrayTest(dfda1, dfda2, test, "jacobian dfda");
TestAssertions.assertArrayTest(dfdb1, dfdb2, test, "jacobian dfdb");
}
}
}
}
use of org.apache.commons.math3.linear.RealVector in project GDSC-SMLM by aherbert.
the class ApacheLvmFitter method computeFit.
@Override
public FitStatus computeFit(double[] y, final double[] fx, double[] a, double[] parametersVariance) {
final int n = y.length;
try {
// Different convergence thresholds seem to have no effect on the resulting fit, only the
// number of
// iterations for convergence
final double initialStepBoundFactor = 100;
final double costRelativeTolerance = 1e-10;
final double parRelativeTolerance = 1e-10;
final double orthoTolerance = 1e-10;
final double threshold = Precision.SAFE_MIN;
// Extract the parameters to be fitted
final double[] initialSolution = getInitialSolution(a);
// TODO - Pass in more advanced stopping criteria.
// Create the target and weight arrays
final double[] yd = new double[n];
// final double[] w = new double[n];
for (int i = 0; i < n; i++) {
yd[i] = y[i];
// w[i] = 1;
}
final LevenbergMarquardtOptimizer optimizer = new LevenbergMarquardtOptimizer(initialStepBoundFactor, costRelativeTolerance, parRelativeTolerance, orthoTolerance, threshold);
// @formatter:off
final LeastSquaresBuilder builder = new LeastSquaresBuilder().maxEvaluations(Integer.MAX_VALUE).maxIterations(getMaxEvaluations()).start(initialSolution).target(yd);
if (function instanceof ExtendedNonLinearFunction && ((ExtendedNonLinearFunction) function).canComputeValuesAndJacobian()) {
// Compute together, or each individually
builder.model(new ValueAndJacobianFunction() {
final ExtendedNonLinearFunction fun = (ExtendedNonLinearFunction) function;
@Override
public Pair<RealVector, RealMatrix> value(RealVector point) {
final double[] p = point.toArray();
final org.apache.commons.lang3.tuple.Pair<double[], double[][]> result = fun.computeValuesAndJacobian(p);
return new Pair<>(new ArrayRealVector(result.getKey(), false), new Array2DRowRealMatrix(result.getValue(), false));
}
@Override
public RealVector computeValue(double[] params) {
return new ArrayRealVector(fun.computeValues(params), false);
}
@Override
public RealMatrix computeJacobian(double[] params) {
return new Array2DRowRealMatrix(fun.computeJacobian(params), false);
}
});
} else {
// Compute separately
builder.model(new MultivariateVectorFunctionWrapper((NonLinearFunction) function, a, n), new MultivariateMatrixFunctionWrapper((NonLinearFunction) function, a, n));
}
final LeastSquaresProblem problem = builder.build();
final Optimum optimum = optimizer.optimize(problem);
final double[] parameters = optimum.getPoint().toArray();
setSolution(a, parameters);
iterations = optimum.getIterations();
evaluations = optimum.getEvaluations();
if (parametersVariance != null) {
// Set up the Jacobian.
final RealMatrix j = optimum.getJacobian();
// Compute transpose(J)J.
final RealMatrix jTj = j.transpose().multiply(j);
final double[][] data = (jTj instanceof Array2DRowRealMatrix) ? ((Array2DRowRealMatrix) jTj).getDataRef() : jTj.getData();
final FisherInformationMatrix m = new FisherInformationMatrix(data);
setDeviations(parametersVariance, m);
}
// Compute function value
if (fx != null) {
final ValueFunction function = (ValueFunction) this.function;
function.initialise0(a);
function.forEach(new ValueProcedure() {
int index;
@Override
public void execute(double value) {
fx[index++] = value;
}
});
}
// As this is unweighted then we can do this to get the sum of squared residuals
// This is the same as optimum.getCost() * optimum.getCost(); The getCost() function
// just computes the dot product anyway.
value = optimum.getResiduals().dotProduct(optimum.getResiduals());
} catch (final TooManyEvaluationsException ex) {
return FitStatus.TOO_MANY_EVALUATIONS;
} catch (final TooManyIterationsException ex) {
return FitStatus.TOO_MANY_ITERATIONS;
} catch (final ConvergenceException ex) {
// Occurs when QR decomposition fails - mark as a singular non-linear model (no solution)
return FitStatus.SINGULAR_NON_LINEAR_MODEL;
} catch (final Exception ex) {
// TODO - Find out the other exceptions from the fitter and add return values to match.
return FitStatus.UNKNOWN;
}
return FitStatus.OK;
}
use of org.apache.commons.math3.linear.RealVector in project GDSC-SMLM by aherbert.
the class TraceDiffusion method fitMsd.
/**
* Fit the MSD using a linear fit that must pass through 0,0.
*
* <p>Update the plot by adding the fit line.
*
* @param x the x
* @param y the y
* @param title the title
* @param plot the plot
* @return [D, precision]
*/
private double[] fitMsd(double[] x, double[] y, String title, Plot plot) {
// The Weimann paper (Plos One e64287) fits:
// MSD(n dt) = 4D n dt + 4s^2
// n = number of jumps
// dt = time difference between frames
// s = localisation precision
// Thus we should fit an intercept as well.
// From the fit D = gradient / (4*exposureTime)
// D
double diffCoeff = 0;
double intercept = 0;
double precision = 0;
final LevenbergMarquardtOptimizer optimizer = new LevenbergMarquardtOptimizer();
Optimum lvmSolution;
double ic = Double.NaN;
// Fit with no intercept
try {
final LinearFunction function = new LinearFunction(x, y, clusteringSettings.getFitLength());
final double[] parameters = new double[] { function.guess() };
// @formatter:off
final LeastSquaresProblem problem = new LeastSquaresBuilder().maxEvaluations(Integer.MAX_VALUE).maxIterations(3000).start(parameters).target(function.getY()).weight(new DiagonalMatrix(function.getWeights())).model(function, function::jacobian).build();
// @formatter:on
lvmSolution = optimizer.optimize(problem);
final double ss = lvmSolution.getResiduals().dotProduct(lvmSolution.getResiduals());
// double ss = 0;
// double[] obs = function.getY();
// double[] exp = lvmSolution.getValue();
// for (int i = 0; i < obs.length; i++)
// ss += (obs[i] - exp[i]) * (obs[i] - exp[i]);
ic = getAkaikeInformationCriterionFromResiduals(ss, function.getY().length, 1);
final double gradient = lvmSolution.getPoint().getEntry(0);
diffCoeff = gradient / 4;
ImageJUtils.log("Linear fit (%d points) : Gradient = %s, D = %s um^2/s, SS = %s, " + "IC = %s (%d evaluations)", function.getY().length, MathUtils.rounded(gradient, 4), MathUtils.rounded(diffCoeff, 4), MathUtils.rounded(ss), MathUtils.rounded(ic), lvmSolution.getEvaluations());
} catch (final TooManyIterationsException ex) {
ImageJUtils.log("Failed to fit : Too many iterations (%s)", ex.getMessage());
} catch (final ConvergenceException ex) {
ImageJUtils.log("Failed to fit : %s", ex.getMessage());
}
// Fit with intercept.
// Optionally include the intercept (which is the estimated precision).
final boolean fitIntercept = true;
try {
final LinearFunctionWithIntercept function = new LinearFunctionWithIntercept(x, y, clusteringSettings.getFitLength(), fitIntercept);
// @formatter:off
final LeastSquaresProblem problem = new LeastSquaresBuilder().maxEvaluations(Integer.MAX_VALUE).maxIterations(3000).start(function.guess()).target(function.getY()).weight(new DiagonalMatrix(function.getWeights())).model(function, function::jacobian).build();
// @formatter:on
lvmSolution = optimizer.optimize(problem);
final RealVector residuals = lvmSolution.getResiduals();
final double ss = residuals.dotProduct(residuals);
// double ss = 0;
// double[] obs = function.getY();
// double[] exp = lvmSolution.getValue();
// for (int i = 0; i < obs.length; i++)
// ss += (obs[i] - exp[i]) * (obs[i] - exp[i]);
final double ic2 = getAkaikeInformationCriterionFromResiduals(ss, function.getY().length, 2);
final double gradient = lvmSolution.getPoint().getEntry(0);
final double s = lvmSolution.getPoint().getEntry(1);
final double intercept2 = 4 * s * s;
if (ic2 < ic || Double.isNaN(ic)) {
if (settings.debugFitting) {
// Convert fitted precision in um to nm
ImageJUtils.log("Linear fit with intercept (%d points) : Gradient = %s, Intercept = %s, " + "D = %s um^2/s, precision = %s nm, SS = %s, IC = %s (%d evaluations)", function.getY().length, MathUtils.rounded(gradient, 4), MathUtils.rounded(intercept2, 4), MathUtils.rounded(gradient / 4, 4), MathUtils.rounded(s * 1000, 4), MathUtils.rounded(ss), MathUtils.rounded(ic2), lvmSolution.getEvaluations());
}
intercept = intercept2;
diffCoeff = gradient / 4;
precision = s;
}
} catch (final TooManyIterationsException ex) {
ImageJUtils.log("Failed to fit with intercept : Too many iterations (%s)", ex.getMessage());
} catch (final ConvergenceException ex) {
ImageJUtils.log("Failed to fit with intercept : %s", ex.getMessage());
}
if (clusteringSettings.getMsdCorrection()) {
// i.e. the intercept is allowed to be a small negative.
try {
// This function fits the jump distance (n) not the time (nt) so update x
final double[] x2 = new double[x.length];
for (int i = 0; i < x2.length; i++) {
x2[i] = x[i] / exposureTime;
}
final LinearFunctionWithMsdCorrectedIntercept function = new LinearFunctionWithMsdCorrectedIntercept(x2, y, clusteringSettings.getFitLength(), fitIntercept);
// @formatter:off
final LeastSquaresProblem problem = new LeastSquaresBuilder().maxEvaluations(Integer.MAX_VALUE).maxIterations(3000).start(function.guess()).target(function.getY()).weight(new DiagonalMatrix(function.getWeights())).model(function, function::jacobian).build();
// @formatter:on
lvmSolution = optimizer.optimize(problem);
final RealVector residuals = lvmSolution.getResiduals();
final double ss = residuals.dotProduct(residuals);
// double ss = 0;
// double[] obs = function.getY();
// double[] exp = lvmSolution.getValue();
// for (int i = 0; i < obs.length; i++)
// ss += (obs[i] - exp[i]) * (obs[i] - exp[i]);
final double ic2 = getAkaikeInformationCriterionFromResiduals(ss, function.getY().length, 2);
double gradient = lvmSolution.getPoint().getEntry(0);
final double s = lvmSolution.getPoint().getEntry(1);
final double intercept2 = 4 * s * s - gradient / 3;
// Q. Is this working?
// Try fixed precision fitting. Is the gradient correct?
// Revisit all the equations to see if they are wrong.
// Try adding the x[0] datapoint using the precision.
// Change the formula to not be linear at x[0] and to just fit the precision, i.e. the
// intercept2 = 4 * s * s - gradient / 3 is wrong as the
// equation is not linear below n=1.
// Incorporate the exposure time into the gradient to allow comparison to other fits
gradient /= exposureTime;
if (ic2 < ic || Double.isNaN(ic)) {
if (settings.debugFitting) {
// Convert fitted precision in um to nm
ImageJUtils.log("Linear fit with MSD corrected intercept (%d points) : Gradient = %s, " + "Intercept = %s, D = %s um^2/s, precision = %s nm, SS = %s, " + "IC = %s (%d evaluations)", function.getY().length, MathUtils.rounded(gradient, 4), MathUtils.rounded(intercept2, 4), MathUtils.rounded(gradient / 4, 4), MathUtils.rounded(s * 1000, 4), MathUtils.rounded(ss), MathUtils.rounded(ic2), lvmSolution.getEvaluations());
}
intercept = intercept2;
diffCoeff = gradient / 4;
precision = s;
}
} catch (final TooManyIterationsException ex) {
ImageJUtils.log("Failed to fit with intercept : Too many iterations (%s)", ex.getMessage());
} catch (final ConvergenceException ex) {
ImageJUtils.log("Failed to fit with intercept : %s", ex.getMessage());
}
}
// Add the fit to the plot
if (diffCoeff > 0) {
plot.setColor(Color.magenta);
plot.drawLine(0, intercept, x[x.length - 1], 4 * diffCoeff * x[x.length - 1] + intercept);
display(title, plot);
checkTraceSettings(diffCoeff);
}
return new double[] { diffCoeff, precision };
}
Aggregations