Search in sources :

Example 6 with Cluster

use of org.apache.commons.math3.ml.clustering.Cluster in project GDSC-SMLM by aherbert.

the class PCPALMClusters method fitBinomial.

/**
	 * Fit a zero-truncated Binomial to the cumulative histogram
	 * 
	 * @param histogramData
	 * @return
	 */
private double[] fitBinomial(HistogramData histogramData) {
    // Get the mean and sum of the input histogram
    double mean;
    double sum = 0;
    count = 0;
    for (int i = 0; i < histogramData.histogram[1].length; i++) {
        count += histogramData.histogram[1][i];
        sum += histogramData.histogram[1][i] * i;
    }
    mean = sum / count;
    String name = "Zero-truncated Binomial distribution";
    Utils.log("Mean cluster size = %s", Utils.rounded(mean));
    Utils.log("Fitting cumulative " + name);
    // Convert to a normalised double array for the binomial fitter
    double[] histogram = new double[histogramData.histogram[1].length];
    for (int i = 0; i < histogramData.histogram[1].length; i++) histogram[i] = histogramData.histogram[1][i] / count;
    // Plot the cumulative histogram
    String title = TITLE + " Cumulative Distribution";
    Plot2 plot = null;
    if (showCumulativeHistogram) {
        // Create a cumulative histogram for fitting
        double[] cumulativeHistogram = new double[histogram.length];
        sum = 0;
        for (int i = 0; i < histogram.length; i++) {
            sum += histogram[i];
            cumulativeHistogram[i] = sum;
        }
        double[] values = Utils.newArray(histogram.length, 0.0, 1.0);
        plot = new Plot2(title, "N", "Cumulative Probability", values, cumulativeHistogram);
        plot.setLimits(0, histogram.length - 1, 0, 1.05);
        plot.addPoints(values, cumulativeHistogram, Plot2.CIRCLE);
        Utils.display(title, plot);
    }
    // Do fitting for different N
    double bestSS = Double.POSITIVE_INFINITY;
    double[] parameters = null;
    int worse = 0;
    int N = histogram.length - 1;
    int min = minN;
    final boolean customRange = (minN > 1) || (maxN > 0);
    if (min > N)
        min = N;
    if (maxN > 0 && N > maxN)
        N = maxN;
    Utils.log("Fitting N from %d to %d%s", min, N, (customRange) ? " (custom-range)" : "");
    // Since varying the N should be done in integer steps do this
    // for n=1,2,3,... until the SS peaks then falls off (is worse then the best 
    // score several times in succession)
    BinomialFitter bf = new BinomialFitter(new IJLogger());
    bf.setMaximumLikelihood(maximumLikelihood);
    for (int n = min; n <= N; n++) {
        PointValuePair solution = bf.fitBinomial(histogram, mean, n, true);
        if (solution == null)
            continue;
        double p = solution.getPointRef()[0];
        Utils.log("Fitted %s : N=%d, p=%s. SS=%g", name, n, Utils.rounded(p), solution.getValue());
        if (bestSS > solution.getValue()) {
            bestSS = solution.getValue();
            parameters = new double[] { n, p };
            worse = 0;
        } else if (bestSS < Double.POSITIVE_INFINITY) {
            if (++worse >= 3)
                break;
        }
        if (showCumulativeHistogram)
            addToPlot(n, p, title, plot, new Color((float) n / N, 0, 1f - (float) n / N));
    }
    // Add best it in magenta
    if (showCumulativeHistogram && parameters != null)
        addToPlot((int) parameters[0], parameters[1], title, plot, Color.magenta);
    return parameters;
}
Also used : Color(java.awt.Color) BinomialFitter(gdsc.smlm.fitting.BinomialFitter) Plot2(ij.gui.Plot2) ClusterPoint(gdsc.core.clustering.ClusterPoint) IJLogger(gdsc.core.ij.IJLogger) PointValuePair(org.apache.commons.math3.optim.PointValuePair)

Example 7 with Cluster

use of org.apache.commons.math3.ml.clustering.Cluster in project GDSC-SMLM by aherbert.

the class PCPALMFitting method fitEmulsionModel.

/**
	 * Fits the correlation curve with r>0 to the clustered model using the estimated density and precision. Parameters
	 * must be fit within a tolerance of the starting values.
	 * 
	 * @param gr
	 * @param sigmaS
	 *            The estimated precision
	 * @param proteinDensity
	 *            The estimated protein density
	 * @return The fitted parameters [precision, density, clusterRadius, clusterDensity]
	 */
private double[] fitEmulsionModel(double[][] gr, double sigmaS, double proteinDensity, String resultColour) {
    final EmulsionModelFunctionGradient function = new EmulsionModelFunctionGradient();
    emulsionModel = function;
    log("Fitting %s: Estimated precision = %f nm, estimated protein density = %g um^-2", emulsionModel.getName(), sigmaS, proteinDensity * 1e6);
    emulsionModel.setLogging(true);
    for (int i = offset; i < gr[0].length; i++) {
        // Only fit the curve above the estimated resolution (points below it will be subject to error)
        if (gr[0][i] > sigmaS * fitAboveEstimatedPrecision)
            emulsionModel.addPoint(gr[0][i], gr[1][i]);
    }
    double[] parameters;
    // The model is: sigma, density, range, amplitude, alpha
    double[] initialSolution = new double[] { sigmaS, proteinDensity, sigmaS * 5, 1, sigmaS * 5 };
    int evaluations = 0;
    // Constrain the fitting to be close to the estimated precision (sigmaS) and protein density.
    // LVM fitting does not support constrained fitting so use a bounded optimiser.
    SumOfSquaresModelFunction emulsionModelMulti = new SumOfSquaresModelFunction(emulsionModel);
    double[] x = emulsionModelMulti.x;
    double[] y = emulsionModelMulti.y;
    // Range should be equal to the first time the g(r) curve crosses 1
    for (int i = 0; i < x.length; i++) if (y[i] < 1) {
        initialSolution[4] = initialSolution[2] = (i > 0) ? (x[i - 1] + x[i]) * 0.5 : x[i];
        break;
    }
    // Put some bounds around the initial guess. Use the fitting tolerance (in %) if provided.
    double limit = (fittingTolerance > 0) ? 1 + fittingTolerance / 100 : 2;
    double[] lB = new double[] { initialSolution[0] / limit, initialSolution[1] / limit, 0, 0, 0 };
    // The amplitude and range should not extend beyond the limits of the g(r) curve.
    // TODO - Find out the expected range for the alpha parameter.  
    double[] uB = new double[] { initialSolution[0] * limit, initialSolution[1] * limit, Maths.max(x), Maths.max(gr[1]), Maths.max(x) * 2 };
    log("Fitting %s using a bounded search: %s < precision < %s & %s < density < %s", emulsionModel.getName(), Utils.rounded(lB[0], 4), Utils.rounded(uB[0], 4), Utils.rounded(lB[1] * 1e6, 4), Utils.rounded(uB[1] * 1e6, 4));
    PointValuePair constrainedSolution = runBoundedOptimiser(gr, initialSolution, lB, uB, emulsionModelMulti);
    if (constrainedSolution == null)
        return null;
    parameters = constrainedSolution.getPointRef();
    evaluations = boundedEvaluations;
    // Refit using a LVM  
    if (useLSE) {
        log("Re-fitting %s using a gradient optimisation", emulsionModel.getName());
        LevenbergMarquardtOptimizer optimizer = new LevenbergMarquardtOptimizer();
        Optimum lvmSolution;
        try {
            //@formatter:off
            LeastSquaresProblem problem = new LeastSquaresBuilder().maxEvaluations(Integer.MAX_VALUE).maxIterations(3000).start(parameters).target(function.getY()).weight(new DiagonalMatrix(function.getWeights())).model(function, new MultivariateMatrixFunction() {

                public double[][] value(double[] point) throws IllegalArgumentException {
                    return function.jacobian(point);
                }
            }).build();
            //@formatter:on
            lvmSolution = optimizer.optimize(problem);
            evaluations += lvmSolution.getEvaluations();
            double ss = lvmSolution.getResiduals().dotProduct(lvmSolution.getResiduals());
            if (ss < constrainedSolution.getValue()) {
                log("Re-fitting %s improved the SS from %s to %s (-%s%%)", emulsionModel.getName(), Utils.rounded(constrainedSolution.getValue(), 4), Utils.rounded(ss, 4), Utils.rounded(100 * (constrainedSolution.getValue() - ss) / constrainedSolution.getValue(), 4));
                parameters = lvmSolution.getPoint().toArray();
            }
        } catch (TooManyIterationsException e) {
            log("Failed to re-fit %s: Too many iterations (%s)", emulsionModel.getName(), e.getMessage());
        } catch (ConvergenceException e) {
            log("Failed to re-fit %s: %s", emulsionModel.getName(), e.getMessage());
        }
    }
    emulsionModel.setLogging(false);
    // Ensure the width is positive
    parameters[0] = Math.abs(parameters[0]);
    //parameters[2] = Math.abs(parameters[2]);
    double ss = 0;
    double[] obs = emulsionModel.getY();
    double[] exp = emulsionModel.value(parameters);
    for (int i = 0; i < obs.length; i++) ss += (obs[i] - exp[i]) * (obs[i] - exp[i]);
    ic3 = Maths.getAkaikeInformationCriterionFromResiduals(ss, emulsionModel.size(), parameters.length);
    final double fitSigmaS = parameters[0];
    final double fitProteinDensity = parameters[1];
    //The radius of the cluster domain
    final double domainRadius = parameters[2];
    //The density of the cluster domain
    final double domainDensity = parameters[3];
    //The coherence length between circles
    final double coherence = parameters[4];
    // This is from the PC-PALM paper. It may not be correct for the emulsion model.
    final double nCluster = 2 * domainDensity * Math.PI * domainRadius * domainRadius * fitProteinDensity;
    double e1 = parameterDrift(sigmaS, fitSigmaS);
    double e2 = parameterDrift(proteinDensity, fitProteinDensity);
    log("  %s fit: SS = %f. cAIC = %f. %d evaluations", emulsionModel.getName(), ss, ic3, evaluations);
    log("  %s parameters:", emulsionModel.getName());
    log("    Average precision = %s nm (%s%%)", Utils.rounded(fitSigmaS, 4), Utils.rounded(e1, 4));
    log("    Average protein density = %s um^-2 (%s%%)", Utils.rounded(fitProteinDensity * 1e6, 4), Utils.rounded(e2, 4));
    log("    Domain radius = %s nm", Utils.rounded(domainRadius, 4));
    log("    Domain density = %s", Utils.rounded(domainDensity, 4));
    log("    Domain coherence = %s", Utils.rounded(coherence, 4));
    log("    nCluster = %s", Utils.rounded(nCluster, 4));
    // Check the fitted parameters are within tolerance of the initial estimates
    valid2 = true;
    if (fittingTolerance > 0 && (Math.abs(e1) > fittingTolerance || Math.abs(e2) > fittingTolerance)) {
        log("  Failed to fit %s within tolerance (%s%%): Average precision = %f nm (%s%%), average protein density = %g um^-2 (%s%%)", emulsionModel.getName(), Utils.rounded(fittingTolerance, 4), fitSigmaS, Utils.rounded(e1, 4), fitProteinDensity * 1e6, Utils.rounded(e2, 4));
        valid2 = false;
    }
    // Check extra parameters. Domain radius should be higher than the precision. Density should be positive
    if (domainRadius < fitSigmaS) {
        log("  Failed to fit %s: Domain radius is smaller than the average precision (%s < %s)", emulsionModel.getName(), Utils.rounded(domainRadius, 4), Utils.rounded(fitSigmaS, 4));
        valid2 = false;
    }
    if (domainDensity < 0) {
        log("  Failed to fit %s: Domain density is negative (%s)", emulsionModel.getName(), Utils.rounded(domainDensity, 4));
        valid2 = false;
    }
    if (ic3 > ic1) {
        log("  Failed to fit %s - Information Criterion has increased %s%%", emulsionModel.getName(), Utils.rounded((100 * (ic3 - ic1) / ic1), 4));
        valid2 = false;
    }
    addResult(emulsionModel.getName(), resultColour, valid2, fitSigmaS, fitProteinDensity, domainRadius, domainDensity, nCluster, coherence, ic3);
    return parameters;
}
Also used : PointValuePair(org.apache.commons.math3.optim.PointValuePair) LeastSquaresBuilder(org.apache.commons.math3.fitting.leastsquares.LeastSquaresBuilder) Optimum(org.apache.commons.math3.fitting.leastsquares.LeastSquaresOptimizer.Optimum) LevenbergMarquardtOptimizer(org.apache.commons.math3.fitting.leastsquares.LevenbergMarquardtOptimizer) DiagonalMatrix(org.apache.commons.math3.linear.DiagonalMatrix) ConvergenceException(org.apache.commons.math3.exception.ConvergenceException) TooManyIterationsException(org.apache.commons.math3.exception.TooManyIterationsException) LeastSquaresProblem(org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem) MultivariateMatrixFunction(org.apache.commons.math3.analysis.MultivariateMatrixFunction)

Example 8 with Cluster

use of org.apache.commons.math3.ml.clustering.Cluster in project hadoop by apache.

the class TestClusterTopology method testCountNumNodes.

/**
   * Test the count of nodes with exclude list
   */
@Test
public void testCountNumNodes() throws Exception {
    // create the topology
    NetworkTopology cluster = NetworkTopology.getInstance(new Configuration());
    NodeElement node1 = getNewNode("node1", "/d1/r1");
    cluster.add(node1);
    NodeElement node2 = getNewNode("node2", "/d1/r2");
    cluster.add(node2);
    NodeElement node3 = getNewNode("node3", "/d1/r3");
    cluster.add(node3);
    NodeElement node4 = getNewNode("node4", "/d1/r4");
    cluster.add(node4);
    // create exclude list
    List<Node> excludedNodes = new ArrayList<Node>();
    assertEquals("4 nodes should be available", 4, cluster.countNumOfAvailableNodes(NodeBase.ROOT, excludedNodes));
    NodeElement deadNode = getNewNode("node5", "/d1/r2");
    excludedNodes.add(deadNode);
    assertEquals("4 nodes should be available with extra excluded Node", 4, cluster.countNumOfAvailableNodes(NodeBase.ROOT, excludedNodes));
    // add one existing node to exclude list
    excludedNodes.add(node4);
    assertEquals("excluded nodes with ROOT scope should be considered", 3, cluster.countNumOfAvailableNodes(NodeBase.ROOT, excludedNodes));
    assertEquals("excluded nodes without ~ scope should be considered", 2, cluster.countNumOfAvailableNodes("~" + deadNode.getNetworkLocation(), excludedNodes));
    assertEquals("excluded nodes with rack scope should be considered", 1, cluster.countNumOfAvailableNodes(deadNode.getNetworkLocation(), excludedNodes));
    // adding the node in excluded scope to excluded list
    excludedNodes.add(node2);
    assertEquals("excluded nodes with ~ scope should be considered", 2, cluster.countNumOfAvailableNodes("~" + deadNode.getNetworkLocation(), excludedNodes));
    // getting count with non-exist scope.
    assertEquals("No nodes should be considered for non-exist scope", 0, cluster.countNumOfAvailableNodes("/non-exist", excludedNodes));
    // remove a node from the cluster
    cluster.remove(node1);
    assertEquals("1 node should be available", 1, cluster.countNumOfAvailableNodes(NodeBase.ROOT, excludedNodes));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) ChiSquareTest(org.apache.commons.math3.stat.inference.ChiSquareTest) Test(org.junit.Test)

Example 9 with Cluster

use of org.apache.commons.math3.ml.clustering.Cluster in project cruise-control by linkedin.

the class ClusterModel method sanityCheck.

/**
 * (1) Check whether each load in the cluster contains exactly the number of windows defined by the Load.
 * (2) Check whether sum of loads in the cluster / rack / broker / replica are consistent with each other.
 */
public void sanityCheck() {
    // SANITY CHECK #1: Each load in the cluster must contain exactly the number of windows defined by the Load.
    Map<String, Integer> errorMsgAndNumWindows = new HashMap<>();
    int expectedNumWindows = _load.numWindows();
    // Check leadership loads.
    for (Map.Entry<Integer, Load> entry : _potentialLeadershipLoadByBrokerId.entrySet()) {
        int brokerId = entry.getKey();
        Load load = entry.getValue();
        if (load.numWindows() != expectedNumWindows && broker(brokerId).replicas().size() != 0) {
            errorMsgAndNumWindows.put("Leadership(" + brokerId + ")", load.numWindows());
        }
    }
    // Check rack loads.
    for (Rack rack : _racksById.values()) {
        if (rack.load().numWindows() != expectedNumWindows && rack.replicas().size() != 0) {
            errorMsgAndNumWindows.put("Rack(id:" + rack.id() + ")", rack.load().numWindows());
        }
        // Check the host load.
        for (Host host : rack.hosts()) {
            if (host.load().numWindows() != expectedNumWindows && host.replicas().size() != 0) {
                errorMsgAndNumWindows.put("Host(id:" + host.name() + ")", host.load().numWindows());
            }
            // Check broker loads.
            for (Broker broker : rack.brokers()) {
                if (broker.load().numWindows() != expectedNumWindows && broker.replicas().size() != 0) {
                    errorMsgAndNumWindows.put("Broker(id:" + broker.id() + ")", broker.load().numWindows());
                }
                // Check replica loads.
                for (Replica replica : broker.replicas()) {
                    if (replica.load().numWindows() != expectedNumWindows) {
                        errorMsgAndNumWindows.put("Replica(id:" + replica.topicPartition() + "-" + broker.id() + ")", replica.load().numWindows());
                    }
                }
            }
        }
    }
    StringBuilder exceptionMsg = new StringBuilder();
    for (Map.Entry<String, Integer> entry : errorMsgAndNumWindows.entrySet()) {
        exceptionMsg.append(String.format("[%s: %d]%n", entry.getKey(), entry.getValue()));
    }
    if (exceptionMsg.length() > 0) {
        throw new IllegalArgumentException("Loads must have all have " + expectedNumWindows + " windows. Following " + "loads violate this constraint with specified number of windows: " + exceptionMsg);
    }
    // SANITY CHECK #2: Sum of loads in the cluster / rack / broker / replica must be consistent with each other.
    String prologueErrorMsg = "Inconsistent load distribution.";
    // Check equality of sum of the replica load to their broker load for each resource.
    for (Broker broker : brokers()) {
        for (Resource resource : Resource.values()) {
            double sumOfReplicaUtilization = 0.0;
            for (Replica replica : broker.replicas()) {
                sumOfReplicaUtilization += replica.load().expectedUtilizationFor(resource);
            }
            if (AnalyzerUtils.compare(sumOfReplicaUtilization, broker.load().expectedUtilizationFor(resource), resource) != 0) {
                throw new IllegalArgumentException(prologueErrorMsg + " Broker utilization for " + resource + " is different " + "from the total replica utilization in the broker with id: " + broker.id() + ". Sum of the replica utilization: " + sumOfReplicaUtilization + ", broker utilization: " + broker.load().expectedUtilizationFor(resource));
            }
        }
    }
    // Check equality of sum of the broker load to their rack load for each resource.
    Map<Resource, Double> sumOfRackUtilizationByResource = new HashMap<>();
    for (Rack rack : _racksById.values()) {
        Map<Resource, Double> sumOfHostUtilizationByResource = new HashMap<>();
        for (Host host : rack.hosts()) {
            for (Resource resource : Resource.values()) {
                sumOfHostUtilizationByResource.putIfAbsent(resource, 0.0);
                double sumOfBrokerUtilization = 0.0;
                for (Broker broker : host.brokers()) {
                    sumOfBrokerUtilization += broker.load().expectedUtilizationFor(resource);
                }
                Double hostUtilization = host.load().expectedUtilizationFor(resource);
                if (AnalyzerUtils.compare(sumOfBrokerUtilization, hostUtilization, resource) != 0) {
                    throw new IllegalArgumentException(prologueErrorMsg + " Host utilization for " + resource + " is different " + "from the total broker utilization in the host : " + host.name() + ". Sum of the brokers: " + sumOfBrokerUtilization + ", host utilization: " + hostUtilization);
                }
                sumOfHostUtilizationByResource.put(resource, sumOfHostUtilizationByResource.get(resource) + hostUtilization);
            }
        }
        // Check equality of sum of the host load to the rack load for each resource.
        for (Map.Entry<Resource, Double> entry : sumOfHostUtilizationByResource.entrySet()) {
            Resource resource = entry.getKey();
            double sumOfHostsUtil = entry.getValue();
            sumOfRackUtilizationByResource.putIfAbsent(resource, 0.0);
            Double rackUtilization = rack.load().expectedUtilizationFor(resource);
            if (AnalyzerUtils.compare(rackUtilization, sumOfHostsUtil, resource) != 0) {
                throw new IllegalArgumentException(prologueErrorMsg + " Rack utilization for " + resource + " is different " + "from the total host utilization in rack" + rack.id() + " . Sum of the hosts: " + sumOfHostsUtil + ", rack utilization: " + rack.load().expectedUtilizationFor(resource));
            }
            sumOfRackUtilizationByResource.put(resource, sumOfRackUtilizationByResource.get(resource) + sumOfHostUtilizationByResource.get(resource));
        }
    }
    // Check equality of sum of the rack load to the cluster load for each resource.
    for (Map.Entry<Resource, Double> entry : sumOfRackUtilizationByResource.entrySet()) {
        Resource resource = entry.getKey();
        double sumOfRackUtil = entry.getValue();
        if (AnalyzerUtils.compare(_load.expectedUtilizationFor(resource), sumOfRackUtil, resource) != 0) {
            throw new IllegalArgumentException(prologueErrorMsg + " Cluster utilization for " + resource + " is different " + "from the total rack utilization in the cluster. Sum of the racks: " + sumOfRackUtil + ", cluster utilization: " + _load.expectedUtilizationFor(resource));
        }
    }
    // Check equality of the sum of the leadership load to the sum of the load of leader at each broker.
    for (Broker broker : brokers()) {
        double sumOfLeaderOfReplicaUtilization = 0.0;
        for (Replica replica : broker.replicas()) {
            sumOfLeaderOfReplicaUtilization += partition(replica.topicPartition()).leader().load().expectedUtilizationFor(Resource.NW_OUT);
        }
        if (AnalyzerUtils.compare(sumOfLeaderOfReplicaUtilization, _potentialLeadershipLoadByBrokerId.get(broker.id()).expectedUtilizationFor(Resource.NW_OUT), Resource.NW_OUT) != 0) {
            throw new IllegalArgumentException(prologueErrorMsg + " Leadership utilization for " + Resource.NW_OUT + " is different from the total utilization leader of replicas in the broker" + " with id: " + broker.id() + " Expected: " + sumOfLeaderOfReplicaUtilization + " Received: " + _potentialLeadershipLoadByBrokerId.get(broker.id()).expectedUtilizationFor(Resource.NW_OUT) + ".");
        }
        for (Resource resource : Resource.values()) {
            if (resource == Resource.CPU) {
                continue;
            }
            double leaderSum = broker.leaderReplicas().stream().mapToDouble(r -> r.load().expectedUtilizationFor(resource)).sum();
            double cachedLoad = broker.leadershipLoadForNwResources().expectedUtilizationFor(resource);
            if (AnalyzerUtils.compare(leaderSum, cachedLoad, resource) != 0) {
                throw new IllegalArgumentException(prologueErrorMsg + " Leadership load for resource " + resource + " is " + cachedLoad + " but recomputed sum is " + leaderSum + ".");
            }
        }
    }
}
Also used : SortedSet(java.util.SortedSet) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Gson(com.google.gson.Gson) Map(java.util.Map) AggregatedMetricValues(com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues) ModelGeneration(com.linkedin.kafka.cruisecontrol.monitor.ModelGeneration) OutputStream(java.io.OutputStream) TopicPartition(org.apache.kafka.common.TopicPartition) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint) Set(java.util.Set) AnalyzerUtils(com.linkedin.kafka.cruisecontrol.analyzer.AnalyzerUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) List(java.util.List) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) TreeMap(java.util.TreeMap) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) Variance(org.apache.commons.math3.stat.descriptive.moment.Variance) Collections(java.util.Collections) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) Resource(com.linkedin.kafka.cruisecontrol.common.Resource) BalancingConstraint(com.linkedin.kafka.cruisecontrol.analyzer.BalancingConstraint) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap)

Example 10 with Cluster

use of org.apache.commons.math3.ml.clustering.Cluster in project chordatlas by twak.

the class Prof method findProfileLines.

/**
 * We find an initial base offset. Then we cluster the start point of all
 * (clean) profiles. If any are a good distance from the initial base, we
 * add those as their own profile lines.
 *
 * The original line is offset by the remaiing data.
 */
public static List<SuperLine> findProfileLines(Collection<Prof> profiles, Line3d line) {
    List<SuperLine> out = new ArrayList();
    // PaintThing.debug.clear();
    SuperLine superLine = new SuperLine(line.start.x, line.start.z, line.end.x, line.end.z);
    double outLen = superLine.length();
    double min = Double.MAX_VALUE, max = -Double.MAX_VALUE;
    Cache<Prof, Double> vLength = new Cache<Prof, Double>() {

        @Override
        public Double create(Prof i) {
            return i.verticalLength(0.5);
        }
    };
    double vLen = profiles.stream().mapToDouble(p -> vLength.get(p)).sum();
    boolean useVertical = vLen / profiles.size() > 1;
    class Wrapper implements Clusterable {

        double[] pt;

        public Wrapper(Point2d pt) {
            this.pt = new double[] { pt.x, pt.y };
        }

        @Override
        public double[] getPoint() {
            return pt;
        }
    }
    List<Wrapper> toCluster = new ArrayList();
    List<Double> baseLineOffset = new ArrayList();
    for (Prof p : profiles) {
        if (// vLen / (5*profiles.size()))
        useVertical && vLength.get(p) < 1)
            continue;
        Prof clean = p.parameterize();
        Point2d pt = clean.get(0);
        Point3d pt3 = clean.to3d(pt);
        double ppram = superLine.findPPram(new Point2d(pt3.x, pt3.z));
        baseLineOffset.add(pt.x);
        toCluster.add(new Wrapper(new Point2d(pt.x, ppram * outLen)));
        min = Math.min(min, ppram);
        max = Math.max(max, ppram);
    }
    if (min == max || toCluster.isEmpty())
        return out;
    if (true) {
        baseLineOffset.sort(Double::compareTo);
        double modeBaselineOffset = baseLineOffset.get(baseLineOffset.size() / 2);
        DBSCANClusterer<Wrapper> cr = new DBSCANClusterer<>(1.5, 0);
        List<Cluster<Wrapper>> results = cr.cluster(toCluster);
        Iterator<Cluster<Wrapper>> cit = results.iterator();
        while (cit.hasNext()) {
            Cluster<Wrapper> cw = cit.next();
            if (cw.getPoints().size() < 2 / TweedSettings.settings.profileHSampleDist) {
                cit.remove();
                double cMeanY = cw.getPoints().stream().mapToDouble(x -> x.pt[1]).average().getAsDouble();
                double bestDist = Double.MAX_VALUE;
                Cluster<Wrapper> bestWrapper = null;
                for (Cluster<Wrapper> near : results) {
                    double meanY = near.getPoints().stream().mapToDouble(x -> x.pt[1]).average().getAsDouble();
                    double dist = Math.abs(meanY - cMeanY);
                    if (dist < bestDist) {
                        bestDist = dist;
                        bestWrapper = near;
                    }
                }
                if (bestWrapper != null)
                    bestWrapper.getPoints().addAll(cw.getPoints());
            }
        }
        {
            baseLineOffset.clear();
            int c = 0;
            for (Cluster<Wrapper> cw : results) {
                double[] minMax = cw.getPoints().stream().map(p -> new double[] { p.pt[1] }).collect(new InAxDoubleArray());
                double[] offsetA = cw.getPoints().stream().mapToDouble(p -> p.pt[0]).sorted().toArray();
                double offset = offsetA[offsetA.length / 2];
                if (offset - modeBaselineOffset < 1) {
                    for (Wrapper w : cw.getPoints()) baseLineOffset.add(w.pt[0]);
                    continue;
                }
                SuperLine sl = new SuperLine(superLine.fromPPram(minMax[0] / outLen), superLine.fromPPram(minMax[1] / outLen));
                sl.moveLeft(offset);
                out.add(sl);
                List<Point2d> pts = cw.getPoints().stream().map(w -> new Point2d(w.pt[0], w.pt[1])).collect(Collectors.toList());
                PaintThing.debug(Rainbow.getColour(c++), 1, pts);
            }
        }
    }
    Point2d nStart = superLine.fromPPram(min), nEnd = superLine.fromPPram(max);
    superLine.start = nStart;
    superLine.end = nEnd;
    baseLineOffset.sort(Double::compare);
    if (!baseLineOffset.isEmpty())
        superLine.moveLeft(baseLineOffset.get(baseLineOffset.size() / 2));
    out.add(0, superLine);
    return out;
}
Also used : ConsecutivePairs(org.twak.utils.collections.ConsecutivePairs) Matrix4d(javax.vecmath.Matrix4d) ConsecutiveItPairs(org.twak.utils.collections.ConsecutiveItPairs) SliceParameters(org.twak.viewTrace.SliceParameters) Arrayz(org.twak.utils.collections.Arrayz) Node(com.jme3.scene.Node) ObjRead(org.twak.utils.geom.ObjRead) Map(java.util.Map) Cache(org.twak.utils.Cache) Material(com.jme3.material.Material) Point3d(javax.vecmath.Point3d) VertexBuffer(com.jme3.scene.VertexBuffer) IdentityHashMap(java.util.IdentityHashMap) Collection(java.util.Collection) Line(org.twak.utils.Line) FindLines(org.twak.viewTrace.FindLines) Set(java.util.Set) Vector2d(javax.vecmath.Vector2d) LinearForm(org.twak.utils.geom.LinearForm) Collectors(java.util.stream.Collectors) List(java.util.List) Rainbow(org.twak.utils.ui.Rainbow) Line3d(org.twak.utils.geom.Line3d) Mesh(com.jme3.scene.Mesh) Geometry(com.jme3.scene.Geometry) DBSCANClusterer(org.apache.commons.math3.ml.clustering.DBSCANClusterer) LinearForm3D(org.twak.utils.geom.LinearForm3D) Bin(org.twak.viewTrace.Bin) Pair(org.twak.utils.Pair) Vector3d(javax.vecmath.Vector3d) Clusterable(org.apache.commons.math3.ml.clustering.Clusterable) Tweed(org.twak.tweed.Tweed) ArrayList(java.util.ArrayList) TweedSettings(org.twak.tweed.TweedSettings) HashSet(java.util.HashSet) PanMouseAdaptor(org.twak.utils.PanMouseAdaptor) Graphics2D(java.awt.Graphics2D) ICanPaintU(org.twak.utils.PaintThing.ICanPaintU) Mathz(org.twak.utils.Mathz) PaintThing(org.twak.utils.PaintThing) Iterator(java.util.Iterator) Point2d(javax.vecmath.Point2d) SuperLine(org.twak.viewTrace.SuperLine) Cluster(org.apache.commons.math3.ml.clustering.Cluster) ColorRGBA(com.jme3.math.ColorRGBA) Comparator(java.util.Comparator) InAxDoubleArray(org.twak.utils.streams.InAxDoubleArray) Collections(java.util.Collections) ObjSlice(org.twak.viewTrace.ObjSlice) ArrayList(java.util.ArrayList) Cluster(org.apache.commons.math3.ml.clustering.Cluster) Clusterable(org.apache.commons.math3.ml.clustering.Clusterable) InAxDoubleArray(org.twak.utils.streams.InAxDoubleArray) Point2d(javax.vecmath.Point2d) Point3d(javax.vecmath.Point3d) SuperLine(org.twak.viewTrace.SuperLine) List(java.util.List) ArrayList(java.util.ArrayList) DBSCANClusterer(org.apache.commons.math3.ml.clustering.DBSCANClusterer) Cache(org.twak.utils.Cache)

Aggregations

ArrayList (java.util.ArrayList)7 ClusterPoint (gdsc.core.clustering.ClusterPoint)5 Plot2 (ij.gui.Plot2)3 WeightedObservedPoint (org.apache.commons.math3.fitting.WeightedObservedPoint)3 PointValuePair (org.apache.commons.math3.optim.PointValuePair)3 Material (com.jme3.material.Material)2 ColorRGBA (com.jme3.math.ColorRGBA)2 Geometry (com.jme3.scene.Geometry)2 Mesh (com.jme3.scene.Mesh)2 Node (com.jme3.scene.Node)2 VertexBuffer (com.jme3.scene.VertexBuffer)2 TDoubleArrayList (gnu.trove.list.array.TDoubleArrayList)2 PrintWriter (java.io.PrintWriter)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 Collectors (java.util.stream.Collectors)2