Search in sources :

Example 11 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class PROCLUS method run.

/**
 * Performs the PROCLUS algorithm on the given database.
 *
 * @param database Database to process
 * @param relation Relation to process
 */
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
    if (RelationUtil.dimensionality(relation) < l) {
        throw new IllegalStateException("Dimensionality of data < parameter l! (" + RelationUtil.dimensionality(relation) + " < " + l + ")");
    }
    DistanceQuery<V> distFunc = database.getDistanceQuery(relation, SquaredEuclideanDistanceFunction.STATIC);
    RangeQuery<V> rangeQuery = database.getRangeQuery(distFunc);
    final Random random = rnd.getSingleThreadedRandom();
    // initialization phase
    if (LOG.isVerbose()) {
        LOG.verbose("1. Initialization phase...");
    }
    int sampleSize = Math.min(relation.size(), k_i * k);
    DBIDs sampleSet = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random);
    int medoidSize = Math.min(relation.size(), m_i * k);
    ArrayDBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
    if (LOG.isDebugging()) {
        LOG.debugFine(// 
        new StringBuilder().append("sampleSize ").append(sampleSize).append('\n').append("sampleSet ").append(sampleSet).append(// 
        '\n').append("medoidSize ").append(medoidSize).append(// 
        '\n').append("m ").append(medoids).toString());
    }
    // iterative phase
    if (LOG.isVerbose()) {
        LOG.verbose("2. Iterative phase...");
    }
    double bestObjective = Double.POSITIVE_INFINITY;
    ArrayDBIDs m_best = null;
    DBIDs m_bad = null;
    ArrayDBIDs m_current = initialSet(medoids, k, random);
    if (LOG.isDebugging()) {
        LOG.debugFine(new StringBuilder().append("m_c ").append(m_current).toString());
    }
    IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
    ArrayList<PROCLUSCluster> clusters = null;
    int loops = 0;
    while (loops < 10) {
        long[][] dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
        clusters = assignPoints(m_current, dimensions, relation);
        double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
        if (objectiveFunction < bestObjective) {
            // restart counting loops
            loops = 0;
            bestObjective = objectiveFunction;
            m_best = m_current;
            m_bad = computeBadMedoids(m_current, clusters, (int) (relation.size() * 0.1 / k));
        }
        m_current = computeM_current(medoids, m_best, m_bad, random);
        loops++;
        if (cprogress != null) {
            cprogress.setProcessed(clusters.size(), LOG);
        }
    }
    LOG.setCompleted(cprogress);
    // refinement phase
    if (LOG.isVerbose()) {
        LOG.verbose("3. Refinement phase...");
    }
    List<Pair<double[], long[]>> dimensions = findDimensions(clusters, relation);
    List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
    // build result
    int numClusters = 1;
    Clustering<SubspaceModel> result = new Clustering<>("ProClus clustering", "proclus-clustering");
    for (PROCLUSCluster c : finalClusters) {
        Cluster<SubspaceModel> cluster = new Cluster<>(c.objectIDs);
        cluster.setModel(new SubspaceModel(new Subspace(c.getDimensions()), c.centroid));
        cluster.setName("cluster_" + numClusters++);
        result.addToplevelCluster(cluster);
    }
    return result;
}
Also used : ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) AbstractProjectedClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) Random(java.util.Random) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 12 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class KMeansBatchedLloyd method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), blocks, random);
    double[][] meanshift = new double[k][dim];
    int[] changesize = new int[k];
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = false;
        FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Batch", parts.length, LOG) : null;
        for (int p = 0; p < parts.length; p++) {
            // Initialize new means scratch space.
            for (int i = 0; i < k; i++) {
                Arrays.fill(meanshift[i], 0.);
            }
            Arrays.fill(changesize, 0);
            Arrays.fill(varsum, 0.);
            changed |= assignToNearestCluster(relation, parts[p], means, meanshift, changesize, clusters, assignment, varsum);
            // Recompute means.
            updateMeans(means, meanshift, clusters, changesize);
            LOG.incrementProcessed(pprog);
        }
        LOG.ensureCompleted(pprog);
        logVarstat(varstat, varsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 13 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class KMediansLloyd method run.

@Override
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Medians Clustering", "kmedians-clustering");
    }
    // Choose initial medians
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] medians = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] distsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medians iteration", LOG) : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = assignToNearestCluster(relation, medians, clusters, assignment, distsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
        // Recompute medians.
        medians = medians(clusters, medians, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<MeanModel> result = new Clustering<>("k-Medians Clustering", "kmedians-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        MeanModel model = new MeanModel(medians[i]);
        result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) ArrayList(java.util.ArrayList) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 14 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class LogPanel method getOrCreateProgressBar.

/**
 * Get an existing or create a new progress bar.
 *
 * @param prog Progress
 * @return Associated progress bar.
 */
private JProgressBar getOrCreateProgressBar(Progress prog) {
    JProgressBar pbar = pbarmap.get(prog);
    // Add a new progress bar.
    if (pbar == null) {
        synchronized (pbarmap) {
            if (prog instanceof FiniteProgress) {
                pbar = new JProgressBar(0, ((FiniteProgress) prog).getTotal());
                pbar.setStringPainted(true);
            } else if (prog instanceof IndefiniteProgress) {
                pbar = new JProgressBar();
                pbar.setIndeterminate(true);
                pbar.setStringPainted(true);
            } else if (prog instanceof MutableProgress) {
                pbar = new JProgressBar(0, ((MutableProgress) prog).getTotal());
                pbar.setStringPainted(true);
            } else {
                throw new RuntimeException("Unsupported progress record");
            }
            pbarmap.put(prog, pbar);
            final JProgressBar pbar2 = pbar;
            SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    addProgressBar(pbar2);
                }
            });
        }
    }
    return pbar;
}
Also used : MutableProgress(de.lmu.ifi.dbs.elki.logging.progress.MutableProgress) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) JProgressBar(javax.swing.JProgressBar)

Example 15 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class SNNClustering method run.

/**
 * Perform SNN clustering
 *
 * @param database Database
 * @param relation Relation
 * @return Result
 */
public Clustering<Model> run(Database database, Relation<O> relation) {
    SimilarityQuery<O> snnInstance = similarityFunction.instantiate(relation);
    FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("SNNClustering", relation.size(), LOG) : null;
    IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
    resultList = new ArrayList<>();
    noise = DBIDUtil.newHashSet();
    processedIDs = DBIDUtil.newHashSet(relation.size());
    if (relation.size() >= minpts) {
        for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
            if (!processedIDs.contains(id)) {
                expandCluster(snnInstance, id, objprog, clusprog);
                if (processedIDs.size() == relation.size() && noise.size() == 0) {
                    break;
                }
            }
            if (objprog != null && clusprog != null) {
                objprog.setProcessed(processedIDs.size(), LOG);
                clusprog.setProcessed(resultList.size(), LOG);
            }
        }
    } else {
        for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
            noise.add(id);
            if (objprog != null && clusprog != null) {
                objprog.setProcessed(noise.size(), LOG);
                clusprog.setProcessed(resultList.size(), LOG);
            }
        }
    }
    // Finish progress logging
    LOG.ensureCompleted(objprog);
    LOG.setCompleted(clusprog);
    Clustering<Model> result = new Clustering<>("Shared-Nearest-Neighbor Clustering", "snn-clustering");
    for (Iterator<ModifiableDBIDs> resultListIter = resultList.iterator(); resultListIter.hasNext(); ) {
        result.addToplevelCluster(new Cluster<Model>(resultListIter.next(), ClusterModel.CLUSTER));
    }
    result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
    return result;
}
Also used : IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)28 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)20 ArrayList (java.util.ArrayList)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)15 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)14 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)13 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)11 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)11 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 Model (de.lmu.ifi.dbs.elki.data.model.Model)4 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)4 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)4 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)3 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)3 AbstractProjectedClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering)2 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)2