Search in sources :

Example 1 with PAMInitialMeans

use of de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.PAMInitialMeans in project elki by elki-project.

the class KMedoidsEM method run.

/**
 * Run k-medoids
 *
 * @param database Database
 * @param relation relation to use
 * @return result
 */
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
    }
    DistanceQuery<V> distQ = null;
    // Only enforce a distance matrix for PAM initialization, which is slow.
    if (initializer instanceof PAMInitialMeans) {
        distQ = DatabaseUtil.precomputedDistanceQuery(database, relation, getDistanceFunction(), LOG);
    } else {
        distQ = database.getDistanceQuery(relation, getDistanceFunction());
    }
    // Choose initial medoids
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, relation.getDBIDs(), distQ));
    DBIDArrayMIter miter = medoids.iter();
    double[] mdists = new double[k];
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        HashSetModifiableDBIDs set = DBIDUtil.newHashSet(relation.size() / k);
        // Add medoids.
        set.add(miter.seek(i));
        clusters.add(set);
    }
    // Initial assignment to nearest medoids
    // TODO: reuse this information, from the build phase, when possible?
    double tc = assignToNearestCluster(miter, mdists, clusters, distQ);
    if (LOG.isStatistics()) {
        LOG.statistics(new DoubleStatistic(KEY + ".iteration-" + 0 + ".cost", tc));
    }
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medoids EM iteration", LOG) : null;
    // Swap phase
    int iteration = 0;
    DBIDVar best = DBIDUtil.newVar();
    while (true) {
        boolean changed = false;
        // Try to swap the medoid with a better cluster member:
        int i = 0;
        for (miter.seek(0); miter.valid(); miter.advance(), i++) {
            best.unset();
            double bestm = mdists[i];
            for (DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
                if (DBIDUtil.equal(miter, iter)) {
                    continue;
                }
                double sum = 0;
                for (DBIDIter iter2 = clusters.get(i).iter(); iter2.valid(); iter2.advance()) {
                    sum += distQ.distance(iter, iter2);
                }
                if (sum < bestm) {
                    best.set(iter);
                    bestm = sum;
                }
            }
            if (best.isSet() && !DBIDUtil.equal(miter, best)) {
                changed = true;
                assert (clusters.get(i).contains(best));
                medoids.set(i, best);
                mdists[i] = bestm;
            }
        }
        // Reassign
        if (!changed) {
            break;
        }
        double nc = assignToNearestCluster(miter, mdists, clusters, distQ);
        ++iteration;
        if (LOG.isStatistics()) {
            LOG.statistics(new DoubleStatistic(KEY + ".iteration-" + iteration + ".cost", nc));
        }
        LOG.incrementProcessed(prog);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<MedoidModel> result = new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
    for (DBIDArrayIter it = medoids.iter(); it.valid(); it.advance()) {
        result.addToplevelCluster(new Cluster<>(clusters.get(it.getOffset()), new MedoidModel(DBIDUtil.deref(it))));
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) PAMInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.PAMInitialMeans) MedoidModel(de.lmu.ifi.dbs.elki.data.model.MedoidModel) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Aggregations

PAMInitialMeans (de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.PAMInitialMeans)1 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 MedoidModel (de.lmu.ifi.dbs.elki.data.model.MedoidModel)1 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)1 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)1 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)1 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)1 ArrayList (java.util.ArrayList)1