Search in sources :

Example 16 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class CanopyPreClustering method run.

/**
 * Run the algorithm
 *
 * @param database Database
 * @param relation Relation to process
 */
public Clustering<PrototypeModel<O>> run(Database database, Relation<O> relation) {
    if (!(t1 >= t2)) {
        throw new AbortException("T1 must be at least as large as T2.");
    }
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    ModifiableDBIDs ids = DBIDUtil.newHashSet(relation.getDBIDs());
    ArrayList<Cluster<PrototypeModel<O>>> clusters = new ArrayList<>();
    final int size = relation.size();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Canopy clustering", size, LOG) : null;
    DBIDVar first = DBIDUtil.newVar();
    while (!ids.isEmpty()) {
        // Remove first element:
        ids.pop(first);
        // Start a new cluster:
        ModifiableDBIDs cids = DBIDUtil.newArray();
        cids.add(first);
        // Compare to remaining objects:
        for (DBIDMIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double dist = dq.distance(first, iter);
            // Inclusion threshold:
            if (dist > t1) {
                continue;
            }
            cids.add(iter);
            // Removal threshold:
            if (dist <= t2) {
                iter.remove();
            }
        }
        // TODO: remember the central object using a CanopyModel?
        // Construct cluster:
        clusters.add(new Cluster<>(cids, new SimplePrototypeModel<>(relation.get(first))));
        if (prog != null) {
            prog.setProcessed(size - ids.size(), LOG);
        }
    }
    LOG.ensureCompleted(prog);
    return new Clustering<>("Canopy clustering", "canopy-clustering", clusters);
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) SimplePrototypeModel(de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 17 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class AffinityPropagationClusteringAlgorithm method run.

/**
 * Perform affinity propagation clustering.
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering result
 */
public Clustering<MedoidModel> run(Database db, Relation<O> relation) {
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int size = ids.size();
    int[] assignment = new int[size];
    double[][] s = initialization.getSimilarityMatrix(db, relation, ids);
    double[][] r = new double[size][size];
    double[][] a = new double[size][size];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Affinity Propagation Iteration", LOG) : null;
    MutableProgress aprog = LOG.isVerbose() ? new MutableProgress("Stable assignments", size + 1, LOG) : null;
    int inactive = 0;
    for (int iteration = 0; iteration < maxiter && inactive < convergence; iteration++) {
        // Update responsibility matrix:
        for (int i = 0; i < size; i++) {
            double[] ai = a[i], ri = r[i], si = s[i];
            // Find the two largest values (as initially maxk == i)
            double max1 = Double.NEGATIVE_INFINITY, max2 = Double.NEGATIVE_INFINITY;
            int maxk = -1;
            for (int k = 0; k < size; k++) {
                double val = ai[k] + si[k];
                if (val > max1) {
                    max2 = max1;
                    max1 = val;
                    maxk = k;
                } else if (val > max2) {
                    max2 = val;
                }
            }
            // With the maximum value known, update r:
            for (int k = 0; k < size; k++) {
                double val = si[k] - ((k != maxk) ? max1 : max2);
                ri[k] = ri[k] * lambda + val * (1. - lambda);
            }
        }
        // Update availability matrix
        for (int k = 0; k < size; k++) {
            // Compute sum of max(0, r_ik) for all i.
            // For r_kk, don't apply the max.
            double colposum = 0.;
            for (int i = 0; i < size; i++) {
                if (i == k || r[i][k] > 0.) {
                    colposum += r[i][k];
                }
            }
            for (int i = 0; i < size; i++) {
                double val = colposum;
                // Adjust column sum by the one extra term.
                if (i == k || r[i][k] > 0.) {
                    val -= r[i][k];
                }
                if (i != k && val > 0.) {
                    // min
                    val = 0.;
                }
                a[i][k] = a[i][k] * lambda + val * (1 - lambda);
            }
        }
        int changed = 0;
        for (int i = 0; i < size; i++) {
            double[] ai = a[i], ri = r[i];
            double max = Double.NEGATIVE_INFINITY;
            int maxj = -1;
            for (int j = 0; j < size; j++) {
                double v = ai[j] + ri[j];
                if (v > max || (i == j && v >= max)) {
                    max = v;
                    maxj = j;
                }
            }
            if (assignment[i] != maxj) {
                changed += 1;
                assignment[i] = maxj;
            }
        }
        inactive = (changed > 0) ? 0 : (inactive + 1);
        LOG.incrementProcessed(prog);
        if (aprog != null) {
            aprog.setProcessed(size - changed, LOG);
        }
    }
    if (aprog != null) {
        aprog.setProcessed(aprog.getTotal(), LOG);
    }
    LOG.setCompleted(prog);
    // Cluster map, by lead object
    Int2ObjectOpenHashMap<ModifiableDBIDs> map = new Int2ObjectOpenHashMap<>();
    DBIDArrayIter i1 = ids.iter();
    for (int i = 0; i1.valid(); i1.advance(), i++) {
        int c = assignment[i];
        // Add to cluster members:
        ModifiableDBIDs cids = map.get(c);
        if (cids == null) {
            cids = DBIDUtil.newArray();
            map.put(c, cids);
        }
        cids.add(i1);
    }
    // If we stopped early, the cluster lead might be in a different cluster.
    for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
        Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
        final int key = entry.getIntKey();
        int targetkey = key;
        ModifiableDBIDs tids = null;
        // Chase arrows:
        while (ids == null && assignment[targetkey] != targetkey) {
            targetkey = assignment[targetkey];
            tids = map.get(targetkey);
        }
        if (tids != null && targetkey != key) {
            tids.addDBIDs(entry.getValue());
            iter.remove();
        }
    }
    Clustering<MedoidModel> clustering = new Clustering<>("Affinity Propagation Clustering", "ap-clustering");
    ModifiableDBIDs noise = DBIDUtil.newArray();
    for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
        Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
        i1.seek(entry.getIntKey());
        if (entry.getValue().size() > 1) {
            MedoidModel mod = new MedoidModel(DBIDUtil.deref(i1));
            clustering.addToplevelCluster(new Cluster<>(entry.getValue(), mod));
        } else {
            noise.add(i1);
        }
    }
    if (noise.size() > 0) {
        MedoidModel mod = new MedoidModel(DBIDUtil.deref(noise.iter()));
        clustering.addToplevelCluster(new Cluster<>(noise, true, mod));
    }
    return clustering;
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) MedoidModel(de.lmu.ifi.dbs.elki.data.model.MedoidModel) MutableProgress(de.lmu.ifi.dbs.elki.logging.progress.MutableProgress) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 18 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class SameSizeKMeansAlgorithm method initialAssignment.

protected ArrayModifiableDBIDs initialAssignment(List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, DBIDs ids) {
    // Build a sorted list of objects, by descending distance delta
    ArrayModifiableDBIDs tids = DBIDUtil.newArray(ids);
    // Our desired cluster size:
    // rounded up
    final int maxsize = (tids.size() + k - 1) / k;
    // Comparator: sort by largest benefit of assigning to preferred cluster.
    final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {

        @Override
        public int compare(DBIDRef o1, DBIDRef o2) {
            Meta c1 = metas.get(o1), c2 = metas.get(o2);
            return -Double.compare(c1.priority(), c2.priority());
        }
    };
    // We will use this iterator below. It allows seeking!
    DBIDArrayIter id = tids.iter();
    // Initialization phase:
    for (int start = 0; start < tids.size(); ) {
        tids.sort(start, tids.size(), comp);
        for (id.seek(start); id.valid(); id.advance()) {
            Meta c = metas.get(id);
            // Assigning to best cluster - which cannot be full yet!
            ModifiableDBIDs cluster = clusters.get(c.primary);
            assert (cluster.size() <= maxsize);
            cluster.add(id);
            start++;
            // Now the cluster may have become completely filled:
            if (cluster.size() == maxsize) {
                final int full = c.primary;
                // Refresh the not yet assigned objects where necessary:
                for (id.advance(); id.valid(); id.advance()) {
                    Meta ca = metas.get(id);
                    if (ca.primary == full) {
                        // Update the best index:
                        for (int i = 0; i < k; i++) {
                            if (i == full || clusters.get(i).size() >= maxsize) {
                                continue;
                            }
                            if (ca.primary == full || ca.dists[i] < ca.dists[ca.primary]) {
                                ca.primary = i;
                            }
                        }
                        // Changed.
                        metas.put(id, ca);
                    }
                }
                // not really necessary - iterator is at end anyway.
                break;
            }
        }
    // Note: we expect Candidate.a == cluster the object is assigned to!
    }
    return tids;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) IntegerComparator(de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator) Comparator(java.util.Comparator)

Example 19 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class UKMeans method run.

/**
 * Run the clustering.
 *
 * @param database the Database
 * @param relation the Relation
 * @return Clustering result
 */
public Clustering<?> run(final Database database, final Relation<DiscreteUncertainObject> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
    }
    // Choose initial means randomly
    DBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
    List<double[]> means = new ArrayList<>(k);
    for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
        means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
    }
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
        logVarstat(varstat, varsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
        // Recompute means.
        means = means(clusters, means, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.isEmpty()) {
            continue;
        }
        result.addToplevelCluster(new Cluster<>(ids, new KMeansModel(means.get(i), varsum[i])));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 20 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class UKMeans method means.

/**
 * Returns the mean vectors of the given clusters in the given database.
 *
 * @param clusters the clusters to compute the means
 * @param means the recent means
 * @param database the database containing the vectors
 * @return the mean vectors of the given clusters in the given database
 */
protected List<double[]> means(List<? extends ModifiableDBIDs> clusters, List<double[]> means, Relation<DiscreteUncertainObject> database) {
    List<double[]> newMeans = new ArrayList<>(k);
    for (int i = 0; i < k; i++) {
        ModifiableDBIDs list = clusters.get(i);
        double[] mean = null;
        if (list.size() > 0) {
            DBIDIter iter = list.iter();
            // Initialize with first.
            mean = ArrayLikeUtil.toPrimitiveDoubleArray(database.get(iter).getCenterOfMass());
            iter.advance();
            // Update with remaining instances
            for (; iter.valid(); iter.advance()) {
                NumberVector vec = database.get(iter).getCenterOfMass();
                for (int j = 0; j < mean.length; j++) {
                    mean[j] += vec.doubleValue(j);
                }
            }
            timesEquals(mean, 1.0 / list.size());
        } else {
            // Keep degenerated means as-is for now.
            mean = means.get(i);
        }
        newMeans.add(mean);
    }
    return newMeans;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ArrayList(java.util.ArrayList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)44 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)30 ArrayList (java.util.ArrayList)30 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)28 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)18 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)15 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)12 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)12 Model (de.lmu.ifi.dbs.elki.data.model.Model)11 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)10 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)9 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)8 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)7