Search in sources :

Example 41 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class PAMInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    ArrayModifiableDBIDs medids = DBIDUtil.newArray(k);
    DBIDVar bestid = DBIDUtil.newVar();
    // We need three temporary storage arrays:
    WritableDoubleDataStore mindist, bestd, tempd;
    mindist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    bestd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    tempd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    // First mean is chosen by having the smallest distance sum to all others.
    {
        double best = Double.POSITIVE_INFINITY;
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial mean", ids.size(), LOG) : null;
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double sum = 0, d;
            for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
                sum += d = distQ.distance(iter, iter2);
                tempd.putDouble(iter2, d);
            }
            if (sum < best) {
                best = sum;
                bestid.set(iter);
                // Swap mindist and newd:
                WritableDoubleDataStore temp = mindist;
                mindist = tempd;
                tempd = temp;
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        medids.add(bestid);
    }
    assert (mindist != null);
    // Subsequent means optimize the full criterion.
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial centers", k, LOG) : null;
    // First one was just chosen.
    LOG.incrementProcessed(prog);
    for (int i = 1; i < k; i++) {
        double best = Double.POSITIVE_INFINITY;
        bestid.unset();
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            if (medids.contains(iter)) {
                continue;
            }
            double sum = 0., v;
            for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
                sum += v = MathUtil.min(distQ.distance(iter, iter2), mindist.doubleValue(iter2));
                tempd.put(iter2, v);
            }
            if (sum < best) {
                best = sum;
                bestid.set(iter);
                // Swap bestd and newd:
                WritableDoubleDataStore temp = bestd;
                bestd = tempd;
                tempd = temp;
            }
        }
        if (!bestid.isSet()) {
            throw new AbortException("No median found that improves the criterion function?!? Too many infinite distances.");
        }
        medids.add(bestid);
        // Swap bestd and mindist:
        WritableDoubleDataStore temp = bestd;
        bestd = mindist;
        mindist = temp;
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    mindist.destroy();
    bestd.destroy();
    tempd.destroy();
    return medids;
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 42 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class KNNKernelDensityMinimaClustering method run.

/**
 * Run the clustering algorithm on a data relation.
 *
 * @param relation Relation
 * @return Clustering result
 */
public Clustering<ClusterModel> run(Relation<V> relation) {
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    final int size = ids.size();
    // Sort by the sole dimension
    ids.sort(new VectorUtil.SortDBIDsBySingleDimension(relation, dim));
    // Density storage.
    WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    DBIDArrayIter iter = ids.iter(), iter2 = ids.iter();
    StepProgress sprog = LOG.isVerbose() ? new StepProgress("Clustering steps", 2) : null;
    LOG.beginStep(sprog, 1, "Kernel density estimation.");
    {
        double[] scratch = new double[2 * k];
        iter.seek(0);
        for (int i = 0; i < size; i++, iter.advance()) {
            // Current value.
            final double curv = relation.get(iter).doubleValue(dim);
            final int pre = Math.max(i - k, 0), prek = i - pre;
            final int pos = Math.min(i + k, size - 1), posk = pos - i;
            iter2.seek(pre);
            for (int j = 0; j < prek; j++, iter2.advance()) {
                scratch[j] = curv - relation.get(iter2).doubleValue(dim);
            }
            assert (iter2.getOffset() == i);
            iter2.advance();
            for (int j = 0; j < posk; j++, iter2.advance()) {
                scratch[prek + j] = relation.get(iter2).doubleValue(dim) - curv;
            }
            assert (prek + posk >= k);
            double kdist = QuickSelect.quickSelect(scratch, 0, prek + posk, k);
            switch(mode) {
                case BALLOON:
                    {
                        double dens = 0.;
                        if (kdist > 0.) {
                            for (int j = 0; j < prek + posk; j++) {
                                dens += kernel.density(scratch[j] / kdist);
                            }
                        } else {
                            dens = Double.POSITIVE_INFINITY;
                        }
                        assert (iter.getOffset() == i);
                        density.putDouble(iter, dens);
                        break;
                    }
                case SAMPLE:
                    {
                        if (kdist > 0.) {
                            iter2.seek(pre);
                            for (int j = 0; j < prek; j++, iter2.advance()) {
                                double delta = curv - relation.get(iter2).doubleValue(dim);
                                density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
                            }
                            assert (iter2.getOffset() == i);
                            iter2.advance();
                            for (int j = 0; j < posk; j++, iter2.advance()) {
                                double delta = relation.get(iter2).doubleValue(dim) - curv;
                                density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
                            }
                        } else {
                            iter2.seek(pre);
                            for (int j = 0; j < prek; j++, iter2.advance()) {
                                double delta = curv - relation.get(iter2).doubleValue(dim);
                                if (!(delta > 0.)) {
                                    density.putDouble(iter2, Double.POSITIVE_INFINITY);
                                }
                            }
                            assert (iter2.getOffset() == i);
                            iter2.advance();
                            for (int j = 0; j < posk; j++, iter2.advance()) {
                                double delta = relation.get(iter2).doubleValue(dim) - curv;
                                if (!(delta > 0.)) {
                                    density.putDouble(iter2, Double.POSITIVE_INFINITY);
                                }
                            }
                        }
                        break;
                    }
                default:
                    throw new UnsupportedOperationException("Unknown mode specified.");
            }
        }
    }
    LOG.beginStep(sprog, 2, "Local minima detection.");
    Clustering<ClusterModel> clustering = new Clustering<>("onedimensional-kde-clustering", "One-Dimensional clustering using kernel density estimation.");
    {
        double[] scratch = new double[2 * minwindow + 1];
        int begin = 0;
        int halfw = (minwindow + 1) >> 1;
        iter.seek(0);
        // Fill initial buffer.
        for (int i = 0; i < size; i++, iter.advance()) {
            final int m = i % scratch.length, t = (i - minwindow - 1) % scratch.length;
            scratch[m] = density.doubleValue(iter);
            if (i > scratch.length) {
                double min = Double.POSITIVE_INFINITY;
                for (int j = 0; j < scratch.length; j++) {
                    if (j != t && scratch[j] < min) {
                        min = scratch[j];
                    }
                }
                // Local minimum:
                if (scratch[t] < min) {
                    int end = i - minwindow + 1;
                    {
                        // Test on which side the kNN is
                        iter2.seek(end);
                        double curv = relation.get(iter2).doubleValue(dim);
                        iter2.seek(end - halfw);
                        double left = relation.get(iter2).doubleValue(dim) - curv;
                        iter2.seek(end + halfw);
                        double right = curv - relation.get(iter2).doubleValue(dim);
                        if (left < right) {
                            end++;
                        }
                    }
                    iter2.seek(begin);
                    ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
                    for (int j = 0; j < end - begin; j++, iter2.advance()) {
                        cids.add(iter2);
                    }
                    clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
                    begin = end;
                }
            }
        }
        // Extract last cluster
        int end = size;
        iter2.seek(begin);
        ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
        for (int j = 0; j < end - begin; j++, iter2.advance()) {
            cids.add(iter2);
        }
        clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
    }
    LOG.ensureCompleted(sprog);
    return clustering;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) VectorUtil(de.lmu.ifi.dbs.elki.data.VectorUtil) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)

Example 43 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class FarthestSumPointsInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    means.add(first);
    DBIDVar prevmean = DBIDUtil.newVar(first);
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        // Find farthest object:
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double dsum = prev + distQ.distance(prevmean, it);
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, dsum);
            }
            if (dsum > maxdist) {
                maxdist = dsum;
                best.set(it);
            }
        }
        // Add new mean:
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean.set(best);
        means.add(best);
    }
    store.destroy();
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 44 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class ExternalClustering method attachToRelation.

/**
 * Build a clustering from the file result.
 *
 * @param database Database
 * @param r Result to attach to
 * @param assignment Cluster assignment
 * @param name Name
 */
private void attachToRelation(Database database, Relation<?> r, IntArrayList assignment, ArrayList<String> name) {
    DBIDs ids = r.getDBIDs();
    if (!(ids instanceof ArrayDBIDs)) {
        throw new AbortException("External clusterings can only be used with static DBIDs.");
    }
    Int2IntOpenHashMap sizes = new Int2IntOpenHashMap();
    for (IntListIterator it = assignment.iterator(); it.hasNext(); ) {
        sizes.addTo(it.nextInt(), 1);
    }
    Int2ObjectOpenHashMap<ArrayModifiableDBIDs> cids = new Int2ObjectOpenHashMap<>(sizes.size());
    for (ObjectIterator<Int2IntMap.Entry> it = sizes.int2IntEntrySet().fastIterator(); it.hasNext(); ) {
        Int2IntMap.Entry entry = it.next();
        cids.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
    }
    {
        DBIDArrayIter it = ((ArrayDBIDs) ids).iter();
        for (int i = 0; i < assignment.size(); i++) {
            cids.get(assignment.getInt(i)).add(it.seek(i));
        }
    }
    String nam = FormatUtil.format(name, " ");
    String snam = nam.toLowerCase().replace(' ', '-');
    Clustering<ClusterModel> result = new Clustering<>(nam, snam);
    for (ObjectIterator<Int2ObjectMap.Entry<ArrayModifiableDBIDs>> it = cids.int2ObjectEntrySet().fastIterator(); it.hasNext(); ) {
        Int2ObjectMap.Entry<ArrayModifiableDBIDs> entry = it.next();
        boolean noise = entry.getIntKey() < 0;
        result.addToplevelCluster(new Cluster<>(entry.getValue(), noise, ClusterModel.CLUSTER));
    }
    database.getHierarchy().add(r, result);
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) IntListIterator(it.unimi.dsi.fastutil.ints.IntListIterator) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 45 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class RdKNNTree method bulkReverseKNNQueryForID.

public List<ModifiableDoubleDBIDList> bulkReverseKNNQueryForID(DBIDs ids, int k, SpatialPrimitiveDistanceFunction<? super O> distanceFunction, KNNQuery<O> knnQuery) {
    checkDistanceFunction(distanceFunction);
    if (k > settings.k_max) {
        throw new IllegalArgumentException("Parameter k is not supported, k > k_max: " + k + " > " + settings.k_max);
    }
    // get candidates
    Map<DBID, ModifiableDoubleDBIDList> candidateMap = new HashMap<>();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        DBID id = DBIDUtil.deref(iter);
        candidateMap.put(id, DBIDUtil.newDistanceDBIDList());
    }
    doBulkReverseKNN(getRoot(), ids, candidateMap);
    if (k == settings.k_max) {
        List<ModifiableDoubleDBIDList> resultList = new ArrayList<>();
        for (ModifiableDoubleDBIDList candidates : candidateMap.values()) {
            candidates.sort();
            resultList.add(candidates);
        }
        return resultList;
    }
    // refinement of candidates, if k < k_max
    // perform a knn query for the candidates
    ArrayModifiableDBIDs candidateIDs = DBIDUtil.newArray();
    for (ModifiableDoubleDBIDList candidates : candidateMap.values()) {
        candidateIDs.addDBIDs(candidates);
    }
    candidateIDs.sort();
    List<? extends KNNList> knnLists = knnQuery.getKNNForBulkDBIDs(candidateIDs, k);
    // and add candidate c to the result if o is a knn of c
    List<ModifiableDoubleDBIDList> resultList = new ArrayList<>();
    for (DBID id : candidateMap.keySet()) {
        ModifiableDoubleDBIDList candidates = candidateMap.get(id);
        ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
        for (DoubleDBIDListIter candidate = candidates.iter(); candidate.valid(); candidate.advance()) {
            int pos = candidateIDs.binarySearch(candidate);
            assert (pos >= 0);
            for (DoubleDBIDListIter qr = knnLists.get(pos).iter(); qr.valid(); qr.advance()) {
                if (DBIDUtil.equal(id, qr)) {
                    result.add(qr.doubleValue(), candidate);
                    break;
                }
            }
        }
        resultList.add(result);
    }
    return resultList;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) HashMap(java.util.HashMap) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)49 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)23 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)12 ArrayList (java.util.ArrayList)11 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)10 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)9 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)7 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)6 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)5 DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)5 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)4 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)4 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)4 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)4 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)4 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)4