Search in sources :

Example 56 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class NaiveMeanShiftClustering method run.

/**
 * Run the mean-shift clustering algorithm.
 *
 * @param database Database
 * @param relation Data relation
 * @return Clustering result
 */
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
    final DistanceQuery<V> distq = database.getDistanceQuery(relation, getDistanceFunction());
    final RangeQuery<V> rangeq = database.getRangeQuery(distq);
    final NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
    final int dim = RelationUtil.dimensionality(relation);
    // Stopping threshold
    final double threshold = bandwidth * 1E-10;
    // Result store:
    ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
    ModifiableDBIDs noise = DBIDUtil.newArray();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Mean-shift clustering", relation.size(), LOG) : null;
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        // Initial position:
        V position = relation.get(iter);
        iterations: for (int j = 1; j <= MAXITER; j++) {
            // Compute new position:
            V newvec = null;
            {
                DoubleDBIDList neigh = rangeq.getRangeForObject(position, bandwidth);
                boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
                if (okay) {
                    Centroid newpos = new Centroid(dim);
                    for (DoubleDBIDListIter niter = neigh.iter(); niter.valid(); niter.advance()) {
                        final double weight = kernel.density(niter.doubleValue() / bandwidth);
                        newpos.put(relation.get(niter), weight);
                    }
                    newvec = factory.newNumberVector(newpos.getArrayRef());
                // TODO: detect 0 weight!
                }
                if (!okay) {
                    noise.add(iter);
                    break iterations;
                }
            }
            // Test if we are close to one of the known clusters:
            double bestd = Double.POSITIVE_INFINITY;
            Pair<V, ModifiableDBIDs> bestp = null;
            for (Pair<V, ModifiableDBIDs> pair : clusters) {
                final double merged = distq.distance(newvec, pair.first);
                if (merged < bestd) {
                    bestd = merged;
                    bestp = pair;
                }
            }
            // Check for convergence:
            double delta = distq.distance(position, newvec);
            if (bestd < 10 * threshold || bestd * 2 < delta) {
                bestp.second.add(iter);
                break iterations;
            }
            if (j == MAXITER) {
                LOG.warning("No convergence after " + MAXITER + " iterations. Distance: " + delta);
            }
            if (Double.isNaN(delta)) {
                LOG.warning("Encountered NaN distance. Invalid center vector? " + newvec.toString());
                break iterations;
            }
            if (j == MAXITER || delta < threshold) {
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine("New cluster:" + newvec + " delta: " + delta + " threshold: " + threshold + " bestd: " + bestd);
                }
                ArrayModifiableDBIDs cids = DBIDUtil.newArray();
                cids.add(iter);
                clusters.add(new Pair<V, ModifiableDBIDs>(newvec, cids));
                break iterations;
            }
            position = newvec;
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    ArrayList<Cluster<MeanModel>> cs = new ArrayList<>(clusters.size());
    for (Pair<V, ModifiableDBIDs> pair : clusters) {
        cs.add(new Cluster<>(pair.second, new MeanModel(pair.first.toArray())));
    }
    if (noise.size() > 0) {
        cs.add(new Cluster<MeanModel>(noise, true));
    }
    Clustering<MeanModel> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
    return c;
}
Also used : ArrayList(java.util.ArrayList) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 57 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class RdKNNTree method bulkReverseKNNQueryForID.

public List<ModifiableDoubleDBIDList> bulkReverseKNNQueryForID(DBIDs ids, int k, SpatialPrimitiveDistanceFunction<? super O> distanceFunction, KNNQuery<O> knnQuery) {
    checkDistanceFunction(distanceFunction);
    if (k > settings.k_max) {
        throw new IllegalArgumentException("Parameter k is not supported, k > k_max: " + k + " > " + settings.k_max);
    }
    // get candidates
    Map<DBID, ModifiableDoubleDBIDList> candidateMap = new HashMap<>();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        DBID id = DBIDUtil.deref(iter);
        candidateMap.put(id, DBIDUtil.newDistanceDBIDList());
    }
    doBulkReverseKNN(getRoot(), ids, candidateMap);
    if (k == settings.k_max) {
        List<ModifiableDoubleDBIDList> resultList = new ArrayList<>();
        for (ModifiableDoubleDBIDList candidates : candidateMap.values()) {
            candidates.sort();
            resultList.add(candidates);
        }
        return resultList;
    }
    // refinement of candidates, if k < k_max
    // perform a knn query for the candidates
    ArrayModifiableDBIDs candidateIDs = DBIDUtil.newArray();
    for (ModifiableDoubleDBIDList candidates : candidateMap.values()) {
        candidateIDs.addDBIDs(candidates);
    }
    candidateIDs.sort();
    List<? extends KNNList> knnLists = knnQuery.getKNNForBulkDBIDs(candidateIDs, k);
    // and add candidate c to the result if o is a knn of c
    List<ModifiableDoubleDBIDList> resultList = new ArrayList<>();
    for (DBID id : candidateMap.keySet()) {
        ModifiableDoubleDBIDList candidates = candidateMap.get(id);
        ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
        for (DoubleDBIDListIter candidate = candidates.iter(); candidate.valid(); candidate.advance()) {
            int pos = candidateIDs.binarySearch(candidate);
            assert (pos >= 0);
            for (DoubleDBIDListIter qr = knnLists.get(pos).iter(); qr.valid(); qr.advance()) {
                if (DBIDUtil.equal(id, qr)) {
                    result.add(qr.doubleValue(), candidate);
                    break;
                }
            }
        }
        resultList.add(result);
    }
    return resultList;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) HashMap(java.util.HashMap) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 58 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class SimplifiedCoverTree method bulkConstruct.

/**
 * Bulk-load the cover tree.
 *
 * This bulk-load is slightly simpler than the one used in the original
 * cover-tree source: We do not look back into the "far" set of candidates.
 *
 * @param cur Current routing object
 * @param maxScale Maximum scale
 * @param elems Candidates
 * @return Root node of subtree
 */
protected Node bulkConstruct(DBIDRef cur, int maxScale, ModifiableDoubleDBIDList elems) {
    assert (!elems.contains(cur));
    final double max = maxDistance(elems);
    final int scale = Math.min(distToScale(max) - 1, maxScale);
    final int nextScale = scale - 1;
    // elements remaining:
    if (max <= 0 || scale <= scaleBottom || elems.size() < truncate) {
        return new Node(cur, max, elems);
    }
    // Find neighbors in the cover of the current object:
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
    excludeNotCovered(elems, scaleToDist(scale), candidates);
    // If no elements were not in the cover, build a compact tree:
    if (candidates.size() == 0) {
        LOG.warning("Scale not chosen appropriately? " + max + " " + scaleToDist(scale));
        return bulkConstruct(cur, nextScale, elems);
    }
    // We will have at least one other child, so build the parent:
    Node node = new Node(cur, max);
    // Routing element now is a singleton:
    final boolean curSingleton = elems.size() == 0;
    if (!curSingleton) {
        // Add node for the routing object:
        node.children.add(bulkConstruct(cur, nextScale, elems));
    }
    final double fmax = scaleToDist(nextScale);
    // Build additional cover nodes:
    for (DoubleDBIDListIter it = candidates.iter(); it.valid(); ) {
        assert (it.getOffset() == 0);
        DBID t = DBIDUtil.deref(it);
        // Recycle.
        elems.clear();
        collectByCover(it, candidates, fmax, elems);
        assert (DBIDUtil.equal(t, it)) : "First element in candidates must not change!";
        if (elems.size() == 0) {
            // Singleton
            node.singletons.add(it);
        } else {
            // Build a full child node:
            node.children.add(bulkConstruct(it, nextScale, elems));
        }
        candidates.removeSwap(0);
    }
    assert (candidates.size() == 0);
    // Routing object is not yet handled:
    if (curSingleton) {
        if (node.isLeaf()) {
            // First in leaf is enough.
            node.children = null;
        } else {
            // Add as regular singleton.
            node.singletons.add(cur);
        }
    }
    // TODO: improve recycling of lists?
    return node;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID)

Example 59 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class CacheDoubleDistanceRangeQueries method run.

@Override
public void run() {
    database.initialize();
    Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
    DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
    RangeQuery<O> rangeQ = database.getRangeQuery(distanceQuery, radius, DatabaseQuery.HINT_HEAVY_USE);
    LOG.verbose("Performing range queries with radius " + radius);
    // open file.
    try (RandomAccessFile file = new RandomAccessFile(out, "rw");
        FileChannel channel = file.getChannel();
        // and acquire a file write lock
        FileLock lock = channel.lock()) {
        // write magic header
        file.writeInt(RANGE_CACHE_MAGIC);
        // write the query radius.
        file.writeDouble(radius);
        // Initial size, enough for 100.
        int bufsize = 100 * 12 * 2 + 10;
        ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing range queries", relation.size(), LOG) : null;
        ModifiableDoubleDBIDList nn = DBIDUtil.newDistanceDBIDList();
        DoubleDBIDListIter ni = nn.iter();
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            nn.clear();
            rangeQ.getRangeForDBID(it, radius, nn);
            nn.sort();
            final int nnsize = nn.size();
            // Grow the buffer when needed:
            if (nnsize * 12 + 10 > bufsize) {
                while (nnsize * 12 + 10 > bufsize) {
                    bufsize <<= 1;
                }
                LOG.verbose("Resizing buffer to " + bufsize + " to store " + nnsize + " results:");
                buffer = ByteBuffer.allocateDirect(bufsize);
            }
            buffer.clear();
            ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
            ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
            int c = 0;
            for (ni.seek(0); ni.valid(); ni.advance(), c++) {
                ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
                buffer.putDouble(ni.doubleValue());
            }
            if (c != nn.size()) {
                throw new AbortException("Sizes did not agree. Cache is invalid.");
            }
            buffer.flip();
            channel.write(buffer);
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        lock.release();
    } catch (IOException e) {
        LOG.exception(e);
    }
// FIXME: close!
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) FileChannel(java.nio.channels.FileChannel) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) RandomAccessFile(java.io.RandomAccessFile) FileLock(java.nio.channels.FileLock) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 60 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class CacheDoubleDistanceKNNLists method run.

@Override
public void run() {
    database.initialize();
    Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
    DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
    KNNQuery<O> knnQ = database.getKNNQuery(distanceQuery, DatabaseQuery.HINT_HEAVY_USE);
    // open file.
    try (RandomAccessFile file = new RandomAccessFile(out, "rw");
        FileChannel channel = file.getChannel();
        // and acquire a file write lock
        FileLock lock = channel.lock()) {
        // write magic header
        file.writeInt(KNN_CACHE_MAGIC);
        // Initial size, enough for 2 kNN.
        int bufsize = k * 12 * 2 + 10;
        ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null;
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            final KNNList nn = knnQ.getKNNForDBID(it, k);
            final int nnsize = nn.size();
            // Grow the buffer when needed:
            if (nnsize * 12 + 10 > bufsize) {
                while (nnsize * 12 + 10 > bufsize) {
                    bufsize <<= 1;
                }
                buffer = ByteBuffer.allocateDirect(bufsize);
            }
            buffer.clear();
            ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
            ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
            int c = 0;
            for (DoubleDBIDListIter ni = nn.iter(); ni.valid(); ni.advance(), c++) {
                ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
                buffer.putDouble(ni.doubleValue());
            }
            if (c != nn.size()) {
                throw new AbortException("Sizes did not agree. Cache is invalid.");
            }
            buffer.flip();
            channel.write(buffer);
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        lock.release();
    } catch (IOException e) {
        LOG.exception(e);
    }
// FIXME: close!
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FileChannel(java.nio.channels.FileChannel) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) RandomAccessFile(java.io.RandomAccessFile) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FileLock(java.nio.channels.FileLock) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)69 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)38 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)20 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)19 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)12 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)11 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)11 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)11 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)11 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)11 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)10 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)9 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)6 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)5 DoubleDBIDPair (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)5 ArrayList (java.util.ArrayList)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4