Search in sources :

Example 1 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class OPTICSCut method makeOPTICSCut.

/**
 * Compute an OPTICS cut clustering
 *
 * @param co Cluster order result
 * @param epsilon Epsilon value for cut
 * @return New partitioning clustering
 */
public static <E extends ClusterOrder> Clustering<Model> makeOPTICSCut(E co, double epsilon) {
    // Clustering model we are building
    Clustering<Model> clustering = new Clustering<>("OPTICS Cut Clustering", "optics-cut");
    // Collects noise elements
    ModifiableDBIDs noise = DBIDUtil.newHashSet();
    double lastDist = Double.MAX_VALUE;
    double actDist = Double.MAX_VALUE;
    // Current working set
    ModifiableDBIDs current = DBIDUtil.newHashSet();
    // TODO: can we implement this more nicely with a 1-lookahead?
    DBIDVar prev = DBIDUtil.newVar();
    for (DBIDIter it = co.iter(); it.valid(); prev.set(it), it.advance()) {
        lastDist = actDist;
        actDist = co.getReachability(it);
        if (actDist <= epsilon) {
            // the last element before the plot drops belongs to the cluster
            if (lastDist > epsilon && prev.isSet()) {
                // So un-noise it
                noise.remove(prev);
                // Add it to the cluster
                current.add(prev);
            }
            current.add(it);
        } else {
            // 'Finish' the previous cluster
            if (!current.isEmpty()) {
                // TODO: do we want a minpts restriction?
                // But we get have only core points guaranteed anyway.
                clustering.addToplevelCluster(new Cluster<Model>(current, ClusterModel.CLUSTER));
                current = DBIDUtil.newHashSet();
            }
            // Add to noise
            noise.add(it);
        }
    }
    // Any unfinished cluster will also be added
    if (!current.isEmpty()) {
        clustering.addToplevelCluster(new Cluster<Model>(current, ClusterModel.CLUSTER));
    }
    // Add noise
    clustering.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
    return clustering;
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) Model(de.lmu.ifi.dbs.elki.data.model.Model) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 2 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class FarthestPointsInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    DBIDVar prevmean = DBIDUtil.newVar(first);
    means.add(first);
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        // Find farthest object:
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double val = Math.min(prev, distQ.distance(prevmean, it));
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, val);
            }
            if (val > maxdist) {
                maxdist = val;
                best.set(it);
            }
        }
        // Add new mean:
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean.set(best);
        means.add(best);
    }
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class AbstractHDBSCAN method convertToPointerRepresentation.

/**
 * Convert spanning tree to a pointer representation.
 *
 * Note: the heap must use the correct encoding of indexes.
 *
 * @param ids IDs indexed
 * @param heap Heap
 * @param pi Parent array
 * @param lambda Distance array
 */
protected void convertToPointerRepresentation(ArrayDBIDs ids, DoubleLongHeap heap, WritableDBIDDataStore pi, WritableDoubleDataStore lambda) {
    final Logging LOG = getLogger();
    // Initialize parent array:
    for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
        // Initialize
        pi.put(iter, iter);
    }
    DBIDVar p = DBIDUtil.newVar(), q = DBIDUtil.newVar(), n = DBIDUtil.newVar();
    FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Converting MST to pointer representation", heap.size(), LOG) : null;
    while (!heap.isEmpty()) {
        final double dist = heap.peekKey();
        final long pair = heap.peekValue();
        final int i = (int) (pair >>> 31), j = (int) (pair & 0x7FFFFFFFL);
        ids.assignVar(i, p);
        // Follow p to its parent.
        while (!DBIDUtil.equal(p, pi.assignVar(p, n))) {
            p.set(n);
        }
        // Follow q to its parent.
        ids.assignVar(j, q);
        while (!DBIDUtil.equal(q, pi.assignVar(q, n))) {
            q.set(n);
        }
        // By definition of the pointer representation, the largest element in
        // each cluster is the cluster lead.
        // The extraction methods currently rely on this!
        int c = DBIDUtil.compare(p, q);
        if (c < 0) {
            // p joins q:
            pi.put(p, q);
            lambda.put(p, dist);
        } else {
            assert (c != 0) : "This should never happen!";
            // q joins p:
            pi.put(q, p);
            lambda.put(q, dist);
        }
        heap.poll();
        LOG.incrementProcessed(pprog);
    }
    LOG.ensureCompleted(pprog);
    // does not fulfill the property that the last element has the largest id.
    for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double d = lambda.doubleValue(iter);
        // Parent:
        pi.assignVar(iter, p);
        q.set(p);
        // Follow parent while tied.
        while (d >= lambda.doubleValue(q) && !DBIDUtil.equal(q, pi.assignVar(q, n))) {
            q.set(n);
        }
        if (!DBIDUtil.equal(p, q)) {
            if (LOG.isDebuggingFinest()) {
                LOG.finest("Correcting parent: " + p + " -> " + q);
            }
            pi.put(iter, q);
        }
    }
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 4 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class SLINK method slinkstep3.

/**
 * Third step: Determine the values for P and L
 *
 * @param id the id of the object to be inserted into the pointer
 *        representation
 * @param it array iterator
 * @param n Last object to process at this run
 * @param pi Pi data store
 * @param lambda Lambda data store
 * @param m Data store
 */
private void slinkstep3(DBIDRef id, DBIDArrayIter it, int n, WritableDBIDDataStore pi, WritableDoubleDataStore lambda, WritableDoubleDataStore m) {
    DBIDVar p_i = DBIDUtil.newVar();
    // for i = 1..n
    for (it.seek(0); it.getOffset() < n; it.advance()) {
        double l_i = lambda.doubleValue(it);
        double m_i = m.doubleValue(it);
        // p_i = pi(it)
        p_i.from(pi, it);
        double mp_i = m.doubleValue(p_i);
        // if L(i) >= M(i)
        if (l_i >= m_i) {
            // M(P(i)) = min { M(P(i)), L(i) }
            if (l_i < mp_i) {
                m.putDouble(p_i, l_i);
            }
            // L(i) = M(i)
            lambda.putDouble(it, m_i);
            // P(i) = n+1;
            pi.put(it, id);
        } else {
            // M(P(i)) = min { M(P(i)), M(i) }
            if (m_i < mp_i) {
                m.putDouble(p_i, m_i);
            }
        }
    }
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar)

Example 5 with DBIDVar

use of de.lmu.ifi.dbs.elki.database.ids.DBIDVar in project elki by elki-project.

the class BundleWriter method writeBundleStream.

/**
 * Write a bundle stream to a file output channel.
 *
 * @param source Data source
 * @param output Output channel
 * @throws IOException on IO errors
 */
public void writeBundleStream(BundleStreamSource source, WritableByteChannel output) throws IOException {
    ByteBuffer buffer = ByteBuffer.allocateDirect(INITIAL_BUFFER);
    DBIDVar var = DBIDUtil.newVar();
    ByteBufferSerializer<?>[] serializers = null;
    loop: while (true) {
        BundleStreamSource.Event ev = source.nextEvent();
        switch(ev) {
            case NEXT_OBJECT:
                if (serializers == null) {
                    serializers = writeHeader(source, buffer, output);
                }
                if (serializers[0] != null) {
                    if (!source.assignDBID(var)) {
                        throw new AbortException("An object did not have an DBID assigned.");
                    }
                    DBID id = DBIDUtil.deref(var);
                    @SuppressWarnings("unchecked") ByteBufferSerializer<DBID> ser = (ByteBufferSerializer<DBID>) serializers[0];
                    int size = ser.getByteSize(id);
                    buffer = ensureBuffer(size, buffer, output);
                    ser.toByteBuffer(buffer, id);
                }
                for (int i = 1, j = 0; i < serializers.length; ++i, ++j) {
                    @SuppressWarnings("unchecked") ByteBufferSerializer<Object> ser = (ByteBufferSerializer<Object>) serializers[i];
                    int size = ser.getByteSize(source.data(j));
                    buffer = ensureBuffer(size, buffer, output);
                    ser.toByteBuffer(buffer, source.data(j));
                }
                // switch
                break;
            case META_CHANGED:
                if (serializers != null) {
                    throw new AbortException("Meta changes are not supported, once the block header has been written.");
                }
                // switch
                break;
            case END_OF_STREAM:
                break loop;
            default:
                LOG.warning("Unknown bundle stream event. API inconsistent? " + ev);
                // switch
                break;
        }
    }
    if (buffer.position() > 0) {
        flushBuffer(buffer, output);
    }
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ByteBufferSerializer(de.lmu.ifi.dbs.elki.utilities.io.ByteBufferSerializer) ByteBuffer(java.nio.ByteBuffer) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)26 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)7 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)6 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)6 DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)5 ArrayList (java.util.ArrayList)5 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)4 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)4 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)3 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)3 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)2 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)2 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)2 List (java.util.List)2 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1