Search in sources :

Example 6 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class AGNES method initializeDistanceMatrix.

/**
 * Initialize a distance matrix.
 *
 * @param mat Matrix
 * @param dq Distance query
 * @param linkage Linkage method
 */
protected static void initializeDistanceMatrix(MatrixParadigm mat, DistanceQuery<?> dq, Linkage linkage) {
    final DBIDArrayIter ix = mat.ix, iy = mat.iy;
    final double[] matrix = mat.matrix;
    final boolean issquare = dq.getDistanceFunction().isSquared();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Distance matrix computation", matrix.length, LOG) : null;
    int pos = 0;
    for (ix.seek(0); ix.valid(); ix.advance()) {
        final int x = ix.getOffset();
        assert (pos == MatrixParadigm.triangleSize(x));
        for (iy.seek(0); iy.getOffset() < x; iy.advance()) {
            matrix[pos++] = linkage.initial(dq.distance(ix, iy), issquare);
        }
        if (prog != null) {
            prog.setProcessed(pos, LOG);
        }
    }
    // Avoid logging errors in case scratch space was too large:
    if (prog != null) {
        prog.setProcessed(matrix.length, LOG);
    }
    LOG.ensureCompleted(prog);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 7 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class AGNES method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    if (SingleLinkage.class.isInstance(linkage)) {
        LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }
    final DBIDs ids = relation.getDBIDs();
    final int size = ids.size();
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    // Compute the initial (lower triangular) distance matrix.
    MatrixParadigm mat = new MatrixParadigm(ids);
    initializeDistanceMatrix(mat, dq, linkage);
    // Initialize space for result:
    PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
    // Repeat until everything merged into 1 cluster
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    // Use end to shrink the matrix virtually as the tailing objects disappear
    DBIDArrayIter ix = mat.ix;
    for (int i = 1, end = size; i < size; i++) {
        end = shrinkActiveSet(// 
        ix, // 
        builder, // 
        end, findMerge(end, mat, builder));
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return builder.complete();
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 8 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class AnderbergHierarchicalClustering method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    if (SingleLinkage.class.isInstance(linkage)) {
        LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    final DBIDs ids = relation.getDBIDs();
    MatrixParadigm mat = new MatrixParadigm(ids);
    final int size = ids.size();
    // Position counter - must agree with computeOffset!
    AGNES.initializeDistanceMatrix(mat, dq, linkage);
    // Arrays used for caching:
    double[] bestd = new double[size];
    int[] besti = new int[size];
    initializeNNCache(mat.matrix, bestd, besti);
    // Initialize space for result:
    PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
    // Repeat until everything merged into 1 cluster
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    DBIDArrayIter ix = mat.ix;
    for (int i = 1, end = size; i < size; i++) {
        end = // 
        AGNES.shrinkActiveSet(// 
        ix, // 
        builder, // 
        end, findMerge(end, mat, bestd, besti, builder));
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return builder.complete();
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 9 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class AbstractHDBSCAN method convertToPointerRepresentation.

/**
 * Convert spanning tree to a pointer representation.
 *
 * Note: the heap must use the correct encoding of indexes.
 *
 * @param ids IDs indexed
 * @param heap Heap
 * @param pi Parent array
 * @param lambda Distance array
 */
protected void convertToPointerRepresentation(ArrayDBIDs ids, DoubleLongHeap heap, WritableDBIDDataStore pi, WritableDoubleDataStore lambda) {
    final Logging LOG = getLogger();
    // Initialize parent array:
    for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
        // Initialize
        pi.put(iter, iter);
    }
    DBIDVar p = DBIDUtil.newVar(), q = DBIDUtil.newVar(), n = DBIDUtil.newVar();
    FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Converting MST to pointer representation", heap.size(), LOG) : null;
    while (!heap.isEmpty()) {
        final double dist = heap.peekKey();
        final long pair = heap.peekValue();
        final int i = (int) (pair >>> 31), j = (int) (pair & 0x7FFFFFFFL);
        ids.assignVar(i, p);
        // Follow p to its parent.
        while (!DBIDUtil.equal(p, pi.assignVar(p, n))) {
            p.set(n);
        }
        // Follow q to its parent.
        ids.assignVar(j, q);
        while (!DBIDUtil.equal(q, pi.assignVar(q, n))) {
            q.set(n);
        }
        // By definition of the pointer representation, the largest element in
        // each cluster is the cluster lead.
        // The extraction methods currently rely on this!
        int c = DBIDUtil.compare(p, q);
        if (c < 0) {
            // p joins q:
            pi.put(p, q);
            lambda.put(p, dist);
        } else {
            assert (c != 0) : "This should never happen!";
            // q joins p:
            pi.put(q, p);
            lambda.put(q, dist);
        }
        heap.poll();
        LOG.incrementProcessed(pprog);
    }
    LOG.ensureCompleted(pprog);
    // does not fulfill the property that the last element has the largest id.
    for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double d = lambda.doubleValue(iter);
        // Parent:
        pi.assignVar(iter, p);
        q.set(p);
        // Follow parent while tied.
        while (d >= lambda.doubleValue(q) && !DBIDUtil.equal(q, pi.assignVar(q, n))) {
            q.set(n);
        }
        if (!DBIDUtil.equal(p, q)) {
            if (LOG.isDebuggingFinest()) {
                LOG.finest("Correcting parent: " + p + " -> " + q);
            }
            pi.put(iter, q);
        }
    }
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 10 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class MiniMax method run.

/**
 * Run the algorithm on a database.
 *
 * @param db Database
 * @param relation Relation to process.
 * @return Hierarchical result
 */
public PointerPrototypeHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    DistanceQuery<O> dq = DatabaseUtil.precomputedDistanceQuery(db, relation, getDistanceFunction(), LOG);
    final DBIDs ids = relation.getDBIDs();
    final int size = ids.size();
    // Initialize space for result:
    PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
    Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(size);
    // Allocate working space:
    MatrixParadigm mat = new MatrixParadigm(ids);
    ArrayModifiableDBIDs prots = DBIDUtil.newArray(MatrixParadigm.triangleSize(size));
    initializeMatrices(mat, prots, dq);
    DBIDArrayMIter protiter = prots.iter();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("MiniMax clustering", size - 1, LOG) : null;
    DBIDArrayIter ix = mat.ix;
    for (int i = 1, end = size; i < size; i++) {
        end = // 
        AGNES.shrinkActiveSet(// 
        ix, // 
        builder, // 
        end, findMerge(end, mat, protiter, builder, clusters, dq));
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    return (PointerPrototypeHierarchyRepresentationResult) builder.complete();
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12