Search in sources :

Example 1 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class NNChain method nnChainCore.

/**
 * Uses NNChain as in "Modern hierarchical, agglomerative clustering
 * algorithms" by Daniel Müllner
 *
 * @param mat Matrix view
 * @param builder Result builder
 */
private void nnChainCore(MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder) {
    final DBIDArrayIter ix = mat.ix;
    final double[] distances = mat.matrix;
    final int size = mat.size;
    // The maximum chain size = number of ids + 1
    IntegerArray chain = new IntegerArray(size + 1);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running NNChain", size - 1, LOG) : null;
    for (int k = 1, end = size; k < size; k++) {
        int a = -1, b = -1;
        if (chain.size() <= 3) {
            // Accessing two arbitrary not yet merged elements could be optimized to
            // work in O(1) like in Müllner;
            // however this usually does not have a huge impact (empirically just
            // about 1/5000 of total performance)
            a = findUnlinked(0, end, ix, builder);
            b = findUnlinked(a + 1, end, ix, builder);
            chain.clear();
            chain.add(a);
        } else {
            // Chain is expected to look like (.... a, b, c, b) with b and c merged.
            int lastIndex = chain.size;
            int c = chain.get(lastIndex - 2);
            b = chain.get(lastIndex - 3);
            a = chain.get(lastIndex - 4);
            // Ensure we had a loop at the end:
            assert (chain.get(lastIndex - 1) == c || chain.get(lastIndex - 1) == b);
            // if c < b, then we merged b -> c, otherwise c -> b
            b = c < b ? c : b;
            // Cut the tail:
            chain.size -= 3;
        }
        // For ties, always prefer the second-last element b:
        double minDist = mat.get(a, b);
        do {
            int c = b;
            final int ta = MatrixParadigm.triangleSize(a);
            for (int i = 0; i < a; i++) {
                if (i != b && !builder.isLinked(ix.seek(i))) {
                    double dist = distances[ta + i];
                    if (dist < minDist) {
                        minDist = dist;
                        c = i;
                    }
                }
            }
            for (int i = a + 1; i < size; i++) {
                if (i != b && !builder.isLinked(ix.seek(i))) {
                    double dist = distances[MatrixParadigm.triangleSize(i) + a];
                    if (dist < minDist) {
                        minDist = dist;
                        c = i;
                    }
                }
            }
            b = a;
            a = c;
            chain.add(a);
        } while (chain.size() < 3 || a != chain.get(chain.size - 1 - 2));
        // We always merge the larger into the smaller index:
        if (a < b) {
            int tmp = a;
            a = b;
            b = tmp;
        }
        assert (minDist == mat.get(a, b));
        assert (b < a);
        merge(size, mat, builder, minDist, a, b);
        // Shrink working set
        end = AGNES.shrinkActiveSet(ix, builder, end, a);
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)

Example 2 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class AGNES method initializeDistanceMatrix.

/**
 * Initialize a distance matrix.
 *
 * @param mat Matrix
 * @param dq Distance query
 * @param linkage Linkage method
 */
protected static void initializeDistanceMatrix(MatrixParadigm mat, DistanceQuery<?> dq, Linkage linkage) {
    final DBIDArrayIter ix = mat.ix, iy = mat.iy;
    final double[] matrix = mat.matrix;
    final boolean issquare = dq.getDistanceFunction().isSquared();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Distance matrix computation", matrix.length, LOG) : null;
    int pos = 0;
    for (ix.seek(0); ix.valid(); ix.advance()) {
        final int x = ix.getOffset();
        assert (pos == MatrixParadigm.triangleSize(x));
        for (iy.seek(0); iy.getOffset() < x; iy.advance()) {
            matrix[pos++] = linkage.initial(dq.distance(ix, iy), issquare);
        }
        if (prog != null) {
            prog.setProcessed(pos, LOG);
        }
    }
    // Avoid logging errors in case scratch space was too large:
    if (prog != null) {
        prog.setProcessed(matrix.length, LOG);
    }
    LOG.ensureCompleted(prog);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 3 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class AGNES method updateMatrix.

/**
 * Update the scratch distance matrix.
 *
 * @param end Active set size
 * @param mat Matrix view
 * @param builder Hierarchy builder (to get cluster sizes)
 * @param mindist Distance that was used for merging
 * @param x First matrix position
 * @param y Second matrix position
 * @param sizex Old size of first cluster
 * @param sizey Old size of second cluster
 */
protected void updateMatrix(int end, MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y, final int sizex, final int sizey) {
    // Update distance matrix. Note: y < x
    final int xbase = MatrixParadigm.triangleSize(x);
    final int ybase = MatrixParadigm.triangleSize(y);
    double[] scratch = mat.matrix;
    DBIDArrayIter ij = mat.ix;
    // Write to (y, j), with j < y
    int j = 0;
    for (; j < y; j++) {
        if (builder.isLinked(ij.seek(j))) {
            continue;
        }
        // Otherwise, ybase + j is the wrong position!
        assert (j < y);
        final int yb = ybase + j;
        scratch[yb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[yb], builder.getSize(ij), mindist);
    }
    // Skip y
    j++;
    // Write to (j, y), with y < j < x
    int jbase = MatrixParadigm.triangleSize(j);
    for (; j < x; jbase += j++) {
        if (builder.isLinked(ij.seek(j))) {
            continue;
        }
        final int jb = jbase + y;
        scratch[jb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[jb], builder.getSize(ij), mindist);
    }
    // Skip x
    jbase += j++;
    // Write to (j, y), with y < x < j
    for (; j < end; jbase += j++) {
        if (builder.isLinked(ij.seek(j))) {
            continue;
        }
        final int jb = jbase + y;
        scratch[jb] = linkage.combine(sizex, scratch[jbase + x], sizey, scratch[jb], builder.getSize(ij), mindist);
    }
}
Also used : DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 4 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class AGNES method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    if (SingleLinkage.class.isInstance(linkage)) {
        LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }
    final DBIDs ids = relation.getDBIDs();
    final int size = ids.size();
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    // Compute the initial (lower triangular) distance matrix.
    MatrixParadigm mat = new MatrixParadigm(ids);
    initializeDistanceMatrix(mat, dq, linkage);
    // Initialize space for result:
    PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
    // Repeat until everything merged into 1 cluster
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    // Use end to shrink the matrix virtually as the tailing objects disappear
    DBIDArrayIter ix = mat.ix;
    for (int i = 1, end = size; i < size; i++) {
        end = shrinkActiveSet(// 
        ix, // 
        builder, // 
        end, findMerge(end, mat, builder));
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return builder.complete();
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 5 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class AnderbergHierarchicalClustering method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    if (SingleLinkage.class.isInstance(linkage)) {
        LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    final DBIDs ids = relation.getDBIDs();
    MatrixParadigm mat = new MatrixParadigm(ids);
    final int size = ids.size();
    // Position counter - must agree with computeOffset!
    AGNES.initializeDistanceMatrix(mat, dq, linkage);
    // Arrays used for caching:
    double[] bestd = new double[size];
    int[] besti = new int[size];
    initializeNNCache(mat.matrix, bestd, besti);
    // Initialize space for result:
    PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
    // Repeat until everything merged into 1 cluster
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    DBIDArrayIter ix = mat.ix;
    for (int i = 1, end = size; i < size; i++) {
        end = // 
        AGNES.shrinkActiveSet(// 
        ix, // 
        builder, // 
        end, findMerge(end, mat, bestd, besti, builder));
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return builder.complete();
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Aggregations

DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)64 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)17 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)15 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)13 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)12 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)9 Test (org.junit.Test)9 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 IOException (java.io.IOException)5 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)4 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)3 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)3