Search in sources :

Example 41 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class NaiveAgglomerativeHierarchicalClustering4 method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int size = ids.size();
    if (size > 0x10000) {
        throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
    }
    if (Linkage.SINGLE.equals(linkage)) {
        LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }
    // Compute the initial (lower triangular) distance matrix.
    double[] scratch = new double[triangleSize(size)];
    DBIDArrayIter ix = ids.iter(), iy = ids.iter(), ij = ids.iter();
    // Position counter - must agree with computeOffset!
    int pos = 0;
    boolean square = Linkage.WARD.equals(linkage) && !getDistanceFunction().isSquared();
    for (int x = 0; ix.valid(); x++, ix.advance()) {
        iy.seek(0);
        for (int y = 0; y < x; y++, iy.advance()) {
            scratch[pos] = dq.distance(ix, iy);
            // Ward uses variances -- i.e. squared values
            if (square) {
                scratch[pos] *= scratch[pos];
            }
            pos++;
        }
    }
    // Initialize space for result:
    WritableDBIDDataStore parent = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore height = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        parent.put(it, it);
        height.put(it, Double.POSITIVE_INFINITY);
        csize.put(it, 1);
    }
    // Repeat until everything merged, except the desired number of clusters:
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    for (int i = 1; i < size; i++) {
        double min = Double.POSITIVE_INFINITY;
        int minx = -1, miny = -1;
        for (ix.seek(0); ix.valid(); ix.advance()) {
            if (height.doubleValue(ix) < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int xbase = triangleSize(ix.getOffset());
            for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
                if (height.doubleValue(iy) < Double.POSITIVE_INFINITY) {
                    continue;
                }
                final int idx = xbase + iy.getOffset();
                if (scratch[idx] <= min) {
                    min = scratch[idx];
                    minx = ix.getOffset();
                    miny = iy.getOffset();
                }
            }
        }
        assert (minx >= 0 && miny >= 0);
        // Avoid allocating memory, by reusing existing iterators:
        ix.seek(minx);
        iy.seek(miny);
        // Perform merge in data structure: x -> y
        // Since y < x, prefer keeping y, dropping x.
        int sizex = csize.intValue(ix), sizey = csize.intValue(iy);
        height.put(ix, min);
        parent.put(ix, iy);
        csize.put(iy, sizex + sizey);
        // Update distance matrix. Note: miny < minx
        final int xbase = triangleSize(minx), ybase = triangleSize(miny);
        // Write to (y, j), with j < y
        for (ij.seek(0); ij.getOffset() < miny; ij.advance()) {
            if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int sizej = csize.intValue(ij);
            scratch[ybase + ij.getOffset()] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, min);
        }
        // Write to (j, y), with y < j < x
        for (ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) {
            if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int jbase = triangleSize(ij.getOffset());
            final int sizej = csize.intValue(ij);
            scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + miny], sizej, min);
        }
        // Write to (j, y), with y < x < j
        for (ij.seek(minx + 1); ij.valid(); ij.advance()) {
            if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int jbase = triangleSize(ij.getOffset());
            final int sizej = csize.intValue(ij);
            scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return new PointerHierarchyRepresentationResult(ids, parent, height, dq.getDistanceFunction().isSquared());
}
Also used : PointerHierarchyRepresentationResult(de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.PointerHierarchyRepresentationResult) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 42 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class AbstractBiclustering method rowsBitsetToIDs.

/**
 * Convert a bitset into integer row ids.
 *
 * @param rows
 * @return integer row ids
 */
protected ArrayDBIDs rowsBitsetToIDs(long[] rows) {
    ArrayModifiableDBIDs rowIDs = DBIDUtil.newArray(BitsUtil.cardinality(rows));
    DBIDArrayIter iter = this.rowIDs.iter();
    outer: for (int rlpos = 0; rlpos < rows.length; ++rlpos) {
        long rlong = rows[rlpos];
        // Fast skip blocks of 64 masked values.
        if (rlong == 0L) {
            iter.advance(Long.SIZE);
            continue;
        }
        for (int i = 0; i < Long.SIZE; ++i, rlong >>>= 1, iter.advance()) {
            if (!iter.valid()) {
                break outer;
            }
            if ((rlong & 1L) == 1L) {
                rowIDs.add(iter);
            }
        }
    }
    return rowIDs;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 43 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class AGNES method findMerge.

/**
 * Perform the next merge step in AGNES.
 *
 * @param end Active set size
 * @param mat Matrix storage
 * @param builder Pointer representation builder
 * @return the index that has disappeared, for shrinking the working set
 */
protected int findMerge(int end, MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder) {
    assert (end > 0);
    final DBIDArrayIter ix = mat.ix, iy = mat.iy;
    final double[] matrix = mat.matrix;
    double mindist = Double.POSITIVE_INFINITY;
    int x = -1, y = -1;
    // Find minimum:
    for (int ox = 0, xbase = 0; ox < end; xbase += ox++) {
        // Skip if object has already joined a cluster:
        if (builder.isLinked(ix.seek(ox))) {
            continue;
        }
        assert (xbase == MatrixParadigm.triangleSize(ox));
        for (int oy = 0; oy < ox; oy++) {
            // Skip if object has already joined a cluster:
            if (builder.isLinked(iy.seek(oy))) {
                continue;
            }
            final double dist = matrix[xbase + oy];
            if (dist <= mindist) {
                // Prefer later on ==, to truncate more often.
                mindist = dist;
                x = ox;
                y = oy;
            }
        }
    }
    assert (x >= 0 && y >= 0);
    // We could swap otherwise, but this shouldn't arise.
    assert (y < x);
    merge(end, mat, builder, mindist, x, y);
    return x;
}
Also used : DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 44 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class AGNES method merge.

/**
 * Execute the cluster merge.
 *
 * @param end Active set size
 * @param mat Matrix paradigm
 * @param builder Hierarchy builder
 * @param mindist Distance that was used for merging
 * @param x First matrix position
 * @param y Second matrix position
 */
protected void merge(int end, MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y) {
    // Avoid allocating memory, by reusing existing iterators:
    final DBIDArrayIter ix = mat.ix.seek(x), iy = mat.iy.seek(y);
    if (LOG.isDebuggingFine()) {
        LOG.debugFine("Merging: " + DBIDUtil.toString(ix) + " -> " + DBIDUtil.toString(iy) + " " + mindist);
    }
    // Perform merge in data structure: x -> y
    assert (y < x);
    // Since y < x, prefer keeping y, dropping x.
    builder.add(ix, linkage.restore(mindist, getDistanceFunction().isSquared()), iy);
    // Update cluster size for y:
    final int sizex = builder.getSize(ix), sizey = builder.getSize(iy);
    builder.setSize(iy, sizex + sizey);
    updateMatrix(end, mat, builder, mindist, x, y, sizex, sizey);
}
Also used : DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 45 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class AnderbergHierarchicalClustering method merge.

/**
 * Execute the cluster merge.
 *
 * @param size Data set size
 * @param mat Matrix paradigm
 * @param bestd Best distance
 * @param besti Index of best distance
 * @param builder Hierarchy builder
 * @param mindist Distance that was used for merging
 * @param x First matrix position
 * @param y Second matrix position
 */
protected void merge(int size, MatrixParadigm mat, double[] bestd, int[] besti, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y) {
    // Avoid allocating memory, by reusing existing iterators:
    final DBIDArrayIter ix = mat.ix.seek(x), iy = mat.iy.seek(y);
    if (LOG.isDebuggingFine()) {
        LOG.debugFine("Merging: " + DBIDUtil.toString(ix) + " -> " + DBIDUtil.toString(iy) + " " + mindist);
    }
    // Perform merge in data structure: x -> y
    assert (y < x);
    // Since y < x, prefer keeping y, dropping x.
    builder.add(ix, linkage.restore(mindist, getDistanceFunction().isSquared()), iy);
    // Update cluster size for y:
    final int sizex = builder.getSize(ix), sizey = builder.getSize(iy);
    builder.setSize(iy, sizex + sizey);
    // Deactivate x in cache:
    besti[x] = -1;
    // Note: this changes iy.
    updateMatrix(size, mat.matrix, iy, bestd, besti, builder, mindist, x, y, sizex, sizey);
    if (besti[y] == x) {
        findBest(size, mat.matrix, bestd, besti, y);
    }
}
Also used : DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Aggregations

DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)64 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)17 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)15 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)13 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)12 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)9 Test (org.junit.Test)9 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 IOException (java.io.IOException)5 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)4 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)3 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)3