Search in sources :

Example 1 with PointerHierarchyRepresentationResult

use of de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.PointerHierarchyRepresentationResult in project elki by elki-project.

the class NaiveAgglomerativeHierarchicalClustering4 method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int size = ids.size();
    if (size > 0x10000) {
        throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
    }
    if (Linkage.SINGLE.equals(linkage)) {
        LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }
    // Compute the initial (lower triangular) distance matrix.
    double[] scratch = new double[triangleSize(size)];
    DBIDArrayIter ix = ids.iter(), iy = ids.iter(), ij = ids.iter();
    // Position counter - must agree with computeOffset!
    int pos = 0;
    boolean square = Linkage.WARD.equals(linkage) && !getDistanceFunction().isSquared();
    for (int x = 0; ix.valid(); x++, ix.advance()) {
        iy.seek(0);
        for (int y = 0; y < x; y++, iy.advance()) {
            scratch[pos] = dq.distance(ix, iy);
            // Ward uses variances -- i.e. squared values
            if (square) {
                scratch[pos] *= scratch[pos];
            }
            pos++;
        }
    }
    // Initialize space for result:
    WritableDBIDDataStore parent = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore height = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        parent.put(it, it);
        height.put(it, Double.POSITIVE_INFINITY);
        csize.put(it, 1);
    }
    // Repeat until everything merged, except the desired number of clusters:
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    for (int i = 1; i < size; i++) {
        double min = Double.POSITIVE_INFINITY;
        int minx = -1, miny = -1;
        for (ix.seek(0); ix.valid(); ix.advance()) {
            if (height.doubleValue(ix) < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int xbase = triangleSize(ix.getOffset());
            for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
                if (height.doubleValue(iy) < Double.POSITIVE_INFINITY) {
                    continue;
                }
                final int idx = xbase + iy.getOffset();
                if (scratch[idx] <= min) {
                    min = scratch[idx];
                    minx = ix.getOffset();
                    miny = iy.getOffset();
                }
            }
        }
        assert (minx >= 0 && miny >= 0);
        // Avoid allocating memory, by reusing existing iterators:
        ix.seek(minx);
        iy.seek(miny);
        // Perform merge in data structure: x -> y
        // Since y < x, prefer keeping y, dropping x.
        int sizex = csize.intValue(ix), sizey = csize.intValue(iy);
        height.put(ix, min);
        parent.put(ix, iy);
        csize.put(iy, sizex + sizey);
        // Update distance matrix. Note: miny < minx
        final int xbase = triangleSize(minx), ybase = triangleSize(miny);
        // Write to (y, j), with j < y
        for (ij.seek(0); ij.getOffset() < miny; ij.advance()) {
            if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int sizej = csize.intValue(ij);
            scratch[ybase + ij.getOffset()] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, min);
        }
        // Write to (j, y), with y < j < x
        for (ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) {
            if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int jbase = triangleSize(ij.getOffset());
            final int sizej = csize.intValue(ij);
            scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + miny], sizej, min);
        }
        // Write to (j, y), with y < x < j
        for (ij.seek(minx + 1); ij.valid(); ij.advance()) {
            if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
                continue;
            }
            final int jbase = triangleSize(ij.getOffset());
            final int sizej = csize.intValue(ij);
            scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return new PointerHierarchyRepresentationResult(ids, parent, height, dq.getDistanceFunction().isSquared());
}
Also used : PointerHierarchyRepresentationResult(de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.PointerHierarchyRepresentationResult) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 2 with PointerHierarchyRepresentationResult

use of de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.PointerHierarchyRepresentationResult in project elki by elki-project.

the class HDBSCANHierarchyExtractionEvaluator method processNewResult.

@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
    ArrayList<PointerHierarchyRepresentationResult> hrs = ResultUtil.filterResults(hier, newResult, PointerHierarchyRepresentationResult.class);
    for (PointerHierarchyRepresentationResult pointerresult : hrs) {
        Clustering<DendrogramModel> result = inner.run(pointerresult);
        pointerresult.addChildResult(result);
    }
}
Also used : PointerHierarchyRepresentationResult(de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.PointerHierarchyRepresentationResult) DendrogramModel(de.lmu.ifi.dbs.elki.data.model.DendrogramModel)

Aggregations

PointerHierarchyRepresentationResult (de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.PointerHierarchyRepresentationResult)2 DendrogramModel (de.lmu.ifi.dbs.elki.data.model.DendrogramModel)1 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)1 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1