Search in sources :

Example 6 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class ParallelLloydKMeans method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    DBIDs ids = relation.getDBIDs();
    // Choose initial means
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Store for current cluster assignment.
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    KMeansProcessor<V> kmm = new KMeansProcessor<>(relation, distanceFunction, assignment, varsum);
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        kmm.nextIteration(means);
        ParallelExecutor.run(ids, kmm);
        // Stop if no cluster assignment changed.
        if (!kmm.changed()) {
            break;
        }
        means = kmm.getMeans();
    }
    LOG.setCompleted(prog);
    // Wrap result
    ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k);
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.length; i++) {
        DBIDs cids = clusters[i];
        if (cids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(cids, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)

Example 7 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class AGNES method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    if (SingleLinkage.class.isInstance(linkage)) {
        LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }
    final DBIDs ids = relation.getDBIDs();
    final int size = ids.size();
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    // Compute the initial (lower triangular) distance matrix.
    MatrixParadigm mat = new MatrixParadigm(ids);
    initializeDistanceMatrix(mat, dq, linkage);
    // Initialize space for result:
    PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
    // Repeat until everything merged into 1 cluster
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    // Use end to shrink the matrix virtually as the tailing objects disappear
    DBIDArrayIter ix = mat.ix;
    for (int i = 1, end = size; i < size; i++) {
        end = shrinkActiveSet(// 
        ix, // 
        builder, // 
        end, findMerge(end, mat, builder));
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return builder.complete();
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 8 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class AnderbergHierarchicalClustering method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    if (SingleLinkage.class.isInstance(linkage)) {
        LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    final DBIDs ids = relation.getDBIDs();
    MatrixParadigm mat = new MatrixParadigm(ids);
    final int size = ids.size();
    // Position counter - must agree with computeOffset!
    AGNES.initializeDistanceMatrix(mat, dq, linkage);
    // Arrays used for caching:
    double[] bestd = new double[size];
    int[] besti = new int[size];
    initializeNNCache(mat.matrix, bestd, besti);
    // Initialize space for result:
    PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
    // Repeat until everything merged into 1 cluster
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    DBIDArrayIter ix = mat.ix;
    for (int i = 1, end = size; i < size; i++) {
        end = // 
        AGNES.shrinkActiveSet(// 
        ix, // 
        builder, // 
        end, findMerge(end, mat, bestd, besti, builder));
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return builder.complete();
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 9 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class LMCLUS method run.

/**
 * The main LMCLUS (Linear manifold clustering algorithm) is processed in this
 * method.
 *
 * <PRE>
 * The algorithm samples random linear manifolds and tries to find clusters in it.
 * It calculates a distance histogram searches for a threshold and partitions the
 * points in two groups the ones in the cluster and everything else.
 * Then the best fitting linear manifold is searched and registered as a cluster.
 * The process is started over until all points are clustered.
 * The last cluster should contain all the outliers. (or the whole data if no clusters have been found.)
 * For details see {@link LMCLUS}.
 * </PRE>
 *
 * @param database The database to operate on
 * @param relation Relation
 * @return Clustering result
 */
public Clustering<Model> run(Database database, Relation<NumberVector> relation) {
    Clustering<Model> ret = new Clustering<>("LMCLUS Clustering", "lmclus-clustering");
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
    IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
    ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
    Random r = rnd.getSingleThreadedRandom();
    final int maxdim = Math.min(maxLMDim, RelationUtil.dimensionality(relation));
    int cnum = 0;
    while (unclustered.size() > minsize) {
        DBIDs current = unclustered;
        int lmDim = 1;
        for (int k = 1; k <= maxdim; k++) {
            // stopping at the appropriate dimensionality either.
            while (true) {
                Separation separation = findSeparation(relation, current, k, r);
                // " threshold: " + separation.threshold);
                if (separation.goodness <= sensitivityThreshold) {
                    break;
                }
                ModifiableDBIDs subset = DBIDUtil.newArray(current.size());
                for (DBIDIter iter = current.iter(); iter.valid(); iter.advance()) {
                    if (deviation(minusEquals(relation.get(iter).toArray(), separation.originV), separation.basis) < separation.threshold) {
                        subset.add(iter);
                    }
                }
                // logger.verbose("size:"+subset.size());
                if (subset.size() < minsize) {
                    break;
                }
                current = subset;
                lmDim = k;
            // System.out.println("Partition: " + subset.size());
            }
        }
        // No more clusters found
        if (current.size() < minsize || current == unclustered) {
            break;
        }
        // New cluster found
        // TODO: annotate cluster with dimensionality
        final Cluster<Model> cluster = new Cluster<>(current);
        cluster.setName("Cluster_" + lmDim + "d_" + cnum);
        cnum++;
        ret.addToplevelCluster(cluster);
        // Remove from main working set.
        unclustered.removeDBIDs(current);
        if (progress != null) {
            progress.setProcessed(relation.size() - unclustered.size(), LOG);
        }
        if (cprogress != null) {
            cprogress.setProcessed(cnum, LOG);
        }
    }
    // Remaining objects are noise
    if (unclustered.size() > 0) {
        ret.addToplevelCluster(new Cluster<>(unclustered, true));
    }
    if (progress != null) {
        progress.setProcessed(relation.size(), LOG);
        progress.ensureCompleted(LOG);
    }
    LOG.setCompleted(cprogress);
    return ret;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Random(java.util.Random) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) Model(de.lmu.ifi.dbs.elki.data.model.Model) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 10 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class MaterializeKNNAndRKNNPreprocessor method objectsRemoved.

@Override
protected void objectsRemoved(DBIDs ids) {
    StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;
    // For debugging: valid DBIDs still in the database.
    final DBIDs valid = DBIDUtil.ensureSet(distanceQuery.getRelation().getDBIDs());
    ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
    // delete the materialized (old) kNNs and RkNNs
    getLogger().beginStep(stepprog, 1, "New deletions ocurred, remove their materialized kNNs and RkNNs.");
    // Temporary storage of removed lists
    List<KNNList> kNNs = new ArrayList<>(ids.size());
    List<TreeSet<DoubleDBIDPair>> rkNNs = new ArrayList<>(ids.size());
    for (DBIDIter iter = aids.iter(); iter.valid(); iter.advance()) {
        kNNs.add(storage.get(iter));
        for (DBIDIter it = storage.get(iter).iter(); it.valid(); it.advance()) {
            if (!valid.contains(it) && !ids.contains(it)) {
                LOG.warning("False kNN: " + it);
            }
        }
        storage.delete(iter);
        rkNNs.add(materialized_RkNN.get(iter));
        for (DoubleDBIDPair it : materialized_RkNN.get(iter)) {
            if (!valid.contains(it) && !ids.contains(it)) {
                LOG.warning("False RkNN: " + it);
            }
        }
        materialized_RkNN.delete(iter);
    }
    // Keep only those IDs not also removed
    ArrayDBIDs kNN_ids = affectedkNN(kNNs, aids);
    ArrayDBIDs rkNN_ids = affectedRkNN(rkNNs, aids);
    // update the affected kNNs and RkNNs
    getLogger().beginStep(stepprog, 2, "New deletions ocurred, update the affected kNNs and RkNNs.");
    // Recompute the kNN for affected objects (in rkNN lists)
    {
        List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
        int i = 0;
        for (DBIDIter reknn = rkNN_ids.iter(); reknn.valid(); reknn.advance(), i++) {
            if (kNNList.get(i) == null && !valid.contains(reknn)) {
                LOG.warning("BUG in online kNN/RkNN maintainance: " + DBIDUtil.toString(reknn) + " no longer in database.");
                continue;
            }
            assert (kNNList.get(i) != null);
            storage.put(reknn, kNNList.get(i));
            for (DoubleDBIDListIter it = kNNList.get(i).iter(); it.valid(); it.advance()) {
                materialized_RkNN.get(it).add(makePair(it, reknn));
            }
        }
    }
    // remove objects from RkNNs of objects (in kNN lists)
    {
        SetDBIDs idsSet = DBIDUtil.ensureSet(ids);
        for (DBIDIter nn = kNN_ids.iter(); nn.valid(); nn.advance()) {
            TreeSet<DoubleDBIDPair> rkNN = materialized_RkNN.get(nn);
            for (Iterator<DoubleDBIDPair> it = rkNN.iterator(); it.hasNext(); ) {
                if (idsSet.contains(it.next())) {
                    it.remove();
                }
            }
        }
    }
    // inform listener
    getLogger().beginStep(stepprog, 3, "New deletions ocurred, inform listeners.");
    fireKNNsRemoved(ids, rkNN_ids);
    getLogger().ensureCompleted(stepprog);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs) ArrayList(java.util.ArrayList) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) TreeSet(java.util.TreeSet) DoubleDBIDPair(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)

Aggregations

DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)139 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)77 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)45 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)44 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)40 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)39 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)38 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)38 ArrayList (java.util.ArrayList)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)29 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)25 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)22 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)19 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)18 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)15 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)14