Search in sources :

Example 21 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class Leader method run.

/**
 * Run the leader clustering algorithm.
 *
 * @param relation Data set
 * @return Clustering result
 */
public Clustering<PrototypeModel<O>> run(Relation<O> relation) {
    RangeQuery<O> rq = relation.getRangeQuery(getDistanceFunction(), threshold);
    ModifiableDBIDs seen = DBIDUtil.newHashSet(relation.size());
    Clustering<PrototypeModel<O>> clustering = new Clustering<>("Prototype clustering", "prototype-clustering");
    int queries = 0;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Leader clustering", relation.size(), LOG) : null;
    for (DBIDIter it = relation.iterDBIDs(); it.valid() && seen.size() < relation.size(); it.advance()) {
        if (seen.contains(it)) {
            continue;
        }
        DoubleDBIDList res = rq.getRangeForDBID(it, threshold);
        ++queries;
        ModifiableDBIDs ids = DBIDUtil.newArray(res.size());
        for (DBIDIter cand = res.iter(); cand.valid(); cand.advance()) {
            if (seen.add(cand)) {
                LOG.incrementProcessed(prog);
                ids.add(cand);
            }
        }
        assert (ids.size() > 0 && ids.contains(it));
        PrototypeModel<O> mod = new SimplePrototypeModel<>(relation.get(it));
        clustering.addToplevelCluster(new Cluster<>(ids, mod));
    }
    LOG.statistics(new LongStatistic(this.getClass().getName() + ".queries", queries));
    LOG.ensureCompleted(prog);
    return clustering;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) SimplePrototypeModel(de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) PrototypeModel(de.lmu.ifi.dbs.elki.data.model.PrototypeModel) SimplePrototypeModel(de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel)

Example 22 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class SNNClustering method expandCluster.

/**
 * DBSCAN-function expandCluster adapted to SNN criterion.
 * <p/>
 * <p/>
 * Border-Objects become members of the first possible cluster.
 *
 * @param snnInstance shared nearest neighbors
 * @param startObjectID potential seed of a new potential cluster
 * @param objprog the progress object to report about the progress of
 *        clustering
 */
protected void expandCluster(SimilarityQuery<O> snnInstance, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
    ArrayModifiableDBIDs seeds = findSNNNeighbors(snnInstance, startObjectID);
    // startObject is no core-object
    if (seeds.size() < minpts) {
        noise.add(startObjectID);
        processedIDs.add(startObjectID);
        if (objprog != null && clusprog != null) {
            objprog.setProcessed(processedIDs.size(), LOG);
            clusprog.setProcessed(resultList.size(), LOG);
        }
        return;
    }
    // try to expand the cluster
    ModifiableDBIDs currentCluster = DBIDUtil.newArray();
    for (DBIDIter seed = seeds.iter(); seed.valid(); seed.advance()) {
        if (!processedIDs.contains(seed)) {
            currentCluster.add(seed);
            processedIDs.add(seed);
        } else if (noise.contains(seed)) {
            currentCluster.add(seed);
            noise.remove(seed);
        }
    }
    DBIDVar o = DBIDUtil.newVar();
    while (seeds.size() > 0) {
        seeds.pop(o);
        ArrayModifiableDBIDs neighborhood = findSNNNeighbors(snnInstance, o);
        if (neighborhood.size() >= minpts) {
            for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
                boolean inNoise = noise.contains(iter);
                boolean unclassified = !processedIDs.contains(iter);
                if (inNoise || unclassified) {
                    if (unclassified) {
                        seeds.add(iter);
                    }
                    currentCluster.add(iter);
                    processedIDs.add(iter);
                    if (inNoise) {
                        noise.remove(iter);
                    }
                }
            }
        }
        if (objprog != null && clusprog != null) {
            objprog.setProcessed(processedIDs.size(), LOG);
            int numClusters = currentCluster.size() > minpts ? resultList.size() + 1 : resultList.size();
            clusprog.setProcessed(numClusters, LOG);
        }
        if (processedIDs.size() == snnInstance.getRelation().size() && noise.size() == 0) {
            break;
        }
    }
    if (currentCluster.size() >= minpts) {
        resultList.add(currentCluster);
    } else {
        noise.addDBIDs(currentCluster);
        noise.add(startObjectID);
        processedIDs.add(startObjectID);
    }
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 23 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class DBSCAN method run.

/**
 * Performs the DBSCAN algorithm on the given database.
 */
public Clustering<Model> run(Relation<O> relation) {
    final int size = relation.size();
    if (size < minpts) {
        Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
        result.addToplevelCluster(new Cluster<Model>(relation.getDBIDs(), true, ClusterModel.CLUSTER));
        return result;
    }
    RangeQuery<O> rangeQuery = QueryUtil.getRangeQuery(relation, getDistanceFunction());
    resultList = new ArrayList<>();
    noise = DBIDUtil.newHashSet();
    runDBSCAN(relation, rangeQuery);
    double averagen = ncounter / (double) relation.size();
    LOG.statistics(new DoubleStatistic(DBSCAN.class.getName() + ".average-neighbors", averagen));
    if (averagen < 1 + 0.1 * (minpts - 1)) {
        LOG.warning("There are very few neighbors found. Epsilon may be too small.");
    }
    if (averagen > 100 * minpts) {
        LOG.warning("There are very many neighbors found. Epsilon may be too large.");
    }
    Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
    for (ModifiableDBIDs res : resultList) {
        result.addToplevelCluster(new Cluster<Model>(res, ClusterModel.CLUSTER));
    }
    result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
    return result;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 24 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class DBSCAN method expandCluster.

/**
 * DBSCAN-function expandCluster.
 *
 * Border-Objects become members of the first possible cluster.
 *
 * @param relation Database relation to run on
 * @param rangeQuery Range query to use
 * @param startObjectID potential seed of a new potential cluster
 * @param seeds Array to store the current seeds
 * @param objprog Number of objects processed (may be {@code null})
 * @param clusprog Number of clusters found (may be {@code null})
 */
protected void expandCluster(Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, ArrayModifiableDBIDs seeds, FiniteProgress objprog, IndefiniteProgress clusprog) {
    DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
    ncounter += neighbors.size();
    // startObject is no core-object
    if (neighbors.size() < minpts) {
        noise.add(startObjectID);
        processedIDs.add(startObjectID);
        if (objprog != null) {
            objprog.incrementProcessed(LOG);
        }
        return;
    }
    ModifiableDBIDs currentCluster = DBIDUtil.newArray();
    currentCluster.add(startObjectID);
    processedIDs.add(startObjectID);
    // try to expand the cluster
    assert (seeds.size() == 0);
    seeds.clear();
    processNeighbors(neighbors.iter(), currentCluster, seeds);
    DBIDVar o = DBIDUtil.newVar();
    while (!seeds.isEmpty()) {
        neighbors = rangeQuery.getRangeForDBID(seeds.pop(o), epsilon);
        ncounter += neighbors.size();
        if (neighbors.size() >= minpts) {
            processNeighbors(neighbors.iter(), currentCluster, seeds);
        }
        if (objprog != null) {
            objprog.incrementProcessed(LOG);
        }
    }
    resultList.add(currentCluster);
    if (clusprog != null) {
        clusprog.setProcessed(resultList.size(), LOG);
    }
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 25 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class ByModelClustering method run.

/**
 * Run the actual clustering algorithm.
 *
 * @param relation The data input we use
 */
public Clustering<Model> run(Relation<Model> relation) {
    // Build model mapping
    HashMap<Model, ModifiableDBIDs> modelMap = new HashMap<>();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        Model model = relation.get(iditer);
        ModifiableDBIDs modelids = modelMap.get(model);
        if (modelids == null) {
            modelids = DBIDUtil.newHashSet();
            modelMap.put(model, modelids);
        }
        modelids.add(iditer);
    }
    Clustering<Model> result = new Clustering<>("By Model Clustering", "bymodel-clustering");
    for (Entry<Model, ModifiableDBIDs> entry : modelMap.entrySet()) {
        final Model model = entry.getKey();
        final ModifiableDBIDs ids = entry.getValue();
        final String name = (model instanceof GeneratorInterface) ? ((GeneratorInterface) model).getName() : model.toString();
        Cluster<Model> c = new Cluster<>(name, ids, model);
        if (noisepattern != null && noisepattern.matcher(name).find()) {
            c.setNoise(true);
        }
        result.addToplevelCluster(c);
    }
    return result;
}
Also used : GeneratorInterface(de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorInterface) HashMap(java.util.HashMap) Model(de.lmu.ifi.dbs.elki.data.model.Model) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)44 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)30 ArrayList (java.util.ArrayList)30 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)28 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)18 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)15 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)12 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)12 Model (de.lmu.ifi.dbs.elki.data.model.Model)11 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)10 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)9 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)8 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)7