Search in sources :

Example 31 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class SUBCLU method runDBSCAN.

/**
 * Runs the DBSCAN algorithm on the specified partition of the database in the
 * given subspace. If parameter {@code ids} is null DBSCAN will be applied to
 * the whole database.
 *
 * @param relation the database holding the objects to run DBSCAN on
 * @param ids the IDs of the database defining the partition to run DBSCAN on
 *        - if this parameter is null DBSCAN will be applied to the whole
 *        database
 * @param subspace the subspace to run DBSCAN on
 * @return the clustering result of the DBSCAN run
 */
private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
    // distance function
    distanceFunction.setSelectedDimensions(subspace.getDimensions());
    ProxyDatabase proxy;
    if (ids == null) {
        // TODO: in this case, we might want to use an index - the proxy below
        // will prevent this!
        ids = relation.getDBIDs();
    }
    proxy = new ProxyDatabase(ids, relation);
    DBSCAN<V> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
    // run DBSCAN
    if (LOG.isVerbose()) {
        LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
    }
    Clustering<Model> dbsres = dbscan.run(proxy);
    // separate cluster and noise
    List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters();
    List<Cluster<Model>> clusters = new ArrayList<>();
    for (Cluster<Model> c : clusterAndNoise) {
        if (!c.isNoise()) {
            clusters.add(c);
        }
    }
    return clusters;
}
Also used : SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayList(java.util.ArrayList) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DBSCAN(de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN) Cluster(de.lmu.ifi.dbs.elki.data.Cluster)

Example 32 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class SUBCLU method run.

/**
 * Performs the SUBCLU algorithm on the given database.
 *
 * @param relation Relation to process
 * @return Clustering result
 */
public Clustering<SubspaceModel> run(Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
    // Generate all 1-dimensional clusters
    LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
    // mapping of dimensionality to set of subspaces
    HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
    // list of 1-dimensional subspaces containing clusters
    List<Subspace> s_1 = new ArrayList<>();
    subspaceMap.put(0, s_1);
    // mapping of subspaces to list of clusters
    TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
    for (int d = 0; d < dimensionality; d++) {
        Subspace currentSubspace = new Subspace(d);
        List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
        if (LOG.isDebuggingFiner()) {
            StringBuilder msg = new StringBuilder();
            msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
            for (Cluster<Model> cluster : clusters) {
                msg.append("      " + cluster.getIDs() + "\n");
            }
            LOG.debugFiner(msg.toString());
        }
        if (!clusters.isEmpty()) {
            s_1.add(currentSubspace);
            clusterMap.put(currentSubspace, clusters);
        }
    }
    // Generate (d+1)-dimensional clusters from d-dimensional clusters
    for (int d = 0; d < dimensionality - 1; d++) {
        if (stepprog != null) {
            stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
        }
        List<Subspace> subspaces = subspaceMap.get(d);
        if (subspaces == null || subspaces.isEmpty()) {
            if (stepprog != null) {
                for (int dim = d + 1; dim < dimensionality - 1; dim++) {
                    stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
                }
            }
            break;
        }
        List<Subspace> candidates = generateSubspaceCandidates(subspaces);
        List<Subspace> s_d = new ArrayList<>();
        for (Subspace candidate : candidates) {
            Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
            if (LOG.isDebuggingFine()) {
                LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
            }
            List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
            List<Cluster<Model>> clusters = new ArrayList<>();
            for (Cluster<Model> cluster : bestSubspaceClusters) {
                List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
                if (!candidateClusters.isEmpty()) {
                    clusters.addAll(candidateClusters);
                }
            }
            if (LOG.isDebuggingFine()) {
                StringBuilder msg = new StringBuilder();
                msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
                for (Cluster<Model> c : clusters) {
                    msg.append("      " + c.getIDs() + "\n");
                }
                LOG.debugFine(msg.toString());
            }
            if (!clusters.isEmpty()) {
                s_d.add(candidate);
                clusterMap.put(candidate, clusters);
            }
        }
        if (!s_d.isEmpty()) {
            subspaceMap.put(d + 1, s_d);
        }
    }
    // build result
    int numClusters = 1;
    result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
    for (Subspace subspace : clusterMap.descendingKeySet()) {
        List<Cluster<Model>> clusters = clusterMap.get(subspace);
        for (Cluster<Model> cluster : clusters) {
            Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
            newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()).getArrayRef()));
            newCluster.setName("cluster_" + numClusters++);
            result.addToplevelCluster(newCluster);
        }
    }
    LOG.setCompleted(stepprog);
    return result;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) TreeMap(java.util.TreeMap) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayList(java.util.ArrayList) List(java.util.List)

Example 33 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class XMeans method splitCluster.

/**
 * Conditionally splits the clusters based on the information criterion.
 *
 * @param parentCluster Cluster to split
 * @param database Database
 * @param relation Data relation
 * @return Parent cluster when split decreases clustering quality or child
 *         clusters when split improves clustering.
 */
protected List<Cluster<M>> splitCluster(Cluster<M> parentCluster, Database database, Relation<V> relation) {
    // Transform parent cluster into a clustering
    ArrayList<Cluster<M>> parentClusterList = new ArrayList<Cluster<M>>(1);
    parentClusterList.add(parentCluster);
    Clustering<M> parentClustering = new Clustering<>(parentCluster.getName(), parentCluster.getName(), parentClusterList);
    if (parentCluster.size() < 2) {
        // Split is not possbile
        return parentClusterList;
    }
    ProxyDatabase proxyDB = new ProxyDatabase(parentCluster.getIDs(), database);
    splitInitializer.setInitialMeans(splitCentroid(parentCluster, relation));
    innerKMeans.setK(2);
    Clustering<M> childClustering = innerKMeans.run(proxyDB);
    double parentEvaluation = informationCriterion.quality(parentClustering, getDistanceFunction(), relation);
    double childrenEvaluation = informationCriterion.quality(childClustering, getDistanceFunction(), relation);
    if (LOG.isDebugging()) {
        LOG.debug("parentEvaluation: " + parentEvaluation);
        LOG.debug("childrenEvaluation: " + childrenEvaluation);
    }
    // Check if split is an improvement:
    return (childrenEvaluation > parentEvaluation) ^ informationCriterion.ascending() ? parentClusterList : childClustering.getAllClusters();
}
Also used : ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 34 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class KNNKernelDensityMinimaClustering method run.

/**
 * Run the clustering algorithm on a data relation.
 *
 * @param relation Relation
 * @return Clustering result
 */
public Clustering<ClusterModel> run(Relation<V> relation) {
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    final int size = ids.size();
    // Sort by the sole dimension
    ids.sort(new VectorUtil.SortDBIDsBySingleDimension(relation, dim));
    // Density storage.
    WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    DBIDArrayIter iter = ids.iter(), iter2 = ids.iter();
    StepProgress sprog = LOG.isVerbose() ? new StepProgress("Clustering steps", 2) : null;
    LOG.beginStep(sprog, 1, "Kernel density estimation.");
    {
        double[] scratch = new double[2 * k];
        iter.seek(0);
        for (int i = 0; i < size; i++, iter.advance()) {
            // Current value.
            final double curv = relation.get(iter).doubleValue(dim);
            final int pre = Math.max(i - k, 0), prek = i - pre;
            final int pos = Math.min(i + k, size - 1), posk = pos - i;
            iter2.seek(pre);
            for (int j = 0; j < prek; j++, iter2.advance()) {
                scratch[j] = curv - relation.get(iter2).doubleValue(dim);
            }
            assert (iter2.getOffset() == i);
            iter2.advance();
            for (int j = 0; j < posk; j++, iter2.advance()) {
                scratch[prek + j] = relation.get(iter2).doubleValue(dim) - curv;
            }
            assert (prek + posk >= k);
            double kdist = QuickSelect.quickSelect(scratch, 0, prek + posk, k);
            switch(mode) {
                case BALLOON:
                    {
                        double dens = 0.;
                        if (kdist > 0.) {
                            for (int j = 0; j < prek + posk; j++) {
                                dens += kernel.density(scratch[j] / kdist);
                            }
                        } else {
                            dens = Double.POSITIVE_INFINITY;
                        }
                        assert (iter.getOffset() == i);
                        density.putDouble(iter, dens);
                        break;
                    }
                case SAMPLE:
                    {
                        if (kdist > 0.) {
                            iter2.seek(pre);
                            for (int j = 0; j < prek; j++, iter2.advance()) {
                                double delta = curv - relation.get(iter2).doubleValue(dim);
                                density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
                            }
                            assert (iter2.getOffset() == i);
                            iter2.advance();
                            for (int j = 0; j < posk; j++, iter2.advance()) {
                                double delta = relation.get(iter2).doubleValue(dim) - curv;
                                density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
                            }
                        } else {
                            iter2.seek(pre);
                            for (int j = 0; j < prek; j++, iter2.advance()) {
                                double delta = curv - relation.get(iter2).doubleValue(dim);
                                if (!(delta > 0.)) {
                                    density.putDouble(iter2, Double.POSITIVE_INFINITY);
                                }
                            }
                            assert (iter2.getOffset() == i);
                            iter2.advance();
                            for (int j = 0; j < posk; j++, iter2.advance()) {
                                double delta = relation.get(iter2).doubleValue(dim) - curv;
                                if (!(delta > 0.)) {
                                    density.putDouble(iter2, Double.POSITIVE_INFINITY);
                                }
                            }
                        }
                        break;
                    }
                default:
                    throw new UnsupportedOperationException("Unknown mode specified.");
            }
        }
    }
    LOG.beginStep(sprog, 2, "Local minima detection.");
    Clustering<ClusterModel> clustering = new Clustering<>("onedimensional-kde-clustering", "One-Dimensional clustering using kernel density estimation.");
    {
        double[] scratch = new double[2 * minwindow + 1];
        int begin = 0;
        int halfw = (minwindow + 1) >> 1;
        iter.seek(0);
        // Fill initial buffer.
        for (int i = 0; i < size; i++, iter.advance()) {
            final int m = i % scratch.length, t = (i - minwindow - 1) % scratch.length;
            scratch[m] = density.doubleValue(iter);
            if (i > scratch.length) {
                double min = Double.POSITIVE_INFINITY;
                for (int j = 0; j < scratch.length; j++) {
                    if (j != t && scratch[j] < min) {
                        min = scratch[j];
                    }
                }
                // Local minimum:
                if (scratch[t] < min) {
                    int end = i - minwindow + 1;
                    {
                        // Test on which side the kNN is
                        iter2.seek(end);
                        double curv = relation.get(iter2).doubleValue(dim);
                        iter2.seek(end - halfw);
                        double left = relation.get(iter2).doubleValue(dim) - curv;
                        iter2.seek(end + halfw);
                        double right = curv - relation.get(iter2).doubleValue(dim);
                        if (left < right) {
                            end++;
                        }
                    }
                    iter2.seek(begin);
                    ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
                    for (int j = 0; j < end - begin; j++, iter2.advance()) {
                        cids.add(iter2);
                    }
                    clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
                    begin = end;
                }
            }
        }
        // Extract last cluster
        int end = size;
        iter2.seek(begin);
        ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
        for (int j = 0; j < end - begin; j++, iter2.advance()) {
            cids.add(iter2);
        }
        clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
    }
    LOG.ensureCompleted(sprog);
    return clustering;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) VectorUtil(de.lmu.ifi.dbs.elki.data.VectorUtil) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)

Example 35 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class ByLabelClustering method run.

/**
 * Run the actual clustering algorithm.
 *
 * @param relation The data input we use
 */
public Clustering<Model> run(Relation<?> relation) {
    HashMap<String, DBIDs> labelMap = multiple ? multipleAssignment(relation) : singleAssignment(relation);
    ModifiableDBIDs noiseids = DBIDUtil.newArray();
    Clustering<Model> result = new Clustering<>("By Label Clustering", "bylabel-clustering");
    for (Entry<String, DBIDs> entry : labelMap.entrySet()) {
        DBIDs ids = entry.getValue();
        if (ids.size() <= 1) {
            noiseids.addDBIDs(ids);
            continue;
        }
        // Build a cluster
        Cluster<Model> c = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
        if (noisepattern != null && noisepattern.matcher(entry.getKey()).find()) {
            c.setNoise(true);
        }
        result.addToplevelCluster(c);
    }
    // Collected noise IDs.
    if (noiseids.size() > 0) {
        Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
        c.setNoise(true);
        result.addToplevelCluster(c);
    }
    return result;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Aggregations

Cluster (de.lmu.ifi.dbs.elki.data.Cluster)38 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)21 Model (de.lmu.ifi.dbs.elki.data.model.Model)18 ArrayList (java.util.ArrayList)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)11 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)11 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)8 Subspace (de.lmu.ifi.dbs.elki.data.Subspace)7 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)6 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)6 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)4 HashMap (java.util.HashMap)4 ByLabelOrAllInOneClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)3 ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)3 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)3 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)3