Search in sources :

Example 36 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class ByLabelHierarchicalClustering method run.

/**
 * Run the actual clustering algorithm.
 *
 * @param relation The data input to use
 */
public Clustering<Model> run(Relation<?> relation) {
    HashMap<String, DBIDs> labelmap = new HashMap<>();
    ModifiableDBIDs noiseids = DBIDUtil.newArray();
    Clustering<Model> clustering = new Clustering<>("By Label Hierarchical Clustering", "bylabel-clustering");
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final Object val = relation.get(iditer);
        if (val == null) {
            noiseids.add(iditer);
            continue;
        }
        String label = val.toString();
        assign(labelmap, label, iditer);
    }
    ArrayList<Cluster<Model>> clusters = new ArrayList<>(labelmap.size());
    for (Entry<String, DBIDs> entry : labelmap.entrySet()) {
        DBIDs ids = entry.getValue();
        if (ids instanceof DBID) {
            noiseids.add((DBID) ids);
            continue;
        }
        Cluster<Model> clus = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
        clusters.add(clus);
    }
    for (Cluster<Model> cur : clusters) {
        boolean isrootcluster = true;
        for (Cluster<Model> oth : clusters) {
            if (oth != cur && oth.getName().startsWith(cur.getName())) {
                clustering.addChildCluster(oth, cur);
                if (LOG.isDebuggingFiner()) {
                    LOG.debugFiner(oth.getName() + " is a child of " + cur.getName());
                }
                isrootcluster = false;
            }
        }
        if (isrootcluster) {
            clustering.addToplevelCluster(cur);
        }
    }
    // Collected noise IDs.
    if (noiseids.size() > 0) {
        Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
        c.setNoise(true);
        clustering.addToplevelCluster(c);
    }
    return clustering;
}
Also used : HashMap(java.util.HashMap) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 37 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class LogClusterSizes method logClusterSizes.

/**
 * Log the cluster sizes of a clustering.
 *
 * @param c Clustering ot analyze
 */
public static <C extends Model> void logClusterSizes(Clustering<C> c) {
    if (!LOG.isStatistics()) {
        return;
    }
    final List<Cluster<C>> clusters = c.getAllClusters();
    final int numc = clusters.size();
    LOG.statistics(new StringStatistic(PREFIX + "name", c.getLongName()));
    LOG.statistics(new LongStatistic(PREFIX + "clusters", numc));
    Hierarchy<Cluster<C>> h = c.getClusterHierarchy();
    int cnum = 0;
    for (Cluster<C> clu : clusters) {
        final String p = PREFIX + "cluster-" + cnum + ".";
        if (clu.getName() != null) {
            LOG.statistics(new StringStatistic(p + "name", clu.getName()));
        }
        LOG.statistics(new LongStatistic(p + "size", clu.size()));
        if (clu.isNoise()) {
            LOG.statistics(new StringStatistic(p + "noise", "true"));
        }
        if (h.numChildren(clu) > 0) {
            // TODO: this only works if we have cluster names!
            StringBuilder buf = new StringBuilder();
            for (It<Cluster<C>> it = h.iterChildren(clu); it.valid(); it.advance()) {
                if (buf.length() > 0) {
                    buf.append(", ");
                }
                buf.append(it.get().getName());
            }
            LOG.statistics(new StringStatistic(p + "children", buf.toString()));
        }
        // TODO: also log parents?
        ++cnum;
    }
}
Also used : StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) Cluster(de.lmu.ifi.dbs.elki.data.Cluster)

Example 38 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class ClusterContingencyTable method process.

/**
 * Process two clustering results.
 *
 * @param result1 First clustering
 * @param result2 Second clustering
 */
public void process(Clustering<?> result1, Clustering<?> result2) {
    // Get the clusters
    final List<? extends Cluster<?>> cs1 = result1.getAllClusters();
    final List<? extends Cluster<?>> cs2 = result2.getAllClusters();
    // Initialize
    size1 = cs1.size();
    size2 = cs2.size();
    contingency = new int[size1 + 2][size2 + 2];
    noise1 = BitsUtil.zero(size1);
    noise2 = BitsUtil.zero(size2);
    // Fill main part of matrix
    {
        final Iterator<? extends Cluster<?>> it2 = cs2.iterator();
        for (int i2 = 0; it2.hasNext(); i2++) {
            final Cluster<?> c2 = it2.next();
            if (c2.isNoise()) {
                BitsUtil.setI(noise2, i2);
            }
            contingency[size1 + 1][i2] = c2.size();
            contingency[size1 + 1][size2] += c2.size();
        }
    }
    final Iterator<? extends Cluster<?>> it1 = cs1.iterator();
    for (int i1 = 0; it1.hasNext(); i1++) {
        final Cluster<?> c1 = it1.next();
        if (c1.isNoise()) {
            BitsUtil.setI(noise1, i1);
        }
        final DBIDs ids = DBIDUtil.ensureSet(c1.getIDs());
        contingency[i1][size2 + 1] = c1.size();
        contingency[size1][size2 + 1] += c1.size();
        final Iterator<? extends Cluster<?>> it2 = cs2.iterator();
        for (int i2 = 0; it2.hasNext(); i2++) {
            final Cluster<?> c2 = it2.next();
            int count = DBIDUtil.intersectionSize(ids, c2.getIDs());
            contingency[i1][i2] = count;
            contingency[i1][size2] += count;
            contingency[size1][i2] += count;
            contingency[size1][size2] += count;
        }
    }
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Iterator(java.util.Iterator) Cluster(de.lmu.ifi.dbs.elki.data.Cluster)

Aggregations

Cluster (de.lmu.ifi.dbs.elki.data.Cluster)38 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)21 Model (de.lmu.ifi.dbs.elki.data.model.Model)18 ArrayList (java.util.ArrayList)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)11 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)11 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)8 Subspace (de.lmu.ifi.dbs.elki.data.Subspace)7 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)6 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)6 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)4 HashMap (java.util.HashMap)4 ByLabelOrAllInOneClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)3 ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)3 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)3 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)3