Search in sources :

Example 46 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class SUBCLU method run.

/**
 * Performs the SUBCLU algorithm on the given database.
 *
 * @param relation Relation to process
 * @return Clustering result
 */
public Clustering<SubspaceModel> run(Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
    // Generate all 1-dimensional clusters
    LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
    // mapping of dimensionality to set of subspaces
    HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
    // list of 1-dimensional subspaces containing clusters
    List<Subspace> s_1 = new ArrayList<>();
    subspaceMap.put(0, s_1);
    // mapping of subspaces to list of clusters
    TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
    for (int d = 0; d < dimensionality; d++) {
        Subspace currentSubspace = new Subspace(d);
        List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
        if (LOG.isDebuggingFiner()) {
            StringBuilder msg = new StringBuilder();
            msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
            for (Cluster<Model> cluster : clusters) {
                msg.append("      " + cluster.getIDs() + "\n");
            }
            LOG.debugFiner(msg.toString());
        }
        if (!clusters.isEmpty()) {
            s_1.add(currentSubspace);
            clusterMap.put(currentSubspace, clusters);
        }
    }
    // Generate (d+1)-dimensional clusters from d-dimensional clusters
    for (int d = 0; d < dimensionality - 1; d++) {
        if (stepprog != null) {
            stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
        }
        List<Subspace> subspaces = subspaceMap.get(d);
        if (subspaces == null || subspaces.isEmpty()) {
            if (stepprog != null) {
                for (int dim = d + 1; dim < dimensionality - 1; dim++) {
                    stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
                }
            }
            break;
        }
        List<Subspace> candidates = generateSubspaceCandidates(subspaces);
        List<Subspace> s_d = new ArrayList<>();
        for (Subspace candidate : candidates) {
            Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
            if (LOG.isDebuggingFine()) {
                LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
            }
            List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
            List<Cluster<Model>> clusters = new ArrayList<>();
            for (Cluster<Model> cluster : bestSubspaceClusters) {
                List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
                if (!candidateClusters.isEmpty()) {
                    clusters.addAll(candidateClusters);
                }
            }
            if (LOG.isDebuggingFine()) {
                StringBuilder msg = new StringBuilder();
                msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
                for (Cluster<Model> c : clusters) {
                    msg.append("      " + c.getIDs() + "\n");
                }
                LOG.debugFine(msg.toString());
            }
            if (!clusters.isEmpty()) {
                s_d.add(candidate);
                clusterMap.put(candidate, clusters);
            }
        }
        if (!s_d.isEmpty()) {
            subspaceMap.put(d + 1, s_d);
        }
    }
    // build result
    int numClusters = 1;
    result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
    for (Subspace subspace : clusterMap.descendingKeySet()) {
        List<Cluster<Model>> clusters = clusterMap.get(subspace);
        for (Cluster<Model> cluster : clusters) {
            Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
            newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()).getArrayRef()));
            newCluster.setName("cluster_" + numClusters++);
            result.addToplevelCluster(newCluster);
        }
    }
    LOG.setCompleted(stepprog);
    return result;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) TreeMap(java.util.TreeMap) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayList(java.util.ArrayList) List(java.util.List)

Example 47 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class ByLabelClustering method run.

/**
 * Run the actual clustering algorithm.
 *
 * @param relation The data input we use
 */
public Clustering<Model> run(Relation<?> relation) {
    HashMap<String, DBIDs> labelMap = multiple ? multipleAssignment(relation) : singleAssignment(relation);
    ModifiableDBIDs noiseids = DBIDUtil.newArray();
    Clustering<Model> result = new Clustering<>("By Label Clustering", "bylabel-clustering");
    for (Entry<String, DBIDs> entry : labelMap.entrySet()) {
        DBIDs ids = entry.getValue();
        if (ids.size() <= 1) {
            noiseids.addDBIDs(ids);
            continue;
        }
        // Build a cluster
        Cluster<Model> c = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
        if (noisepattern != null && noisepattern.matcher(entry.getKey()).find()) {
            c.setNoise(true);
        }
        result.addToplevelCluster(c);
    }
    // Collected noise IDs.
    if (noiseids.size() > 0) {
        Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
        c.setNoise(true);
        result.addToplevelCluster(c);
    }
    return result;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 48 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class ByLabelHierarchicalClustering method run.

/**
 * Run the actual clustering algorithm.
 *
 * @param relation The data input to use
 */
public Clustering<Model> run(Relation<?> relation) {
    HashMap<String, DBIDs> labelmap = new HashMap<>();
    ModifiableDBIDs noiseids = DBIDUtil.newArray();
    Clustering<Model> clustering = new Clustering<>("By Label Hierarchical Clustering", "bylabel-clustering");
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final Object val = relation.get(iditer);
        if (val == null) {
            noiseids.add(iditer);
            continue;
        }
        String label = val.toString();
        assign(labelmap, label, iditer);
    }
    ArrayList<Cluster<Model>> clusters = new ArrayList<>(labelmap.size());
    for (Entry<String, DBIDs> entry : labelmap.entrySet()) {
        DBIDs ids = entry.getValue();
        if (ids instanceof DBID) {
            noiseids.add((DBID) ids);
            continue;
        }
        Cluster<Model> clus = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
        clusters.add(clus);
    }
    for (Cluster<Model> cur : clusters) {
        boolean isrootcluster = true;
        for (Cluster<Model> oth : clusters) {
            if (oth != cur && oth.getName().startsWith(cur.getName())) {
                clustering.addChildCluster(oth, cur);
                if (LOG.isDebuggingFiner()) {
                    LOG.debugFiner(oth.getName() + " is a child of " + cur.getName());
                }
                isrootcluster = false;
            }
        }
        if (isrootcluster) {
            clustering.addToplevelCluster(cur);
        }
    }
    // Collected noise IDs.
    if (noiseids.size() > 0) {
        Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
        c.setNoise(true);
        clustering.addToplevelCluster(c);
    }
    return clustering;
}
Also used : HashMap(java.util.HashMap) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 49 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class DBSCANTest method testDBSCANOnSingleLinkDataset.

/**
 * Run DBSCAN with fixed parameters and compare the result to a golden
 * standard.
 */
@Test
public void testDBSCANOnSingleLinkDataset() {
    Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638);
    Clustering<Model> result = // 
    new ELKIBuilder<DBSCAN<DoubleVector>>(DBSCAN.class).with(DBSCAN.Parameterizer.EPSILON_ID, // 
    11.5).with(DBSCAN.Parameterizer.MINPTS_ID, // 
    120).build().run(db);
    testFMeasure(db, result, 0.954382);
    testClusterSizes(result, new int[] { 11, 200, 203, 224 });
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) Database(de.lmu.ifi.dbs.elki.database.Database) Model(de.lmu.ifi.dbs.elki.data.model.Model) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Test(org.junit.Test)

Example 50 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class DBSCANTest method testDBSCANResults.

/**
 * Run DBSCAN with fixed parameters and compare the result to a golden
 * standard.
 */
@Test
public void testDBSCANResults() {
    Database db = makeSimpleDatabase(UNITTEST + "3clusters-and-noise-2d.csv", 330);
    Clustering<Model> result = // 
    new ELKIBuilder<DBSCAN<DoubleVector>>(DBSCAN.class).with(DBSCAN.Parameterizer.EPSILON_ID, // 
    0.04).with(DBSCAN.Parameterizer.MINPTS_ID, // 
    20).build().run(db);
    testFMeasure(db, result, 0.996413);
    testClusterSizes(result, new int[] { 29, 50, 101, 150 });
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) Database(de.lmu.ifi.dbs.elki.database.Database) Model(de.lmu.ifi.dbs.elki.data.model.Model) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Test(org.junit.Test)

Aggregations

Model (de.lmu.ifi.dbs.elki.data.model.Model)60 Database (de.lmu.ifi.dbs.elki.database.Database)29 Test (org.junit.Test)24 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)21 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)18 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)17 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)13 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)12 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)11 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)10 ArrayList (java.util.ArrayList)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)7 HashMap (java.util.HashMap)5 ByLabelClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering)3 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)3 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)3 CorePredicate (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.CorePredicate)2