use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class SUBCLU method run.
/**
* Performs the SUBCLU algorithm on the given database.
*
* @param relation Relation to process
* @return Clustering result
*/
public Clustering<SubspaceModel> run(Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
// Generate all 1-dimensional clusters
LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
// mapping of dimensionality to set of subspaces
HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
// list of 1-dimensional subspaces containing clusters
List<Subspace> s_1 = new ArrayList<>();
subspaceMap.put(0, s_1);
// mapping of subspaces to list of clusters
TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
for (int d = 0; d < dimensionality; d++) {
Subspace currentSubspace = new Subspace(d);
List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
if (LOG.isDebuggingFiner()) {
StringBuilder msg = new StringBuilder();
msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
for (Cluster<Model> cluster : clusters) {
msg.append(" " + cluster.getIDs() + "\n");
}
LOG.debugFiner(msg.toString());
}
if (!clusters.isEmpty()) {
s_1.add(currentSubspace);
clusterMap.put(currentSubspace, clusters);
}
}
// Generate (d+1)-dimensional clusters from d-dimensional clusters
for (int d = 0; d < dimensionality - 1; d++) {
if (stepprog != null) {
stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
}
List<Subspace> subspaces = subspaceMap.get(d);
if (subspaces == null || subspaces.isEmpty()) {
if (stepprog != null) {
for (int dim = d + 1; dim < dimensionality - 1; dim++) {
stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
}
}
break;
}
List<Subspace> candidates = generateSubspaceCandidates(subspaces);
List<Subspace> s_d = new ArrayList<>();
for (Subspace candidate : candidates) {
Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
if (LOG.isDebuggingFine()) {
LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
}
List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
List<Cluster<Model>> clusters = new ArrayList<>();
for (Cluster<Model> cluster : bestSubspaceClusters) {
List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
if (!candidateClusters.isEmpty()) {
clusters.addAll(candidateClusters);
}
}
if (LOG.isDebuggingFine()) {
StringBuilder msg = new StringBuilder();
msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
for (Cluster<Model> c : clusters) {
msg.append(" " + c.getIDs() + "\n");
}
LOG.debugFine(msg.toString());
}
if (!clusters.isEmpty()) {
s_d.add(candidate);
clusterMap.put(candidate, clusters);
}
}
if (!s_d.isEmpty()) {
subspaceMap.put(d + 1, s_d);
}
}
// build result
int numClusters = 1;
result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
for (Subspace subspace : clusterMap.descendingKeySet()) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
for (Cluster<Model> cluster : clusters) {
Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()).getArrayRef()));
newCluster.setName("cluster_" + numClusters++);
result.addToplevelCluster(newCluster);
}
}
LOG.setCompleted(stepprog);
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class ByLabelClustering method run.
/**
* Run the actual clustering algorithm.
*
* @param relation The data input we use
*/
public Clustering<Model> run(Relation<?> relation) {
HashMap<String, DBIDs> labelMap = multiple ? multipleAssignment(relation) : singleAssignment(relation);
ModifiableDBIDs noiseids = DBIDUtil.newArray();
Clustering<Model> result = new Clustering<>("By Label Clustering", "bylabel-clustering");
for (Entry<String, DBIDs> entry : labelMap.entrySet()) {
DBIDs ids = entry.getValue();
if (ids.size() <= 1) {
noiseids.addDBIDs(ids);
continue;
}
// Build a cluster
Cluster<Model> c = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
if (noisepattern != null && noisepattern.matcher(entry.getKey()).find()) {
c.setNoise(true);
}
result.addToplevelCluster(c);
}
// Collected noise IDs.
if (noiseids.size() > 0) {
Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
c.setNoise(true);
result.addToplevelCluster(c);
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class ByLabelHierarchicalClustering method run.
/**
* Run the actual clustering algorithm.
*
* @param relation The data input to use
*/
public Clustering<Model> run(Relation<?> relation) {
HashMap<String, DBIDs> labelmap = new HashMap<>();
ModifiableDBIDs noiseids = DBIDUtil.newArray();
Clustering<Model> clustering = new Clustering<>("By Label Hierarchical Clustering", "bylabel-clustering");
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final Object val = relation.get(iditer);
if (val == null) {
noiseids.add(iditer);
continue;
}
String label = val.toString();
assign(labelmap, label, iditer);
}
ArrayList<Cluster<Model>> clusters = new ArrayList<>(labelmap.size());
for (Entry<String, DBIDs> entry : labelmap.entrySet()) {
DBIDs ids = entry.getValue();
if (ids instanceof DBID) {
noiseids.add((DBID) ids);
continue;
}
Cluster<Model> clus = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
clusters.add(clus);
}
for (Cluster<Model> cur : clusters) {
boolean isrootcluster = true;
for (Cluster<Model> oth : clusters) {
if (oth != cur && oth.getName().startsWith(cur.getName())) {
clustering.addChildCluster(oth, cur);
if (LOG.isDebuggingFiner()) {
LOG.debugFiner(oth.getName() + " is a child of " + cur.getName());
}
isrootcluster = false;
}
}
if (isrootcluster) {
clustering.addToplevelCluster(cur);
}
}
// Collected noise IDs.
if (noiseids.size() > 0) {
Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
c.setNoise(true);
clustering.addToplevelCluster(c);
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class DBSCANTest method testDBSCANOnSingleLinkDataset.
/**
* Run DBSCAN with fixed parameters and compare the result to a golden
* standard.
*/
@Test
public void testDBSCANOnSingleLinkDataset() {
Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638);
Clustering<Model> result = //
new ELKIBuilder<DBSCAN<DoubleVector>>(DBSCAN.class).with(DBSCAN.Parameterizer.EPSILON_ID, //
11.5).with(DBSCAN.Parameterizer.MINPTS_ID, //
120).build().run(db);
testFMeasure(db, result, 0.954382);
testClusterSizes(result, new int[] { 11, 200, 203, 224 });
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class DBSCANTest method testDBSCANResults.
/**
* Run DBSCAN with fixed parameters and compare the result to a golden
* standard.
*/
@Test
public void testDBSCANResults() {
Database db = makeSimpleDatabase(UNITTEST + "3clusters-and-noise-2d.csv", 330);
Clustering<Model> result = //
new ELKIBuilder<DBSCAN<DoubleVector>>(DBSCAN.class).with(DBSCAN.Parameterizer.EPSILON_ID, //
0.04).with(DBSCAN.Parameterizer.MINPTS_ID, //
20).build().run(db);
testFMeasure(db, result, 0.996413);
testClusterSizes(result, new int[] { 29, 50, 101, 150 });
}
Aggregations