use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class ByLabelHierarchicalClustering method run.
/**
* Run the actual clustering algorithm.
*
* @param relation The data input to use
*/
public Clustering<Model> run(Relation<?> relation) {
HashMap<String, DBIDs> labelmap = new HashMap<>();
ModifiableDBIDs noiseids = DBIDUtil.newArray();
Clustering<Model> clustering = new Clustering<>("By Label Hierarchical Clustering", "bylabel-clustering");
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final Object val = relation.get(iditer);
if (val == null) {
noiseids.add(iditer);
continue;
}
String label = val.toString();
assign(labelmap, label, iditer);
}
ArrayList<Cluster<Model>> clusters = new ArrayList<>(labelmap.size());
for (Entry<String, DBIDs> entry : labelmap.entrySet()) {
DBIDs ids = entry.getValue();
if (ids instanceof DBID) {
noiseids.add((DBID) ids);
continue;
}
Cluster<Model> clus = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
clusters.add(clus);
}
for (Cluster<Model> cur : clusters) {
boolean isrootcluster = true;
for (Cluster<Model> oth : clusters) {
if (oth != cur && oth.getName().startsWith(cur.getName())) {
clustering.addChildCluster(oth, cur);
if (LOG.isDebuggingFiner()) {
LOG.debugFiner(oth.getName() + " is a child of " + cur.getName());
}
isrootcluster = false;
}
}
if (isrootcluster) {
clustering.addToplevelCluster(cur);
}
}
// Collected noise IDs.
if (noiseids.size() > 0) {
Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
c.setNoise(true);
clustering.addToplevelCluster(c);
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class LogClusterSizes method logClusterSizes.
/**
* Log the cluster sizes of a clustering.
*
* @param c Clustering ot analyze
*/
public static <C extends Model> void logClusterSizes(Clustering<C> c) {
if (!LOG.isStatistics()) {
return;
}
final List<Cluster<C>> clusters = c.getAllClusters();
final int numc = clusters.size();
LOG.statistics(new StringStatistic(PREFIX + "name", c.getLongName()));
LOG.statistics(new LongStatistic(PREFIX + "clusters", numc));
Hierarchy<Cluster<C>> h = c.getClusterHierarchy();
int cnum = 0;
for (Cluster<C> clu : clusters) {
final String p = PREFIX + "cluster-" + cnum + ".";
if (clu.getName() != null) {
LOG.statistics(new StringStatistic(p + "name", clu.getName()));
}
LOG.statistics(new LongStatistic(p + "size", clu.size()));
if (clu.isNoise()) {
LOG.statistics(new StringStatistic(p + "noise", "true"));
}
if (h.numChildren(clu) > 0) {
// TODO: this only works if we have cluster names!
StringBuilder buf = new StringBuilder();
for (It<Cluster<C>> it = h.iterChildren(clu); it.valid(); it.advance()) {
if (buf.length() > 0) {
buf.append(", ");
}
buf.append(it.get().getName());
}
LOG.statistics(new StringStatistic(p + "children", buf.toString()));
}
// TODO: also log parents?
++cnum;
}
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class ClusterContingencyTable method process.
/**
* Process two clustering results.
*
* @param result1 First clustering
* @param result2 Second clustering
*/
public void process(Clustering<?> result1, Clustering<?> result2) {
// Get the clusters
final List<? extends Cluster<?>> cs1 = result1.getAllClusters();
final List<? extends Cluster<?>> cs2 = result2.getAllClusters();
// Initialize
size1 = cs1.size();
size2 = cs2.size();
contingency = new int[size1 + 2][size2 + 2];
noise1 = BitsUtil.zero(size1);
noise2 = BitsUtil.zero(size2);
// Fill main part of matrix
{
final Iterator<? extends Cluster<?>> it2 = cs2.iterator();
for (int i2 = 0; it2.hasNext(); i2++) {
final Cluster<?> c2 = it2.next();
if (c2.isNoise()) {
BitsUtil.setI(noise2, i2);
}
contingency[size1 + 1][i2] = c2.size();
contingency[size1 + 1][size2] += c2.size();
}
}
final Iterator<? extends Cluster<?>> it1 = cs1.iterator();
for (int i1 = 0; it1.hasNext(); i1++) {
final Cluster<?> c1 = it1.next();
if (c1.isNoise()) {
BitsUtil.setI(noise1, i1);
}
final DBIDs ids = DBIDUtil.ensureSet(c1.getIDs());
contingency[i1][size2 + 1] = c1.size();
contingency[size1][size2 + 1] += c1.size();
final Iterator<? extends Cluster<?>> it2 = cs2.iterator();
for (int i2 = 0; it2.hasNext(); i2++) {
final Cluster<?> c2 = it2.next();
int count = DBIDUtil.intersectionSize(ids, c2.getIDs());
contingency[i1][i2] = count;
contingency[i1][size2] += count;
contingency[size1][i2] += count;
contingency[size1][size2] += count;
}
}
}
Aggregations