use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class SUBCLU method runDBSCAN.
/**
* Runs the DBSCAN algorithm on the specified partition of the database in the
* given subspace. If parameter {@code ids} is null DBSCAN will be applied to
* the whole database.
*
* @param relation the database holding the objects to run DBSCAN on
* @param ids the IDs of the database defining the partition to run DBSCAN on
* - if this parameter is null DBSCAN will be applied to the whole
* database
* @param subspace the subspace to run DBSCAN on
* @return the clustering result of the DBSCAN run
*/
private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
// distance function
distanceFunction.setSelectedDimensions(subspace.getDimensions());
ProxyDatabase proxy;
if (ids == null) {
// TODO: in this case, we might want to use an index - the proxy below
// will prevent this!
ids = relation.getDBIDs();
}
proxy = new ProxyDatabase(ids, relation);
DBSCAN<V> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
// run DBSCAN
if (LOG.isVerbose()) {
LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
}
Clustering<Model> dbsres = dbscan.run(proxy);
// separate cluster and noise
List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters();
List<Cluster<Model>> clusters = new ArrayList<>();
for (Cluster<Model> c : clusterAndNoise) {
if (!c.isNoise()) {
clusters.add(c);
}
}
return clusters;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class SUBCLU method run.
/**
* Performs the SUBCLU algorithm on the given database.
*
* @param relation Relation to process
* @return Clustering result
*/
public Clustering<SubspaceModel> run(Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
// Generate all 1-dimensional clusters
LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
// mapping of dimensionality to set of subspaces
HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
// list of 1-dimensional subspaces containing clusters
List<Subspace> s_1 = new ArrayList<>();
subspaceMap.put(0, s_1);
// mapping of subspaces to list of clusters
TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
for (int d = 0; d < dimensionality; d++) {
Subspace currentSubspace = new Subspace(d);
List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
if (LOG.isDebuggingFiner()) {
StringBuilder msg = new StringBuilder();
msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
for (Cluster<Model> cluster : clusters) {
msg.append(" " + cluster.getIDs() + "\n");
}
LOG.debugFiner(msg.toString());
}
if (!clusters.isEmpty()) {
s_1.add(currentSubspace);
clusterMap.put(currentSubspace, clusters);
}
}
// Generate (d+1)-dimensional clusters from d-dimensional clusters
for (int d = 0; d < dimensionality - 1; d++) {
if (stepprog != null) {
stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
}
List<Subspace> subspaces = subspaceMap.get(d);
if (subspaces == null || subspaces.isEmpty()) {
if (stepprog != null) {
for (int dim = d + 1; dim < dimensionality - 1; dim++) {
stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
}
}
break;
}
List<Subspace> candidates = generateSubspaceCandidates(subspaces);
List<Subspace> s_d = new ArrayList<>();
for (Subspace candidate : candidates) {
Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
if (LOG.isDebuggingFine()) {
LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
}
List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
List<Cluster<Model>> clusters = new ArrayList<>();
for (Cluster<Model> cluster : bestSubspaceClusters) {
List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
if (!candidateClusters.isEmpty()) {
clusters.addAll(candidateClusters);
}
}
if (LOG.isDebuggingFine()) {
StringBuilder msg = new StringBuilder();
msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
for (Cluster<Model> c : clusters) {
msg.append(" " + c.getIDs() + "\n");
}
LOG.debugFine(msg.toString());
}
if (!clusters.isEmpty()) {
s_d.add(candidate);
clusterMap.put(candidate, clusters);
}
}
if (!s_d.isEmpty()) {
subspaceMap.put(d + 1, s_d);
}
}
// build result
int numClusters = 1;
result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
for (Subspace subspace : clusterMap.descendingKeySet()) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
for (Cluster<Model> cluster : clusters) {
Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()).getArrayRef()));
newCluster.setName("cluster_" + numClusters++);
result.addToplevelCluster(newCluster);
}
}
LOG.setCompleted(stepprog);
return result;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class XMeans method splitCluster.
/**
* Conditionally splits the clusters based on the information criterion.
*
* @param parentCluster Cluster to split
* @param database Database
* @param relation Data relation
* @return Parent cluster when split decreases clustering quality or child
* clusters when split improves clustering.
*/
protected List<Cluster<M>> splitCluster(Cluster<M> parentCluster, Database database, Relation<V> relation) {
// Transform parent cluster into a clustering
ArrayList<Cluster<M>> parentClusterList = new ArrayList<Cluster<M>>(1);
parentClusterList.add(parentCluster);
Clustering<M> parentClustering = new Clustering<>(parentCluster.getName(), parentCluster.getName(), parentClusterList);
if (parentCluster.size() < 2) {
// Split is not possbile
return parentClusterList;
}
ProxyDatabase proxyDB = new ProxyDatabase(parentCluster.getIDs(), database);
splitInitializer.setInitialMeans(splitCentroid(parentCluster, relation));
innerKMeans.setK(2);
Clustering<M> childClustering = innerKMeans.run(proxyDB);
double parentEvaluation = informationCriterion.quality(parentClustering, getDistanceFunction(), relation);
double childrenEvaluation = informationCriterion.quality(childClustering, getDistanceFunction(), relation);
if (LOG.isDebugging()) {
LOG.debug("parentEvaluation: " + parentEvaluation);
LOG.debug("childrenEvaluation: " + childrenEvaluation);
}
// Check if split is an improvement:
return (childrenEvaluation > parentEvaluation) ^ informationCriterion.ascending() ? parentClusterList : childClustering.getAllClusters();
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class KNNKernelDensityMinimaClustering method run.
/**
* Run the clustering algorithm on a data relation.
*
* @param relation Relation
* @return Clustering result
*/
public Clustering<ClusterModel> run(Relation<V> relation) {
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
final int size = ids.size();
// Sort by the sole dimension
ids.sort(new VectorUtil.SortDBIDsBySingleDimension(relation, dim));
// Density storage.
WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
DBIDArrayIter iter = ids.iter(), iter2 = ids.iter();
StepProgress sprog = LOG.isVerbose() ? new StepProgress("Clustering steps", 2) : null;
LOG.beginStep(sprog, 1, "Kernel density estimation.");
{
double[] scratch = new double[2 * k];
iter.seek(0);
for (int i = 0; i < size; i++, iter.advance()) {
// Current value.
final double curv = relation.get(iter).doubleValue(dim);
final int pre = Math.max(i - k, 0), prek = i - pre;
final int pos = Math.min(i + k, size - 1), posk = pos - i;
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
scratch[j] = curv - relation.get(iter2).doubleValue(dim);
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
scratch[prek + j] = relation.get(iter2).doubleValue(dim) - curv;
}
assert (prek + posk >= k);
double kdist = QuickSelect.quickSelect(scratch, 0, prek + posk, k);
switch(mode) {
case BALLOON:
{
double dens = 0.;
if (kdist > 0.) {
for (int j = 0; j < prek + posk; j++) {
dens += kernel.density(scratch[j] / kdist);
}
} else {
dens = Double.POSITIVE_INFINITY;
}
assert (iter.getOffset() == i);
density.putDouble(iter, dens);
break;
}
case SAMPLE:
{
if (kdist > 0.) {
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
double delta = curv - relation.get(iter2).doubleValue(dim);
density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
double delta = relation.get(iter2).doubleValue(dim) - curv;
density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
}
} else {
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
double delta = curv - relation.get(iter2).doubleValue(dim);
if (!(delta > 0.)) {
density.putDouble(iter2, Double.POSITIVE_INFINITY);
}
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
double delta = relation.get(iter2).doubleValue(dim) - curv;
if (!(delta > 0.)) {
density.putDouble(iter2, Double.POSITIVE_INFINITY);
}
}
}
break;
}
default:
throw new UnsupportedOperationException("Unknown mode specified.");
}
}
}
LOG.beginStep(sprog, 2, "Local minima detection.");
Clustering<ClusterModel> clustering = new Clustering<>("onedimensional-kde-clustering", "One-Dimensional clustering using kernel density estimation.");
{
double[] scratch = new double[2 * minwindow + 1];
int begin = 0;
int halfw = (minwindow + 1) >> 1;
iter.seek(0);
// Fill initial buffer.
for (int i = 0; i < size; i++, iter.advance()) {
final int m = i % scratch.length, t = (i - minwindow - 1) % scratch.length;
scratch[m] = density.doubleValue(iter);
if (i > scratch.length) {
double min = Double.POSITIVE_INFINITY;
for (int j = 0; j < scratch.length; j++) {
if (j != t && scratch[j] < min) {
min = scratch[j];
}
}
// Local minimum:
if (scratch[t] < min) {
int end = i - minwindow + 1;
{
// Test on which side the kNN is
iter2.seek(end);
double curv = relation.get(iter2).doubleValue(dim);
iter2.seek(end - halfw);
double left = relation.get(iter2).doubleValue(dim) - curv;
iter2.seek(end + halfw);
double right = curv - relation.get(iter2).doubleValue(dim);
if (left < right) {
end++;
}
}
iter2.seek(begin);
ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
for (int j = 0; j < end - begin; j++, iter2.advance()) {
cids.add(iter2);
}
clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
begin = end;
}
}
}
// Extract last cluster
int end = size;
iter2.seek(begin);
ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
for (int j = 0; j < end - begin; j++, iter2.advance()) {
cids.add(iter2);
}
clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
}
LOG.ensureCompleted(sprog);
return clustering;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class ByLabelClustering method run.
/**
* Run the actual clustering algorithm.
*
* @param relation The data input we use
*/
public Clustering<Model> run(Relation<?> relation) {
HashMap<String, DBIDs> labelMap = multiple ? multipleAssignment(relation) : singleAssignment(relation);
ModifiableDBIDs noiseids = DBIDUtil.newArray();
Clustering<Model> result = new Clustering<>("By Label Clustering", "bylabel-clustering");
for (Entry<String, DBIDs> entry : labelMap.entrySet()) {
DBIDs ids = entry.getValue();
if (ids.size() <= 1) {
noiseids.addDBIDs(ids);
continue;
}
// Build a cluster
Cluster<Model> c = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
if (noisepattern != null && noisepattern.matcher(entry.getKey()).find()) {
c.setNoise(true);
}
result.addToplevelCluster(c);
}
// Collected noise IDs.
if (noiseids.size() > 0) {
Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
c.setNoise(true);
result.addToplevelCluster(c);
}
return result;
}
Aggregations