use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class Leader method run.
/**
* Run the leader clustering algorithm.
*
* @param relation Data set
* @return Clustering result
*/
public Clustering<PrototypeModel<O>> run(Relation<O> relation) {
RangeQuery<O> rq = relation.getRangeQuery(getDistanceFunction(), threshold);
ModifiableDBIDs seen = DBIDUtil.newHashSet(relation.size());
Clustering<PrototypeModel<O>> clustering = new Clustering<>("Prototype clustering", "prototype-clustering");
int queries = 0;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Leader clustering", relation.size(), LOG) : null;
for (DBIDIter it = relation.iterDBIDs(); it.valid() && seen.size() < relation.size(); it.advance()) {
if (seen.contains(it)) {
continue;
}
DoubleDBIDList res = rq.getRangeForDBID(it, threshold);
++queries;
ModifiableDBIDs ids = DBIDUtil.newArray(res.size());
for (DBIDIter cand = res.iter(); cand.valid(); cand.advance()) {
if (seen.add(cand)) {
LOG.incrementProcessed(prog);
ids.add(cand);
}
}
assert (ids.size() > 0 && ids.contains(it));
PrototypeModel<O> mod = new SimplePrototypeModel<>(relation.get(it));
clustering.addToplevelCluster(new Cluster<>(ids, mod));
}
LOG.statistics(new LongStatistic(this.getClass().getName() + ".queries", queries));
LOG.ensureCompleted(prog);
return clustering;
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class SNNClustering method expandCluster.
/**
* DBSCAN-function expandCluster adapted to SNN criterion.
* <p/>
* <p/>
* Border-Objects become members of the first possible cluster.
*
* @param snnInstance shared nearest neighbors
* @param startObjectID potential seed of a new potential cluster
* @param objprog the progress object to report about the progress of
* clustering
*/
protected void expandCluster(SimilarityQuery<O> snnInstance, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
ArrayModifiableDBIDs seeds = findSNNNeighbors(snnInstance, startObjectID);
// startObject is no core-object
if (seeds.size() < minpts) {
noise.add(startObjectID);
processedIDs.add(startObjectID);
if (objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), LOG);
clusprog.setProcessed(resultList.size(), LOG);
}
return;
}
// try to expand the cluster
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
for (DBIDIter seed = seeds.iter(); seed.valid(); seed.advance()) {
if (!processedIDs.contains(seed)) {
currentCluster.add(seed);
processedIDs.add(seed);
} else if (noise.contains(seed)) {
currentCluster.add(seed);
noise.remove(seed);
}
}
DBIDVar o = DBIDUtil.newVar();
while (seeds.size() > 0) {
seeds.pop(o);
ArrayModifiableDBIDs neighborhood = findSNNNeighbors(snnInstance, o);
if (neighborhood.size() >= minpts) {
for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
boolean inNoise = noise.contains(iter);
boolean unclassified = !processedIDs.contains(iter);
if (inNoise || unclassified) {
if (unclassified) {
seeds.add(iter);
}
currentCluster.add(iter);
processedIDs.add(iter);
if (inNoise) {
noise.remove(iter);
}
}
}
}
if (objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), LOG);
int numClusters = currentCluster.size() > minpts ? resultList.size() + 1 : resultList.size();
clusprog.setProcessed(numClusters, LOG);
}
if (processedIDs.size() == snnInstance.getRelation().size() && noise.size() == 0) {
break;
}
}
if (currentCluster.size() >= minpts) {
resultList.add(currentCluster);
} else {
noise.addDBIDs(currentCluster);
noise.add(startObjectID);
processedIDs.add(startObjectID);
}
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class DBSCAN method run.
/**
* Performs the DBSCAN algorithm on the given database.
*/
public Clustering<Model> run(Relation<O> relation) {
final int size = relation.size();
if (size < minpts) {
Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
result.addToplevelCluster(new Cluster<Model>(relation.getDBIDs(), true, ClusterModel.CLUSTER));
return result;
}
RangeQuery<O> rangeQuery = QueryUtil.getRangeQuery(relation, getDistanceFunction());
resultList = new ArrayList<>();
noise = DBIDUtil.newHashSet();
runDBSCAN(relation, rangeQuery);
double averagen = ncounter / (double) relation.size();
LOG.statistics(new DoubleStatistic(DBSCAN.class.getName() + ".average-neighbors", averagen));
if (averagen < 1 + 0.1 * (minpts - 1)) {
LOG.warning("There are very few neighbors found. Epsilon may be too small.");
}
if (averagen > 100 * minpts) {
LOG.warning("There are very many neighbors found. Epsilon may be too large.");
}
Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
for (ModifiableDBIDs res : resultList) {
result.addToplevelCluster(new Cluster<Model>(res, ClusterModel.CLUSTER));
}
result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class DBSCAN method expandCluster.
/**
* DBSCAN-function expandCluster.
*
* Border-Objects become members of the first possible cluster.
*
* @param relation Database relation to run on
* @param rangeQuery Range query to use
* @param startObjectID potential seed of a new potential cluster
* @param seeds Array to store the current seeds
* @param objprog Number of objects processed (may be {@code null})
* @param clusprog Number of clusters found (may be {@code null})
*/
protected void expandCluster(Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, ArrayModifiableDBIDs seeds, FiniteProgress objprog, IndefiniteProgress clusprog) {
DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
ncounter += neighbors.size();
// startObject is no core-object
if (neighbors.size() < minpts) {
noise.add(startObjectID);
processedIDs.add(startObjectID);
if (objprog != null) {
objprog.incrementProcessed(LOG);
}
return;
}
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
currentCluster.add(startObjectID);
processedIDs.add(startObjectID);
// try to expand the cluster
assert (seeds.size() == 0);
seeds.clear();
processNeighbors(neighbors.iter(), currentCluster, seeds);
DBIDVar o = DBIDUtil.newVar();
while (!seeds.isEmpty()) {
neighbors = rangeQuery.getRangeForDBID(seeds.pop(o), epsilon);
ncounter += neighbors.size();
if (neighbors.size() >= minpts) {
processNeighbors(neighbors.iter(), currentCluster, seeds);
}
if (objprog != null) {
objprog.incrementProcessed(LOG);
}
}
resultList.add(currentCluster);
if (clusprog != null) {
clusprog.setProcessed(resultList.size(), LOG);
}
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class ByModelClustering method run.
/**
* Run the actual clustering algorithm.
*
* @param relation The data input we use
*/
public Clustering<Model> run(Relation<Model> relation) {
// Build model mapping
HashMap<Model, ModifiableDBIDs> modelMap = new HashMap<>();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
Model model = relation.get(iditer);
ModifiableDBIDs modelids = modelMap.get(model);
if (modelids == null) {
modelids = DBIDUtil.newHashSet();
modelMap.put(model, modelids);
}
modelids.add(iditer);
}
Clustering<Model> result = new Clustering<>("By Model Clustering", "bymodel-clustering");
for (Entry<Model, ModifiableDBIDs> entry : modelMap.entrySet()) {
final Model model = entry.getKey();
final ModifiableDBIDs ids = entry.getValue();
final String name = (model instanceof GeneratorInterface) ? ((GeneratorInterface) model).getName() : model.toString();
Cluster<Model> c = new Cluster<>(name, ids, model);
if (noisepattern != null && noisepattern.matcher(name).find()) {
c.setNoise(true);
}
result.addToplevelCluster(c);
}
return result;
}
Aggregations