use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class SNNClustering method run.
/**
* Perform SNN clustering
*
* @param database Database
* @param relation Relation
* @return Result
*/
public Clustering<Model> run(Database database, Relation<O> relation) {
SimilarityQuery<O> snnInstance = similarityFunction.instantiate(relation);
FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("SNNClustering", relation.size(), LOG) : null;
IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
resultList = new ArrayList<>();
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(relation.size());
if (relation.size() >= minpts) {
for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
if (!processedIDs.contains(id)) {
expandCluster(snnInstance, id, objprog, clusprog);
if (processedIDs.size() == relation.size() && noise.size() == 0) {
break;
}
}
if (objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), LOG);
clusprog.setProcessed(resultList.size(), LOG);
}
}
} else {
for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
noise.add(id);
if (objprog != null && clusprog != null) {
objprog.setProcessed(noise.size(), LOG);
clusprog.setProcessed(resultList.size(), LOG);
}
}
}
// Finish progress logging
LOG.ensureCompleted(objprog);
LOG.setCompleted(clusprog);
Clustering<Model> result = new Clustering<>("Shared-Nearest-Neighbor Clustering", "snn-clustering");
for (Iterator<ModifiableDBIDs> resultListIter = resultList.iterator(); resultListIter.hasNext(); ) {
result.addToplevelCluster(new Cluster<Model>(resultListIter.next(), ClusterModel.CLUSTER));
}
result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class LSDBC method run.
/**
* Run the LSDBC algorithm
*
* @param database Database to process
* @param relation Data relation
* @return Clustering result
*/
public Clustering<Model> run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LSDBC", 3) : null;
final int dim = RelationUtil.dimensionality(relation);
final double factor = FastMath.pow(2., alpha / dim);
final DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Materializing kNN neighborhoods");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
LOG.beginStep(stepprog, 2, "Sorting by density");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
fillDensities(knnq, ids, dens);
ArrayModifiableDBIDs sids = DBIDUtil.newArray(ids);
sids.sort(new DataStoreUtil.AscendingByDoubleDataStore(dens));
LOG.beginStep(stepprog, 3, "Computing clusters");
// Setup progress logging
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("LSDBC Clustering", ids.size(), LOG) : null;
final IndefiniteProgress clusprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters found", LOG) : null;
// (Temporary) store the cluster ID assigned.
final WritableIntegerDataStore clusterids = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, UNPROCESSED);
// Note: these are not exact, as objects may be stolen from noise.
final IntArrayList clustersizes = new IntArrayList();
// Unprocessed dummy value.
clustersizes.add(0);
// Noise counter.
clustersizes.add(0);
// Implementation Note: using Integer objects should result in
// reduced memory use in the HashMap!
int clusterid = NOISE + 1;
// Iterate over all objects in the database.
for (DBIDIter id = sids.iter(); id.valid(); id.advance()) {
// Skip already processed ids.
if (clusterids.intValue(id) != UNPROCESSED) {
continue;
}
// Evaluate Neighborhood predicate
final KNNList neighbors = knnq.getKNNForDBID(id, k);
// Evaluate Core-Point predicate:
if (isLocalMaximum(neighbors.getKNNDistance(), neighbors, dens)) {
double mindens = factor * neighbors.getKNNDistance();
clusterids.putInt(id, clusterid);
clustersizes.add(expandCluster(clusterid, clusterids, knnq, neighbors, mindens, progress));
// start next cluster on next iteration.
++clusterid;
if (clusprogress != null) {
clusprogress.setProcessed(clusterid, LOG);
}
} else {
// otherwise, it's a noise point
clusterids.putInt(id, NOISE);
clustersizes.set(NOISE, clustersizes.getInt(NOISE) + 1);
}
// We've completed this element
LOG.incrementProcessed(progress);
}
// Finish progress logging.
LOG.ensureCompleted(progress);
LOG.setCompleted(clusprogress);
LOG.setCompleted(stepprog);
// Transform cluster ID mapping into a clustering result:
ArrayList<ArrayModifiableDBIDs> clusterlists = new ArrayList<>(clusterid);
// add storage containers for clusters
for (int i = 0; i < clustersizes.size(); i++) {
clusterlists.add(DBIDUtil.newArray(clustersizes.getInt(i)));
}
// do the actual inversion
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
// Negative values are non-core points:
int cid = clusterids.intValue(id);
int cluster = Math.abs(cid);
clusterlists.get(cluster).add(id);
}
clusterids.destroy();
Clustering<Model> result = new Clustering<>("LSDBC", "lsdbc-clustering");
for (int cid = NOISE; cid < clusterlists.size(); cid++) {
boolean isNoise = (cid == NOISE);
Cluster<Model> c;
c = new Cluster<Model>(clusterlists.get(cid), isNoise, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class ORCLUS method run.
/**
* Performs the ORCLUS algorithm on the given database.
*
* @param database Database
* @param relation Relation
*/
public Clustering<Model> run(Database database, Relation<V> relation) {
// current dimensionality associated with each seed
int dim_c = RelationUtil.dimensionality(relation);
if (dim_c < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + dim_c + " < " + l + ")");
}
// current number of seeds
int k_c = Math.min(relation.size(), k_i * k);
// pick k0 > k points from the database
List<ORCLUSCluster> clusters = initialSeeds(relation, k_c);
double beta = FastMath.exp(-FastMath.log(dim_c / (double) l) * FastMath.log(1 / alpha) / FastMath.log(k_c / (double) k));
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
while (k_c > k) {
// find partitioning induced by the seeds of the clusters
assign(relation, clusters);
// determine current subspace associated with each cluster
for (ORCLUSCluster cluster : clusters) {
if (cluster.objectIDs.size() > 0) {
cluster.basis = findBasis(relation, cluster, dim_c);
}
}
// reduce number of seeds and dimensionality associated with
// each seed
k_c = (int) Math.max(k, k_c * alpha);
dim_c = (int) Math.max(l, dim_c * beta);
merge(relation, clusters, k_c, dim_c, cprogress);
if (cprogress != null) {
cprogress.setProcessed(clusters.size(), LOG);
}
}
assign(relation, clusters);
LOG.setCompleted(cprogress);
// get the result
Clustering<Model> r = new Clustering<>("ORCLUS clustering", "orclus-clustering");
for (ORCLUSCluster c : clusters) {
r.addToplevelCluster(new Cluster<Model>(c.objectIDs, ClusterModel.CLUSTER));
}
return r;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class COPAC method run.
/**
* Run the COPAC algorithm.
*
* @param database Database
* @param relation Vector field relation
* @return COPAC clustering
*/
public Clustering<DimensionModel> run(Database database, Relation<V> relation) {
COPACNeighborPredicate.Instance npred = new COPACNeighborPredicate<V>(settings).instantiate(database, relation);
CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database);
Clustering<Model> dclusters = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
// Re-wrap the detected clusters for COPAC:
Clustering<DimensionModel> result = new Clustering<>("COPAC clustering", "copac-clustering");
// Generalized DBSCAN clusterings will be flat.
for (It<Cluster<Model>> iter = dclusters.iterToplevelClusters(); iter.valid(); iter.advance()) {
Cluster<Model> clus = iter.get();
if (clus.size() > 0) {
int dim = npred.dimensionality(clus.getIDs().iter());
DimensionModel model = new DimensionModel(dim);
result.addToplevelCluster(new Cluster<>(clus.getIDs(), model));
}
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class SUBCLU method runDBSCAN.
/**
* Runs the DBSCAN algorithm on the specified partition of the database in the
* given subspace. If parameter {@code ids} is null DBSCAN will be applied to
* the whole database.
*
* @param relation the database holding the objects to run DBSCAN on
* @param ids the IDs of the database defining the partition to run DBSCAN on
* - if this parameter is null DBSCAN will be applied to the whole
* database
* @param subspace the subspace to run DBSCAN on
* @return the clustering result of the DBSCAN run
*/
private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
// distance function
distanceFunction.setSelectedDimensions(subspace.getDimensions());
ProxyDatabase proxy;
if (ids == null) {
// TODO: in this case, we might want to use an index - the proxy below
// will prevent this!
ids = relation.getDBIDs();
}
proxy = new ProxyDatabase(ids, relation);
DBSCAN<V> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
// run DBSCAN
if (LOG.isVerbose()) {
LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
}
Clustering<Model> dbsres = dbscan.run(proxy);
// separate cluster and noise
List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters();
List<Cluster<Model>> clusters = new ArrayList<>();
for (Cluster<Model> c : clusterAndNoise) {
if (!c.isNoise()) {
clusters.add(c);
}
}
return clusters;
}
Aggregations