use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class ParallelLloydKMeans method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
// Choose initial means
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Store for current cluster assignment.
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
KMeansProcessor<V> kmm = new KMeansProcessor<>(relation, distanceFunction, assignment, varsum);
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
kmm.nextIteration(means);
ParallelExecutor.run(ids, kmm);
// Stop if no cluster assignment changed.
if (!kmm.changed()) {
break;
}
means = kmm.getMeans();
}
LOG.setCompleted(prog);
// Wrap result
ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k);
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.length; i++) {
DBIDs cids = clusters[i];
if (cids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(cids, model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class AGNES method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
if (SingleLinkage.class.isInstance(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
final DBIDs ids = relation.getDBIDs();
final int size = ids.size();
DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
// Compute the initial (lower triangular) distance matrix.
MatrixParadigm mat = new MatrixParadigm(ids);
initializeDistanceMatrix(mat, dq, linkage);
// Initialize space for result:
PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
// Repeat until everything merged into 1 cluster
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
// Use end to shrink the matrix virtually as the tailing objects disappear
DBIDArrayIter ix = mat.ix;
for (int i = 1, end = size; i < size; i++) {
end = shrinkActiveSet(//
ix, //
builder, //
end, findMerge(end, mat, builder));
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return builder.complete();
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class AnderbergHierarchicalClustering method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
if (SingleLinkage.class.isInstance(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
final DBIDs ids = relation.getDBIDs();
MatrixParadigm mat = new MatrixParadigm(ids);
final int size = ids.size();
// Position counter - must agree with computeOffset!
AGNES.initializeDistanceMatrix(mat, dq, linkage);
// Arrays used for caching:
double[] bestd = new double[size];
int[] besti = new int[size];
initializeNNCache(mat.matrix, bestd, besti);
// Initialize space for result:
PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
// Repeat until everything merged into 1 cluster
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
DBIDArrayIter ix = mat.ix;
for (int i = 1, end = size; i < size; i++) {
end = //
AGNES.shrinkActiveSet(//
ix, //
builder, //
end, findMerge(end, mat, bestd, besti, builder));
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return builder.complete();
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class LMCLUS method run.
/**
* The main LMCLUS (Linear manifold clustering algorithm) is processed in this
* method.
*
* <PRE>
* The algorithm samples random linear manifolds and tries to find clusters in it.
* It calculates a distance histogram searches for a threshold and partitions the
* points in two groups the ones in the cluster and everything else.
* Then the best fitting linear manifold is searched and registered as a cluster.
* The process is started over until all points are clustered.
* The last cluster should contain all the outliers. (or the whole data if no clusters have been found.)
* For details see {@link LMCLUS}.
* </PRE>
*
* @param database The database to operate on
* @param relation Relation
* @return Clustering result
*/
public Clustering<Model> run(Database database, Relation<NumberVector> relation) {
Clustering<Model> ret = new Clustering<>("LMCLUS Clustering", "lmclus-clustering");
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
Random r = rnd.getSingleThreadedRandom();
final int maxdim = Math.min(maxLMDim, RelationUtil.dimensionality(relation));
int cnum = 0;
while (unclustered.size() > minsize) {
DBIDs current = unclustered;
int lmDim = 1;
for (int k = 1; k <= maxdim; k++) {
// stopping at the appropriate dimensionality either.
while (true) {
Separation separation = findSeparation(relation, current, k, r);
// " threshold: " + separation.threshold);
if (separation.goodness <= sensitivityThreshold) {
break;
}
ModifiableDBIDs subset = DBIDUtil.newArray(current.size());
for (DBIDIter iter = current.iter(); iter.valid(); iter.advance()) {
if (deviation(minusEquals(relation.get(iter).toArray(), separation.originV), separation.basis) < separation.threshold) {
subset.add(iter);
}
}
// logger.verbose("size:"+subset.size());
if (subset.size() < minsize) {
break;
}
current = subset;
lmDim = k;
// System.out.println("Partition: " + subset.size());
}
}
// No more clusters found
if (current.size() < minsize || current == unclustered) {
break;
}
// New cluster found
// TODO: annotate cluster with dimensionality
final Cluster<Model> cluster = new Cluster<>(current);
cluster.setName("Cluster_" + lmDim + "d_" + cnum);
cnum++;
ret.addToplevelCluster(cluster);
// Remove from main working set.
unclustered.removeDBIDs(current);
if (progress != null) {
progress.setProcessed(relation.size() - unclustered.size(), LOG);
}
if (cprogress != null) {
cprogress.setProcessed(cnum, LOG);
}
}
// Remaining objects are noise
if (unclustered.size() > 0) {
ret.addToplevelCluster(new Cluster<>(unclustered, true));
}
if (progress != null) {
progress.setProcessed(relation.size(), LOG);
progress.ensureCompleted(LOG);
}
LOG.setCompleted(cprogress);
return ret;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class MaterializeKNNAndRKNNPreprocessor method objectsRemoved.
@Override
protected void objectsRemoved(DBIDs ids) {
StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;
// For debugging: valid DBIDs still in the database.
final DBIDs valid = DBIDUtil.ensureSet(distanceQuery.getRelation().getDBIDs());
ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
// delete the materialized (old) kNNs and RkNNs
getLogger().beginStep(stepprog, 1, "New deletions ocurred, remove their materialized kNNs and RkNNs.");
// Temporary storage of removed lists
List<KNNList> kNNs = new ArrayList<>(ids.size());
List<TreeSet<DoubleDBIDPair>> rkNNs = new ArrayList<>(ids.size());
for (DBIDIter iter = aids.iter(); iter.valid(); iter.advance()) {
kNNs.add(storage.get(iter));
for (DBIDIter it = storage.get(iter).iter(); it.valid(); it.advance()) {
if (!valid.contains(it) && !ids.contains(it)) {
LOG.warning("False kNN: " + it);
}
}
storage.delete(iter);
rkNNs.add(materialized_RkNN.get(iter));
for (DoubleDBIDPair it : materialized_RkNN.get(iter)) {
if (!valid.contains(it) && !ids.contains(it)) {
LOG.warning("False RkNN: " + it);
}
}
materialized_RkNN.delete(iter);
}
// Keep only those IDs not also removed
ArrayDBIDs kNN_ids = affectedkNN(kNNs, aids);
ArrayDBIDs rkNN_ids = affectedRkNN(rkNNs, aids);
// update the affected kNNs and RkNNs
getLogger().beginStep(stepprog, 2, "New deletions ocurred, update the affected kNNs and RkNNs.");
// Recompute the kNN for affected objects (in rkNN lists)
{
List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
int i = 0;
for (DBIDIter reknn = rkNN_ids.iter(); reknn.valid(); reknn.advance(), i++) {
if (kNNList.get(i) == null && !valid.contains(reknn)) {
LOG.warning("BUG in online kNN/RkNN maintainance: " + DBIDUtil.toString(reknn) + " no longer in database.");
continue;
}
assert (kNNList.get(i) != null);
storage.put(reknn, kNNList.get(i));
for (DoubleDBIDListIter it = kNNList.get(i).iter(); it.valid(); it.advance()) {
materialized_RkNN.get(it).add(makePair(it, reknn));
}
}
}
// remove objects from RkNNs of objects (in kNN lists)
{
SetDBIDs idsSet = DBIDUtil.ensureSet(ids);
for (DBIDIter nn = kNN_ids.iter(); nn.valid(); nn.advance()) {
TreeSet<DoubleDBIDPair> rkNN = materialized_RkNN.get(nn);
for (Iterator<DoubleDBIDPair> it = rkNN.iterator(); it.hasNext(); ) {
if (idsSet.contains(it.next())) {
it.remove();
}
}
}
}
// inform listener
getLogger().beginStep(stepprog, 3, "New deletions ocurred, inform listeners.");
fireKNNsRemoved(ids, rkNN_ids);
getLogger().ensureCompleted(stepprog);
}
Aggregations