use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class KNNJoin method run.
/**
* Inner run method. This returns a double store, and is used by
* {@link de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNJoinMaterializeKNNPreprocessor}
*
* @param relation Data relation
* @param ids Object IDs
* @return Data store
*/
@SuppressWarnings("unchecked")
public WritableDataStore<KNNList> run(Relation<V> relation, DBIDs ids) {
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(relation.getHierarchy(), relation, SpatialIndexTree.class);
if (indexes.size() != 1) {
throw new MissingPrerequisitesException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one.");
}
// FIXME: Ensure were looking at the right relation!
SpatialIndexTree<N, E> index = indexes.iterator().next();
SpatialPrimitiveDistanceFunction<V> distFunction = (SpatialPrimitiveDistanceFunction<V>) getDistanceFunction();
// data pages
List<E> ps_candidates = new ArrayList<>(index.getLeaves());
// knn heaps
List<List<KNNHeap>> heaps = new ArrayList<>(ps_candidates.size());
// Initialize with the page self-pairing
for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
heaps.add(initHeaps(distFunction, pr));
}
// Build priority queue
final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) >>> 1;
ComparableMinHeap<Task> pq = new ComparableMinHeap<>(sqsize);
if (LOG.isDebuggingFine()) {
LOG.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
}
FiniteProgress mprogress = LOG.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, LOG) : null;
for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
List<KNNHeap> pr_heaps = heaps.get(i);
double pr_knn_distance = computeStopDistance(pr_heaps);
for (int j = i + 1; j < ps_candidates.size(); j++) {
E ps_entry = ps_candidates.get(j);
N ps = index.getNode(ps_entry);
List<KNNHeap> ps_heaps = heaps.get(j);
double ps_knn_distance = computeStopDistance(ps_heaps);
double minDist = distFunction.minDist(pr_entry, ps_entry);
// Resolve immediately:
if (minDist <= 0.) {
processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
} else if (minDist <= pr_knn_distance || minDist <= ps_knn_distance) {
pq.add(new Task(minDist, i, j));
}
LOG.incrementProcessed(mprogress);
}
}
LOG.ensureCompleted(mprogress);
// Process the queue
FiniteProgress qprogress = LOG.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), LOG) : null;
IndefiniteProgress fprogress = LOG.isVerbose() ? new IndefiniteProgress("Full comparisons", LOG) : null;
while (!pq.isEmpty()) {
Task task = pq.poll();
List<KNNHeap> pr_heaps = heaps.get(task.i);
List<KNNHeap> ps_heaps = heaps.get(task.j);
double pr_knn_distance = computeStopDistance(pr_heaps);
double ps_knn_distance = computeStopDistance(ps_heaps);
boolean dor = task.mindist <= pr_knn_distance;
boolean dos = task.mindist <= ps_knn_distance;
if (dor || dos) {
N pr = index.getNode(ps_candidates.get(task.i));
N ps = index.getNode(ps_candidates.get(task.j));
if (dor && dos) {
processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
} else {
if (dor) {
processDataPages(distFunction, pr_heaps, null, pr, ps);
} else /* dos */
{
processDataPages(distFunction, ps_heaps, null, ps, pr);
}
}
LOG.incrementProcessed(fprogress);
}
LOG.incrementProcessed(qprogress);
}
LOG.ensureCompleted(qprogress);
LOG.setCompleted(fprogress);
WritableDataStore<KNNList> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
FiniteProgress pageprog = LOG.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), LOG) : null;
for (int i = 0; i < ps_candidates.size(); i++) {
N pr = index.getNode(ps_candidates.get(i));
List<KNNHeap> pr_heaps = heaps.get(i);
// Finalize lists
for (int j = 0; j < pr.getNumEntries(); j++) {
knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
}
// Forget heaps and pq
heaps.set(i, null);
LOG.incrementProcessed(pageprog);
}
LOG.ensureCompleted(pageprog);
return knnLists;
}
use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class AffinityPropagationClusteringAlgorithm method run.
/**
* Perform affinity propagation clustering.
*
* @param db Database
* @param relation Relation
* @return Clustering result
*/
public Clustering<MedoidModel> run(Database db, Relation<O> relation) {
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
int[] assignment = new int[size];
double[][] s = initialization.getSimilarityMatrix(db, relation, ids);
double[][] r = new double[size][size];
double[][] a = new double[size][size];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Affinity Propagation Iteration", LOG) : null;
MutableProgress aprog = LOG.isVerbose() ? new MutableProgress("Stable assignments", size + 1, LOG) : null;
int inactive = 0;
for (int iteration = 0; iteration < maxiter && inactive < convergence; iteration++) {
// Update responsibility matrix:
for (int i = 0; i < size; i++) {
double[] ai = a[i], ri = r[i], si = s[i];
// Find the two largest values (as initially maxk == i)
double max1 = Double.NEGATIVE_INFINITY, max2 = Double.NEGATIVE_INFINITY;
int maxk = -1;
for (int k = 0; k < size; k++) {
double val = ai[k] + si[k];
if (val > max1) {
max2 = max1;
max1 = val;
maxk = k;
} else if (val > max2) {
max2 = val;
}
}
// With the maximum value known, update r:
for (int k = 0; k < size; k++) {
double val = si[k] - ((k != maxk) ? max1 : max2);
ri[k] = ri[k] * lambda + val * (1. - lambda);
}
}
// Update availability matrix
for (int k = 0; k < size; k++) {
// Compute sum of max(0, r_ik) for all i.
// For r_kk, don't apply the max.
double colposum = 0.;
for (int i = 0; i < size; i++) {
if (i == k || r[i][k] > 0.) {
colposum += r[i][k];
}
}
for (int i = 0; i < size; i++) {
double val = colposum;
// Adjust column sum by the one extra term.
if (i == k || r[i][k] > 0.) {
val -= r[i][k];
}
if (i != k && val > 0.) {
// min
val = 0.;
}
a[i][k] = a[i][k] * lambda + val * (1 - lambda);
}
}
int changed = 0;
for (int i = 0; i < size; i++) {
double[] ai = a[i], ri = r[i];
double max = Double.NEGATIVE_INFINITY;
int maxj = -1;
for (int j = 0; j < size; j++) {
double v = ai[j] + ri[j];
if (v > max || (i == j && v >= max)) {
max = v;
maxj = j;
}
}
if (assignment[i] != maxj) {
changed += 1;
assignment[i] = maxj;
}
}
inactive = (changed > 0) ? 0 : (inactive + 1);
LOG.incrementProcessed(prog);
if (aprog != null) {
aprog.setProcessed(size - changed, LOG);
}
}
if (aprog != null) {
aprog.setProcessed(aprog.getTotal(), LOG);
}
LOG.setCompleted(prog);
// Cluster map, by lead object
Int2ObjectOpenHashMap<ModifiableDBIDs> map = new Int2ObjectOpenHashMap<>();
DBIDArrayIter i1 = ids.iter();
for (int i = 0; i1.valid(); i1.advance(), i++) {
int c = assignment[i];
// Add to cluster members:
ModifiableDBIDs cids = map.get(c);
if (cids == null) {
cids = DBIDUtil.newArray();
map.put(c, cids);
}
cids.add(i1);
}
// If we stopped early, the cluster lead might be in a different cluster.
for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
final int key = entry.getIntKey();
int targetkey = key;
ModifiableDBIDs tids = null;
// Chase arrows:
while (ids == null && assignment[targetkey] != targetkey) {
targetkey = assignment[targetkey];
tids = map.get(targetkey);
}
if (tids != null && targetkey != key) {
tids.addDBIDs(entry.getValue());
iter.remove();
}
}
Clustering<MedoidModel> clustering = new Clustering<>("Affinity Propagation Clustering", "ap-clustering");
ModifiableDBIDs noise = DBIDUtil.newArray();
for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
i1.seek(entry.getIntKey());
if (entry.getValue().size() > 1) {
MedoidModel mod = new MedoidModel(DBIDUtil.deref(i1));
clustering.addToplevelCluster(new Cluster<>(entry.getValue(), mod));
} else {
noise.add(i1);
}
}
if (noise.size() > 0) {
MedoidModel mod = new MedoidModel(DBIDUtil.deref(noise.iter()));
clustering.addToplevelCluster(new Cluster<>(noise, true, mod));
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class UKMeans method run.
/**
* Run the clustering.
*
* @param database the Database
* @param relation the Relation
* @return Clustering result
*/
public Clustering<?> run(final Database database, final Relation<DiscreteUncertainObject> relation) {
if (relation.size() <= 0) {
return new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
}
// Choose initial means randomly
DBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
List<double[]> means = new ArrayList<>(k);
for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
}
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.isEmpty()) {
continue;
}
result.addToplevelCluster(new Cluster<>(ids, new KMeansModel(means.get(i), varsum[i])));
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class DBSCAN method runDBSCAN.
/**
* Run the DBSCAN algorithm
*
* @param relation Data relation
* @param rangeQuery Range query class
*/
protected void runDBSCAN(Relation<O> relation, RangeQuery<O> rangeQuery) {
final int size = relation.size();
FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null;
IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
processedIDs = DBIDUtil.newHashSet(size);
ArrayModifiableDBIDs seeds = DBIDUtil.newArray();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
if (!processedIDs.contains(iditer)) {
expandCluster(relation, rangeQuery, iditer, seeds, objprog, clusprog);
}
if (objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), LOG);
clusprog.setProcessed(resultList.size(), LOG);
}
if (processedIDs.size() == size) {
break;
}
}
// Finish progress logging
LOG.ensureCompleted(objprog);
LOG.setCompleted(clusprog);
}
use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class DOC method run.
/**
* Performs the DOC or FastDOC (as configured) algorithm on the given
* Database.
*
* This will run exhaustively, i.e. run DOC until no clusters are found
* anymore / the database size has shrunk below the threshold for minimum
* cluster size.
*
* @param database Database
* @param relation Data relation
*/
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
// Dimensionality of our set.
final int d = RelationUtil.dimensionality(relation);
// Get available DBIDs as a set we can remove items from.
ArrayModifiableDBIDs S = DBIDUtil.newArray(relation.getDBIDs());
// Precompute values as described in Figure 2.
double r = Math.abs(FastMath.log(d + d) / FastMath.log(beta * .5));
// Outer loop count.
int n = (int) (2. / alpha);
// Inner loop count.
int m = (int) (FastMath.pow(2. / alpha, r) * FastMath.log(4));
// TODO: This should only apply for FastDOC.
m = Math.min(m, Math.min(1000000, d * d));
// Minimum size for a cluster for it to be accepted.
int minClusterSize = (int) (alpha * S.size());
// List of all clusters we found.
Clustering<SubspaceModel> result = new Clustering<>("DOC Clusters", "DOC");
// Inform the user about the number of actual clusters found so far.
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
// of points is empty.
while (S.size() > minClusterSize) {
Cluster<SubspaceModel> C = runDOC(database, relation, S, d, n, m, (int) r, minClusterSize);
if (C == null) {
// Stop trying if we couldn't find a cluster.
break;
}
// Found a cluster, remember it, remove its points from the set.
result.addToplevelCluster(C);
// Remove all points of the cluster from the set and continue.
S.removeDBIDs(C.getIDs());
if (cprogress != null) {
cprogress.setProcessed(result.getAllClusters().size(), LOG);
}
}
// Add the remainder as noise.
if (S.size() > 0) {
long[] alldims = BitsUtil.ones(d);
result.addToplevelCluster(new Cluster<>(S, true, new SubspaceModel(new Subspace(alldims), Centroid.make(relation, S).getArrayRef())));
}
LOG.setCompleted(cprogress);
return result;
}
Aggregations