use of de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction in project elki by elki-project.
the class DeLiClu method run.
public ClusterOrder run(Database database, Relation<NV> relation) {
Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database.getHierarchy(), relation, DeLiCluTreeIndex.class);
if (indexes.size() != 1) {
throw new MissingPrerequisitesException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
}
DeLiCluTreeIndex<NV> index = indexes.iterator().next();
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction<?>)) {
throw new IllegalArgumentException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
@SuppressWarnings("unchecked") SpatialPrimitiveDistanceFunction<NV> distFunction = (SpatialPrimitiveDistanceFunction<NV>) getDistanceFunction();
// first do the knn-Join
if (LOG.isVerbose()) {
LOG.verbose("knnJoin...");
}
Relation<KNNList> knns = knnJoin.run(relation);
DBIDs ids = relation.getDBIDs();
final int size = ids.size();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", size, LOG) : null;
ClusterOrder clusterOrder = new ClusterOrder(ids, "DeLiClu Clustering", "deliclu-clustering");
heap = new UpdatableHeap<>();
// add start object to cluster order and (root, root) to priority queue
DBID startID = DBIDUtil.deref(ids.iter());
clusterOrder.add(startID, Double.POSITIVE_INFINITY, null);
int numHandled = 1;
index.setHandled(startID, relation.get(startID));
SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) index.getRootEntry();
SpatialObjectPair spatialObjectPair = new SpatialObjectPair(0., rootEntry, rootEntry, true);
heap.add(spatialObjectPair);
while (numHandled < size) {
if (heap.isEmpty()) {
throw new AbortException("DeLiClu heap was empty when it shouldn't have been.");
}
SpatialObjectPair dataPair = heap.poll();
// pair of nodes
if (dataPair.isExpandable) {
expandNodes(index, distFunction, dataPair, knns);
} else // pair of objects
{
// set handled
LeafEntry e1 = (LeafEntry) dataPair.entry1;
LeafEntry e2 = (LeafEntry) dataPair.entry2;
final DBID e1id = e1.getDBID();
IndexTreePath<DeLiCluEntry> path = index.setHandled(e1id, relation.get(e1id));
if (path == null) {
throw new RuntimeException("snh: parent(" + e1id + ") = null!!!");
}
// add to cluster order
clusterOrder.add(e1id, dataPair.distance, e2.getDBID());
numHandled++;
// reinsert expanded leafs
reinsertExpanded(distFunction, index, path, knns);
if (progress != null) {
progress.setProcessed(numHandled, LOG);
}
}
}
LOG.ensureCompleted(progress);
return clusterOrder;
}
use of de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction in project elki by elki-project.
the class KNNJoin method run.
/**
* Inner run method. This returns a double store, and is used by
* {@link de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNJoinMaterializeKNNPreprocessor}
*
* @param relation Data relation
* @param ids Object IDs
* @return Data store
*/
@SuppressWarnings("unchecked")
public WritableDataStore<KNNList> run(Relation<V> relation, DBIDs ids) {
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(relation.getHierarchy(), relation, SpatialIndexTree.class);
if (indexes.size() != 1) {
throw new MissingPrerequisitesException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one.");
}
// FIXME: Ensure were looking at the right relation!
SpatialIndexTree<N, E> index = indexes.iterator().next();
SpatialPrimitiveDistanceFunction<V> distFunction = (SpatialPrimitiveDistanceFunction<V>) getDistanceFunction();
// data pages
List<E> ps_candidates = new ArrayList<>(index.getLeaves());
// knn heaps
List<List<KNNHeap>> heaps = new ArrayList<>(ps_candidates.size());
// Initialize with the page self-pairing
for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
heaps.add(initHeaps(distFunction, pr));
}
// Build priority queue
final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) >>> 1;
ComparableMinHeap<Task> pq = new ComparableMinHeap<>(sqsize);
if (LOG.isDebuggingFine()) {
LOG.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
}
FiniteProgress mprogress = LOG.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, LOG) : null;
for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
List<KNNHeap> pr_heaps = heaps.get(i);
double pr_knn_distance = computeStopDistance(pr_heaps);
for (int j = i + 1; j < ps_candidates.size(); j++) {
E ps_entry = ps_candidates.get(j);
N ps = index.getNode(ps_entry);
List<KNNHeap> ps_heaps = heaps.get(j);
double ps_knn_distance = computeStopDistance(ps_heaps);
double minDist = distFunction.minDist(pr_entry, ps_entry);
// Resolve immediately:
if (minDist <= 0.) {
processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
} else if (minDist <= pr_knn_distance || minDist <= ps_knn_distance) {
pq.add(new Task(minDist, i, j));
}
LOG.incrementProcessed(mprogress);
}
}
LOG.ensureCompleted(mprogress);
// Process the queue
FiniteProgress qprogress = LOG.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), LOG) : null;
IndefiniteProgress fprogress = LOG.isVerbose() ? new IndefiniteProgress("Full comparisons", LOG) : null;
while (!pq.isEmpty()) {
Task task = pq.poll();
List<KNNHeap> pr_heaps = heaps.get(task.i);
List<KNNHeap> ps_heaps = heaps.get(task.j);
double pr_knn_distance = computeStopDistance(pr_heaps);
double ps_knn_distance = computeStopDistance(ps_heaps);
boolean dor = task.mindist <= pr_knn_distance;
boolean dos = task.mindist <= ps_knn_distance;
if (dor || dos) {
N pr = index.getNode(ps_candidates.get(task.i));
N ps = index.getNode(ps_candidates.get(task.j));
if (dor && dos) {
processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
} else {
if (dor) {
processDataPages(distFunction, pr_heaps, null, pr, ps);
} else /* dos */
{
processDataPages(distFunction, ps_heaps, null, ps, pr);
}
}
LOG.incrementProcessed(fprogress);
}
LOG.incrementProcessed(qprogress);
}
LOG.ensureCompleted(qprogress);
LOG.setCompleted(fprogress);
WritableDataStore<KNNList> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
FiniteProgress pageprog = LOG.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), LOG) : null;
for (int i = 0; i < ps_candidates.size(); i++) {
N pr = index.getNode(ps_candidates.get(i));
List<KNNHeap> pr_heaps = heaps.get(i);
// Finalize lists
for (int j = 0; j < pr.getNumEntries(); j++) {
knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
}
// Forget heaps and pq
heaps.set(i, null);
LOG.incrementProcessed(pageprog);
}
LOG.ensureCompleted(pageprog);
return knnLists;
}
Aggregations