use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap in project elki by elki-project.
the class MetricalIndexKNNQuery method getKNNForObject.
@Override
public KNNList getKNNForObject(O q, int k) {
if (k < 1) {
throw new IllegalArgumentException("At least one object has to be requested!");
}
index.statistics.countKNNQuery();
KNNHeap knnList = DBIDUtil.newHeap(k);
double d_k = Double.POSITIVE_INFINITY;
final ComparableMinHeap<MTreeSearchCandidate> pq = new ComparableMinHeap<>();
// Push the root node
pq.add(new MTreeSearchCandidate(0., index.getRootID(), null, 0.));
// search in tree
while (!pq.isEmpty()) {
MTreeSearchCandidate pqNode = pq.poll();
if (knnList.size() >= k && pqNode.mindist > d_k) {
break;
}
AbstractMTreeNode<?, ?, ?> node = index.getNode(pqNode.nodeID);
DBID id_p = pqNode.routingObjectID;
double d1 = pqNode.routingDistance;
// directory node
if (!node.isLeaf()) {
for (int i = 0; i < node.getNumEntries(); i++) {
MTreeEntry entry = node.getEntry(i);
DBID o_r = entry.getRoutingObjectID();
double r_or = entry.getCoveringRadius();
double d2 = id_p != null ? entry.getParentDistance() : 0.;
double diff = Math.abs(d1 - d2);
double sum = d_k + r_or;
if (diff <= sum) {
double d3 = distanceQuery.distance(o_r, q);
index.statistics.countDistanceCalculation();
double d_min = Math.max(d3 - r_or, 0.);
if (d_min <= d_k) {
pq.add(new MTreeSearchCandidate(d_min, ((DirectoryEntry) entry).getPageID(), o_r, d3));
}
}
}
} else // data node
{
for (int i = 0; i < node.getNumEntries(); i++) {
MTreeEntry entry = node.getEntry(i);
DBID o_j = entry.getRoutingObjectID();
double d2 = id_p != null ? entry.getParentDistance() : 0.;
double diff = Math.abs(d1 - d2);
if (diff <= d_k) {
double d3 = distanceQuery.distance(o_j, q);
index.statistics.countDistanceCalculation();
if (d3 <= d_k) {
knnList.insert(d3, o_j);
d_k = knnList.getKNNDistance();
}
}
}
}
}
return knnList.toKNNList();
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap in project elki by elki-project.
the class KNNJoin method run.
/**
* Inner run method. This returns a double store, and is used by
* {@link de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNJoinMaterializeKNNPreprocessor}
*
* @param relation Data relation
* @param ids Object IDs
* @return Data store
*/
@SuppressWarnings("unchecked")
public WritableDataStore<KNNList> run(Relation<V> relation, DBIDs ids) {
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(relation.getHierarchy(), relation, SpatialIndexTree.class);
if (indexes.size() != 1) {
throw new MissingPrerequisitesException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one.");
}
// FIXME: Ensure were looking at the right relation!
SpatialIndexTree<N, E> index = indexes.iterator().next();
SpatialPrimitiveDistanceFunction<V> distFunction = (SpatialPrimitiveDistanceFunction<V>) getDistanceFunction();
// data pages
List<E> ps_candidates = new ArrayList<>(index.getLeaves());
// knn heaps
List<List<KNNHeap>> heaps = new ArrayList<>(ps_candidates.size());
// Initialize with the page self-pairing
for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
heaps.add(initHeaps(distFunction, pr));
}
// Build priority queue
final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) >>> 1;
ComparableMinHeap<Task> pq = new ComparableMinHeap<>(sqsize);
if (LOG.isDebuggingFine()) {
LOG.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
}
FiniteProgress mprogress = LOG.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, LOG) : null;
for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
List<KNNHeap> pr_heaps = heaps.get(i);
double pr_knn_distance = computeStopDistance(pr_heaps);
for (int j = i + 1; j < ps_candidates.size(); j++) {
E ps_entry = ps_candidates.get(j);
N ps = index.getNode(ps_entry);
List<KNNHeap> ps_heaps = heaps.get(j);
double ps_knn_distance = computeStopDistance(ps_heaps);
double minDist = distFunction.minDist(pr_entry, ps_entry);
// Resolve immediately:
if (minDist <= 0.) {
processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
} else if (minDist <= pr_knn_distance || minDist <= ps_knn_distance) {
pq.add(new Task(minDist, i, j));
}
LOG.incrementProcessed(mprogress);
}
}
LOG.ensureCompleted(mprogress);
// Process the queue
FiniteProgress qprogress = LOG.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), LOG) : null;
IndefiniteProgress fprogress = LOG.isVerbose() ? new IndefiniteProgress("Full comparisons", LOG) : null;
while (!pq.isEmpty()) {
Task task = pq.poll();
List<KNNHeap> pr_heaps = heaps.get(task.i);
List<KNNHeap> ps_heaps = heaps.get(task.j);
double pr_knn_distance = computeStopDistance(pr_heaps);
double ps_knn_distance = computeStopDistance(ps_heaps);
boolean dor = task.mindist <= pr_knn_distance;
boolean dos = task.mindist <= ps_knn_distance;
if (dor || dos) {
N pr = index.getNode(ps_candidates.get(task.i));
N ps = index.getNode(ps_candidates.get(task.j));
if (dor && dos) {
processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
} else {
if (dor) {
processDataPages(distFunction, pr_heaps, null, pr, ps);
} else /* dos */
{
processDataPages(distFunction, ps_heaps, null, ps, pr);
}
}
LOG.incrementProcessed(fprogress);
}
LOG.incrementProcessed(qprogress);
}
LOG.ensureCompleted(qprogress);
LOG.setCompleted(fprogress);
WritableDataStore<KNNList> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
FiniteProgress pageprog = LOG.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), LOG) : null;
for (int i = 0; i < ps_candidates.size(); i++) {
N pr = index.getNode(ps_candidates.get(i));
List<KNNHeap> pr_heaps = heaps.get(i);
// Finalize lists
for (int j = 0; j < pr.getNumEntries(); j++) {
knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
}
// Forget heaps and pq
heaps.set(i, null);
LOG.incrementProcessed(pageprog);
}
LOG.ensureCompleted(pageprog);
return knnLists;
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap in project elki by elki-project.
the class MkCoPTree method doReverseKNNQuery.
/**
* Performs a reverse knn query.
*
* @param k the parameter k of the rknn query
* @param q the id of the query object
* @param result holds the true results (they need not to be refined)
* @param candidates holds possible candidates for the result (they need a
* refinement)
*/
private void doReverseKNNQuery(int k, DBIDRef q, ModifiableDoubleDBIDList result, ModifiableDBIDs candidates) {
final ComparableMinHeap<MTreeSearchCandidate> pq = new ComparableMinHeap<>();
// push root
pq.add(new MTreeSearchCandidate(0., getRootID(), null, Double.NaN));
// search in tree
while (!pq.isEmpty()) {
MTreeSearchCandidate pqNode = pq.poll();
// FIXME: cache the distance to the routing object in the queue node!
MkCoPTreeNode<O> node = getNode(pqNode.nodeID);
// directory node
if (!node.isLeaf()) {
for (int i = 0; i < node.getNumEntries(); i++) {
MkCoPEntry entry = node.getEntry(i);
double distance = distance(entry.getRoutingObjectID(), q);
double minDist = entry.getCoveringRadius() > distance ? 0. : distance - entry.getCoveringRadius();
double approximatedKnnDist_cons = entry.approximateConservativeKnnDistance(k);
if (minDist <= approximatedKnnDist_cons) {
pq.add(new MTreeSearchCandidate(minDist, getPageID(entry), entry.getRoutingObjectID(), Double.NaN));
}
}
} else // data node
{
for (int i = 0; i < node.getNumEntries(); i++) {
MkCoPLeafEntry entry = (MkCoPLeafEntry) node.getEntry(i);
double distance = distance(entry.getRoutingObjectID(), q);
double approximatedKnnDist_prog = entry.approximateProgressiveKnnDistance(k);
if (distance <= approximatedKnnDist_prog) {
result.add(distance, entry.getRoutingObjectID());
} else {
double approximatedKnnDist_cons = entry.approximateConservativeKnnDistance(k);
double diff = distance - approximatedKnnDist_cons;
if (diff <= 1E-10) {
candidates.add(entry.getRoutingObjectID());
}
}
}
}
}
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap in project elki by elki-project.
the class CASH method doRun.
/**
* Runs the CASH algorithm on the specified database, this method is
* recursively called until only noise is left.
*
* @param relation the Relation to run the CASH algorithm on
* @param progress the progress object for verbose messages
* @return a mapping of subspace dimensionalities to clusters
*/
private Clustering<Model> doRun(Relation<ParameterizationFunction> relation, FiniteProgress progress) {
Clustering<Model> res = new Clustering<>("CASH clustering", "cash-clustering");
final int dim = dimensionality(relation);
// init heap
ObjectHeap<IntegerPriorityObject<CASHInterval>> heap = new ComparableMinHeap<>();
ModifiableDBIDs noiseIDs = DBIDUtil.newHashSet(relation.getDBIDs());
initHeap(heap, relation, dim, noiseIDs);
if (LOG.isVerbose()) {
LOG.verbose(new StringBuilder().append("dim ").append(dim).append(" database.size ").append(relation.size()).toString());
}
// get the ''best'' d-dimensional intervals at max level
while (!heap.isEmpty()) {
CASHInterval interval = determineNextIntervalAtMaxLevel(heap);
if (LOG.isVerbose()) {
LOG.verbose("next interval in dim " + dim + ": " + interval);
}
// only noise left
if (interval == null) {
break;
}
// do a dim-1 dimensional run
ModifiableDBIDs clusterIDs = DBIDUtil.newHashSet();
if (dim > minDim + 1) {
ModifiableDBIDs ids;
double[][] basis_dim_minus_1;
if (adjust) {
ids = DBIDUtil.newHashSet();
basis_dim_minus_1 = runDerivator(relation, dim, interval, ids);
} else {
ids = interval.getIDs();
basis_dim_minus_1 = determineBasis(SpatialUtil.centroid(interval));
}
if (ids.size() != 0) {
MaterializedRelation<ParameterizationFunction> db = buildDB(dim, basis_dim_minus_1, ids, relation);
// add result of dim-1 to this result
Clustering<Model> res_dim_minus_1 = doRun(db, progress);
for (Cluster<Model> cluster : res_dim_minus_1.getAllClusters()) {
res.addToplevelCluster(cluster);
noiseIDs.removeDBIDs(cluster.getIDs());
clusterIDs.addDBIDs(cluster.getIDs());
processedIDs.addDBIDs(cluster.getIDs());
}
}
} else // dim == minDim
{
LinearEquationSystem les = runDerivator(relation, dim - 1, interval.getIDs());
Cluster<Model> c = new Cluster<Model>(interval.getIDs(), new LinearEquationModel(les));
res.addToplevelCluster(c);
noiseIDs.removeDBIDs(interval.getIDs());
clusterIDs.addDBIDs(interval.getIDs());
processedIDs.addDBIDs(interval.getIDs());
}
// Rebuild heap
ArrayList<IntegerPriorityObject<CASHInterval>> heapVector = new ArrayList<>(heap.size());
for (ObjectHeap.UnsortedIter<IntegerPriorityObject<CASHInterval>> iter = heap.unsortedIter(); iter.valid(); iter.advance()) {
heapVector.add(iter.get());
}
heap.clear();
for (IntegerPriorityObject<CASHInterval> pair : heapVector) {
CASHInterval currentInterval = pair.getObject();
currentInterval.removeIDs(clusterIDs);
if (currentInterval.getIDs().size() >= minPts) {
heap.add(new IntegerPriorityObject<>(currentInterval.priority(), currentInterval));
}
}
if (progress != null) {
progress.setProcessed(processedIDs.size(), LOG);
}
}
// put noise to clusters
if (!noiseIDs.isEmpty()) {
if (dim == noiseDim) {
res.addToplevelCluster(new Cluster<Model>(noiseIDs, true, ClusterModel.CLUSTER));
processedIDs.addDBIDs(noiseIDs);
} else if (noiseIDs.size() >= minPts) {
LinearEquationSystem les = runDerivator(fulldatabase, dim - 1, noiseIDs);
res.addToplevelCluster(new Cluster<Model>(noiseIDs, true, new LinearEquationModel(les)));
processedIDs.addDBIDs(noiseIDs);
}
}
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append("noise fuer dim ").append(dim).append(": ").append(noiseIDs.size());
for (Cluster<Model> c : res.getAllClusters()) {
if (c.getModel() instanceof LinearEquationModel) {
msg.append("\n Cluster: Dim: ").append(((LinearEquationModel) c.getModel()).getLes().subspacedim());
} else {
msg.append("\n Cluster: ").append(c.getModel().getClass().getName());
}
msg.append(" size: ").append(c.size());
}
LOG.debugFine(msg.toString());
}
if (progress != null) {
progress.setProcessed(processedIDs.size(), LOG);
}
return res;
}
Aggregations