Search in sources :

Example 1 with ComparableMinHeap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap in project elki by elki-project.

the class MetricalIndexKNNQuery method getKNNForObject.

@Override
public KNNList getKNNForObject(O q, int k) {
    if (k < 1) {
        throw new IllegalArgumentException("At least one object has to be requested!");
    }
    index.statistics.countKNNQuery();
    KNNHeap knnList = DBIDUtil.newHeap(k);
    double d_k = Double.POSITIVE_INFINITY;
    final ComparableMinHeap<MTreeSearchCandidate> pq = new ComparableMinHeap<>();
    // Push the root node
    pq.add(new MTreeSearchCandidate(0., index.getRootID(), null, 0.));
    // search in tree
    while (!pq.isEmpty()) {
        MTreeSearchCandidate pqNode = pq.poll();
        if (knnList.size() >= k && pqNode.mindist > d_k) {
            break;
        }
        AbstractMTreeNode<?, ?, ?> node = index.getNode(pqNode.nodeID);
        DBID id_p = pqNode.routingObjectID;
        double d1 = pqNode.routingDistance;
        // directory node
        if (!node.isLeaf()) {
            for (int i = 0; i < node.getNumEntries(); i++) {
                MTreeEntry entry = node.getEntry(i);
                DBID o_r = entry.getRoutingObjectID();
                double r_or = entry.getCoveringRadius();
                double d2 = id_p != null ? entry.getParentDistance() : 0.;
                double diff = Math.abs(d1 - d2);
                double sum = d_k + r_or;
                if (diff <= sum) {
                    double d3 = distanceQuery.distance(o_r, q);
                    index.statistics.countDistanceCalculation();
                    double d_min = Math.max(d3 - r_or, 0.);
                    if (d_min <= d_k) {
                        pq.add(new MTreeSearchCandidate(d_min, ((DirectoryEntry) entry).getPageID(), o_r, d3));
                    }
                }
            }
        } else // data node
        {
            for (int i = 0; i < node.getNumEntries(); i++) {
                MTreeEntry entry = node.getEntry(i);
                DBID o_j = entry.getRoutingObjectID();
                double d2 = id_p != null ? entry.getParentDistance() : 0.;
                double diff = Math.abs(d1 - d2);
                if (diff <= d_k) {
                    double d3 = distanceQuery.distance(o_j, q);
                    index.statistics.countDistanceCalculation();
                    if (d3 <= d_k) {
                        knnList.insert(d3, o_j);
                        d_k = knnList.getKNNDistance();
                    }
                }
            }
        }
    }
    return knnList.toKNNList();
}
Also used : ComparableMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) MTreeEntry(de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry) DirectoryEntry(de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap)

Example 2 with ComparableMinHeap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap in project elki by elki-project.

the class KNNJoin method run.

/**
 * Inner run method. This returns a double store, and is used by
 * {@link de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNJoinMaterializeKNNPreprocessor}
 *
 * @param relation Data relation
 * @param ids Object IDs
 * @return Data store
 */
@SuppressWarnings("unchecked")
public WritableDataStore<KNNList> run(Relation<V> relation, DBIDs ids) {
    if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
        throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
    }
    Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(relation.getHierarchy(), relation, SpatialIndexTree.class);
    if (indexes.size() != 1) {
        throw new MissingPrerequisitesException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one.");
    }
    // FIXME: Ensure were looking at the right relation!
    SpatialIndexTree<N, E> index = indexes.iterator().next();
    SpatialPrimitiveDistanceFunction<V> distFunction = (SpatialPrimitiveDistanceFunction<V>) getDistanceFunction();
    // data pages
    List<E> ps_candidates = new ArrayList<>(index.getLeaves());
    // knn heaps
    List<List<KNNHeap>> heaps = new ArrayList<>(ps_candidates.size());
    // Initialize with the page self-pairing
    for (int i = 0; i < ps_candidates.size(); i++) {
        E pr_entry = ps_candidates.get(i);
        N pr = index.getNode(pr_entry);
        heaps.add(initHeaps(distFunction, pr));
    }
    // Build priority queue
    final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) >>> 1;
    ComparableMinHeap<Task> pq = new ComparableMinHeap<>(sqsize);
    if (LOG.isDebuggingFine()) {
        LOG.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
    }
    FiniteProgress mprogress = LOG.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, LOG) : null;
    for (int i = 0; i < ps_candidates.size(); i++) {
        E pr_entry = ps_candidates.get(i);
        N pr = index.getNode(pr_entry);
        List<KNNHeap> pr_heaps = heaps.get(i);
        double pr_knn_distance = computeStopDistance(pr_heaps);
        for (int j = i + 1; j < ps_candidates.size(); j++) {
            E ps_entry = ps_candidates.get(j);
            N ps = index.getNode(ps_entry);
            List<KNNHeap> ps_heaps = heaps.get(j);
            double ps_knn_distance = computeStopDistance(ps_heaps);
            double minDist = distFunction.minDist(pr_entry, ps_entry);
            // Resolve immediately:
            if (minDist <= 0.) {
                processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
            } else if (minDist <= pr_knn_distance || minDist <= ps_knn_distance) {
                pq.add(new Task(minDist, i, j));
            }
            LOG.incrementProcessed(mprogress);
        }
    }
    LOG.ensureCompleted(mprogress);
    // Process the queue
    FiniteProgress qprogress = LOG.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), LOG) : null;
    IndefiniteProgress fprogress = LOG.isVerbose() ? new IndefiniteProgress("Full comparisons", LOG) : null;
    while (!pq.isEmpty()) {
        Task task = pq.poll();
        List<KNNHeap> pr_heaps = heaps.get(task.i);
        List<KNNHeap> ps_heaps = heaps.get(task.j);
        double pr_knn_distance = computeStopDistance(pr_heaps);
        double ps_knn_distance = computeStopDistance(ps_heaps);
        boolean dor = task.mindist <= pr_knn_distance;
        boolean dos = task.mindist <= ps_knn_distance;
        if (dor || dos) {
            N pr = index.getNode(ps_candidates.get(task.i));
            N ps = index.getNode(ps_candidates.get(task.j));
            if (dor && dos) {
                processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
            } else {
                if (dor) {
                    processDataPages(distFunction, pr_heaps, null, pr, ps);
                } else /* dos */
                {
                    processDataPages(distFunction, ps_heaps, null, ps, pr);
                }
            }
            LOG.incrementProcessed(fprogress);
        }
        LOG.incrementProcessed(qprogress);
    }
    LOG.ensureCompleted(qprogress);
    LOG.setCompleted(fprogress);
    WritableDataStore<KNNList> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
    FiniteProgress pageprog = LOG.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), LOG) : null;
    for (int i = 0; i < ps_candidates.size(); i++) {
        N pr = index.getNode(ps_candidates.get(i));
        List<KNNHeap> pr_heaps = heaps.get(i);
        // Finalize lists
        for (int j = 0; j < pr.getNumEntries(); j++) {
            knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
        }
        // Forget heaps and pq
        heaps.set(i, null);
        LOG.incrementProcessed(pageprog);
    }
    LOG.ensureCompleted(pageprog);
    return knnLists;
}
Also used : ComparableMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap) ArrayList(java.util.ArrayList) SpatialIndexTree(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree) MissingPrerequisitesException(de.lmu.ifi.dbs.elki.utilities.exceptions.MissingPrerequisitesException) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) SpatialPrimitiveDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 3 with ComparableMinHeap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap in project elki by elki-project.

the class MkCoPTree method doReverseKNNQuery.

/**
 * Performs a reverse knn query.
 *
 * @param k the parameter k of the rknn query
 * @param q the id of the query object
 * @param result holds the true results (they need not to be refined)
 * @param candidates holds possible candidates for the result (they need a
 *        refinement)
 */
private void doReverseKNNQuery(int k, DBIDRef q, ModifiableDoubleDBIDList result, ModifiableDBIDs candidates) {
    final ComparableMinHeap<MTreeSearchCandidate> pq = new ComparableMinHeap<>();
    // push root
    pq.add(new MTreeSearchCandidate(0., getRootID(), null, Double.NaN));
    // search in tree
    while (!pq.isEmpty()) {
        MTreeSearchCandidate pqNode = pq.poll();
        // FIXME: cache the distance to the routing object in the queue node!
        MkCoPTreeNode<O> node = getNode(pqNode.nodeID);
        // directory node
        if (!node.isLeaf()) {
            for (int i = 0; i < node.getNumEntries(); i++) {
                MkCoPEntry entry = node.getEntry(i);
                double distance = distance(entry.getRoutingObjectID(), q);
                double minDist = entry.getCoveringRadius() > distance ? 0. : distance - entry.getCoveringRadius();
                double approximatedKnnDist_cons = entry.approximateConservativeKnnDistance(k);
                if (minDist <= approximatedKnnDist_cons) {
                    pq.add(new MTreeSearchCandidate(minDist, getPageID(entry), entry.getRoutingObjectID(), Double.NaN));
                }
            }
        } else // data node
        {
            for (int i = 0; i < node.getNumEntries(); i++) {
                MkCoPLeafEntry entry = (MkCoPLeafEntry) node.getEntry(i);
                double distance = distance(entry.getRoutingObjectID(), q);
                double approximatedKnnDist_prog = entry.approximateProgressiveKnnDistance(k);
                if (distance <= approximatedKnnDist_prog) {
                    result.add(distance, entry.getRoutingObjectID());
                } else {
                    double approximatedKnnDist_cons = entry.approximateConservativeKnnDistance(k);
                    double diff = distance - approximatedKnnDist_cons;
                    if (diff <= 1E-10) {
                        candidates.add(entry.getRoutingObjectID());
                    }
                }
            }
        }
    }
}
Also used : ComparableMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap) MTreeSearchCandidate(de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MTreeSearchCandidate)

Example 4 with ComparableMinHeap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap in project elki by elki-project.

the class CASH method doRun.

/**
 * Runs the CASH algorithm on the specified database, this method is
 * recursively called until only noise is left.
 *
 * @param relation the Relation to run the CASH algorithm on
 * @param progress the progress object for verbose messages
 * @return a mapping of subspace dimensionalities to clusters
 */
private Clustering<Model> doRun(Relation<ParameterizationFunction> relation, FiniteProgress progress) {
    Clustering<Model> res = new Clustering<>("CASH clustering", "cash-clustering");
    final int dim = dimensionality(relation);
    // init heap
    ObjectHeap<IntegerPriorityObject<CASHInterval>> heap = new ComparableMinHeap<>();
    ModifiableDBIDs noiseIDs = DBIDUtil.newHashSet(relation.getDBIDs());
    initHeap(heap, relation, dim, noiseIDs);
    if (LOG.isVerbose()) {
        LOG.verbose(new StringBuilder().append("dim ").append(dim).append(" database.size ").append(relation.size()).toString());
    }
    // get the ''best'' d-dimensional intervals at max level
    while (!heap.isEmpty()) {
        CASHInterval interval = determineNextIntervalAtMaxLevel(heap);
        if (LOG.isVerbose()) {
            LOG.verbose("next interval in dim " + dim + ": " + interval);
        }
        // only noise left
        if (interval == null) {
            break;
        }
        // do a dim-1 dimensional run
        ModifiableDBIDs clusterIDs = DBIDUtil.newHashSet();
        if (dim > minDim + 1) {
            ModifiableDBIDs ids;
            double[][] basis_dim_minus_1;
            if (adjust) {
                ids = DBIDUtil.newHashSet();
                basis_dim_minus_1 = runDerivator(relation, dim, interval, ids);
            } else {
                ids = interval.getIDs();
                basis_dim_minus_1 = determineBasis(SpatialUtil.centroid(interval));
            }
            if (ids.size() != 0) {
                MaterializedRelation<ParameterizationFunction> db = buildDB(dim, basis_dim_minus_1, ids, relation);
                // add result of dim-1 to this result
                Clustering<Model> res_dim_minus_1 = doRun(db, progress);
                for (Cluster<Model> cluster : res_dim_minus_1.getAllClusters()) {
                    res.addToplevelCluster(cluster);
                    noiseIDs.removeDBIDs(cluster.getIDs());
                    clusterIDs.addDBIDs(cluster.getIDs());
                    processedIDs.addDBIDs(cluster.getIDs());
                }
            }
        } else // dim == minDim
        {
            LinearEquationSystem les = runDerivator(relation, dim - 1, interval.getIDs());
            Cluster<Model> c = new Cluster<Model>(interval.getIDs(), new LinearEquationModel(les));
            res.addToplevelCluster(c);
            noiseIDs.removeDBIDs(interval.getIDs());
            clusterIDs.addDBIDs(interval.getIDs());
            processedIDs.addDBIDs(interval.getIDs());
        }
        // Rebuild heap
        ArrayList<IntegerPriorityObject<CASHInterval>> heapVector = new ArrayList<>(heap.size());
        for (ObjectHeap.UnsortedIter<IntegerPriorityObject<CASHInterval>> iter = heap.unsortedIter(); iter.valid(); iter.advance()) {
            heapVector.add(iter.get());
        }
        heap.clear();
        for (IntegerPriorityObject<CASHInterval> pair : heapVector) {
            CASHInterval currentInterval = pair.getObject();
            currentInterval.removeIDs(clusterIDs);
            if (currentInterval.getIDs().size() >= minPts) {
                heap.add(new IntegerPriorityObject<>(currentInterval.priority(), currentInterval));
            }
        }
        if (progress != null) {
            progress.setProcessed(processedIDs.size(), LOG);
        }
    }
    // put noise to clusters
    if (!noiseIDs.isEmpty()) {
        if (dim == noiseDim) {
            res.addToplevelCluster(new Cluster<Model>(noiseIDs, true, ClusterModel.CLUSTER));
            processedIDs.addDBIDs(noiseIDs);
        } else if (noiseIDs.size() >= minPts) {
            LinearEquationSystem les = runDerivator(fulldatabase, dim - 1, noiseIDs);
            res.addToplevelCluster(new Cluster<Model>(noiseIDs, true, new LinearEquationModel(les)));
            processedIDs.addDBIDs(noiseIDs);
        }
    }
    if (LOG.isDebugging()) {
        StringBuilder msg = new StringBuilder();
        msg.append("noise fuer dim ").append(dim).append(": ").append(noiseIDs.size());
        for (Cluster<Model> c : res.getAllClusters()) {
            if (c.getModel() instanceof LinearEquationModel) {
                msg.append("\n Cluster: Dim: ").append(((LinearEquationModel) c.getModel()).getLes().subspacedim());
            } else {
                msg.append("\n Cluster: ").append(c.getModel().getClass().getName());
            }
            msg.append(" size: ").append(c.size());
        }
        LOG.debugFine(msg.toString());
    }
    if (progress != null) {
        progress.setProcessed(processedIDs.size(), LOG);
    }
    return res;
}
Also used : CASHInterval(de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.CASHInterval) ComparableMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap) ArrayList(java.util.ArrayList) ObjectHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap) LinearEquationModel(de.lmu.ifi.dbs.elki.data.model.LinearEquationModel) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) IntegerPriorityObject(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.IntegerPriorityObject) ParameterizationFunction(de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.ParameterizationFunction) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) LinearEquationSystem(de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem) LinearEquationModel(de.lmu.ifi.dbs.elki.data.model.LinearEquationModel)

Aggregations

ComparableMinHeap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap)4 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)2 ArrayList (java.util.ArrayList)2 CASHInterval (de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.CASHInterval)1 ParameterizationFunction (de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.ParameterizationFunction)1 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)1 LinearEquationModel (de.lmu.ifi.dbs.elki.data.model.LinearEquationModel)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)1 SpatialPrimitiveDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction)1 DirectoryEntry (de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry)1 MTreeEntry (de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry)1 MTreeSearchCandidate (de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MTreeSearchCandidate)1 SpatialIndexTree (de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)1 LinearEquationSystem (de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem)1 IntegerPriorityObject (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.IntegerPriorityObject)1