Search in sources :

Example 6 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class KNNJoin method computeStopDistance.

/**
 * Compute the maximum stop distance.
 *
 * @param heaps Heaps list
 * @return the k-nearest neighbor distance of pr in ps
 */
private double computeStopDistance(List<KNNHeap> heaps) {
    // Update pruning distance
    double pr_knn_distance = Double.NaN;
    for (KNNHeap knnList : heaps) {
        // set kNN distance of r
        double kdist = knnList.getKNNDistance();
        pr_knn_distance = (kdist < pr_knn_distance) ? pr_knn_distance : kdist;
    }
    if (pr_knn_distance != pr_knn_distance) {
        return Double.POSITIVE_INFINITY;
    }
    return pr_knn_distance;
}
Also used : KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap)

Example 7 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class KNNJoin method run.

/**
 * Inner run method. This returns a double store, and is used by
 * {@link de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNJoinMaterializeKNNPreprocessor}
 *
 * @param relation Data relation
 * @param ids Object IDs
 * @return Data store
 */
@SuppressWarnings("unchecked")
public WritableDataStore<KNNList> run(Relation<V> relation, DBIDs ids) {
    if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
        throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
    }
    Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(relation.getHierarchy(), relation, SpatialIndexTree.class);
    if (indexes.size() != 1) {
        throw new MissingPrerequisitesException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one.");
    }
    // FIXME: Ensure were looking at the right relation!
    SpatialIndexTree<N, E> index = indexes.iterator().next();
    SpatialPrimitiveDistanceFunction<V> distFunction = (SpatialPrimitiveDistanceFunction<V>) getDistanceFunction();
    // data pages
    List<E> ps_candidates = new ArrayList<>(index.getLeaves());
    // knn heaps
    List<List<KNNHeap>> heaps = new ArrayList<>(ps_candidates.size());
    // Initialize with the page self-pairing
    for (int i = 0; i < ps_candidates.size(); i++) {
        E pr_entry = ps_candidates.get(i);
        N pr = index.getNode(pr_entry);
        heaps.add(initHeaps(distFunction, pr));
    }
    // Build priority queue
    final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) >>> 1;
    ComparableMinHeap<Task> pq = new ComparableMinHeap<>(sqsize);
    if (LOG.isDebuggingFine()) {
        LOG.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
    }
    FiniteProgress mprogress = LOG.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, LOG) : null;
    for (int i = 0; i < ps_candidates.size(); i++) {
        E pr_entry = ps_candidates.get(i);
        N pr = index.getNode(pr_entry);
        List<KNNHeap> pr_heaps = heaps.get(i);
        double pr_knn_distance = computeStopDistance(pr_heaps);
        for (int j = i + 1; j < ps_candidates.size(); j++) {
            E ps_entry = ps_candidates.get(j);
            N ps = index.getNode(ps_entry);
            List<KNNHeap> ps_heaps = heaps.get(j);
            double ps_knn_distance = computeStopDistance(ps_heaps);
            double minDist = distFunction.minDist(pr_entry, ps_entry);
            // Resolve immediately:
            if (minDist <= 0.) {
                processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
            } else if (minDist <= pr_knn_distance || minDist <= ps_knn_distance) {
                pq.add(new Task(minDist, i, j));
            }
            LOG.incrementProcessed(mprogress);
        }
    }
    LOG.ensureCompleted(mprogress);
    // Process the queue
    FiniteProgress qprogress = LOG.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), LOG) : null;
    IndefiniteProgress fprogress = LOG.isVerbose() ? new IndefiniteProgress("Full comparisons", LOG) : null;
    while (!pq.isEmpty()) {
        Task task = pq.poll();
        List<KNNHeap> pr_heaps = heaps.get(task.i);
        List<KNNHeap> ps_heaps = heaps.get(task.j);
        double pr_knn_distance = computeStopDistance(pr_heaps);
        double ps_knn_distance = computeStopDistance(ps_heaps);
        boolean dor = task.mindist <= pr_knn_distance;
        boolean dos = task.mindist <= ps_knn_distance;
        if (dor || dos) {
            N pr = index.getNode(ps_candidates.get(task.i));
            N ps = index.getNode(ps_candidates.get(task.j));
            if (dor && dos) {
                processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
            } else {
                if (dor) {
                    processDataPages(distFunction, pr_heaps, null, pr, ps);
                } else /* dos */
                {
                    processDataPages(distFunction, ps_heaps, null, ps, pr);
                }
            }
            LOG.incrementProcessed(fprogress);
        }
        LOG.incrementProcessed(qprogress);
    }
    LOG.ensureCompleted(qprogress);
    LOG.setCompleted(fprogress);
    WritableDataStore<KNNList> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
    FiniteProgress pageprog = LOG.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), LOG) : null;
    for (int i = 0; i < ps_candidates.size(); i++) {
        N pr = index.getNode(ps_candidates.get(i));
        List<KNNHeap> pr_heaps = heaps.get(i);
        // Finalize lists
        for (int j = 0; j < pr.getNumEntries(); j++) {
            knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
        }
        // Forget heaps and pq
        heaps.set(i, null);
        LOG.incrementProcessed(pageprog);
    }
    LOG.ensureCompleted(pageprog);
    return knnLists;
}
Also used : ComparableMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap) ArrayList(java.util.ArrayList) SpatialIndexTree(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree) MissingPrerequisitesException(de.lmu.ifi.dbs.elki.utilities.exceptions.MissingPrerequisitesException) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) SpatialPrimitiveDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 8 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class KNNJoin method processDataPages.

/**
 * Processes the two data pages pr and ps and determines the k-nearest
 * neighbors of pr in ps.
 *
 * @param df the distance function to use
 * @param pr the first data page
 * @param ps the second data page
 * @param pr_heaps the knn lists for each data object
 * @param ps_heaps the knn lists for each data object in ps
 */
private void processDataPages(SpatialPrimitiveDistanceFunction<? super V> df, List<KNNHeap> pr_heaps, List<KNNHeap> ps_heaps, N pr, N ps) {
    // Compare pairwise
    for (int j = 0; j < ps.getNumEntries(); j++) {
        final SpatialPointLeafEntry s_e = (SpatialPointLeafEntry) ps.getEntry(j);
        final KNNHeap hj = ps_heaps != null ? ps_heaps.get(j) : null;
        final DBID s_id = s_e.getDBID();
        for (int i = 0; i < pr.getNumEntries(); i++) {
            final SpatialPointLeafEntry r_e = (SpatialPointLeafEntry) pr.getEntry(i);
            double distance = df.minDist(s_e, r_e);
            pr_heaps.get(i).insert(distance, s_id);
            if (hj != null) {
                hj.insert(distance, r_e.getDBID());
            }
        }
    }
}
Also used : SpatialPointLeafEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap)

Example 9 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class LinearScanPrimitiveDistanceKNNQuery method linearScanBatchKNN.

/**
 * Perform a linear scan batch kNN for primitive distance functions.
 *
 * @param objs Objects list
 * @param heaps Heaps array
 */
protected void linearScanBatchKNN(List<O> objs, List<KNNHeap> heaps) {
    final PrimitiveDistanceFunction<? super O> rawdist = this.rawdist;
    final Relation<? extends O> relation = getRelation();
    final int size = objs.size();
    // Linear scan style KNN.
    for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
        O candidate = relation.get(iter);
        for (int index = 0; index < size; index++) {
            final KNNHeap heap = heaps.get(index);
            final double dist = rawdist.distance(objs.get(index), candidate);
            if (dist <= heap.getKNNDistance()) {
                heap.insert(dist, iter);
            }
        }
    }
}
Also used : KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 10 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class LinearScanDistanceKNNQuery method getKNNForDBID.

@Override
public KNNList getKNNForDBID(DBIDRef id, int k) {
    final DistanceQuery<O> dq = distanceQuery;
    KNNHeap heap = DBIDUtil.newHeap(k);
    double max = Double.POSITIVE_INFINITY;
    for (DBIDIter iter = getRelation().getDBIDs().iter(); iter.valid(); iter.advance()) {
        final double dist = dq.distance(id, iter);
        if (dist <= max) {
            max = heap.insert(dist, iter);
        }
    }
    return heap.toKNNList();
}
Also used : KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)17 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)6 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)5 ArrayList (java.util.ArrayList)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)4 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)3 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)3 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)3 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)3 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)3 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)3 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)3 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)3 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)3 DBIDPair (de.lmu.ifi.dbs.elki.database.ids.DBIDPair)2 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)2 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)2