Search in sources :

Example 1 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class PartitionApproximationMaterializeKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    MeanVariance ksize = new MeanVariance();
    if (LOG.isVerbose()) {
        LOG.verbose("Approximating nearest neighbor lists to database objects");
    }
    // Produce a random shuffling of the IDs:
    ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), partitions, rnd);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Processing partitions", partitions, LOG) : null;
    for (int part = 0; part < partitions; part++) {
        final ArrayDBIDs ids = parts[part];
        final int size = ids.size();
        Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 3);
        cache.defaultReturnValue(Double.NaN);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            KNNHeap kNN = DBIDUtil.newHeap(k);
            for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
                DBIDPair key = DBIDUtil.newPair(iter, iter2);
                double d = cache.removeDouble(key);
                if (d == d) {
                    // Not NaN
                    // consume the previous result.
                    kNN.insert(d, iter2);
                } else {
                    // compute new and store the previous result.
                    d = distanceQuery.distance(iter, iter2);
                    kNN.insert(d, iter2);
                    // put it into the cache, but with the keys reversed
                    key = DBIDUtil.newPair(iter2, iter);
                    cache.put(key, d);
                }
            }
            ksize.put(kNN.size());
            storage.put(iter, kNN.toKNNList());
        }
        if (LOG.isDebugging() && cache.size() > 0) {
            LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
        }
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isVerbose()) {
        LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
    }
}
Also used : Object2DoubleOpenHashMap(it.unimi.dsi.fastutil.objects.Object2DoubleOpenHashMap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DBIDPair(de.lmu.ifi.dbs.elki.database.ids.DBIDPair) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)

Example 2 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class RandomSampleKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing random-sample k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
    final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int samplesize = (int) (ids.size() * share);
    Random random = rnd.getSingleThreadedRandom();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNHeap kNN = DBIDUtil.newHeap(k);
        DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, random);
        for (DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) {
            double dist = distanceQuery.distance(iter, iter2);
            kNN.insert(dist, iter2);
        }
        storage.put(iter, kNN.toKNNList());
        getLogger().incrementProcessed(progress);
    }
    getLogger().ensureCompleted(progress);
}
Also used : Random(java.util.Random) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class MetricalIndexKNNQuery method getKNNForObject.

@Override
public KNNList getKNNForObject(O q, int k) {
    if (k < 1) {
        throw new IllegalArgumentException("At least one object has to be requested!");
    }
    index.statistics.countKNNQuery();
    KNNHeap knnList = DBIDUtil.newHeap(k);
    double d_k = Double.POSITIVE_INFINITY;
    final ComparableMinHeap<MTreeSearchCandidate> pq = new ComparableMinHeap<>();
    // Push the root node
    pq.add(new MTreeSearchCandidate(0., index.getRootID(), null, 0.));
    // search in tree
    while (!pq.isEmpty()) {
        MTreeSearchCandidate pqNode = pq.poll();
        if (knnList.size() >= k && pqNode.mindist > d_k) {
            break;
        }
        AbstractMTreeNode<?, ?, ?> node = index.getNode(pqNode.nodeID);
        DBID id_p = pqNode.routingObjectID;
        double d1 = pqNode.routingDistance;
        // directory node
        if (!node.isLeaf()) {
            for (int i = 0; i < node.getNumEntries(); i++) {
                MTreeEntry entry = node.getEntry(i);
                DBID o_r = entry.getRoutingObjectID();
                double r_or = entry.getCoveringRadius();
                double d2 = id_p != null ? entry.getParentDistance() : 0.;
                double diff = Math.abs(d1 - d2);
                double sum = d_k + r_or;
                if (diff <= sum) {
                    double d3 = distanceQuery.distance(o_r, q);
                    index.statistics.countDistanceCalculation();
                    double d_min = Math.max(d3 - r_or, 0.);
                    if (d_min <= d_k) {
                        pq.add(new MTreeSearchCandidate(d_min, ((DirectoryEntry) entry).getPageID(), o_r, d3));
                    }
                }
            }
        } else // data node
        {
            for (int i = 0; i < node.getNumEntries(); i++) {
                MTreeEntry entry = node.getEntry(i);
                DBID o_j = entry.getRoutingObjectID();
                double d2 = id_p != null ? entry.getParentDistance() : 0.;
                double diff = Math.abs(d1 - d2);
                if (diff <= d_k) {
                    double d3 = distanceQuery.distance(o_j, q);
                    index.statistics.countDistanceCalculation();
                    if (d3 <= d_k) {
                        knnList.insert(d3, o_j);
                        d_k = knnList.getKNNDistance();
                    }
                }
            }
        }
    }
    return knnList.toKNNList();
}
Also used : ComparableMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) MTreeEntry(de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry) DirectoryEntry(de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap)

Example 4 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class LBABOD method run.

/**
 * Run LB-ABOD on the data set.
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
@Override
public OutlierResult run(Database db, Relation<V> relation) {
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    DBIDArrayIter pB = ids.iter(), pC = ids.iter();
    SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
    KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
    // Output storage.
    WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmaxabod = new DoubleMinMax();
    double max = 0.;
    // Storage for squared distances (will be reused!)
    WritableDoubleDataStore sqDists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    // Nearest neighbor heap (will be reused!)
    KNNHeap nn = DBIDUtil.newHeap(k);
    // Priority queue for candidates
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(relation.size());
    // get Candidate Ranking
    for (DBIDIter pA = relation.iterDBIDs(); pA.valid(); pA.advance()) {
        // Compute nearest neighbors and distances.
        nn.clear();
        double simAA = kernelMatrix.getSimilarity(pA, pA);
        // Sum of 1./(|AB|) and 1./(|AB|^2); for computing R2.
        double sumid = 0., sumisqd = 0.;
        for (pB.seek(0); pB.valid(); pB.advance()) {
            if (DBIDUtil.equal(pB, pA)) {
                continue;
            }
            double simBB = kernelMatrix.getSimilarity(pB, pB);
            double simAB = kernelMatrix.getSimilarity(pA, pB);
            double sqdAB = simAA + simBB - simAB - simAB;
            sqDists.putDouble(pB, sqdAB);
            final double isqdAB = 1. / sqdAB;
            sumid += FastMath.sqrt(isqdAB);
            sumisqd += isqdAB;
            // Update heap
            nn.insert(sqdAB, pB);
        }
        // Compute FastABOD approximation, adjust for lower bound.
        // LB-ABOF is defined via a numerically unstable formula.
        // Variance as E(X^2)-E(X)^2 suffers from catastrophic cancellation!
        // TODO: ensure numerical precision!
        double nnsum = 0., nnsumsq = 0., nnsumisqd = 0.;
        KNNList nl = nn.toKNNList();
        DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
        for (; iB.valid(); iB.advance()) {
            double sqdAB = iB.doubleValue();
            double simAB = kernelMatrix.getSimilarity(pA, iB);
            if (!(sqdAB > 0.)) {
                continue;
            }
            for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
                double sqdAC = iC.doubleValue();
                double simAC = kernelMatrix.getSimilarity(pA, iC);
                if (!(sqdAC > 0.)) {
                    continue;
                }
                // Exploit bilinearity of scalar product:
                // <B-A, C-A> = <B, C-A> - <A,C-A>
                // = <B,C> - <B,A> - <A,C> + <A,A>
                double simBC = kernelMatrix.getSimilarity(iB, iC);
                double numerator = simBC - simAB - simAC + simAA;
                double sqweight = 1. / (sqdAB * sqdAC);
                double weight = FastMath.sqrt(sqweight);
                double val = numerator * sqweight;
                nnsum += val * weight;
                nnsumsq += val * val * weight;
                nnsumisqd += sqweight;
            }
        }
        // Remaining weight, term R2:
        double r2 = sumisqd * sumisqd - 2. * nnsumisqd;
        double tmp = (2. * nnsum + r2) / (sumid * sumid);
        double lbabof = 2. * nnsumsq / (sumid * sumid) - tmp * tmp;
        // Track maximum?
        if (lbabof > max) {
            max = lbabof;
        }
        abodvalues.putDouble(pA, lbabof);
        candidates.add(lbabof, pA);
    }
    // Put maximum from approximate values.
    minmaxabod.put(max);
    candidates.sort();
    // refine Candidates
    int refinements = 0;
    DoubleMinHeap topscores = new DoubleMinHeap(l);
    MeanVariance s = new MeanVariance();
    for (DoubleDBIDListIter pA = candidates.iter(); pA.valid(); pA.advance()) {
        // Stop refining
        if (topscores.size() >= k && pA.doubleValue() > topscores.peek()) {
            break;
        }
        final double abof = computeABOF(kernelMatrix, pA, pB, pC, s);
        // Store refined score:
        abodvalues.putDouble(pA, abof);
        minmaxabod.put(abof);
        // Update the heap tracking the top scores.
        if (topscores.size() < k) {
            topscores.add(abof);
        } else {
            if (topscores.peek() > abof) {
                topscores.replaceTopElement(abof);
            }
        }
        refinements += 1;
    }
    if (LOG.isStatistics()) {
        LoggingConfiguration.setVerbose(Level.VERYVERBOSE);
        LOG.statistics(new LongStatistic("lb-abod.refinements", refinements));
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-based Outlier Detection", "abod-outlier", abodvalues, ids);
    OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) DoubleMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KernelMatrix(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 5 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class RdKNNTree method preInsert.

/**
 * Performs necessary operations before inserting the specified entry.
 *
 * @param entry the entry to be inserted
 */
@Override
protected void preInsert(RdKNNEntry entry) {
    KNNHeap knns_o = DBIDUtil.newHeap(settings.k_max);
    preInsert(entry, getRootEntry(), knns_o);
}
Also used : KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap)

Aggregations

KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)17 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)6 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)5 ArrayList (java.util.ArrayList)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)4 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)3 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)3 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)3 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)3 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)3 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)3 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)3 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)3 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)3 DBIDPair (de.lmu.ifi.dbs.elki.database.ids.DBIDPair)2 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)2 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)2