Search in sources :

Example 1 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class MkCoPTree method reverseKNNQuery.

/**
 * Performs a reverse k-nearest neighbor query for the given object ID. The
 * query result is in ascending order to the distance to the query object.
 *
 * @param id the query object id
 * @param k the number of nearest neighbors to be returned
 * @return a List of the query results
 */
@Override
public DoubleDBIDList reverseKNNQuery(DBIDRef id, int k) {
    if (k > settings.kmax) {
        throw new IllegalArgumentException("Parameter k has to be less or equal than " + "parameter kmax of the MCop-Tree!");
    }
    ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
    ModifiableDBIDs candidates = DBIDUtil.newArray();
    doReverseKNNQuery(k, id, result, candidates);
    // refinement of candidates
    Map<DBID, KNNList> knnLists = batchNN(getRoot(), candidates, k);
    result.sort();
    for (DBIDIter iter = candidates.iter(); iter.valid(); iter.advance()) {
        DBID cid = DBIDUtil.deref(iter);
        KNNList cands = knnLists.get(cid);
        for (DoubleDBIDListIter iter2 = cands.iter(); iter2.valid(); iter2.advance()) {
            if (DBIDUtil.equal(id, iter2)) {
                result.add(iter2.doubleValue(), cid);
                break;
            }
        }
    }
    result.sort();
    // rkNNStatistics.addResults(result.size());
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 2 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class LBABOD method run.

/**
 * Run LB-ABOD on the data set.
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
@Override
public OutlierResult run(Database db, Relation<V> relation) {
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    DBIDArrayIter pB = ids.iter(), pC = ids.iter();
    SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
    KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
    // Output storage.
    WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmaxabod = new DoubleMinMax();
    double max = 0.;
    // Storage for squared distances (will be reused!)
    WritableDoubleDataStore sqDists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    // Nearest neighbor heap (will be reused!)
    KNNHeap nn = DBIDUtil.newHeap(k);
    // Priority queue for candidates
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(relation.size());
    // get Candidate Ranking
    for (DBIDIter pA = relation.iterDBIDs(); pA.valid(); pA.advance()) {
        // Compute nearest neighbors and distances.
        nn.clear();
        double simAA = kernelMatrix.getSimilarity(pA, pA);
        // Sum of 1./(|AB|) and 1./(|AB|^2); for computing R2.
        double sumid = 0., sumisqd = 0.;
        for (pB.seek(0); pB.valid(); pB.advance()) {
            if (DBIDUtil.equal(pB, pA)) {
                continue;
            }
            double simBB = kernelMatrix.getSimilarity(pB, pB);
            double simAB = kernelMatrix.getSimilarity(pA, pB);
            double sqdAB = simAA + simBB - simAB - simAB;
            sqDists.putDouble(pB, sqdAB);
            final double isqdAB = 1. / sqdAB;
            sumid += FastMath.sqrt(isqdAB);
            sumisqd += isqdAB;
            // Update heap
            nn.insert(sqdAB, pB);
        }
        // Compute FastABOD approximation, adjust for lower bound.
        // LB-ABOF is defined via a numerically unstable formula.
        // Variance as E(X^2)-E(X)^2 suffers from catastrophic cancellation!
        // TODO: ensure numerical precision!
        double nnsum = 0., nnsumsq = 0., nnsumisqd = 0.;
        KNNList nl = nn.toKNNList();
        DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
        for (; iB.valid(); iB.advance()) {
            double sqdAB = iB.doubleValue();
            double simAB = kernelMatrix.getSimilarity(pA, iB);
            if (!(sqdAB > 0.)) {
                continue;
            }
            for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
                double sqdAC = iC.doubleValue();
                double simAC = kernelMatrix.getSimilarity(pA, iC);
                if (!(sqdAC > 0.)) {
                    continue;
                }
                // Exploit bilinearity of scalar product:
                // <B-A, C-A> = <B, C-A> - <A,C-A>
                // = <B,C> - <B,A> - <A,C> + <A,A>
                double simBC = kernelMatrix.getSimilarity(iB, iC);
                double numerator = simBC - simAB - simAC + simAA;
                double sqweight = 1. / (sqdAB * sqdAC);
                double weight = FastMath.sqrt(sqweight);
                double val = numerator * sqweight;
                nnsum += val * weight;
                nnsumsq += val * val * weight;
                nnsumisqd += sqweight;
            }
        }
        // Remaining weight, term R2:
        double r2 = sumisqd * sumisqd - 2. * nnsumisqd;
        double tmp = (2. * nnsum + r2) / (sumid * sumid);
        double lbabof = 2. * nnsumsq / (sumid * sumid) - tmp * tmp;
        // Track maximum?
        if (lbabof > max) {
            max = lbabof;
        }
        abodvalues.putDouble(pA, lbabof);
        candidates.add(lbabof, pA);
    }
    // Put maximum from approximate values.
    minmaxabod.put(max);
    candidates.sort();
    // refine Candidates
    int refinements = 0;
    DoubleMinHeap topscores = new DoubleMinHeap(l);
    MeanVariance s = new MeanVariance();
    for (DoubleDBIDListIter pA = candidates.iter(); pA.valid(); pA.advance()) {
        // Stop refining
        if (topscores.size() >= k && pA.doubleValue() > topscores.peek()) {
            break;
        }
        final double abof = computeABOF(kernelMatrix, pA, pB, pC, s);
        // Store refined score:
        abodvalues.putDouble(pA, abof);
        minmaxabod.put(abof);
        // Update the heap tracking the top scores.
        if (topscores.size() < k) {
            topscores.add(abof);
        } else {
            if (topscores.peek() > abof) {
                topscores.replaceTopElement(abof);
            }
        }
        refinements += 1;
    }
    if (LOG.isStatistics()) {
        LoggingConfiguration.setVerbose(Level.VERYVERBOSE);
        LOG.statistics(new LongStatistic("lb-abod.refinements", refinements));
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-based Outlier Detection", "abod-outlier", abodvalues, ids);
    OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) DoubleMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KernelMatrix(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 3 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class RdKNNTree method postDelete.

/**
 * Performs necessary operations after deleting the specified object.
 */
@Override
protected void postDelete(RdKNNEntry entry) {
    // reverse knn of o
    ModifiableDoubleDBIDList rnns = DBIDUtil.newDistanceDBIDList();
    doReverseKNN(getRoot(), ((RdKNNLeafEntry) entry).getDBID(), rnns);
    // knn of rnn
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(rnns);
    ids.sort();
    List<? extends KNNList> knnLists = knnQuery.getKNNForBulkDBIDs(ids, settings.k_max);
    // adjust knn distances
    adjustKNNDistance(getRootEntry(), ids, knnLists);
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)

Example 4 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class RdKNNTree method reverseKNNQuery.

public DoubleDBIDList reverseKNNQuery(DBID oid, int k, SpatialPrimitiveDistanceFunction<? super O> distanceFunction, KNNQuery<O> knnQuery) {
    checkDistanceFunction(distanceFunction);
    if (k > settings.k_max) {
        throw new IllegalArgumentException("Parameter k is not supported, k > k_max: " + k + " > " + settings.k_max);
    }
    // get candidates
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
    doReverseKNN(getRoot(), oid, candidates);
    if (k == settings.k_max) {
        candidates.sort();
        return candidates;
    }
    // refinement of candidates, if k < k_max
    ArrayModifiableDBIDs candidateIDs = DBIDUtil.newArray(candidates);
    candidateIDs.sort();
    List<? extends KNNList> knnLists = knnQuery.getKNNForBulkDBIDs(candidateIDs, k);
    ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
    int i = 0;
    for (DBIDIter iter = candidateIDs.iter(); iter.valid(); iter.advance(), i++) {
        for (DoubleDBIDListIter qr = knnLists.get(i).iter(); qr.valid(); qr.advance()) {
            if (DBIDUtil.equal(oid, qr)) {
                result.add(qr.doubleValue(), iter);
                break;
            }
        }
    }
    result.sort();
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 5 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class AutotuningPCA method processIds.

@Override
public PCAResult processIds(DBIDs ids, Relation<? extends NumberVector> database) {
    // Assume Euclidean distance. In the context of PCA, the neighborhood should
    // be L2-spherical to be unbiased.
    Centroid center = Centroid.make(database, ids);
    ModifiableDoubleDBIDList dres = DBIDUtil.newDistanceDBIDList(ids.size());
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final double dist = EuclideanDistanceFunction.STATIC.distance(center, database.get(iter));
        dres.add(dist, iter);
    }
    dres.sort();
    return processQueryResult(dres, database);
}
Also used : Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)53 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)16 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)9 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)5 ArrayList (java.util.ArrayList)5 Test (org.junit.Test)5 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DoubleDBIDPair (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)3 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)3 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 DoubleDBIDListMIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListMIter)2