Search in sources :

Example 66 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class LBABOD method run.

/**
 * Run LB-ABOD on the data set.
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
@Override
public OutlierResult run(Database db, Relation<V> relation) {
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    DBIDArrayIter pB = ids.iter(), pC = ids.iter();
    SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
    KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
    // Output storage.
    WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmaxabod = new DoubleMinMax();
    double max = 0.;
    // Storage for squared distances (will be reused!)
    WritableDoubleDataStore sqDists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    // Nearest neighbor heap (will be reused!)
    KNNHeap nn = DBIDUtil.newHeap(k);
    // Priority queue for candidates
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(relation.size());
    // get Candidate Ranking
    for (DBIDIter pA = relation.iterDBIDs(); pA.valid(); pA.advance()) {
        // Compute nearest neighbors and distances.
        nn.clear();
        double simAA = kernelMatrix.getSimilarity(pA, pA);
        // Sum of 1./(|AB|) and 1./(|AB|^2); for computing R2.
        double sumid = 0., sumisqd = 0.;
        for (pB.seek(0); pB.valid(); pB.advance()) {
            if (DBIDUtil.equal(pB, pA)) {
                continue;
            }
            double simBB = kernelMatrix.getSimilarity(pB, pB);
            double simAB = kernelMatrix.getSimilarity(pA, pB);
            double sqdAB = simAA + simBB - simAB - simAB;
            sqDists.putDouble(pB, sqdAB);
            final double isqdAB = 1. / sqdAB;
            sumid += FastMath.sqrt(isqdAB);
            sumisqd += isqdAB;
            // Update heap
            nn.insert(sqdAB, pB);
        }
        // Compute FastABOD approximation, adjust for lower bound.
        // LB-ABOF is defined via a numerically unstable formula.
        // Variance as E(X^2)-E(X)^2 suffers from catastrophic cancellation!
        // TODO: ensure numerical precision!
        double nnsum = 0., nnsumsq = 0., nnsumisqd = 0.;
        KNNList nl = nn.toKNNList();
        DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
        for (; iB.valid(); iB.advance()) {
            double sqdAB = iB.doubleValue();
            double simAB = kernelMatrix.getSimilarity(pA, iB);
            if (!(sqdAB > 0.)) {
                continue;
            }
            for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
                double sqdAC = iC.doubleValue();
                double simAC = kernelMatrix.getSimilarity(pA, iC);
                if (!(sqdAC > 0.)) {
                    continue;
                }
                // Exploit bilinearity of scalar product:
                // <B-A, C-A> = <B, C-A> - <A,C-A>
                // = <B,C> - <B,A> - <A,C> + <A,A>
                double simBC = kernelMatrix.getSimilarity(iB, iC);
                double numerator = simBC - simAB - simAC + simAA;
                double sqweight = 1. / (sqdAB * sqdAC);
                double weight = FastMath.sqrt(sqweight);
                double val = numerator * sqweight;
                nnsum += val * weight;
                nnsumsq += val * val * weight;
                nnsumisqd += sqweight;
            }
        }
        // Remaining weight, term R2:
        double r2 = sumisqd * sumisqd - 2. * nnsumisqd;
        double tmp = (2. * nnsum + r2) / (sumid * sumid);
        double lbabof = 2. * nnsumsq / (sumid * sumid) - tmp * tmp;
        // Track maximum?
        if (lbabof > max) {
            max = lbabof;
        }
        abodvalues.putDouble(pA, lbabof);
        candidates.add(lbabof, pA);
    }
    // Put maximum from approximate values.
    minmaxabod.put(max);
    candidates.sort();
    // refine Candidates
    int refinements = 0;
    DoubleMinHeap topscores = new DoubleMinHeap(l);
    MeanVariance s = new MeanVariance();
    for (DoubleDBIDListIter pA = candidates.iter(); pA.valid(); pA.advance()) {
        // Stop refining
        if (topscores.size() >= k && pA.doubleValue() > topscores.peek()) {
            break;
        }
        final double abof = computeABOF(kernelMatrix, pA, pB, pC, s);
        // Store refined score:
        abodvalues.putDouble(pA, abof);
        minmaxabod.put(abof);
        // Update the heap tracking the top scores.
        if (topscores.size() < k) {
            topscores.add(abof);
        } else {
            if (topscores.peek() > abof) {
                topscores.replaceTopElement(abof);
            }
        }
        refinements += 1;
    }
    if (LOG.isStatistics()) {
        LoggingConfiguration.setVerbose(Level.VERYVERBOSE);
        LOG.statistics(new LongStatistic("lb-abod.refinements", refinements));
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-based Outlier Detection", "abod-outlier", abodvalues, ids);
    OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) DoubleMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KernelMatrix(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 67 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class DBOutlierDetection method computeOutlierScores.

@Override
protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    // Prefer kNN query if available, as this will usually stop earlier.
    KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
    RangeQuery<O> rangeQuery = knnQuery == null ? database.getRangeQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY, d) : null;
    // maximum number of objects in the D-neighborhood of an outlier
    int m = (int) Math.floor((distFunc.getRelation().size()) * (1 - p));
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("DBOutlier detection", distFunc.getRelation().size(), LOG) : null;
    // is more than d -> object is outlier
    if (knnQuery != null) {
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            KNNList knns = knnQuery.getKNNForDBID(iditer, m);
            scores.putDouble(iditer, (knns.getKNNDistance() > d) ? 1. : 0.);
            LOG.incrementProcessed(prog);
        }
    } else if (rangeQuery != null) {
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, d);
            scores.putDouble(iditer, (neighbors.size() < m) ? 1. : 0.);
            LOG.incrementProcessed(prog);
        }
    } else {
        // Linear scan neighbors for each object, but stop early.
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            int count = 0;
            for (DBIDIter iterator = relation.iterDBIDs(); iterator.valid(); iterator.advance()) {
                double currentDistance = distFunc.distance(iditer, iterator);
                if (currentDistance <= d) {
                    if (++count >= m) {
                        break;
                    }
                }
            }
            scores.putDouble(iditer, (count < m) ? 1.0 : 0);
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    return scores;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 68 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class LocalIsolationCoefficient method run.

/**
 * Runs the algorithm in the timed evaluation part.
 *
 * @param database Database context
 * @param relation Data relation
 */
public OutlierResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    // + query point
    KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Compute Local Isolation Coefficients", relation.size(), LOG) : null;
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore lic_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // + query point
        final KNNList knn = knnQuery.getKNNForDBID(iditer, k + 1);
        // sum of the distances to the k nearest neighbors
        double skn = 0;
        // number of neighbors so far
        int i = 0;
        for (DoubleDBIDListIter neighbor = knn.iter(); i < k && neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(iditer, neighbor)) {
                continue;
            }
            skn += neighbor.doubleValue();
            ++i;
        }
        double lic = knn.getKNNDistance() + (i > 0 ? skn / i : 0);
        lic_score.putDouble(iditer, lic);
        minmax.put(skn);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    DoubleRelation res = new MaterializedDoubleRelation("Local Isolation Coefficient", "lic-outlier", lic_score, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(meta, res);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 69 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class ReferenceBasedOutlierDetection method run.

/**
 * Run the algorithm on the given relation.
 *
 * @param database Database
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
    @SuppressWarnings("unchecked") PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction);
    Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
    if (refPoints.isEmpty()) {
        throw new AbortException("Cannot compute ROS without reference points!");
    }
    DBIDs ids = relation.getDBIDs();
    if (k >= ids.size()) {
        throw new AbortException("k must not be chosen larger than the database size!");
    }
    // storage of distance/score values.
    WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);
    // Compute density estimation:
    for (NumberVector refPoint : refPoints) {
        DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
        updateDensities(rbod_score, referenceDists);
    }
    // compute maximum density
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        mm.put(rbod_score.doubleValue(iditer));
    }
    // compute ROS
    double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
    // Reuse
    mm.reset();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = 1 - (rbod_score.doubleValue(iditer) * scale);
        mm.put(score);
        rbod_score.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // adds reference points to the result. header information for the
    // visualizer to find the reference points in the result
    result.addChildResult(new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) PrimitiveDistanceQuery(de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 70 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class SimplifiedLOF method computeSimplifiedLRDs.

/**
 * Compute the simplified reachability densities.
 *
 * @param ids IDs to process
 * @param knnq kNN query class
 * @param lrds Density output
 */
private void computeSimplifiedLRDs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore lrds) {
    FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(iter, k);
        double sum = 0.0;
        int count = 0;
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, iter)) {
                continue;
            }
            sum += neighbor.doubleValue();
            count++;
        }
        // Avoid division by 0
        final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
        lrds.putDouble(iter, lrd);
        LOG.incrementProcessed(lrdsProgress);
    }
    LOG.ensureCompleted(lrdsProgress);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21