Search in sources :

Example 86 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class LOF method computeLOFScores.

/**
 * Compute local outlier factors.
 *
 * @param knnq KNN query
 * @param ids IDs to process
 * @param lrds Local reachability distances
 * @param lofs Local outlier factor storage
 * @param lofminmax Score minimum/maximum tracker
 */
private void computeLOFScores(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
    FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Outlier Factor (LOF) scores", ids.size(), LOG) : null;
    double lof;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        lof = computeLOFScore(knnq, iter, lrds);
        lofs.putDouble(iter, lof);
        // update minimum and maximum
        lofminmax.put(lof);
        LOG.incrementProcessed(progressLOFs);
    }
    LOG.ensureCompleted(progressLOFs);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 87 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class COF method computeAverageChainingDistances.

/**
 * Computes the average chaining distance, the average length of a path
 * through the given set of points to each target. The authors of COF decided
 * to approximate this value using a weighted mean that assumes every object
 * is reached from the previous point (but actually every point could be best
 * reachable from the first, in which case this does not make much sense.)
 *
 * TODO: can we accelerate this by using the kNN of the neighbors?
 *
 * @param knnq KNN query
 * @param dq Distance query
 * @param ids IDs to process
 * @param acds Storage for average chaining distances
 */
protected void computeAverageChainingDistances(KNNQuery<O> knnq, DistanceQuery<O> dq, DBIDs ids, WritableDoubleDataStore acds) {
    FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Computing average chaining distances", ids.size(), LOG) : null;
    // We do <i>not</i> bother to materialize the chaining order.
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(iter, k);
        final int r = neighbors.size();
        DoubleDBIDListIter it1 = neighbors.iter(), it2 = neighbors.iter();
        // Store the current lowest reachability.
        final double[] mindists = new double[r];
        for (int i = 0; it1.valid(); it1.advance(), ++i) {
            mindists[i] = DBIDUtil.equal(it1, iter) ? Double.NaN : it1.doubleValue();
        }
        double acsum = 0.;
        for (int j = ((r < k) ? r : k) - 1; j > 0; --j) {
            // Find the minimum:
            int minpos = -1;
            double mindist = Double.NaN;
            for (int i = 0; i < mindists.length; ++i) {
                double curdist = mindists[i];
                // Both values could be NaN, deliberately.
                if (curdist == curdist && !(curdist > mindist)) {
                    minpos = i;
                    mindist = curdist;
                }
            }
            // Weighted sum, decreasing weights
            acsum += mindist * j;
            mindists[minpos] = Double.NaN;
            it1.seek(minpos);
            // Update distances
            it2.seek(0);
            for (int i = 0; it2.valid(); it2.advance(), ++i) {
                final double curdist = mindists[i];
                if (curdist != curdist) {
                    // NaN = processed!
                    continue;
                }
                double newdist = dq.distance(it1, it2);
                if (newdist < curdist) {
                    mindists[i] = newdist;
                }
            }
        }
        acds.putDouble(iter, acsum / (r * 0.5 * (r - 1.)));
        LOG.incrementProcessed(lrdsProgress);
    }
    LOG.ensureCompleted(lrdsProgress);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 88 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class INFLO method computeINFLO.

/**
 * Compute the final INFLO scores.
 *
 * @param relation Data relation
 * @param pruned Pruned objects
 * @param knn kNN query
 * @param rNNminuskNNs reverse kNN storage
 * @param inflos INFLO score storage
 * @param inflominmax Output of minimum and maximum
 */
protected void computeINFLO(Relation<O> relation, ModifiableDBIDs pruned, KNNQuery<O> knnq, WritableDataStore<ModifiableDBIDs> rNNminuskNNs, WritableDoubleDataStore inflos, DoubleMinMax inflominmax) {
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing INFLOs", relation.size(), LOG) : null;
    HashSetModifiableDBIDs set = DBIDUtil.newHashSet();
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        if (pruned.contains(iter)) {
            inflos.putDouble(iter, 1.);
            inflominmax.put(1.);
            LOG.incrementProcessed(prog);
            continue;
        }
        final KNNList knn = knnq.getKNNForDBID(iter, kplus1);
        if (knn.getKNNDistance() == 0.) {
            inflos.putDouble(iter, 1.);
            inflominmax.put(1.);
            LOG.incrementProcessed(prog);
            continue;
        }
        set.clear();
        set.addDBIDs(knn);
        set.addDBIDs(rNNminuskNNs.get(iter));
        // Compute mean density of NN \cup RNN
        double sum = 0.;
        int c = 0;
        for (DBIDIter niter = set.iter(); niter.valid(); niter.advance()) {
            if (DBIDUtil.equal(iter, niter)) {
                continue;
            }
            final double kdist = knnq.getKNNForDBID(niter, kplus1).getKNNDistance();
            if (kdist <= 0) {
                sum = Double.POSITIVE_INFINITY;
                c++;
                break;
            }
            sum += 1. / kdist;
            c++;
        }
        sum *= knn.getKNNDistance();
        final double inflo = sum == 0 ? 1. : sum / c;
        inflos.putDouble(iter, inflo);
        inflominmax.put(inflo);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)

Example 89 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class LOCI method precomputeInterestingRadii.

/**
 * Preprocessing step: determine the radii of interest for each point.
 *
 * @param ids IDs to process
 * @param rangeQuery Range query
 * @param interestingDistances Distances of interest
 */
protected void precomputeInterestingRadii(DBIDs ids, RangeQuery<O> rangeQuery, WritableDataStore<DoubleIntArrayList> interestingDistances) {
    FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null;
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
        // build list of critical distances
        DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1);
        {
            int i = 0;
            DoubleDBIDListIter ni = neighbors.iter();
            while (ni.valid()) {
                final double curdist = ni.doubleValue();
                ++i;
                ni.advance();
                // Skip, if tied to the next object:
                if (ni.valid() && curdist == ni.doubleValue()) {
                    continue;
                }
                cdist.append(curdist, i);
                // Scale radius, and reinsert
                if (alpha != 1.) {
                    final double ri = curdist / alpha;
                    if (ri <= rmax) {
                        cdist.append(ri, Integer.MIN_VALUE);
                    }
                }
            }
        }
        cdist.sort();
        // fill the gaps to have fast lookups of number of neighbors at a given
        // distance.
        int lastk = 0;
        for (int i = 0, size = cdist.size(); i < size; i++) {
            final int k = cdist.getInt(i);
            if (k == Integer.MIN_VALUE) {
                cdist.setValue(i, lastk);
            } else {
                lastk = k;
            }
        }
        // TODO: shrink the list, removing duplicate radii?
        interestingDistances.put(iditer, cdist);
        LOG.incrementProcessed(progressPreproc);
    }
    LOG.ensureCompleted(progressPreproc);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 90 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class LoOP method computePLOFs.

/**
 * Compute the LOF values, using the pdist distances.
 *
 * @param relation Data relation
 * @param knn kNN query
 * @param pdists Precomputed distances
 * @param plofs Storage for PLOFs.
 * @return Normalization factor.
 */
protected double computePLOFs(Relation<O> relation, KNNQuery<O> knn, WritableDoubleDataStore pdists, WritableDoubleDataStore plofs) {
    FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null;
    double nplof = 0.;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // + query
        final KNNList neighbors = knn.getKNNForDBID(iditer, kcomp + 1);
        // point
        // use first kref neighbors as comparison set.
        int ks = 0;
        double sum = 0.;
        for (DBIDIter neighbor = neighbors.iter(); neighbor.valid() && ks < kcomp; neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, iditer)) {
                continue;
            }
            sum += pdists.doubleValue(neighbor);
            ks++;
        }
        double plof = MathUtil.max(pdists.doubleValue(iditer) * ks / sum, 1.0);
        if (Double.isNaN(plof) || Double.isInfinite(plof)) {
            plof = 1.0;
        }
        plofs.putDouble(iditer, plof);
        nplof += (plof - 1.0) * (plof - 1.0);
        LOG.incrementProcessed(progressPLOFs);
    }
    LOG.ensureCompleted(progressPLOFs);
    nplof = lambda * FastMath.sqrt(nplof / relation.size());
    if (LOG.isDebuggingFine()) {
        LOG.debugFine("nplof normalization factor is " + nplof);
    }
    return nplof > 0. ? nplof : 1.;
}
Also used : KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12