Search in sources :

Example 21 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class LoOP method computePDists.

/**
 * Compute the probabilistic distances used by LoOP.
 *
 * @param relation Data relation
 * @param knn kNN query
 * @param pdists Storage for distances
 */
protected void computePDists(Relation<O> relation, KNNQuery<O> knn, WritableDoubleDataStore pdists) {
    // computing PRDs
    FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // +
        final KNNList neighbors = knn.getKNNForDBID(iditer, kreach + 1);
        // query
        // point
        // use first kref neighbors as reference set
        int ks = 0;
        double ssum = 0.;
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid() && ks < kreach; neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, iditer)) {
                continue;
            }
            final double d = neighbor.doubleValue();
            ssum += d * d;
            ks++;
        }
        double pdist = ks > 0 ? FastMath.sqrt(ssum / ks) : 0.;
        pdists.putDouble(iditer, pdist);
        LOG.incrementProcessed(prdsProgress);
    }
    LOG.ensureCompleted(prdsProgress);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 22 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class HilOut method run.

public OutlierResult run(Database database, Relation<O> relation) {
    distq = database.getDistanceQuery(relation, getDistanceFunction());
    d = RelationUtil.dimensionality(relation);
    WritableDoubleDataStore hilout_weight = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    // Compute extend of dataset.
    double[] min;
    // Actually "length of edge"
    double diameter = 0;
    {
        double[][] hbbs = RelationUtil.computeMinMax(relation);
        min = hbbs[0];
        double[] max = hbbs[1];
        for (int i = 0; i < d; i++) {
            diameter = Math.max(diameter, max[i] - min[i]);
        }
        // Enlarge bounding box to have equal lengths.
        for (int i = 0; i < d; i++) {
            double diff = (diameter - (max[i] - min[i])) * .5;
            min[i] -= diff;
            max[i] += diff;
        }
        if (LOG.isVerbose()) {
            LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
        }
    }
    // Initialization part
    capital_n_star = capital_n = relation.size();
    HilbertFeatures h = new HilbertFeatures(relation, min, diameter);
    FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
    FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
    // Main part: 1. Phase max. d+1 loops
    for (int j = 0; j <= d && n_star < n; j++) {
        // initialize (clear) out and wlb - not 100% clear in the paper
        h.out.clear();
        h.wlb.clear();
        // Initialize Hilbert values in pf according to current shift
        h.initialize(.5 * j / (d + 1));
        // scan the Data according to the current shift; build out and wlb
        scan(h, (int) (k * capital_n / (double) capital_n_star));
        // determine the true outliers (n_star)
        trueOutliers(h);
        if (progressTrueOut != null) {
            progressTrueOut.setProcessed(n_star, LOG);
        }
        // Build the top Set as out + wlb
        h.top.clear();
        HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size());
        for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
            HilFeature entry = iter.get();
            top_keys.add(entry.id);
            h.top.add(entry);
        }
        for (ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
            HilFeature entry = iter.get();
            if (!top_keys.contains(entry.id)) {
                // No need to update top_keys - discarded
                h.top.add(entry);
            }
        }
        LOG.incrementProcessed(progressHilOut);
    }
    // 2. Phase: Additional Scan if less than n true outliers determined
    if (n_star < n) {
        h.out.clear();
        h.wlb.clear();
        // TODO: reinitialize shift to 0?
        scan(h, capital_n);
    }
    if (progressHilOut != null) {
        progressHilOut.setProcessed(d, LOG);
        progressHilOut.ensureCompleted(LOG);
    }
    if (progressTrueOut != null) {
        progressTrueOut.setProcessed(n, LOG);
        progressTrueOut.ensureCompleted(LOG);
    }
    DoubleMinMax minmax = new DoubleMinMax();
    // Return weights in out
    if (tn == ScoreType.TopN) {
        minmax.put(0.0);
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            hilout_weight.putDouble(iditer, 0.0);
        }
        for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
            HilFeature ent = iter.get();
            minmax.put(ent.ubound);
            hilout_weight.putDouble(ent.id, ent.ubound);
        }
    } else // Return all weights in pf
    {
        for (HilFeature ent : h.pf) {
            minmax.put(ent.ubound);
            hilout_weight.putDouble(ent.id, ent.ubound);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("HilOut weight", "hilout-weight", hilout_weight, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ObjectHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 23 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class KNNWeightOutlier method run.

/**
 * Runs the algorithm in the timed evaluation part.
 *
 * @param database Database context
 * @param relation Data relation
 */
public OutlierResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    // + query point
    KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Compute kNN weights", relation.size(), LOG) : null;
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // + query point
        final KNNList knn = knnQuery.getKNNForDBID(iditer, k + 1);
        // sum of the distances to the k nearest neighbors
        double skn = 0;
        // number of neighbors so far
        int i = 0;
        for (DoubleDBIDListIter neighbor = knn.iter(); i < k && neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(iditer, neighbor)) {
                continue;
            }
            skn += neighbor.doubleValue();
            ++i;
        }
        if (i < k) {
            // Less than k neighbors found
            // Approximative index, or k > data set size!
            skn = Double.POSITIVE_INFINITY;
        }
        knnw_score.putDouble(iditer, skn);
        minmax.put(skn);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    DoubleRelation res = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", knnw_score, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(meta, res);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 24 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class IDOS method computeIDOS.

/**
 * Computes all IDOS scores.
 *
 * @param ids the DBIDs to process
 * @param knnQ the KNN query
 * @param intDims Precomputed intrinsic dimensionalities
 * @param idosminmax Output of minimum and maximum, for metadata
 * @return ID scores
 */
protected DoubleDataStore computeIDOS(DBIDs ids, KNNQuery<O> knnQ, DoubleDataStore intDims, DoubleMinMax idosminmax) {
    WritableDoubleDataStore ldms = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("ID Outlier Scores for objects", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final KNNList neighbors = knnQ.getKNNForDBID(iter, k_r);
        double sum = 0.;
        int cnt = 0;
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(iter, neighbor)) {
                continue;
            }
            final double id = intDims.doubleValue(neighbor);
            sum += id > 0 ? 1.0 / id : 0.;
            if (++cnt == k_r) {
                // Always stop after at most k_r elements.
                break;
            }
        }
        final double id_q = intDims.doubleValue(iter);
        final double idos = id_q > 0 ? id_q * sum / cnt : 0.;
        ldms.putDouble(iter, idos);
        idosminmax.put(idos);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return ldms;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) GreaterEqualConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 25 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class IntrinsicDimensionalityOutlier method run.

/**
 * Run the algorithm
 *
 * @param database Database
 * @param relation Data relation
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore id_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double id = 0.;
        try {
            id = estimator.estimate(knnQuery, iditer, k + 1);
        } catch (ArithmeticException e) {
            id = 0.;
        }
        id_score.putDouble(iditer, id);
        minmax.put(id);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    DoubleRelation scoreres = new MaterializedDoubleRelation("Intrinsic dimensionality", "id-score", id_score, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12