Search in sources :

Example 1 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class BubbleVisualization method makeVisualization.

@Override
public Visualization makeVisualization(VisualizerContext context, VisualizationTask task, VisualizationPlot plot, double width, double height, Projection proj) {
    if (settings.scaling != null && settings.scaling instanceof OutlierScalingFunction) {
        final OutlierResult outlierResult = task.getResult();
        ((OutlierScalingFunction) settings.scaling).prepare(outlierResult);
    }
    return new Instance(context, task, plot, width, height, proj);
}
Also used : OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)

Example 2 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class ODIN method run.

/**
 * Run the ODIN algorithm
 *
 * Tutorial note: the <em>signature</em> of this method depends on the types
 * that we requested in the {@link #getInputTypeRestriction} method. Here we
 * requested a single relation of type {@code O} , the data type of our
 * distance function.
 *
 * @param database Database to run on.
 * @param relation Relation to process.
 * @return ODIN outlier result.
 */
public OutlierResult run(Database database, Relation<O> relation) {
    // Get the query functions:
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(dq, k);
    // Get the objects to process, and a data storage for counting and output:
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.);
    // Process all objects
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        // Find the nearest neighbors (using an index, if available!)
        KNNList neighbors = knnq.getKNNForDBID(iter, k);
        // For each neighbor, except ourselves, increase the in-degree:
        for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
            if (DBIDUtil.equal(iter, nei)) {
                continue;
            }
            scores.put(nei, scores.doubleValue(nei) + 1);
        }
    }
    // Compute maximum
    double min = Double.POSITIVE_INFINITY, max = 0.0;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        min = Math.min(min, scores.doubleValue(iter));
        max = Math.max(max, scores.doubleValue(iter));
    }
    // Wrap the result and add metadata.
    // By actually specifying theoretical min, max and baseline, we get a better
    // visualization (try it out - or see the screenshots in the tutorial)!
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., ids.size() - 1, k);
    DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids);
    return new OutlierResult(meta, rel);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 3 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class LDF method run.

/**
 * Run the naive kernel density LOF algorithm.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return LOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null;
    final int dim = RelationUtil.dimensionality(relation);
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    // Compute LDEs
    LOG.beginStep(stepprog, 2, "Computing LDEs.");
    WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(it, k);
        double sum = 0.0;
        int count = 0;
        // Fast version for double distances
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, it)) {
                continue;
            }
            final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
            if (!(nkdist > 0.) || nkdist == Double.POSITIVE_INFINITY) {
                sum = Double.POSITIVE_INFINITY;
                count++;
                break;
            }
            final double v = MathUtil.max(nkdist, neighbor.doubleValue()) / (h * nkdist);
            sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
            count++;
        }
        ldes.putDouble(it, sum / count);
        LOG.incrementProcessed(densProgress);
    }
    LOG.ensureCompleted(densProgress);
    // Compute local density factors.
    LOG.beginStep(stepprog, 3, "Computing LDFs.");
    WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    // track the maximum value for normalization.
    DoubleMinMax lofminmax = new DoubleMinMax();
    FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final double lrdp = ldes.doubleValue(it);
        final KNNList neighbors = knnq.getKNNForDBID(it, k);
        double sum = 0.0;
        int count = 0;
        for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            // skip the point itself
            if (DBIDUtil.equal(neighbor, it)) {
                continue;
            }
            sum += ldes.doubleValue(neighbor);
            count++;
        }
        sum /= count;
        final double div = lrdp + c * sum;
        double ldf = div == Double.POSITIVE_INFINITY ? (sum < Double.POSITIVE_INFINITY ? 0. : 1) : (div > 0) ? sum / div : 0;
        ldfs.putDouble(it, ldf);
        // update minimum and maximum
        lofminmax.put(ldf);
        LOG.incrementProcessed(progressLOFs);
    }
    LOG.ensureCompleted(progressLOFs);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Density Factor", "ldf-outlier", ldfs, ids);
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c));
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 4 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class LOCI method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
    DBIDs ids = relation.getDBIDs();
    // LOCI preprocessing step
    WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class);
    precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
    // LOCI main step
    FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    // Shared instance, to save allocations.
    MeanVariance mv_n_r_alpha = new MeanVariance();
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        final DoubleIntArrayList cdist = interestingDistances.get(iditer);
        final double maxdist = cdist.getDouble(cdist.size() - 1);
        final int maxneig = cdist.getInt(cdist.size() - 1);
        double maxmdefnorm = 0.0;
        double maxnormr = 0;
        if (maxneig >= nmin) {
            // Compute the largest neighborhood we will need.
            DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
            // For any critical distance, compute the normalized MDEF score.
            for (int i = 0, size = cdist.size(); i < size; i++) {
                // Only start when minimum size is fulfilled
                if (cdist.getInt(i) < nmin) {
                    continue;
                }
                final double r = cdist.getDouble(i);
                final double alpha_r = alpha * r;
                // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
                final int n_alphar = cdist.getInt(cdist.find(alpha_r));
                // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
                mv_n_r_alpha.reset();
                for (DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
                    // Stop at radius r
                    if (neighbor.doubleValue() > r) {
                        break;
                    }
                    DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
                    int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
                    mv_n_r_alpha.put(rn_alphar);
                }
                // We only use the average and standard deviation
                final double nhat_r_alpha = mv_n_r_alpha.getMean();
                final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
                // Redundant divisions by nhat_r_alpha removed.
                final double mdef = nhat_r_alpha - n_alphar;
                final double sigmamdef = sigma_nhat_r_alpha;
                final double mdefnorm = mdef / sigmamdef;
                if (mdefnorm > maxmdefnorm) {
                    maxmdefnorm = mdefnorm;
                    maxnormr = r;
                }
            }
        } else {
            // FIXME: when nmin was not fulfilled - what is the proper value then?
            maxmdefnorm = Double.POSITIVE_INFINITY;
            maxnormr = maxdist;
        }
        mdef_norm.putDouble(iditer, maxmdefnorm);
        mdef_radius.putDouble(iditer, maxnormr);
        minmax.put(maxmdefnorm);
        LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult = new MaterializedDoubleRelation("LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    result.addChildResult(new MaterializedDoubleRelation("LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 5 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class HilOut method run.

public OutlierResult run(Database database, Relation<O> relation) {
    distq = database.getDistanceQuery(relation, getDistanceFunction());
    d = RelationUtil.dimensionality(relation);
    WritableDoubleDataStore hilout_weight = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    // Compute extend of dataset.
    double[] min;
    // Actually "length of edge"
    double diameter = 0;
    {
        double[][] hbbs = RelationUtil.computeMinMax(relation);
        min = hbbs[0];
        double[] max = hbbs[1];
        for (int i = 0; i < d; i++) {
            diameter = Math.max(diameter, max[i] - min[i]);
        }
        // Enlarge bounding box to have equal lengths.
        for (int i = 0; i < d; i++) {
            double diff = (diameter - (max[i] - min[i])) * .5;
            min[i] -= diff;
            max[i] += diff;
        }
        if (LOG.isVerbose()) {
            LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
        }
    }
    // Initialization part
    capital_n_star = capital_n = relation.size();
    HilbertFeatures h = new HilbertFeatures(relation, min, diameter);
    FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
    FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
    // Main part: 1. Phase max. d+1 loops
    for (int j = 0; j <= d && n_star < n; j++) {
        // initialize (clear) out and wlb - not 100% clear in the paper
        h.out.clear();
        h.wlb.clear();
        // Initialize Hilbert values in pf according to current shift
        h.initialize(.5 * j / (d + 1));
        // scan the Data according to the current shift; build out and wlb
        scan(h, (int) (k * capital_n / (double) capital_n_star));
        // determine the true outliers (n_star)
        trueOutliers(h);
        if (progressTrueOut != null) {
            progressTrueOut.setProcessed(n_star, LOG);
        }
        // Build the top Set as out + wlb
        h.top.clear();
        HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size());
        for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
            HilFeature entry = iter.get();
            top_keys.add(entry.id);
            h.top.add(entry);
        }
        for (ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
            HilFeature entry = iter.get();
            if (!top_keys.contains(entry.id)) {
                // No need to update top_keys - discarded
                h.top.add(entry);
            }
        }
        LOG.incrementProcessed(progressHilOut);
    }
    // 2. Phase: Additional Scan if less than n true outliers determined
    if (n_star < n) {
        h.out.clear();
        h.wlb.clear();
        // TODO: reinitialize shift to 0?
        scan(h, capital_n);
    }
    if (progressHilOut != null) {
        progressHilOut.setProcessed(d, LOG);
        progressHilOut.ensureCompleted(LOG);
    }
    if (progressTrueOut != null) {
        progressTrueOut.setProcessed(n, LOG);
        progressTrueOut.ensureCompleted(LOG);
    }
    DoubleMinMax minmax = new DoubleMinMax();
    // Return weights in out
    if (tn == ScoreType.TopN) {
        minmax.put(0.0);
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            hilout_weight.putDouble(iditer, 0.0);
        }
        for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
            HilFeature ent = iter.get();
            minmax.put(ent.ubound);
            hilout_weight.putDouble(ent.id, ent.ubound);
        }
    } else // Return all weights in pf
    {
        for (HilFeature ent : h.pf) {
            minmax.put(ent.ubound);
            hilout_weight.putDouble(ent.id, ent.ubound);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("HilOut weight", "hilout-weight", hilout_weight, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ObjectHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)144 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)72 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)71 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)71 Database (de.lmu.ifi.dbs.elki.database.Database)69 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)62 Test (org.junit.Test)58 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)57 AbstractOutlierAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)50 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)45 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)26 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)23 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)22 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11