Search in sources :

Example 91 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class SimpleKernelDensityLOF method run.

/**
 * Run the naive kernel density LOF algorithm.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return LOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
    final int dim = RelationUtil.dimensionality(relation);
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    // Compute LRDs
    LOG.beginStep(stepprog, 2, "Computing densities.");
    WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(it, k);
        int count = 0;
        double sum = 0.0;
        // Fast version for double distances
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, it)) {
                continue;
            }
            double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
            if (max == 0) {
                sum = Double.POSITIVE_INFINITY;
                break;
            }
            final double v = neighbor.doubleValue() / max;
            sum += kernel.density(v) / MathUtil.powi(max, dim);
            count++;
        }
        final double density = count > 0 ? sum / count : 0.;
        dens.putDouble(it, density);
        LOG.incrementProcessed(densProgress);
    }
    LOG.ensureCompleted(densProgress);
    // compute LOF_SCORE of each db object
    LOG.beginStep(stepprog, 3, "Computing KLOFs.");
    WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    // track the maximum value for normalization.
    DoubleMinMax lofminmax = new DoubleMinMax();
    FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final double lrdp = dens.doubleValue(it);
        final double lof;
        if (lrdp > 0) {
            final KNNList neighbors = knnq.getKNNForDBID(it, k);
            double sum = 0.0;
            int count = 0;
            for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                // skip the point itself
                if (DBIDUtil.equal(neighbor, it)) {
                    continue;
                }
                sum += dens.doubleValue(neighbor);
                count++;
            }
            lof = (lrdp == Double.POSITIVE_INFINITY) ? (sum == Double.POSITIVE_INFINITY ? 1 : 0.) : sum / (count * lrdp);
        } else {
            lof = 1.0;
        }
        lofs.putDouble(it, lof);
        // update minimum and maximum
        lofminmax.put(lof);
        LOG.incrementProcessed(progressLOFs);
    }
    LOG.ensureCompleted(progressLOFs);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids);
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 92 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class TrimmedMeanApproach method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data Relation (1 dimensional!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        int num = 0;
        double[] values = new double[neighbors.size()];
        // calculate trimmedMean
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            values[num] = relation.get(iter).doubleValue(0);
            num++;
        }
        // calculate local trimmed Mean and error term
        final double tm;
        if (num > 0) {
            int left = (int) Math.floor(p * (num - 1));
            int right = (int) Math.floor((1 - p) * (num - 1));
            Arrays.sort(values, 0, num);
            Mean mean = new Mean();
            for (int i = left; i <= right; i++) {
                mean.put(values[i]);
            }
            tm = mean.getMean();
        } else {
            tm = relation.get(iditer).doubleValue(0);
        }
        // Error: deviation from trimmed mean
        errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isVerbose()) {
        LOG.verbose("Computing median error.");
    }
    double median_dev_from_median;
    {
        // calculate the median error
        double[] ei = new double[relation.size()];
        {
            int i = 0;
            for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
                ei[i] = errors.doubleValue(iditer);
                i++;
            }
        }
        double median_i = QuickSelect.median(ei);
        // Update to deviation from median
        for (int i = 0; i < ei.length; i++) {
            ei[i] = Math.abs(ei[i] - median_i);
        }
        // Again, extract median
        median_dev_from_median = QuickSelect.median(ei);
    }
    if (LOG.isVerbose()) {
        LOG.verbose("Normalizing scores.");
    }
    // calculate score
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median;
        scores.putDouble(iditer, score);
        minmax.put(score);
    }
    // 
    DoubleRelation scoreResult = new MaterializedDoubleRelation("TrimmedMean", "Trimmed Mean Score", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 93 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class AggarwalYuNaive method run.

/**
 * Run the algorithm on the given relation.
 *
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    final int size = relation.size();
    ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
    ArrayList<ArrayList<IntIntPair>> Rk;
    // Build a list of all subspaces
    {
        // R1 initial one-dimensional subspaces.
        Rk = new ArrayList<>();
        // Set of all dim*phi ranges
        ArrayList<IntIntPair> q = new ArrayList<>();
        for (int i = 0; i < dimensionality; i++) {
            for (int j = 0; j < phi; j++) {
                IntIntPair s = new IntIntPair(i, j);
                q.add(s);
                // Add to first Rk
                ArrayList<IntIntPair> v = new ArrayList<>();
                v.add(s);
                Rk.add(v);
            }
        }
        // build Ri
        for (int i = 2; i <= k; i++) {
            ArrayList<ArrayList<IntIntPair>> Rnew = new ArrayList<>();
            for (int j = 0; j < Rk.size(); j++) {
                ArrayList<IntIntPair> c = Rk.get(j);
                for (IntIntPair pair : q) {
                    boolean invalid = false;
                    for (int t = 0; t < c.size(); t++) {
                        if (c.get(t).first == pair.first) {
                            invalid = true;
                            break;
                        }
                    }
                    if (!invalid) {
                        ArrayList<IntIntPair> neu = new ArrayList<>(c);
                        neu.add(pair);
                        Rnew.add(neu);
                    }
                }
            }
            Rk = Rnew;
        }
    }
    WritableDoubleDataStore sparsity = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    // calculate the sparsity coefficient
    for (ArrayList<IntIntPair> sub : Rk) {
        DBIDs ids = computeSubspace(sub, ranges);
        final double sparsityC = sparsity(ids.size(), size, k, phi);
        if (sparsityC < 0) {
            for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
                double prev = sparsity.doubleValue(iter);
                if (Double.isNaN(prev) || sparsityC < prev) {
                    sparsity.putDouble(iter, sparsityC);
                }
            }
        }
    }
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double val = sparsity.doubleValue(iditer);
        if (Double.isNaN(val)) {
            sparsity.putDouble(iditer, 0.0);
            val = 0.0;
        }
        minmax.put(val);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuNaive", "aggarwal-yu-outlier", sparsity, relation.getDBIDs());
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
    return new OutlierResult(meta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) IntIntPair(de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair)

Example 94 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class ByLabelOutlier method run.

/**
 * Run the algorithm
 *
 * @param relation Relation to process.
 * @return Result
 */
public OutlierResult run(Relation<?> relation) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        String label = relation.get(iditer).toString();
        final double score = (pattern.matcher(label).matches()) ? 1 : 0;
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("By label outlier scores", "label-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta meta = new ProbabilisticOutlierScore();
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 95 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class TrivialAllOutlier method run.

/**
 * Run the actual algorithm.
 *
 * @param relation Relation
 * @return Result
 */
public OutlierResult run(Relation<?> relation) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        scores.putDouble(iditer, 1.0);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial all-outlier score", "all-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta meta = new ProbabilisticOutlierScore();
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)144 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)72 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)71 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)71 Database (de.lmu.ifi.dbs.elki.database.Database)69 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)62 Test (org.junit.Test)58 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)57 AbstractOutlierAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)50 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)45 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)26 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)23 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)22 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11