Search in sources :

Example 26 with BasicOutlierScoreMeta

use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.

the class FeatureBagging method run.

/**
 * Run the algorithm on a data set.
 *
 * @param database Database context
 * @param relation Relation to use
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<NumberVector> relation) {
    final int dbdim = RelationUtil.dimensionality(relation);
    final int mindim = dbdim >> 1;
    final int maxdim = dbdim - 1;
    final Random rand = rnd.getSingleThreadedRandom();
    ArrayList<OutlierResult> results = new ArrayList<>(num);
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
        for (int i = 0; i < num; i++) {
            long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
            SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
            LOF<NumberVector> lof = new LOF<>(k, df);
            // run LOF and collect the result
            OutlierResult result = lof.run(database, relation);
            results.add(result);
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
    }
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    if (breadth) {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        @SuppressWarnings("unchecked") Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = (Pair<DBIDIter, DoubleRelation>[]) new Pair[results.size()];
        // Mapping score-sorted DBID-Iterators onto their corresponding scores.
        // We need to initialize them now be able to iterate them "in parallel".
        {
            int i = 0;
            for (OutlierResult r : results) {
                IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().order(relation.getDBIDs()).iter(), r.getScores());
                i++;
            }
        }
        // Iterating over the *lines* of the AS_t(i)-matrix.
        for (int i = 0; i < relation.size(); i++) {
            // Iterating over the elements of a line (breadth-first).
            for (Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
                DBIDIter iter = pair.first;
                // for every DBID).
                if (iter.valid()) {
                    double score = pair.second.doubleValue(iter);
                    if (Double.isNaN(scores.doubleValue(iter))) {
                        scores.putDouble(iter, score);
                        minmax.put(score);
                    }
                    iter.advance();
                } else {
                    LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
                }
            }
            // Progress does not take the initial mapping into account.
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    } else {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
            double sum = 0.0;
            for (OutlierResult r : results) {
                final double s = r.getScores().doubleValue(iter);
                if (!Double.isNaN(s)) {
                    sum += s;
                }
            }
            scores.putDouble(iter, sum);
            minmax.put(sum);
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
    return new OutlierResult(meta, scoreres);
}
Also used : LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayList(java.util.ArrayList) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Random(java.util.Random) SubspaceEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 27 with BasicOutlierScoreMeta

use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.

the class RescaleMetaOutlierAlgorithm method run.

@Override
public OutlierResult run(Database database) {
    Result innerresult = algorithm.run(database);
    OutlierResult or = getOutlierResult(database.getHierarchy(), innerresult);
    final DoubleRelation scores = or.getScores();
    if (scaling instanceof OutlierScalingFunction) {
        ((OutlierScalingFunction) scaling).prepare(or);
    }
    WritableDoubleDataStore scaledscores = DataStoreUtil.makeDoubleStorage(scores.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = scores.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double val = scaling.getScaled(scores.doubleValue(iditer));
        scaledscores.putDouble(iditer, val);
        minmax.put(val);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), scaling.getMin(), scaling.getMax());
    DoubleRelation scoresult = new MaterializedDoubleRelation("Scaled Outlier", "scaled-outlier", scaledscores, scores.getDBIDs());
    OutlierResult result = new OutlierResult(meta, scoresult);
    result.addChildResult(innerresult);
    return result;
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Result(de.lmu.ifi.dbs.elki.result.Result) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 28 with BasicOutlierScoreMeta

use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.

the class TrimmedMeanApproach method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data Relation (1 dimensional!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        int num = 0;
        double[] values = new double[neighbors.size()];
        // calculate trimmedMean
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            values[num] = relation.get(iter).doubleValue(0);
            num++;
        }
        // calculate local trimmed Mean and error term
        final double tm;
        if (num > 0) {
            int left = (int) Math.floor(p * (num - 1));
            int right = (int) Math.floor((1 - p) * (num - 1));
            Arrays.sort(values, 0, num);
            Mean mean = new Mean();
            for (int i = left; i <= right; i++) {
                mean.put(values[i]);
            }
            tm = mean.getMean();
        } else {
            tm = relation.get(iditer).doubleValue(0);
        }
        // Error: deviation from trimmed mean
        errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isVerbose()) {
        LOG.verbose("Computing median error.");
    }
    double median_dev_from_median;
    {
        // calculate the median error
        double[] ei = new double[relation.size()];
        {
            int i = 0;
            for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
                ei[i] = errors.doubleValue(iditer);
                i++;
            }
        }
        double median_i = QuickSelect.median(ei);
        // Update to deviation from median
        for (int i = 0; i < ei.length; i++) {
            ei[i] = Math.abs(ei[i] - median_i);
        }
        // Again, extract median
        median_dev_from_median = QuickSelect.median(ei);
    }
    if (LOG.isVerbose()) {
        LOG.verbose("Normalizing scores.");
    }
    // calculate score
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median;
        scores.putDouble(iditer, score);
        minmax.put(score);
    }
    // 
    DoubleRelation scoreResult = new MaterializedDoubleRelation("TrimmedMean", "Trimmed Mean Score", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 29 with BasicOutlierScoreMeta

use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.

the class SLOM method run.

/**
 * @param database Database to process
 * @param spatial Spatial Relation to use.
 * @param relation Relation to use.
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial);
    DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
    WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    // calculate D-Tilde
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double sum = 0;
        double maxDist = 0;
        int cnt = 0;
        final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(iditer, iter)) {
                continue;
            }
            double dist = distFunc.distance(iditer, iter);
            sum += dist;
            cnt++;
            maxDist = Math.max(maxDist, dist);
        }
        if (cnt > 1) {
            modifiedDistance.putDouble(iditer, ((sum - maxDist) / (cnt - 1)));
        } else {
            // Use regular distance when the d-tilde trick is undefined.
            // Note: this can be 0 when there were no neighbors.
            modifiedDistance.putDouble(iditer, maxDist);
        }
    }
    // Second step - compute actual SLOM values
    DoubleMinMax slomminmax = new DoubleMinMax();
    WritableDoubleDataStore sloms = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double sum = 0;
        int cnt = 0;
        final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(iditer, iter)) {
                continue;
            }
            sum += modifiedDistance.doubleValue(iter);
            cnt++;
        }
        double slom;
        if (cnt > 0) {
            // With and without the object itself:
            double avgPlus = (sum + modifiedDistance.doubleValue(iditer)) / (cnt + 1);
            double avg = sum / cnt;
            double beta = 0;
            for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
                final double dist = modifiedDistance.doubleValue(iter);
                if (dist > avgPlus) {
                    beta += 1;
                } else if (dist < avgPlus) {
                    beta -= 1;
                }
            }
            // Include object itself
            if (!neighbors.contains(iditer)) {
                final double dist = modifiedDistance.doubleValue(iditer);
                if (dist > avgPlus) {
                    beta += 1;
                } else if (dist < avgPlus) {
                    beta -= 1;
                }
            }
            beta = Math.abs(beta);
            // note: cnt == size of N(x), not N+(x)
            if (cnt > 1) {
                beta = Math.max(beta, 1.0) / (cnt - 1);
            } else {
                // Workaround insufficiency in SLOM paper - div by zero
                beta = 1.0;
            }
            beta = beta / (1 + avg);
            slom = beta * modifiedDistance.doubleValue(iditer);
        } else {
            // No neighbors to compare to - no score.
            slom = 0.0;
        }
        sloms.putDouble(iditer, slom);
        slomminmax.put(slom);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("SLOM", "slom-outlier", sloms, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(slomminmax.getMin(), slomminmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 30 with BasicOutlierScoreMeta

use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.

the class TrivialAverageCoordinateOutlier method run.

/**
 * Run the actual algorithm.
 *
 * @param relation Relation
 * @return Result
 */
public OutlierResult run(Relation<? extends NumberVector> relation) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
    DoubleMinMax minmax = new DoubleMinMax();
    Mean m = new Mean();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        m.reset();
        NumberVector nv = relation.get(iditer);
        for (int i = 0; i < nv.getDimensionality(); i++) {
            m.put(nv.doubleValue(i));
        }
        final double score = m.getMean();
        scores.putDouble(iditer, score);
        minmax.put(score);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial mean score", "mean-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    return new OutlierResult(meta, scoreres);
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)35 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)35 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)35 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)35 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)34 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)30 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)18 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)12 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)9 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)8 Mean (de.lmu.ifi.dbs.elki.math.Mean)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)4 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)4 CovarianceMatrix (de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)4 DoubleMinMaxProcessor (de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor)4 KNNProcessor (de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor)4 WriteDoubleDataStoreProcessor (de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor)4