Search in sources :

Example 36 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class KMeansOutlierDetection method run.

/**
 * Run the outlier detection algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceFunction<? super O> df = clusterer.getDistanceFunction();
    DistanceQuery<O> dq = database.getDistanceQuery(relation, df);
    // TODO: improve ELKI api to ensure we're using the same DBIDs!
    Clustering<?> c = clusterer.run(database, relation);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    DoubleMinMax mm = new DoubleMinMax();
    @SuppressWarnings("unchecked") NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory();
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    for (Cluster<?> cluster : clusters) {
        // FIXME: use a primitive distance function on number vectors instead.
        O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation));
        for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double dist = dq.distance(mean, iter);
            scores.put(iter, dist);
            mm.put(dist);
        }
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DataStoreFactory(de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 37 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class DistanceStddevOutlier method run.

/**
 * Run the outlier detection algorithm
 *
 * @param database Database to use
 * @param relation Relation to analyze
 * @return Outlier score result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    // Get a nearest neighbor query on the relation.
    KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
    // Output data storage
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    // Track minimum and maximum scores
    DoubleMinMax minmax = new DoubleMinMax();
    // Iterate over all objects
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        KNNList neighbors = knnq.getKNNForDBID(iter, k);
        // Aggregate distances
        MeanVariance mv = new MeanVariance();
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            // Skip the object itself. The 0 is not very informative.
            if (DBIDUtil.equal(iter, neighbor)) {
                continue;
            }
            mv.put(neighbor.doubleValue());
        }
        // Store score
        scores.putDouble(iter, mv.getSampleStddev());
    }
    // Wrap the result in the standard containers
    // Actual min-max, theoretical min-max!
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
    DoubleRelation rel = new MaterializedDoubleRelation(relation.getDBIDs(), "stddev-outlier", scores);
    return new OutlierResult(meta, rel);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 38 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class AggarwalYuEvolutionary method run.

/**
 * Performs the evolutionary algorithm on the given database.
 *
 * @param database Database
 * @param relation Relation
 * @return Result
 */
public OutlierResult run(Database database, Relation<V> relation) {
    final int dbsize = relation.size();
    ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
    Heap<Individuum>.UnorderedIter individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getSingleThreadedRandom())).run();
    WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    for (; individuums.valid(); individuums.advance()) {
        DBIDs ids = computeSubspaceForGene(individuums.get().getGene(), ranges);
        double sparsityC = sparsity(ids.size(), dbsize, k, phi);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double prev = outlierScore.doubleValue(iter);
            if (Double.isNaN(prev) || sparsityC < prev) {
                outlierScore.putDouble(iter, sparsityC);
            }
        }
    }
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double val = outlierScore.doubleValue(iditer);
        if (Double.isNaN(val)) {
            outlierScore.putDouble(iditer, 0.0);
            val = 0.0;
        }
        minmax.put(val);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuEvolutionary", "aggarwal-yu-outlier", outlierScore, relation.getDBIDs());
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
    return new OutlierResult(meta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 39 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class OUTRES method run.

/**
 * Main loop for OUTRES
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
public OutlierResult run(Relation<V> relation) {
    WritableDoubleDataStore ranks = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    KernelDensityEstimator kernel = new KernelDensityEstimator(relation);
    long[] subspace = BitsUtil.zero(kernel.dim);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        BitsUtil.zeroI(subspace);
        double score = outresScore(0, subspace, iditer, kernel);
        ranks.putDouble(iditer, score);
        minmax.put(score);
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.);
    OutlierResult outresResult = new OutlierResult(meta, new MaterializedDoubleRelation("OUTRES", "outres-score", ranks, relation.getDBIDs()));
    return outresResult;
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 40 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class SOD method run.

/**
 * Performs the SOD algorithm on the given database.
 *
 * @param relation Data relation to process
 * @return Outlier result
 */
public OutlierResult run(Relation<V> relation) {
    SimilarityQuery<V> snnInstance = similarityFunction.instantiate(relation);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null;
    final WritableDoubleDataStore sod_scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDataStore<SODModel> sod_models = null;
    if (models) {
        // Models requested
        sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
    }
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        LOG.incrementProcessed(progress);
        DBIDs neighborhood = getNearestNeighbors(relation, snnInstance, iter);
        double[] center;
        long[] weightVector;
        double sod;
        if (neighborhood.size() > 0) {
            center = Centroid.make(relation, neighborhood).getArrayRef();
            // Note: per-dimension variances; no covariances.
            double[] variances = computePerDimensionVariances(relation, center, neighborhood);
            double expectationOfVariance = Mean.of(variances);
            weightVector = BitsUtil.zero(variances.length);
            for (int d = 0; d < variances.length; d++) {
                if (variances[d] < alpha * expectationOfVariance) {
                    BitsUtil.setI(weightVector, d);
                }
            }
            sod = subspaceOutlierDegree(relation.get(iter), center, weightVector);
        } else {
            center = relation.get(iter).toArray();
            weightVector = null;
            sod = 0.;
        }
        if (sod_models != null) {
            sod_models.put(iter, new SODModel(center, weightVector));
        }
        sod_scores.putDouble(iter, sod);
        minmax.put(sod);
    }
    LOG.ensureCompleted(progress);
    // combine results.
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    OutlierResult sodResult = new OutlierResult(meta, new MaterializedDoubleRelation("Subspace Outlier Degree", "sod-outlier", sod_scores, relation.getDBIDs()));
    if (sod_models != null) {
        Relation<SODModel> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<>(SODModel.class), sod_models, relation.getDBIDs());
        sodResult.addChildResult(models);
    }
    return sodResult;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)89 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)65 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)62 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)62 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)62 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)62 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)54 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)25 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)15 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)15 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)12 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)11 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)7 Mean (de.lmu.ifi.dbs.elki.math.Mean)6 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)6