Search in sources :

Example 16 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class CTLuMedianAlgorithm method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        final double median;
        {
            double[] fi = new double[neighbors.size()];
            // calculate and store Median of neighborhood
            int c = 0;
            for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
                if (DBIDUtil.equal(iditer, iter)) {
                    continue;
                }
                fi[c] = relation.get(iter).doubleValue(0);
                c++;
            }
            if (c > 0) {
                median = QuickSelect.median(fi, 0, c);
            } else {
                median = relation.get(iditer).doubleValue(0);
            }
        }
        double h = relation.get(iditer).doubleValue(0) - median;
        scores.putDouble(iditer, h);
        mv.put(h);
    }
    // Normalize scores
    final double mean = mv.getMean();
    final double stddev = mv.getNaiveStddev();
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = Math.abs((scores.doubleValue(iditer) - mean) / stddev);
        minmax.put(score);
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("MO", "Median-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 17 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class DWOF method run.

/**
 * Performs the Generalized DWOF_SCORE algorithm on the given database by
 * calling all the other methods in the proper order.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return new OutlierResult instance
 */
public OutlierResult run(Database database, Relation<O> relation) {
    final DBIDs ids = relation.getDBIDs();
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    // Get k nearest neighbor and range query on the relation.
    KNNQuery<O> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
    RangeQuery<O> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
    StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
    // DWOF output score storage.
    WritableDoubleDataStore dwofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT, 0.);
    if (stepProg != null) {
        stepProg.beginStep(1, "Initializing objects' Radii", LOG);
    }
    WritableDoubleDataStore radii = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
    // Find an initial radius for each object:
    initializeRadii(ids, knnq, distFunc, radii);
    WritableIntegerDataStore oldSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
    WritableIntegerDataStore newSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
    int countUnmerged = relation.size();
    if (stepProg != null) {
        stepProg.beginStep(2, "Clustering-Evaluating Cycles.", LOG);
    }
    IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
    while (countUnmerged > 0) {
        LOG.incrementProcessed(clusEvalProgress);
        // Increase radii
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            radii.putDouble(iter, radii.doubleValue(iter) * delta);
        }
        // stores the clustering label for each object
        WritableDataStore<ModifiableDBIDs> labels = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, ModifiableDBIDs.class);
        // Cluster objects based on the current radius
        clusterData(ids, rnnQuery, radii, labels);
        // simple reference swap
        WritableIntegerDataStore temp = newSizes;
        newSizes = oldSizes;
        oldSizes = temp;
        // Update the cluster size count for each object.
        countUnmerged = updateSizes(ids, labels, newSizes);
        labels.destroy();
        // Update DWOF scores.
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double newScore = (newSizes.intValue(iter) > 0) ? ((double) (oldSizes.intValue(iter) - 1) / (double) newSizes.intValue(iter)) : 0.0;
            dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
        }
    }
    LOG.setCompleted(clusEvalProgress);
    LOG.setCompleted(stepProg);
    // Build result representation.
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        minmax.put(dwofs.doubleValue(iter));
    }
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    DoubleRelation rel = new MaterializedDoubleRelation("Dynamic-Window Outlier Factors", "dwof-outlier", dwofs, ids);
    return new OutlierResult(meta, rel);
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 18 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class GaussianUniformMixture method run.

/**
 * Run the algorithm
 *
 * @param relation Data relation
 * @return Outlier result
 */
public OutlierResult run(Relation<V> relation) {
    // Use an array list of object IDs for fast random access by an offset
    ArrayDBIDs objids = DBIDUtil.ensureArray(relation.getDBIDs());
    // A bit set to flag objects as anomalous, none at the beginning
    long[] bits = BitsUtil.zero(objids.size());
    // Positive masked collection
    DBIDs normalObjs = new MaskedDBIDs(objids, bits, true);
    // Positive masked collection
    DBIDs anomalousObjs = new MaskedDBIDs(objids, bits, false);
    // resulting scores
    WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    // compute loglikelihood
    double logLike = relation.size() * logml + loglikelihoodNormal(normalObjs, relation);
    // LOG.debugFine("normalsize " + normalObjs.size() + " anormalsize " +
    // anomalousObjs.size() + " all " + (anomalousObjs.size() +
    // normalObjs.size()));
    // LOG.debugFine(logLike + " loglike beginning" +
    // loglikelihoodNormal(normalObjs, database));
    DoubleMinMax minmax = new DoubleMinMax();
    DBIDIter iter = objids.iter();
    for (int i = 0; i < objids.size(); i++, iter.advance()) {
        // LOG.debugFine("i " + i);
        // Change mask to make the current object anomalous
        BitsUtil.setI(bits, i);
        // Compute new likelihoods
        double currentLogLike = normalObjs.size() * logml + loglikelihoodNormal(normalObjs, relation) + anomalousObjs.size() * logl + loglikelihoodAnomalous(anomalousObjs);
        // if the loglike increases more than a threshold, object stays in
        // anomalous set and is flagged as outlier
        final double loglikeGain = currentLogLike - logLike;
        oscores.putDouble(iter, loglikeGain);
        minmax.put(loglikeGain);
        if (loglikeGain > c) {
            // flag as outlier
            // LOG.debugFine("Outlier: " + curid + " " + (currentLogLike -
            // logLike));
            // Update best logLike
            logLike = currentLogLike;
        } else {
            // LOG.debugFine("Inlier: " + curid + " " + (currentLogLike - logLike));
            // undo bit set
            BitsUtil.clearI(bits, i);
        }
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0);
    DoubleRelation res = new MaterializedDoubleRelation("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", oscores, relation.getDBIDs());
    return new OutlierResult(meta, res);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) MaskedDBIDs(de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaskedDBIDs(de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs)

Example 19 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class ReferenceBasedOutlierDetection method run.

/**
 * Run the algorithm on the given relation.
 *
 * @param database Database
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
    @SuppressWarnings("unchecked") PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction);
    Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
    if (refPoints.isEmpty()) {
        throw new AbortException("Cannot compute ROS without reference points!");
    }
    DBIDs ids = relation.getDBIDs();
    if (k >= ids.size()) {
        throw new AbortException("k must not be chosen larger than the database size!");
    }
    // storage of distance/score values.
    WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);
    // Compute density estimation:
    for (NumberVector refPoint : refPoints) {
        DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
        updateDensities(rbod_score, referenceDists);
    }
    // compute maximum density
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        mm.put(rbod_score.doubleValue(iditer));
    }
    // compute ROS
    double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
    // Reuse
    mm.reset();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = 1 - (rbod_score.doubleValue(iditer) * scale);
        mm.put(score);
        rbod_score.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // adds reference points to the result. header information for the
    // visualizer to find the reference points in the result
    result.addChildResult(new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) PrimitiveDistanceQuery(de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 20 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class ParallelKNNOutlier method run.

public OutlierResult run(Database database, Relation<O> relation) {
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
    DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
    // Compute the kNN
    KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
    SharedObject<KNNList> knnv = new SharedObject<>();
    knnm.connectKNNOutput(knnv);
    // Extract the k-distance
    KDistanceProcessor kdistm = new KDistanceProcessor(k + 1);
    SharedDouble kdistv = new SharedDouble();
    kdistm.connectKNNInput(knnv);
    kdistm.connectOutput(kdistv);
    // Store in outlier scores
    WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store);
    storem.connectInput(kdistv);
    // Gather statistics
    DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
    mmm.connectInput(kdistv);
    ParallelExecutor.run(ids, knnm, kdistm, storem, mmm);
    DoubleMinMax minmax = mmm.getMinMax();
    DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Outlier Score", "knn-outlier", store, ids);
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) KDistanceProcessor(de.lmu.ifi.dbs.elki.parallel.processor.KDistanceProcessor) SharedDouble(de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) KNNProcessor(de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) WriteDoubleDataStoreProcessor(de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) SharedObject(de.lmu.ifi.dbs.elki.parallel.variables.SharedObject) DoubleMinMaxProcessor(de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)139 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)77 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)45 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)44 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)40 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)39 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)38 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)38 ArrayList (java.util.ArrayList)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)29 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)25 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)22 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)19 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)18 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)15 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)14