Search in sources :

Example 21 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class ReferenceBasedOutlierDetection method run.

/**
 * Run the algorithm on the given relation.
 *
 * @param database Database
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
    @SuppressWarnings("unchecked") PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction);
    Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
    if (refPoints.isEmpty()) {
        throw new AbortException("Cannot compute ROS without reference points!");
    }
    DBIDs ids = relation.getDBIDs();
    if (k >= ids.size()) {
        throw new AbortException("k must not be chosen larger than the database size!");
    }
    // storage of distance/score values.
    WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);
    // Compute density estimation:
    for (NumberVector refPoint : refPoints) {
        DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
        updateDensities(rbod_score, referenceDists);
    }
    // compute maximum density
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        mm.put(rbod_score.doubleValue(iditer));
    }
    // compute ROS
    double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
    // Reuse
    mm.reset();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = 1 - (rbod_score.doubleValue(iditer) * scale);
        mm.put(score);
        rbod_score.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // adds reference points to the result. header information for the
    // visualizer to find the reference points in the result
    result.addChildResult(new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) PrimitiveDistanceQuery(de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 22 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class ParallelKNNOutlier method run.

public OutlierResult run(Database database, Relation<O> relation) {
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
    DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
    // Compute the kNN
    KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
    SharedObject<KNNList> knnv = new SharedObject<>();
    knnm.connectKNNOutput(knnv);
    // Extract the k-distance
    KDistanceProcessor kdistm = new KDistanceProcessor(k + 1);
    SharedDouble kdistv = new SharedDouble();
    kdistm.connectKNNInput(knnv);
    kdistm.connectOutput(kdistv);
    // Store in outlier scores
    WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store);
    storem.connectInput(kdistv);
    // Gather statistics
    DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
    mmm.connectInput(kdistv);
    ParallelExecutor.run(ids, knnm, kdistm, storem, mmm);
    DoubleMinMax minmax = mmm.getMinMax();
    DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Outlier Score", "knn-outlier", store, ids);
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) KDistanceProcessor(de.lmu.ifi.dbs.elki.parallel.processor.KDistanceProcessor) SharedDouble(de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) KNNProcessor(de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) WriteDoubleDataStoreProcessor(de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) SharedObject(de.lmu.ifi.dbs.elki.parallel.variables.SharedObject) DoubleMinMaxProcessor(de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 23 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class SimplifiedLOF method run.

/**
 * Run the Simple LOF algorithm.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return LOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Simplified LOF", 3) : null;
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    // Compute LRDs
    LOG.beginStep(stepprog, 2, "Computing densities.");
    WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    computeSimplifiedLRDs(ids, knnq, dens);
    // compute LOF_SCORE of each db object
    LOG.beginStep(stepprog, 3, "Computing SLOFs.");
    WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax lofminmax = new DoubleMinMax();
    computeSimplifiedLOFs(ids, knnq, dens, lofs, lofminmax);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids);
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)

Example 24 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class VarianceOfVolume method run.

/**
 * Runs the VOV algorithm on the given database.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return VOV outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("VOV", 3) : null;
    DBIDs ids = relation.getDBIDs();
    int dim = RelationUtil.dimensionality(relation);
    LOG.beginStep(stepprog, 1, "Materializing nearest-neighbor sets.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    // Compute Volumes
    LOG.beginStep(stepprog, 2, "Computing Volumes.");
    WritableDoubleDataStore vols = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    computeVolumes(knnq, dim, ids, vols);
    // compute VOV of each object
    LOG.beginStep(stepprog, 3, "Computing Variance of Volumes (VOV).");
    WritableDoubleDataStore vovs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
    // track the maximum value for normalization.
    DoubleMinMax vovminmax = new DoubleMinMax();
    computeVOVs(knnq, ids, vols, vovs, vovminmax);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Variance of Volume", "vov-outlier", vovs, ids);
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(vovminmax.getMin(), vovminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)

Example 25 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class CTLuScatterplotOutlier method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
    // Calculate average of neighborhood for each object and perform a linear
    // regression using the covariance matrix
    CovarianceMatrix covm = new CovarianceMatrix(2);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final double local = relation.get(iditer).doubleValue(0);
        // Compute mean of neighbors
        Mean mean = new Mean();
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(iditer, iter)) {
                continue;
            }
            mean.put(relation.get(iter).doubleValue(0));
        }
        final double m;
        if (mean.getCount() > 0) {
            m = mean.getMean();
        } else {
            // if object id has no neighbors ==> avg = non-spatial attribute of id
            m = local;
        }
        // Store the mean for the score calculation
        means.putDouble(iditer, m);
        covm.put(new double[] { local, m });
    }
    // Finalize covariance matrix, compute linear regression
    final double slope, inter;
    {
        double[] meanv = covm.getMeanVector();
        double[][] fmat = covm.destroyToSampleMatrix();
        final double covxx = fmat[0][0], covxy = fmat[0][1];
        slope = covxy / covxx;
        inter = meanv[1] - slope * meanv[0];
    }
    // calculate mean and variance for error
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Compute the error from the linear regression
        double y_i = relation.get(iditer).doubleValue(0);
        double e = means.doubleValue(iditer) - (slope * y_i + inter);
        scores.putDouble(iditer, e);
        mv.put(e);
    }
    // Normalize scores
    DoubleMinMax minmax = new DoubleMinMax();
    {
        final double mean = mv.getMean();
        final double variance = mv.getNaiveStddev();
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            double score = Math.abs((scores.doubleValue(iditer) - mean) / variance);
            minmax.put(score);
            scores.putDouble(iditer, score);
        }
    }
    // build representation
    DoubleRelation scoreResult = new MaterializedDoubleRelation("SPO", "Scatterplot-Outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)144 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)72 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)71 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)71 Database (de.lmu.ifi.dbs.elki.database.Database)69 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)62 Test (org.junit.Test)58 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)57 AbstractOutlierAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)50 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)45 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)26 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)23 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)22 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11