Search in sources :

Example 71 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class ComputeKNNOutlierScores method runForEachK.

/**
 * Iterate over the k range.
 *
 * @param prefix Prefix string
 * @param startk Start k
 * @param stepk Step k
 * @param maxk Max k
 * @param runner Runner to run
 * @param out Output function
 */
private void runForEachK(String prefix, int startk, int stepk, int maxk, IntFunction<OutlierResult> runner, BiConsumer<String, OutlierResult> out) {
    if (isDisabled(prefix)) {
        LOG.verbose("Skipping (disabled): " + prefix);
        // Disabled
        return;
    }
    LOG.verbose("Running " + prefix);
    final int digits = (int) FastMath.ceil(FastMath.log10(maxk + 1));
    final String format = "%s-%0" + digits + "d";
    for (int k = startk; k <= maxk; k += stepk) {
        Duration time = LOG.newDuration(this.getClass().getCanonicalName() + "." + prefix + ".k" + k + ".runtime").begin();
        OutlierResult result = runner.apply(k);
        LOG.statistics(time.end());
        if (result != null) {
            out.accept(String.format(Locale.ROOT, format, prefix, k), result);
            result.getHierarchy().removeSubtree(result);
        }
    }
}
Also used : OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration)

Example 72 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class AutomaticEvaluation method autoEvaluateOutliers.

protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) {
    Collection<OutlierResult> outliers = ResultUtil.filterResults(hier, newResult, OutlierResult.class);
    if (LOG.isDebugging()) {
        LOG.debug("Number of new outlier results: " + outliers.size());
    }
    if (!outliers.isEmpty()) {
        Database db = ResultUtil.findDatabase(hier);
        ensureClusteringResult(db, db);
        Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class);
        if (clusterings.isEmpty()) {
            LOG.warning("Could not find a clustering result, even after running 'ensureClusteringResult'?!?");
            return;
        }
        Clustering<?> basec = clusterings.iterator().next();
        // Find minority class label
        int min = Integer.MAX_VALUE;
        int total = 0;
        String label = null;
        if (basec.getAllClusters().size() > 1) {
            for (Cluster<?> c : basec.getAllClusters()) {
                final int csize = c.getIDs().size();
                total += csize;
                if (csize < min) {
                    min = csize;
                    label = c.getName();
                }
            }
        }
        if (label == null) {
            LOG.warning("Could not evaluate outlier results, as I could not find a minority label.");
            return;
        }
        if (min == 1) {
            LOG.warning("The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column.");
        }
        if (min > 0.05 * total) {
            LOG.warning("The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!");
        }
        LOG.verbose("Evaluating using minority class: " + label);
        Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$");
        // Evaluate rankings.
        new OutlierRankingEvaluation(pat).processNewResult(hier, newResult);
        // Compute ROC curve
        new OutlierROCCurve(pat).processNewResult(hier, newResult);
        // Compute Precision at k
        new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult);
        // Compute ROC curve
        new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult);
        // Compute outlier histogram
        new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false).processNewResult(hier, newResult);
    }
}
Also used : Pattern(java.util.regex.Pattern) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) EvaluateClustering(de.lmu.ifi.dbs.elki.evaluation.clustering.EvaluateClustering) ByLabelOrAllInOneClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) ByLabelClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering) LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) Database(de.lmu.ifi.dbs.elki.database.Database)

Example 73 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class OPTICSOF method run.

/**
 * Perform OPTICS-based outlier detection.
 *
 * @param database Database
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, minpts);
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    DBIDs ids = relation.getDBIDs();
    // FIXME: implicit preprocessor.
    WritableDataStore<KNNList> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
    WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        KNNList minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
        double d = minptsNeighbours.getKNNDistance();
        nMinPts.put(iditer, minptsNeighbours);
        coreDistance.putDouble(iditer, d);
        minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
    }
    // Pass 2
    WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
    WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        List<Double> core = new ArrayList<>();
        double lrd = 0;
        // TODO: optimize for double distances
        for (DoubleDBIDListIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
            double coreDist = coreDistance.doubleValue(neighbor);
            double dist = distQuery.distance(iditer, neighbor);
            double rd = MathUtil.max(coreDist, dist);
            lrd = rd + lrd;
            core.add(rd);
        }
        lrd = minPtsNeighborhoodSize.intValue(iditer) / lrd;
        reachDistance.put(iditer, core);
        lrds.putDouble(iditer, lrd);
    }
    // Pass 3
    DoubleMinMax ofminmax = new DoubleMinMax();
    WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double of = 0;
        for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
            double lrd = lrds.doubleValue(iditer);
            double lrdN = lrds.doubleValue(neighbor);
            of = of + lrdN / lrd;
        }
        of = of / minPtsNeighborhoodSize.intValue(iditer);
        ofs.putDouble(iditer, of);
        // update minimum and maximum
        ofminmax.put(of);
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("OPTICS Outlier Scores", "optics-outlier", ofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 74 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class CBLOF method run.

/**
 * Runs the CBLOF algorithm on the given database.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return CBLOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("CBLOF", 3) : null;
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Computing clustering.");
    Clustering<MeanModel> clustering = clusteringAlgorithm.run(database);
    LOG.beginStep(stepprog, 2, "Computing boundary between large and small clusters.");
    List<? extends Cluster<MeanModel>> clusters = clustering.getAllClusters();
    Collections.sort(clusters, new Comparator<Cluster<MeanModel>>() {

        @Override
        public int compare(Cluster<MeanModel> o1, Cluster<MeanModel> o2) {
            // Sort in descending order by size
            return Integer.compare(o2.size(), o1.size());
        }
    });
    int clusterBoundary = getClusterBoundary(relation, clusters);
    List<? extends Cluster<MeanModel>> largeClusters = clusters.subList(0, clusterBoundary + 1);
    List<? extends Cluster<MeanModel>> smallClusters = clusters.subList(clusterBoundary + 1, clusters.size());
    LOG.beginStep(stepprog, 3, "Computing Cluster-Based Local Outlier Factors (CBLOF).");
    WritableDoubleDataStore cblofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
    DoubleMinMax cblofMinMax = new DoubleMinMax();
    computeCBLOFs(relation, distance, cblofs, cblofMinMax, largeClusters, smallClusters);
    LOG.setCompleted(stepprog);
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Cluster-Based Local Outlier Factor", "cblof-outlier", cblofs, ids);
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(cblofMinMax.getMin(), cblofMinMax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 75 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class ParallelKNNWeightOutlier method run.

/**
 * Run the parallel kNN weight outlier detector.
 *
 * @param database Database to process
 * @param relation Relation to analyze
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
    DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
    // Find kNN
    KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
    SharedObject<KNNList> knnv = new SharedObject<>();
    knnm.connectKNNOutput(knnv);
    // Extract outlier score
    KNNWeightProcessor kdistm = new KNNWeightProcessor(k + 1);
    SharedDouble kdistv = new SharedDouble();
    kdistm.connectKNNInput(knnv);
    kdistm.connectOutput(kdistv);
    // Store in output result
    WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store);
    storem.connectInput(kdistv);
    // And gather statistics for metadata
    DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
    mmm.connectInput(kdistv);
    ParallelExecutor.run(ids, knnm, kdistm, storem, mmm);
    DoubleMinMax minmax = mmm.getMinMax();
    DoubleRelation scoreres = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", store, ids);
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) SharedDouble(de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) KNNProcessor(de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) WriteDoubleDataStoreProcessor(de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) SharedObject(de.lmu.ifi.dbs.elki.parallel.variables.SharedObject) DoubleMinMaxProcessor(de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)144 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)72 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)71 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)71 Database (de.lmu.ifi.dbs.elki.database.Database)69 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)62 Test (org.junit.Test)58 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)57 AbstractOutlierAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)50 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)45 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)26 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)23 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)22 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11