Search in sources :

Example 96 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class SimpleOutlierEnsemble method run.

@Override
public OutlierResult run(Database database) throws IllegalStateException {
    int num = algorithms.size();
    // Run inner outlier algorithms
    ModifiableDBIDs ids = DBIDUtil.newHashSet();
    ArrayList<OutlierResult> results = new ArrayList<>(num);
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null;
        for (Algorithm alg : algorithms) {
            Result res = alg.run(database);
            List<OutlierResult> ors = OutlierResult.getOutlierResults(res);
            for (OutlierResult or : ors) {
                results.add(or);
                ids.addDBIDs(or.getScores().getDBIDs());
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
    }
    // Combine
    WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", ids.size(), LOG) : null;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
            double[] scores = new double[num];
            int i = 0;
            for (OutlierResult r : results) {
                double score = r.getScores().doubleValue(id);
                if (!Double.isNaN(score)) {
                    scores[i] = score;
                    i++;
                } else {
                    LOG.warning("DBID " + id + " was not given a score by result " + r);
                }
            }
            if (i > 0) {
                // Shrink array if necessary.
                if (i < scores.length) {
                    scores = Arrays.copyOf(scores, i);
                }
                double combined = voting.combine(scores);
                sumscore.putDouble(id, combined);
                minmax.put(combined);
            } else {
                LOG.warning("DBID " + id + " was not given any score at all.");
            }
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scores = new MaterializedDoubleRelation("Simple Outlier Ensemble", "ensemble-outlier", sumscore, ids);
    return new OutlierResult(meta, scores);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayList(java.util.ArrayList) Algorithm(de.lmu.ifi.dbs.elki.algorithm.Algorithm) OutlierAlgorithm(de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm) AbstractAlgorithm(de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) Result(de.lmu.ifi.dbs.elki.result.Result) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) List(java.util.List) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 97 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class SimplifiedLOF method computeSimplifiedLOFs.

/**
 * Compute the simplified LOF factors.
 *
 * @param ids IDs to compute for
 * @param knnq kNN query class
 * @param slrds Object densities
 * @param lofs SLOF output storage
 * @param lofminmax Minimum and maximum scores
 */
private void computeSimplifiedLOFs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore slrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
    FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simplified LOF scores", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final double lof;
        final double lrdp = slrds.doubleValue(iter);
        final KNNList neighbors = knnq.getKNNForDBID(iter, k);
        if (!Double.isInfinite(lrdp)) {
            double sum = 0.;
            int count = 0;
            for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                // skip the point itself
                if (DBIDUtil.equal(neighbor, iter)) {
                    continue;
                }
                final double val = slrds.doubleValue(neighbor);
                sum += val;
                count++;
                if (Double.isInfinite(val)) {
                    break;
                }
            }
            lof = sum / (lrdp * count);
        } else {
            lof = 1.0;
        }
        lofs.putDouble(iter, lof);
        // update minimum and maximum
        lofminmax.put(lof);
        LOG.incrementProcessed(progressLOFs);
    }
    LOG.ensureCompleted(progressLOFs);
}
Also used : KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 98 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class FeatureBagging method run.

/**
 * Run the algorithm on a data set.
 *
 * @param database Database context
 * @param relation Relation to use
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<NumberVector> relation) {
    final int dbdim = RelationUtil.dimensionality(relation);
    final int mindim = dbdim >> 1;
    final int maxdim = dbdim - 1;
    final Random rand = rnd.getSingleThreadedRandom();
    ArrayList<OutlierResult> results = new ArrayList<>(num);
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
        for (int i = 0; i < num; i++) {
            long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
            SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
            LOF<NumberVector> lof = new LOF<>(k, df);
            // run LOF and collect the result
            OutlierResult result = lof.run(database, relation);
            results.add(result);
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
    }
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    if (breadth) {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        @SuppressWarnings("unchecked") Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = (Pair<DBIDIter, DoubleRelation>[]) new Pair[results.size()];
        // Mapping score-sorted DBID-Iterators onto their corresponding scores.
        // We need to initialize them now be able to iterate them "in parallel".
        {
            int i = 0;
            for (OutlierResult r : results) {
                IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().order(relation.getDBIDs()).iter(), r.getScores());
                i++;
            }
        }
        // Iterating over the *lines* of the AS_t(i)-matrix.
        for (int i = 0; i < relation.size(); i++) {
            // Iterating over the elements of a line (breadth-first).
            for (Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
                DBIDIter iter = pair.first;
                // for every DBID).
                if (iter.valid()) {
                    double score = pair.second.doubleValue(iter);
                    if (Double.isNaN(scores.doubleValue(iter))) {
                        scores.putDouble(iter, score);
                        minmax.put(score);
                    }
                    iter.advance();
                } else {
                    LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
                }
            }
            // Progress does not take the initial mapping into account.
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    } else {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
            double sum = 0.0;
            for (OutlierResult r : results) {
                final double s = r.getScores().doubleValue(iter);
                if (!Double.isNaN(s)) {
                    sum += s;
                }
            }
            scores.putDouble(iter, sum);
            minmax.put(sum);
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
    return new OutlierResult(meta, scoreres);
}
Also used : LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayList(java.util.ArrayList) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Random(java.util.Random) SubspaceEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 99 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class SimpleKernelDensityLOF method run.

/**
 * Run the naive kernel density LOF algorithm.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return LOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
    final int dim = RelationUtil.dimensionality(relation);
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    // Compute LRDs
    LOG.beginStep(stepprog, 2, "Computing densities.");
    WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(it, k);
        int count = 0;
        double sum = 0.0;
        // Fast version for double distances
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, it)) {
                continue;
            }
            double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
            if (max == 0) {
                sum = Double.POSITIVE_INFINITY;
                break;
            }
            final double v = neighbor.doubleValue() / max;
            sum += kernel.density(v) / MathUtil.powi(max, dim);
            count++;
        }
        final double density = count > 0 ? sum / count : 0.;
        dens.putDouble(it, density);
        LOG.incrementProcessed(densProgress);
    }
    LOG.ensureCompleted(densProgress);
    // compute LOF_SCORE of each db object
    LOG.beginStep(stepprog, 3, "Computing KLOFs.");
    WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    // track the maximum value for normalization.
    DoubleMinMax lofminmax = new DoubleMinMax();
    FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final double lrdp = dens.doubleValue(it);
        final double lof;
        if (lrdp > 0) {
            final KNNList neighbors = knnq.getKNNForDBID(it, k);
            double sum = 0.0;
            int count = 0;
            for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                // skip the point itself
                if (DBIDUtil.equal(neighbor, it)) {
                    continue;
                }
                sum += dens.doubleValue(neighbor);
                count++;
            }
            lof = (lrdp == Double.POSITIVE_INFINITY) ? (sum == Double.POSITIVE_INFINITY ? 1 : 0.) : sum / (count * lrdp);
        } else {
            lof = 1.0;
        }
        lofs.putDouble(it, lof);
        // update minimum and maximum
        lofminmax.put(lof);
        LOG.incrementProcessed(progressLOFs);
    }
    LOG.ensureCompleted(progressLOFs);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids);
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 100 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class VarianceOfVolume method computeVolumes.

/**
 * Compute volumes
 *
 * @param knnq KNN query
 * @param dim Data dimensionality
 * @param ids IDs to process
 * @param vols Volume storage
 */
private void computeVolumes(KNNQuery<O> knnq, int dim, DBIDs ids, WritableDoubleDataStore vols) {
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Volume", ids.size(), LOG) : null;
    double scaleconst = MathUtil.SQRTPI * FastMath.pow(GammaDistribution.gamma(1 + dim * .5), -1. / dim);
    boolean warned = false;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double dk = knnq.getKNNForDBID(iter, k).getKNNDistance();
        double vol = dk > 0 ? MathUtil.powi(dk * scaleconst, dim) : 0.;
        if (vol == Double.POSITIVE_INFINITY && !warned) {
            LOG.warning("Variance of Volumes has hit double precision limits, results are not reliable.");
            warned = true;
        }
        vols.putDouble(iter, vol);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12