Search in sources :

Example 6 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class OnlineLOF method run.

/**
 * Performs the Generalized LOF_SCORE algorithm on the given database by
 * calling {@code #doRunInTime(Database)} and adds a {@link LOFKNNListener} to
 * the preprocessors.
 */
@Override
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
    Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
    KNNQuery<O> kNNRefer = queries.getFirst().getFirst();
    KNNQuery<O> kNNReach = queries.getFirst().getSecond();
    RKNNQuery<O> rkNNRefer = queries.getSecond().getFirst();
    RKNNQuery<O> rkNNReach = queries.getSecond().getSecond();
    LOFResult<O> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
    lofResult.setRkNNRefer(rkNNRefer);
    lofResult.setRkNNReach(rkNNReach);
    // add listener
    KNNListener l = new LOFKNNListener(lofResult);
    ((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
    ((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
    return lofResult.getResult();
}
Also used : StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) AbstractMaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor) MaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor) KNNListener(de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNListener) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 7 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class DWOF method run.

/**
 * Performs the Generalized DWOF_SCORE algorithm on the given database by
 * calling all the other methods in the proper order.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return new OutlierResult instance
 */
public OutlierResult run(Database database, Relation<O> relation) {
    final DBIDs ids = relation.getDBIDs();
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    // Get k nearest neighbor and range query on the relation.
    KNNQuery<O> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
    RangeQuery<O> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
    StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
    // DWOF output score storage.
    WritableDoubleDataStore dwofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT, 0.);
    if (stepProg != null) {
        stepProg.beginStep(1, "Initializing objects' Radii", LOG);
    }
    WritableDoubleDataStore radii = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
    // Find an initial radius for each object:
    initializeRadii(ids, knnq, distFunc, radii);
    WritableIntegerDataStore oldSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
    WritableIntegerDataStore newSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
    int countUnmerged = relation.size();
    if (stepProg != null) {
        stepProg.beginStep(2, "Clustering-Evaluating Cycles.", LOG);
    }
    IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
    while (countUnmerged > 0) {
        LOG.incrementProcessed(clusEvalProgress);
        // Increase radii
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            radii.putDouble(iter, radii.doubleValue(iter) * delta);
        }
        // stores the clustering label for each object
        WritableDataStore<ModifiableDBIDs> labels = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, ModifiableDBIDs.class);
        // Cluster objects based on the current radius
        clusterData(ids, rnnQuery, radii, labels);
        // simple reference swap
        WritableIntegerDataStore temp = newSizes;
        newSizes = oldSizes;
        oldSizes = temp;
        // Update the cluster size count for each object.
        countUnmerged = updateSizes(ids, labels, newSizes);
        labels.destroy();
        // Update DWOF scores.
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double newScore = (newSizes.intValue(iter) > 0) ? ((double) (oldSizes.intValue(iter) - 1) / (double) newSizes.intValue(iter)) : 0.0;
            dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
        }
    }
    LOG.setCompleted(clusEvalProgress);
    LOG.setCompleted(stepProg);
    // Build result representation.
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        minmax.put(dwofs.doubleValue(iter));
    }
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    DoubleRelation rel = new MaterializedDoubleRelation("Dynamic-Window Outlier Factors", "dwof-outlier", dwofs, ids);
    return new OutlierResult(meta, rel);
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 8 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class SimplifiedLOF method run.

/**
 * Run the Simple LOF algorithm.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return LOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Simplified LOF", 3) : null;
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    // Compute LRDs
    LOG.beginStep(stepprog, 2, "Computing densities.");
    WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    computeSimplifiedLRDs(ids, knnq, dens);
    // compute LOF_SCORE of each db object
    LOG.beginStep(stepprog, 3, "Computing SLOFs.");
    WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax lofminmax = new DoubleMinMax();
    computeSimplifiedLOFs(ids, knnq, dens, lofs, lofminmax);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids);
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)

Example 9 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class VarianceOfVolume method run.

/**
 * Runs the VOV algorithm on the given database.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return VOV outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("VOV", 3) : null;
    DBIDs ids = relation.getDBIDs();
    int dim = RelationUtil.dimensionality(relation);
    LOG.beginStep(stepprog, 1, "Materializing nearest-neighbor sets.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    // Compute Volumes
    LOG.beginStep(stepprog, 2, "Computing Volumes.");
    WritableDoubleDataStore vols = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    computeVolumes(knnq, dim, ids, vols);
    // compute VOV of each object
    LOG.beginStep(stepprog, 3, "Computing Variance of Volumes (VOV).");
    WritableDoubleDataStore vovs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
    // track the maximum value for normalization.
    DoubleMinMax vovminmax = new DoubleMinMax();
    computeVOVs(knnq, ids, vols, vovs, vovminmax);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Variance of Volume", "vov-outlier", vovs, ids);
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(vovminmax.getMin(), vovminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)

Example 10 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class DistanceStatisticsWithClasses method run.

@Override
public HistogramResult run(Database database) {
    final Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
    final DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    final StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
    // determine binning ranges.
    DoubleMinMax gminmax = new DoubleMinMax();
    // Cluster by labels
    Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
    // global in-cluster min/max
    DoubleMinMax giminmax = new DoubleMinMax();
    // global other-cluster min/max
    DoubleMinMax gominmax = new DoubleMinMax();
    // in-cluster distances
    MeanVariance mimin = new MeanVariance();
    MeanVariance mimax = new MeanVariance();
    MeanVariance midif = new MeanVariance();
    // other-cluster distances
    MeanVariance momin = new MeanVariance();
    MeanVariance momax = new MeanVariance();
    MeanVariance modif = new MeanVariance();
    // Histogram
    final ObjHistogram<long[]> histogram;
    LOG.beginStep(stepprog, 1, "Prepare histogram.");
    if (exact) {
        gminmax = exactMinMax(relation, distFunc);
        histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
    } else if (sampling) {
        gminmax = sampleMinMax(relation, distFunc);
        histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
    } else {
        histogram = new AbstractObjDynamicHistogram<long[]>(numbin) {

            @Override
            protected long[] downsample(Object[] data, int start, int end, int size) {
                long[] ret = new long[2];
                for (int i = start; i < end; i++) {
                    long[] existing = (long[]) data[i];
                    if (existing != null) {
                        for (int c = 0; c < 2; c++) {
                            ret[c] += existing[c];
                        }
                    }
                }
                return ret;
            }

            @Override
            protected long[] aggregate(long[] first, long[] second) {
                for (int c = 0; c < 2; c++) {
                    first[c] += second[c];
                }
                return first;
            }

            @Override
            protected long[] cloneForCache(long[] data) {
                return data.clone();
            }

            @Override
            protected long[] makeObject() {
                return new long[2];
            }
        };
    }
    LOG.beginStep(stepprog, 2, "Build histogram.");
    final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), LOG) : null;
    // iterate per cluster
    final long[] incFirst = new long[] { 1L, 0L };
    final long[] incSecond = new long[] { 0L, 1L };
    for (Cluster<?> c1 : split) {
        for (DBIDIter id1 = c1.getIDs().iter(); id1.valid(); id1.advance()) {
            // in-cluster distances
            DoubleMinMax iminmax = new DoubleMinMax();
            for (DBIDIter iter2 = c1.getIDs().iter(); iter2.valid(); iter2.advance()) {
                // skip the point itself.
                if (DBIDUtil.equal(id1, iter2)) {
                    continue;
                }
                double d = distFunc.distance(id1, iter2);
                histogram.putData(d, incFirst);
                iminmax.put(d);
            }
            // aggregate
            mimin.put(iminmax.getMin());
            mimax.put(iminmax.getMax());
            midif.put(iminmax.getDiff());
            // min/max
            giminmax.put(iminmax.getMin());
            giminmax.put(iminmax.getMax());
            // other-cluster distances
            DoubleMinMax ominmax = new DoubleMinMax();
            for (Cluster<?> c2 : split) {
                if (c2 == c1) {
                    continue;
                }
                for (DBIDIter iter2 = c2.getIDs().iter(); iter2.valid(); iter2.advance()) {
                    // skip the point itself (shouldn't happen though)
                    if (DBIDUtil.equal(id1, iter2)) {
                        continue;
                    }
                    double d = distFunc.distance(id1, iter2);
                    histogram.putData(d, incSecond);
                    ominmax.put(d);
                }
            }
            // aggregate
            momin.put(ominmax.getMin());
            momax.put(ominmax.getMax());
            modif.put(ominmax.getDiff());
            // min/max
            gominmax.put(ominmax.getMin());
            gominmax.put(ominmax.getMax());
            LOG.incrementProcessed(progress);
        }
    }
    LOG.ensureCompleted(progress);
    // Update values (only needed for sampling case).
    gminmax.put(gominmax);
    LOG.setCompleted(stepprog);
    // count the number of samples we have in the data
    long inum = 0;
    long onum = 0;
    for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
        inum += iter.getValue()[0];
        onum += iter.getValue()[1];
    }
    long bnum = inum + onum;
    Collection<double[]> binstat = new ArrayList<>(numbin);
    for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
        final long[] value = iter.getValue();
        final double icof = (inum == 0) ? 0 : ((double) value[0]) / inum / histogram.getBinsize();
        final double icaf = ((double) value[0]) / bnum / histogram.getBinsize();
        final double ocof = (onum == 0) ? 0 : ((double) value[1]) / onum / histogram.getBinsize();
        final double ocaf = ((double) value[1]) / bnum / histogram.getBinsize();
        binstat.add(new double[] { iter.getCenter(), icof, icaf, ocof, ocaf });
    }
    HistogramResult result = new HistogramResult("Distance Histogram", "distance-histogram", binstat);
    result.addHeader("Absolute minimum distance (abs): " + gminmax.getMin());
    result.addHeader("Absolute maximum distance (abs): " + gminmax.getMax());
    result.addHeader("In-Cluster minimum distance (abs, avg, stddev): " + giminmax.getMin() + " " + mimin.getMean() + " " + mimin.getSampleStddev());
    result.addHeader("In-Cluster maximum distance (abs, avg, stddev): " + giminmax.getMax() + " " + mimax.getMean() + " " + mimax.getSampleStddev());
    result.addHeader("Other-Cluster minimum distance (abs, avg, stddev): " + gominmax.getMin() + " " + momin.getMean() + " " + momin.getSampleStddev());
    result.addHeader("Other-Cluster maximum distance (abs, avg, stddev): " + gominmax.getMax() + " " + momax.getMean() + " " + momax.getSampleStddev());
    result.addHeader("Column description: bin center, in-cluster only frequency, in-cluster all frequency, other-cluster only frequency, other cluster all frequency");
    result.addHeader("In-cluster value count: " + inum + " other cluster value count: " + onum);
    return result;
}
Also used : ObjHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram) HistogramResult(de.lmu.ifi.dbs.elki.result.HistogramResult) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) AbstractObjDynamicHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.AbstractObjDynamicHistogram) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) ByLabelOrAllInOneClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) LongArrayStaticHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.LongArrayStaticHistogram) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax)

Aggregations

StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)26 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)13 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)12 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)11 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)11 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)11 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)11 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)11 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)6 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)6 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)6 ArrayList (java.util.ArrayList)6 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)5 List (java.util.List)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 Subspace (de.lmu.ifi.dbs.elki.data.Subspace)3 Model (de.lmu.ifi.dbs.elki.data.model.Model)3 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)3