Search in sources :

Example 91 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class LoOP method run.

/**
 * Performs the LoOP algorithm on the given database.
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
    Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
    KNNQuery<O> knnComp = pair.getFirst();
    KNNQuery<O> knnReach = pair.getSecond();
    // Assert we got something
    if (knnComp == null) {
        throw new AbortException("No kNN queries supported by database for comparison distance function.");
    }
    if (knnReach == null) {
        throw new AbortException("No kNN queries supported by database for density estimation distance function.");
    }
    // FIXME: tie handling!
    // Probabilistic distances
    WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
    LOG.beginStep(stepprog, 3, "Computing pdists");
    computePDists(relation, knnReach, pdists);
    // Compute PLOF values.
    WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    LOG.beginStep(stepprog, 4, "Computing PLOF");
    double nplof = computePLOFs(relation, knnComp, pdists, plofs);
    // Normalize the outlier scores.
    DoubleMinMax mm = new DoubleMinMax();
    {
        // compute LOOP_SCORE of each db object
        LOG.beginStep(stepprog, 5, "Computing LoOP scores");
        FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
        final double norm = 1. / (nplof * MathUtil.SQRT2);
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            double loop = NormalDistribution.erf((plofs.doubleValue(iditer) - 1.) * norm);
            plofs.putDouble(iditer, loop);
            mm.put(loop);
            LOG.incrementProcessed(progressLOOPs);
        }
        LOG.ensureCompleted(progressLOOPs);
    }
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Probabilities", "loop-outlier", plofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(mm.getMin(), mm.getMax(), 0.);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 92 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class KNNOutlier method run.

/**
 * Runs the algorithm in the timed evaluation part.
 *
 * @param relation Data relation
 */
public OutlierResult run(Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = relation.getDistanceQuery(getDistanceFunction());
    final KNNQuery<O> knnQuery = relation.getKNNQuery(distanceQuery, k);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    // compute distance to the k nearest neighbor.
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        // distance to the kth nearest neighbor
        // (assuming the query point is always included, with distance 0)
        final double dkn = knnQuery.getKNNForDBID(it, k).getKNNDistance();
        knno_score.putDouble(it, dkn);
        minmax.put(dkn);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Outlier Score", "knn-outlier", knno_score, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 93 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class SOS method run.

/**
 * Run the algorithm.
 *
 * @param relation data relation
 * @return outlier detection result
 */
public OutlierResult run(Relation<O> relation) {
    DistanceQuery<O> dq = relation.getDistanceQuery(getDistanceFunction());
    final double logPerp = FastMath.log(perplexity);
    ModifiableDoubleDBIDList dlist = DBIDUtil.newDistanceDBIDList(relation.size() - 1);
    DoubleDBIDListMIter di = dlist.iter();
    double[] p = new double[relation.size() - 1];
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("SOS scores", relation.size(), LOG) : null;
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB, 1.);
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        // Build sorted neighbors list.
        dlist.clear();
        for (DBIDIter i2 = relation.iterDBIDs(); i2.valid(); i2.advance()) {
            if (DBIDUtil.equal(it, i2)) {
                continue;
            }
            dlist.add(dq.distance(it, i2), i2);
        }
        dlist.sort();
        // Compute affinities
        computePi(it, di, p, perplexity, logPerp);
        // Normalization factor:
        double s = sumOfProbabilities(it, di, p);
        if (s > 0) {
            nominateNeighbors(it, di, p, 1. / s, scores);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // Find minimum and maximum.
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter it2 = relation.iterDBIDs(); it2.valid(); it2.advance()) {
        minmax.put(scores.doubleValue(it2));
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("Stoachastic Outlier Selection", "sos-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax(), 0.);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 94 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class HiCS method run.

/**
 * Perform HiCS on a given database.
 *
 * @param relation the database
 * @return The aggregated resulting scores that were assigned by the given
 *         outlier detection algorithm
 */
public OutlierResult run(Relation<V> relation) {
    final DBIDs ids = relation.getDBIDs();
    ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
    Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getSingleThreadedRandom());
    if (LOG.isVerbose()) {
        LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
    }
    List<DoubleRelation> results = new ArrayList<>();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
    // relation instead of SubspaceEuclideanDistanceFunction?)
    for (HiCSSubspace dimset : subspaces) {
        if (LOG.isVerbose()) {
            LOG.verbose("Performing outlier detection in subspace " + dimset);
        }
        ProxyDatabase pdb = new ProxyDatabase(ids);
        pdb.addRelation(new ProjectedView<>(relation, new NumericalFeatureSelection<V>(dimset)));
        // run LOF and collect the result
        OutlierResult result = outlierAlgorithm.run(pdb);
        results.add(result.getScores());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double sum = 0.0;
        for (DoubleRelation r : results) {
            final double s = r.doubleValue(iditer);
            if (!Double.isNaN(s)) {
                sum += s;
            }
        }
        scores.putDouble(iditer, sum);
        minmax.put(sum);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) NumericalFeatureSelection(de.lmu.ifi.dbs.elki.data.projection.NumericalFeatureSelection) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 95 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class HiCS method calculateContrast.

/**
 * Calculates the actual contrast of a given subspace.
 *
 * @param relation Relation to process
 * @param subspace Subspace
 * @param subspaceIndex Subspace indexes
 */
private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
    final int card = subspace.cardinality();
    final double alpha1 = FastMath.pow(alpha, (1.0 / card));
    final int windowsize = (int) (relation.size() * alpha1);
    final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
    int retries = 0;
    double deviationSum = 0.0;
    for (int i = 0; i < m; i++) {
        // Choose a random set bit.
        int chosen = -1;
        for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
            chosen = subspace.nextSetBit(chosen + 1);
        }
        // initialize sample
        DBIDs conditionalSample = relation.getDBIDs();
        for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
            if (j == chosen) {
                continue;
            }
            ArrayDBIDs sortedIndices = subspaceIndex.get(j);
            ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
            // initialize index block
            DBIDArrayIter iter = sortedIndices.iter();
            iter.seek(random.nextInt(relation.size() - windowsize));
            for (int k = 0; k < windowsize; k++, iter.advance()) {
                // select index block
                indexBlock.add(iter);
            }
            conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
        }
        if (conditionalSample.size() < 10) {
            retries++;
            if (LOG.isDebugging()) {
                LOG.debug("Sample size very small. Retry no. " + retries);
            }
            if (retries >= MAX_RETRIES) {
                LOG.warning("Too many retries, for small samples: " + retries);
            } else {
                i--;
                continue;
            }
        }
        // Project conditional set
        double[] sampleValues = new double[conditionalSample.size()];
        {
            int l = 0;
            for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
                sampleValues[l] = relation.get(iter).doubleValue(chosen);
                l++;
            }
        }
        // Project full set
        double[] fullValues = new double[relation.size()];
        {
            int l = 0;
            for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
                fullValues[l] = relation.get(iter).doubleValue(chosen);
                l++;
            }
        }
        double contrast = statTest.deviation(fullValues, sampleValues);
        if (Double.isNaN(contrast)) {
            i--;
            LOG.warning("Contrast was NaN");
            continue;
        }
        deviationSum += contrast;
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    subspace.contrast = deviationSum / m;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12