Search in sources :

Example 31 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class LocalIsolationCoefficient method run.

/**
 * Runs the algorithm in the timed evaluation part.
 *
 * @param database Database context
 * @param relation Data relation
 */
public OutlierResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    // + query point
    KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Compute Local Isolation Coefficients", relation.size(), LOG) : null;
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore lic_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // + query point
        final KNNList knn = knnQuery.getKNNForDBID(iditer, k + 1);
        // sum of the distances to the k nearest neighbors
        double skn = 0;
        // number of neighbors so far
        int i = 0;
        for (DoubleDBIDListIter neighbor = knn.iter(); i < k && neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(iditer, neighbor)) {
                continue;
            }
            skn += neighbor.doubleValue();
            ++i;
        }
        double lic = knn.getKNNDistance() + (i > 0 ? skn / i : 0);
        lic_score.putDouble(iditer, lic);
        minmax.put(skn);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    DoubleRelation res = new MaterializedDoubleRelation("Local Isolation Coefficient", "lic-outlier", lic_score, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(meta, res);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 32 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class SimplifiedLOF method computeSimplifiedLRDs.

/**
 * Compute the simplified reachability densities.
 *
 * @param ids IDs to process
 * @param knnq kNN query class
 * @param lrds Density output
 */
private void computeSimplifiedLRDs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore lrds) {
    FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(iter, k);
        double sum = 0.0;
        int count = 0;
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, iter)) {
                continue;
            }
            sum += neighbor.doubleValue();
            count++;
        }
        // Avoid division by 0
        final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
        lrds.putDouble(iter, lrd);
        LOG.incrementProcessed(lrdsProgress);
    }
    LOG.ensureCompleted(lrdsProgress);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 33 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class VarianceOfVolume method computeVOVs.

/**
 * Compute variance of volumes.
 *
 * @param knnq KNN query
 * @param ids IDs to process
 * @param vols Volumes
 * @param vovs Variance of Volume storage
 * @param vovminmax Score minimum/maximum tracker
 */
private void computeVOVs(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore vols, WritableDoubleDataStore vovs, DoubleMinMax vovminmax) {
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Variance of Volume", ids.size(), LOG) : null;
    boolean warned = false;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNList knns = knnq.getKNNForDBID(iter, k);
        DoubleDBIDListIter it = knns.iter();
        double vbar = 0.;
        for (; it.valid(); it.advance()) {
            vbar += vols.doubleValue(it);
        }
        // Average
        vbar /= knns.size();
        double vov = 0.;
        for (it.seek(0); it.valid(); it.advance()) {
            double v = vols.doubleValue(it) - vbar;
            vov += v * v;
        }
        if (!(vov < Double.POSITIVE_INFINITY) && !warned) {
            LOG.warning("Variance of Volumes has hit double precision limits, results are not reliable.");
            warned = true;
        }
        vov = (knns.size() > 1 && vov < Double.POSITIVE_INFINITY) ? vov / (knns.size() - 1) : Double.POSITIVE_INFINITY;
        vovs.putDouble(iter, vov);
        // update minimum and maximum
        vovminmax.put(vov);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 34 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class KNNSOS method run.

/**
 * Run the algorithm.
 *
 * @param relation data relation
 * @return outlier detection result
 */
public OutlierResult run(Relation<O> relation) {
    // Query size
    final int k1 = k + 1;
    final double perplexity = k / 3.;
    KNNQuery<O> knnq = relation.getKNNQuery(getDistanceFunction(), k1);
    final double logPerp = FastMath.log(perplexity);
    double[] p = new double[k + 10];
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("KNNSOS scores", relation.size(), LOG) : null;
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB, 1.);
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        KNNList knns = knnq.getKNNForDBID(it, k1);
        if (p.length < knns.size() + 1) {
            p = new double[knns.size() + 10];
        }
        final DoubleDBIDListIter ki = knns.iter();
        // Compute affinities
        SOS.computePi(it, ki, p, perplexity, logPerp);
        // Normalization factor:
        double s = SOS.sumOfProbabilities(it, ki, p);
        if (s > 0) {
            ISOS.nominateNeighbors(it, ki, p, 1. / s, scores);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // Find minimum and maximum.
    DoubleMinMax minmax = ISOS.transformScores(scores, relation.getDBIDs(), logPerp, phi);
    DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Stoachastic Outlier Selection", "knnsos-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax(), 0.);
    return new OutlierResult(meta, scoreres);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 35 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class COP method run.

/**
 * Process a single relation.
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
public OutlierResult run(Relation<V> relation) {
    final DBIDs ids = relation.getDBIDs();
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
    final int dim = RelationUtil.dimensionality(relation);
    if (k <= dim + 1) {
        LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
    }
    WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDataStore<double[]> cop_err_v = null;
    WritableIntegerDataStore cop_dim = null;
    if (models) {
        cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
        cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
    }
    // compute neighbors of each db object
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
        ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
        // Do not use query object
        nids.remove(id);
        double[] centroid = Centroid.make(relation, nids).getArrayRef();
        double[] relative = minusEquals(relation.get(id).toArray(), centroid);
        PCAResult pcares = pca.processIds(nids, relation);
        double[][] evecs = pcares.getEigenvectors();
        double[] projected = transposeTimes(evecs, relative);
        double[] evs = pcares.getEigenvalues();
        double min = Double.POSITIVE_INFINITY;
        int vdim = dim;
        switch(dist) {
            case CHISQUARED:
                {
                    double sqdevs = 0;
                    for (int d = 0; d < dim; d++) {
                        // Scale with Stddev
                        double dev = projected[d];
                        // Accumulate
                        sqdevs += dev * dev / evs[d];
                        // Evaluate
                        double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
                        if (score < min) {
                            min = score;
                            vdim = d + 1;
                        }
                    }
                    break;
                }
            case GAMMA:
                {
                    double[][] dists = new double[dim][nids.size()];
                    int j = 0;
                    double[] srel = new double[dim];
                    for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
                        V vec = relation.get(s);
                        for (int d = 0; d < dim; d++) {
                            srel[d] = vec.doubleValue(d) - centroid[d];
                        }
                        double[] serr = transposeTimes(evecs, srel);
                        double sqdist = 0.0;
                        for (int d = 0; d < dim; d++) {
                            double serrd = serr[d];
                            sqdist += serrd * serrd / evs[d];
                            dists[d][j] = sqdist;
                        }
                        j++;
                    }
                    double sqdevs = 0;
                    for (int d = 0; d < dim; d++) {
                        // Scale with Stddev
                        final double dev = projected[d];
                        // Accumulate
                        sqdevs += dev * dev / evs[d];
                        // Sort, so we can trim the top 15% below.
                        Arrays.sort(dists[d]);
                        // Evaluate
                        double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs);
                        if (score < min) {
                            min = score;
                            vdim = d + 1;
                        }
                    }
                    break;
                }
        }
        // Normalize the value
        final double prob = expect * (1 - min) / (expect + min);
        // Construct the error vector:
        for (int d = vdim; d < dim; d++) {
            projected[d] = 0.;
        }
        double[] ev = timesEquals(times(evecs, projected), -1 * prob);
        cop_score.putDouble(id, prob);
        if (models) {
            cop_err_v.put(id, ev);
            cop_dim.putInt(id, dim + 1 - vdim);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // combine results.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids);
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    if (models) {
        result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
        result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) GreaterConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) PCAResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12