Search in sources :

Example 11 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class RandomSampleKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing random-sample k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
    final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int samplesize = (int) (ids.size() * share);
    Random random = rnd.getSingleThreadedRandom();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNHeap kNN = DBIDUtil.newHeap(k);
        DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, random);
        for (DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) {
            double dist = distanceQuery.distance(iter, iter2);
            kNN.insert(dist, iter2);
        }
        storage.put(iter, kNN.toKNNList());
        getLogger().incrementProcessed(progress);
    }
    getLogger().ensureCompleted(progress);
}
Also used : Random(java.util.Random) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 12 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class LDF method run.

/**
 * Run the naive kernel density LOF algorithm.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return LOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null;
    final int dim = RelationUtil.dimensionality(relation);
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    // Compute LDEs
    LOG.beginStep(stepprog, 2, "Computing LDEs.");
    WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(it, k);
        double sum = 0.0;
        int count = 0;
        // Fast version for double distances
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, it)) {
                continue;
            }
            final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
            if (!(nkdist > 0.) || nkdist == Double.POSITIVE_INFINITY) {
                sum = Double.POSITIVE_INFINITY;
                count++;
                break;
            }
            final double v = MathUtil.max(nkdist, neighbor.doubleValue()) / (h * nkdist);
            sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
            count++;
        }
        ldes.putDouble(it, sum / count);
        LOG.incrementProcessed(densProgress);
    }
    LOG.ensureCompleted(densProgress);
    // Compute local density factors.
    LOG.beginStep(stepprog, 3, "Computing LDFs.");
    WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    // track the maximum value for normalization.
    DoubleMinMax lofminmax = new DoubleMinMax();
    FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final double lrdp = ldes.doubleValue(it);
        final KNNList neighbors = knnq.getKNNForDBID(it, k);
        double sum = 0.0;
        int count = 0;
        for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            // skip the point itself
            if (DBIDUtil.equal(neighbor, it)) {
                continue;
            }
            sum += ldes.doubleValue(neighbor);
            count++;
        }
        sum /= count;
        final double div = lrdp + c * sum;
        double ldf = div == Double.POSITIVE_INFINITY ? (sum < Double.POSITIVE_INFINITY ? 0. : 1) : (div > 0) ? sum / div : 0;
        ldfs.putDouble(it, ldf);
        // update minimum and maximum
        lofminmax.put(ldf);
        LOG.incrementProcessed(progressLOFs);
    }
    LOG.ensureCompleted(progressLOFs);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Density Factor", "ldf-outlier", ldfs, ids);
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c));
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 13 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class LOCI method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
    DBIDs ids = relation.getDBIDs();
    // LOCI preprocessing step
    WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class);
    precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
    // LOCI main step
    FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    // Shared instance, to save allocations.
    MeanVariance mv_n_r_alpha = new MeanVariance();
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        final DoubleIntArrayList cdist = interestingDistances.get(iditer);
        final double maxdist = cdist.getDouble(cdist.size() - 1);
        final int maxneig = cdist.getInt(cdist.size() - 1);
        double maxmdefnorm = 0.0;
        double maxnormr = 0;
        if (maxneig >= nmin) {
            // Compute the largest neighborhood we will need.
            DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
            // For any critical distance, compute the normalized MDEF score.
            for (int i = 0, size = cdist.size(); i < size; i++) {
                // Only start when minimum size is fulfilled
                if (cdist.getInt(i) < nmin) {
                    continue;
                }
                final double r = cdist.getDouble(i);
                final double alpha_r = alpha * r;
                // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
                final int n_alphar = cdist.getInt(cdist.find(alpha_r));
                // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
                mv_n_r_alpha.reset();
                for (DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
                    // Stop at radius r
                    if (neighbor.doubleValue() > r) {
                        break;
                    }
                    DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
                    int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
                    mv_n_r_alpha.put(rn_alphar);
                }
                // We only use the average and standard deviation
                final double nhat_r_alpha = mv_n_r_alpha.getMean();
                final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
                // Redundant divisions by nhat_r_alpha removed.
                final double mdef = nhat_r_alpha - n_alphar;
                final double sigmamdef = sigma_nhat_r_alpha;
                final double mdefnorm = mdef / sigmamdef;
                if (mdefnorm > maxmdefnorm) {
                    maxmdefnorm = mdefnorm;
                    maxnormr = r;
                }
            }
        } else {
            // FIXME: when nmin was not fulfilled - what is the proper value then?
            maxmdefnorm = Double.POSITIVE_INFINITY;
            maxnormr = maxdist;
        }
        mdef_norm.putDouble(iditer, maxmdefnorm);
        mdef_radius.putDouble(iditer, maxnormr);
        minmax.put(maxmdefnorm);
        LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult = new MaterializedDoubleRelation("LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    result.addChildResult(new MaterializedDoubleRelation("LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 14 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class ODIN method run.

/**
 * Run the ODIN algorithm
 *
 * @param database Database to run on.
 * @param relation Relation to process.
 * @return ODIN outlier result.
 */
public OutlierResult run(Database database, Relation<O> relation) {
    // Get the query functions:
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(dq, k);
    // Get the objects to process, and a data storage for counting and output:
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.);
    double inc = 1. / (k - 1);
    double min = Double.POSITIVE_INFINITY, max = 0.0;
    // Process all objects
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        // Find the nearest neighbors (using an index, if available!)
        DBIDs neighbors = knnq.getKNNForDBID(iter, k);
        // For each neighbor, except ourselves, increase the in-degree:
        for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
            if (DBIDUtil.equal(iter, nei)) {
                continue;
            }
            final double value = scores.doubleValue(nei) + inc;
            if (value < min) {
                min = value;
            }
            if (value > max) {
                max = value;
            }
            scores.put(nei, value);
        }
    }
    // Wrap the result and add metadata.
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., inc * (ids.size() - 1), 1);
    DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids);
    return new OutlierResult(meta, rel);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 15 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class KDEOS method run.

/**
 * Run the KDEOS outlier detection algorithm.
 *
 * @param database Database to query
 * @param rel Relation to process
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> rel) {
    final DBIDs ids = rel.getDBIDs();
    LOG.verbose("Running kNN preprocessor.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, rel, getDistanceFunction(), kmax + 1);
    // Initialize store for densities
    WritableDataStore<double[]> densities = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, double[].class);
    estimateDensities(rel, knnq, ids, densities);
    // Compute scores:
    WritableDoubleDataStore kofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
    DoubleMinMax minmax = new DoubleMinMax();
    computeOutlierScores(knnq, ids, densities, kofs, minmax);
    DoubleRelation scoreres = new MaterializedDoubleRelation("Kernel Density Estimation Outlier Scores", "kdeos-outlier", kofs, ids);
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax());
    return new OutlierResult(meta, scoreres);
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)

Aggregations

DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)139 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)77 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)45 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)44 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)40 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)39 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)38 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)38 ArrayList (java.util.ArrayList)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)29 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)25 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)22 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)19 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)18 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)15 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)14