Search in sources :

Example 56 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class ParallelKNNWeightOutlier method run.

/**
 * Run the parallel kNN weight outlier detector.
 *
 * @param database Database to process
 * @param relation Relation to analyze
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
    DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
    // Find kNN
    KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
    SharedObject<KNNList> knnv = new SharedObject<>();
    knnm.connectKNNOutput(knnv);
    // Extract outlier score
    KNNWeightProcessor kdistm = new KNNWeightProcessor(k + 1);
    SharedDouble kdistv = new SharedDouble();
    kdistm.connectKNNInput(knnv);
    kdistm.connectOutput(kdistv);
    // Store in output result
    WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store);
    storem.connectInput(kdistv);
    // And gather statistics for metadata
    DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
    mmm.connectInput(kdistv);
    ParallelExecutor.run(ids, knnm, kdistm, storem, mmm);
    DoubleMinMax minmax = mmm.getMinMax();
    DoubleRelation scoreres = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", store, ids);
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) SharedDouble(de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) KNNProcessor(de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) WriteDoubleDataStoreProcessor(de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) SharedObject(de.lmu.ifi.dbs.elki.parallel.variables.SharedObject) DoubleMinMaxProcessor(de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 57 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class KDEOS method estimateDensities.

/**
 * Perform the kernel density estimation step.
 *
 * @param rel Relation to query
 * @param knnq kNN query
 * @param ids IDs to process
 * @param densities Density storage
 */
protected void estimateDensities(Relation<O> rel, KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities) {
    final int dim = dimensionality(rel);
    final int knum = kmax + 1 - kmin;
    // Initialize storage:
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        densities.put(iter, new double[knum]);
    }
    // Distribute densities:
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing densities", ids.size(), LOG) : null;
    double iminbw = (minBandwidth > 0.) ? 1. / (minBandwidth * scale) : Double.POSITIVE_INFINITY;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNList neighbors = knnq.getKNNForDBID(iter, kmax + 1);
        int k = 1, idx = 0;
        double sum = 0.;
        for (DoubleDBIDListIter kneighbor = neighbors.iter(); k <= kmax && kneighbor.valid(); kneighbor.advance(), k++) {
            sum += kneighbor.doubleValue();
            if (k < kmin) {
                continue;
            }
            final double ibw = Math.min(k / (sum * scale), iminbw);
            final double sca = MathUtil.powi(ibw, dim);
            for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                final double dens;
                if (sca < Double.POSITIVE_INFINITY) {
                    // NaNs with duplicate points!
                    dens = sca * kernel.density(neighbor.doubleValue() * ibw);
                } else {
                    dens = neighbor.doubleValue() == 0. ? 1. : 0.;
                }
                densities.get(neighbor)[idx] += dens;
                if (dens < CUTOFF) {
                    break;
                }
            }
            // Only if k >= kmin
            ++idx;
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 58 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class KDEOS method computeOutlierScores.

/**
 * Compute the final KDEOS scores.
 *
 * @param knnq kNN query
 * @param ids IDs to process
 * @param densities Density estimates
 * @param kdeos Score outputs
 * @param minmax Minimum and maximum scores
 */
protected void computeOutlierScores(KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities, WritableDoubleDataStore kdeos, DoubleMinMax minmax) {
    final int knum = kmax + 1 - kmin;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing KDEOS scores", ids.size(), LOG) : null;
    double[][] scratch = new double[knum][kmax + 5];
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double[] dens = densities.get(iter);
        KNNList neighbors = knnq.getKNNForDBID(iter, kmax + 1);
        if (scratch[0].length < neighbors.size()) {
            // Resize scratch. Add some extra margin again.
            scratch = new double[knum][neighbors.size() + 5];
        }
        {
            // Store density matrix of neighbors
            int i = 0;
            for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance(), i++) {
                double[] ndens = densities.get(neighbor);
                for (int k = 0; k < knum; k++) {
                    scratch[k][i] = ndens[k];
                }
            }
            assert (i == neighbors.size());
        }
        // Compute means and stddevs for each k
        double score = 0.;
        for (int i = 0; i < knum; i++) {
            mv.reset();
            for (int j = 0; j < neighbors.size(); j++) {
                mv.put(scratch[i][j]);
            }
            final double mean = mv.getMean(), stddev = mv.getSampleStddev();
            if (stddev > 0.) {
                score += (mean - dens[i]) / stddev;
            }
        }
        // average
        score /= knum;
        score = NormalDistribution.standardNormalCDF(score);
        minmax.put(score);
        kdeos.put(iter, score);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 59 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class LOF method computeLOFScore.

/**
 * Compute a single LOF score.
 *
 * @param knnq kNN query
 * @param cur Current object
 * @param lrds Stored reachability densities
 * @return LOF score.
 */
protected double computeLOFScore(KNNQuery<O> knnq, DBIDRef cur, DoubleDataStore lrds) {
    final double lrdp = lrds.doubleValue(cur);
    if (Double.isInfinite(lrdp)) {
        return 1.0;
    }
    double sum = 0.;
    int count = 0;
    final KNNList neighbors = knnq.getKNNForDBID(cur, k);
    for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
        // skip the point itself
        if (DBIDUtil.equal(cur, neighbor)) {
            continue;
        }
        sum += lrds.doubleValue(neighbor);
        ++count;
    }
    return sum / (lrdp * count);
}
Also used : KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 60 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class COF method computeAverageChainingDistances.

/**
 * Computes the average chaining distance, the average length of a path
 * through the given set of points to each target. The authors of COF decided
 * to approximate this value using a weighted mean that assumes every object
 * is reached from the previous point (but actually every point could be best
 * reachable from the first, in which case this does not make much sense.)
 *
 * TODO: can we accelerate this by using the kNN of the neighbors?
 *
 * @param knnq KNN query
 * @param dq Distance query
 * @param ids IDs to process
 * @param acds Storage for average chaining distances
 */
protected void computeAverageChainingDistances(KNNQuery<O> knnq, DistanceQuery<O> dq, DBIDs ids, WritableDoubleDataStore acds) {
    FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Computing average chaining distances", ids.size(), LOG) : null;
    // We do <i>not</i> bother to materialize the chaining order.
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(iter, k);
        final int r = neighbors.size();
        DoubleDBIDListIter it1 = neighbors.iter(), it2 = neighbors.iter();
        // Store the current lowest reachability.
        final double[] mindists = new double[r];
        for (int i = 0; it1.valid(); it1.advance(), ++i) {
            mindists[i] = DBIDUtil.equal(it1, iter) ? Double.NaN : it1.doubleValue();
        }
        double acsum = 0.;
        for (int j = ((r < k) ? r : k) - 1; j > 0; --j) {
            // Find the minimum:
            int minpos = -1;
            double mindist = Double.NaN;
            for (int i = 0; i < mindists.length; ++i) {
                double curdist = mindists[i];
                // Both values could be NaN, deliberately.
                if (curdist == curdist && !(curdist > mindist)) {
                    minpos = i;
                    mindist = curdist;
                }
            }
            // Weighted sum, decreasing weights
            acsum += mindist * j;
            mindists[minpos] = Double.NaN;
            it1.seek(minpos);
            // Update distances
            it2.seek(0);
            for (int i = 0; it2.valid(); it2.advance(), ++i) {
                final double curdist = mindists[i];
                if (curdist != curdist) {
                    // NaN = processed!
                    continue;
                }
                double newdist = dq.distance(it1, it2);
                if (newdist < curdist) {
                    mindists[i] = newdist;
                }
            }
        }
        acds.putDouble(iter, acsum / (r * 0.5 * (r - 1.)));
        LOG.incrementProcessed(lrdsProgress);
    }
    LOG.ensureCompleted(lrdsProgress);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)53 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)38 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)32 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)21 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)20 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)18 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)18 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)18 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)18 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)15 ArrayList (java.util.ArrayList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)10 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)9 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)9 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)8 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)8 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)6