Search in sources :

Example 46 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class OPTICSOF method run.

/**
 * Perform OPTICS-based outlier detection.
 *
 * @param database Database
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, minpts);
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    DBIDs ids = relation.getDBIDs();
    // FIXME: implicit preprocessor.
    WritableDataStore<KNNList> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
    WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        KNNList minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
        double d = minptsNeighbours.getKNNDistance();
        nMinPts.put(iditer, minptsNeighbours);
        coreDistance.putDouble(iditer, d);
        minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
    }
    // Pass 2
    WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
    WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        List<Double> core = new ArrayList<>();
        double lrd = 0;
        // TODO: optimize for double distances
        for (DoubleDBIDListIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
            double coreDist = coreDistance.doubleValue(neighbor);
            double dist = distQuery.distance(iditer, neighbor);
            double rd = MathUtil.max(coreDist, dist);
            lrd = rd + lrd;
            core.add(rd);
        }
        lrd = minPtsNeighborhoodSize.intValue(iditer) / lrd;
        reachDistance.put(iditer, core);
        lrds.putDouble(iditer, lrd);
    }
    // Pass 3
    DoubleMinMax ofminmax = new DoubleMinMax();
    WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double of = 0;
        for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
            double lrd = lrds.doubleValue(iditer);
            double lrdN = lrds.doubleValue(neighbor);
            of = of + lrdN / lrd;
        }
        of = of / minPtsNeighborhoodSize.intValue(iditer);
        ofs.putDouble(iditer, of);
        // update minimum and maximum
        ofminmax.put(of);
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("OPTICS Outlier Scores", "optics-outlier", ofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 47 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class LinearScanRKNNQuery method getRKNNForDBID.

@Override
public DoubleDBIDList getRKNNForDBID(DBIDRef id, int k) {
    ModifiableDoubleDBIDList rNNList = DBIDUtil.newDistanceDBIDList();
    ArrayDBIDs allIDs = DBIDUtil.ensureArray(relation.getDBIDs());
    List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(allIDs, k);
    int i = 0;
    for (DBIDIter iter = allIDs.iter(); iter.valid(); iter.advance()) {
        KNNList knn = kNNList.get(i);
        for (DoubleDBIDListIter n = knn.iter(); n.valid(); n.advance()) {
            if (DBIDUtil.equal(n, id)) {
                rNNList.add(n.doubleValue(), iter);
            }
        }
        i++;
    }
    rNNList.sort();
    return rNNList;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 48 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class KDEOS method estimateDensities.

/**
 * Perform the kernel density estimation step.
 *
 * @param rel Relation to query
 * @param knnq kNN query
 * @param ids IDs to process
 * @param densities Density storage
 */
protected void estimateDensities(Relation<O> rel, KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities) {
    final int dim = dimensionality(rel);
    final int knum = kmax + 1 - kmin;
    // Initialize storage:
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        densities.put(iter, new double[knum]);
    }
    // Distribute densities:
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing densities", ids.size(), LOG) : null;
    double iminbw = (minBandwidth > 0.) ? 1. / (minBandwidth * scale) : Double.POSITIVE_INFINITY;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNList neighbors = knnq.getKNNForDBID(iter, kmax + 1);
        int k = 1, idx = 0;
        double sum = 0.;
        for (DoubleDBIDListIter kneighbor = neighbors.iter(); k <= kmax && kneighbor.valid(); kneighbor.advance(), k++) {
            sum += kneighbor.doubleValue();
            if (k < kmin) {
                continue;
            }
            final double ibw = Math.min(k / (sum * scale), iminbw);
            final double sca = MathUtil.powi(ibw, dim);
            for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                final double dens;
                if (sca < Double.POSITIVE_INFINITY) {
                    // NaNs with duplicate points!
                    dens = sca * kernel.density(neighbor.doubleValue() * ibw);
                } else {
                    dens = neighbor.doubleValue() == 0. ? 1. : 0.;
                }
                densities.get(neighbor)[idx] += dens;
                if (dens < CUTOFF) {
                    break;
                }
            }
            // Only if k >= kmin
            ++idx;
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 49 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class KDEOS method computeOutlierScores.

/**
 * Compute the final KDEOS scores.
 *
 * @param knnq kNN query
 * @param ids IDs to process
 * @param densities Density estimates
 * @param kdeos Score outputs
 * @param minmax Minimum and maximum scores
 */
protected void computeOutlierScores(KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities, WritableDoubleDataStore kdeos, DoubleMinMax minmax) {
    final int knum = kmax + 1 - kmin;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing KDEOS scores", ids.size(), LOG) : null;
    double[][] scratch = new double[knum][kmax + 5];
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double[] dens = densities.get(iter);
        KNNList neighbors = knnq.getKNNForDBID(iter, kmax + 1);
        if (scratch[0].length < neighbors.size()) {
            // Resize scratch. Add some extra margin again.
            scratch = new double[knum][neighbors.size() + 5];
        }
        {
            // Store density matrix of neighbors
            int i = 0;
            for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance(), i++) {
                double[] ndens = densities.get(neighbor);
                for (int k = 0; k < knum; k++) {
                    scratch[k][i] = ndens[k];
                }
            }
            assert (i == neighbors.size());
        }
        // Compute means and stddevs for each k
        double score = 0.;
        for (int i = 0; i < knum; i++) {
            mv.reset();
            for (int j = 0; j < neighbors.size(); j++) {
                mv.put(scratch[i][j]);
            }
            final double mean = mv.getMean(), stddev = mv.getSampleStddev();
            if (stddev > 0.) {
                score += (mean - dens[i]) / stddev;
            }
        }
        // average
        score /= knum;
        score = NormalDistribution.standardNormalCDF(score);
        minmax.put(score);
        kdeos.put(iter, score);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 50 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class COF method computeAverageChainingDistances.

/**
 * Computes the average chaining distance, the average length of a path
 * through the given set of points to each target. The authors of COF decided
 * to approximate this value using a weighted mean that assumes every object
 * is reached from the previous point (but actually every point could be best
 * reachable from the first, in which case this does not make much sense.)
 *
 * TODO: can we accelerate this by using the kNN of the neighbors?
 *
 * @param knnq KNN query
 * @param dq Distance query
 * @param ids IDs to process
 * @param acds Storage for average chaining distances
 */
protected void computeAverageChainingDistances(KNNQuery<O> knnq, DistanceQuery<O> dq, DBIDs ids, WritableDoubleDataStore acds) {
    FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Computing average chaining distances", ids.size(), LOG) : null;
    // We do <i>not</i> bother to materialize the chaining order.
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(iter, k);
        final int r = neighbors.size();
        DoubleDBIDListIter it1 = neighbors.iter(), it2 = neighbors.iter();
        // Store the current lowest reachability.
        final double[] mindists = new double[r];
        for (int i = 0; it1.valid(); it1.advance(), ++i) {
            mindists[i] = DBIDUtil.equal(it1, iter) ? Double.NaN : it1.doubleValue();
        }
        double acsum = 0.;
        for (int j = ((r < k) ? r : k) - 1; j > 0; --j) {
            // Find the minimum:
            int minpos = -1;
            double mindist = Double.NaN;
            for (int i = 0; i < mindists.length; ++i) {
                double curdist = mindists[i];
                // Both values could be NaN, deliberately.
                if (curdist == curdist && !(curdist > mindist)) {
                    minpos = i;
                    mindist = curdist;
                }
            }
            // Weighted sum, decreasing weights
            acsum += mindist * j;
            mindists[minpos] = Double.NaN;
            it1.seek(minpos);
            // Update distances
            it2.seek(0);
            for (int i = 0; it2.valid(); it2.advance(), ++i) {
                final double curdist = mindists[i];
                if (curdist != curdist) {
                    // NaN = processed!
                    continue;
                }
                double newdist = dq.distance(it1, it2);
                if (newdist < curdist) {
                    mindists[i] = newdist;
                }
            }
        }
        acds.putDouble(iter, acsum / (r * 0.5 * (r - 1.)));
        LOG.incrementProcessed(lrdsProgress);
    }
    LOG.ensureCompleted(lrdsProgress);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)69 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)38 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)20 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)19 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)12 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)11 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)11 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)11 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)11 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)11 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)10 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)9 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)6 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)5 DoubleDBIDPair (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)5 ArrayList (java.util.ArrayList)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4