Search in sources :

Example 51 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class LOCI method precomputeInterestingRadii.

/**
 * Preprocessing step: determine the radii of interest for each point.
 *
 * @param ids IDs to process
 * @param rangeQuery Range query
 * @param interestingDistances Distances of interest
 */
protected void precomputeInterestingRadii(DBIDs ids, RangeQuery<O> rangeQuery, WritableDataStore<DoubleIntArrayList> interestingDistances) {
    FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null;
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
        // build list of critical distances
        DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1);
        {
            int i = 0;
            DoubleDBIDListIter ni = neighbors.iter();
            while (ni.valid()) {
                final double curdist = ni.doubleValue();
                ++i;
                ni.advance();
                // Skip, if tied to the next object:
                if (ni.valid() && curdist == ni.doubleValue()) {
                    continue;
                }
                cdist.append(curdist, i);
                // Scale radius, and reinsert
                if (alpha != 1.) {
                    final double ri = curdist / alpha;
                    if (ri <= rmax) {
                        cdist.append(ri, Integer.MIN_VALUE);
                    }
                }
            }
        }
        cdist.sort();
        // fill the gaps to have fast lookups of number of neighbors at a given
        // distance.
        int lastk = 0;
        for (int i = 0, size = cdist.size(); i < size; i++) {
            final int k = cdist.getInt(i);
            if (k == Integer.MIN_VALUE) {
                cdist.setValue(i, lastk);
            } else {
                lastk = k;
            }
        }
        // TODO: shrink the list, removing duplicate radii?
        interestingDistances.put(iditer, cdist);
        LOG.incrementProcessed(progressPreproc);
    }
    LOG.ensureCompleted(progressPreproc);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 52 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class SimpleKernelDensityLOF method run.

/**
 * Run the naive kernel density LOF algorithm.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return LOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
    final int dim = RelationUtil.dimensionality(relation);
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    // Compute LRDs
    LOG.beginStep(stepprog, 2, "Computing densities.");
    WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(it, k);
        int count = 0;
        double sum = 0.0;
        // Fast version for double distances
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, it)) {
                continue;
            }
            double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
            if (max == 0) {
                sum = Double.POSITIVE_INFINITY;
                break;
            }
            final double v = neighbor.doubleValue() / max;
            sum += kernel.density(v) / MathUtil.powi(max, dim);
            count++;
        }
        final double density = count > 0 ? sum / count : 0.;
        dens.putDouble(it, density);
        LOG.incrementProcessed(densProgress);
    }
    LOG.ensureCompleted(densProgress);
    // compute LOF_SCORE of each db object
    LOG.beginStep(stepprog, 3, "Computing KLOFs.");
    WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    // track the maximum value for normalization.
    DoubleMinMax lofminmax = new DoubleMinMax();
    FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        final double lrdp = dens.doubleValue(it);
        final double lof;
        if (lrdp > 0) {
            final KNNList neighbors = knnq.getKNNForDBID(it, k);
            double sum = 0.0;
            int count = 0;
            for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                // skip the point itself
                if (DBIDUtil.equal(neighbor, it)) {
                    continue;
                }
                sum += dens.doubleValue(neighbor);
                count++;
            }
            lof = (lrdp == Double.POSITIVE_INFINITY) ? (sum == Double.POSITIVE_INFINITY ? 1 : 0.) : sum / (count * lrdp);
        } else {
            lof = 1.0;
        }
        lofs.putDouble(it, lof);
        // update minimum and maximum
        lofminmax.put(lof);
        LOG.incrementProcessed(progressLOFs);
    }
    LOG.ensureCompleted(progressLOFs);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids);
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 53 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class OUTRES method refineRange.

/**
 * Refine a range query.
 *
 * @param neighc Original result
 * @param adjustedEps New epsilon
 * @return refined list
 */
private DoubleDBIDList refineRange(DoubleDBIDList neighc, double adjustedEps) {
    ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
    // We don't have a guarantee for this list to be sorted
    for (DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
        DoubleDBIDPair p = neighbor.getPair();
        double dist = p.doubleValue();
        if (dist <= adjustedEps) {
            n.add(dist, p);
        }
    }
    return n;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDPair(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)

Example 54 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class FastABOD method run.

/**
 * Run Fast-ABOD on the data set.
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
@Override
public OutlierResult run(Database db, Relation<V> relation) {
    DBIDs ids = relation.getDBIDs();
    // Build a kernel matrix, to make O(n^3) slightly less bad.
    SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
    KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
    WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmaxabod = new DoubleMinMax();
    MeanVariance s = new MeanVariance();
    KNNHeap nn = DBIDUtil.newHeap(k);
    for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
        final double simAA = kernelMatrix.getSimilarity(pA, pA);
        // Choose the k-min nearest
        nn.clear();
        for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
            if (DBIDUtil.equal(nB, pA)) {
                continue;
            }
            double simBB = kernelMatrix.getSimilarity(nB, nB);
            double simAB = kernelMatrix.getSimilarity(pA, nB);
            double sqdAB = simAA + simBB - simAB - simAB;
            if (!(sqdAB > 0.)) {
                continue;
            }
            nn.insert(sqdAB, nB);
        }
        KNNList nl = nn.toKNNList();
        s.reset();
        DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
        for (; iB.valid(); iB.advance()) {
            double sqdAB = iB.doubleValue();
            double simAB = kernelMatrix.getSimilarity(pA, iB);
            if (!(sqdAB > 0.)) {
                continue;
            }
            for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
                double sqdAC = iC.doubleValue();
                double simAC = kernelMatrix.getSimilarity(pA, iC);
                if (!(sqdAC > 0.)) {
                    continue;
                }
                // Exploit bilinearity of scalar product:
                // <B-A, C-A> = <B, C-A> - <A,C-A>
                // = <B,C> - <B,A> - <A,C> + <A,A>
                double simBC = kernelMatrix.getSimilarity(iB, iC);
                double numerator = simBC - simAB - simAC + simAA;
                double div = 1. / (sqdAB * sqdAC);
                s.put(numerator * div, FastMath.sqrt(div));
            }
        }
        // Sample variance probably would probably be better, but the ABOD
        // publication uses the naive variance.
        final double abof = s.getNaiveVariance();
        minmaxabod.put(abof);
        abodvalues.putDouble(pA, abof);
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KernelMatrix(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 55 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class ReferenceBasedOutlierDetection method updateDensities.

/**
 * Update the density estimates for each object.
 *
 * @param rbod_score Density storage
 * @param referenceDists Distances from current reference point
 */
protected void updateDensities(WritableDoubleDataStore rbod_score, DoubleDBIDList referenceDists) {
    DoubleDBIDListIter it = referenceDists.iter();
    for (int l = 0; l < referenceDists.size(); l++) {
        double density = computeDensity(referenceDists, it, l);
        // computeDensity modified the iterator, reset:
        it.seek(l);
        // NaN indicates the first run.
        if (!(density > rbod_score.doubleValue(it))) {
            rbod_score.putDouble(it, density);
        }
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)

Aggregations

DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)69 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)38 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)20 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)19 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)12 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)11 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)11 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)11 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)11 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)11 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)10 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)9 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)6 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)5 DoubleDBIDPair (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)5 ArrayList (java.util.ArrayList)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4