Search in sources :

Example 36 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class MaterializeKNNAndRKNNPreprocessor method objectsRemoved.

@Override
protected void objectsRemoved(DBIDs ids) {
    StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;
    // For debugging: valid DBIDs still in the database.
    final DBIDs valid = DBIDUtil.ensureSet(distanceQuery.getRelation().getDBIDs());
    ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
    // delete the materialized (old) kNNs and RkNNs
    getLogger().beginStep(stepprog, 1, "New deletions ocurred, remove their materialized kNNs and RkNNs.");
    // Temporary storage of removed lists
    List<KNNList> kNNs = new ArrayList<>(ids.size());
    List<TreeSet<DoubleDBIDPair>> rkNNs = new ArrayList<>(ids.size());
    for (DBIDIter iter = aids.iter(); iter.valid(); iter.advance()) {
        kNNs.add(storage.get(iter));
        for (DBIDIter it = storage.get(iter).iter(); it.valid(); it.advance()) {
            if (!valid.contains(it) && !ids.contains(it)) {
                LOG.warning("False kNN: " + it);
            }
        }
        storage.delete(iter);
        rkNNs.add(materialized_RkNN.get(iter));
        for (DoubleDBIDPair it : materialized_RkNN.get(iter)) {
            if (!valid.contains(it) && !ids.contains(it)) {
                LOG.warning("False RkNN: " + it);
            }
        }
        materialized_RkNN.delete(iter);
    }
    // Keep only those IDs not also removed
    ArrayDBIDs kNN_ids = affectedkNN(kNNs, aids);
    ArrayDBIDs rkNN_ids = affectedRkNN(rkNNs, aids);
    // update the affected kNNs and RkNNs
    getLogger().beginStep(stepprog, 2, "New deletions ocurred, update the affected kNNs and RkNNs.");
    // Recompute the kNN for affected objects (in rkNN lists)
    {
        List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
        int i = 0;
        for (DBIDIter reknn = rkNN_ids.iter(); reknn.valid(); reknn.advance(), i++) {
            if (kNNList.get(i) == null && !valid.contains(reknn)) {
                LOG.warning("BUG in online kNN/RkNN maintainance: " + DBIDUtil.toString(reknn) + " no longer in database.");
                continue;
            }
            assert (kNNList.get(i) != null);
            storage.put(reknn, kNNList.get(i));
            for (DoubleDBIDListIter it = kNNList.get(i).iter(); it.valid(); it.advance()) {
                materialized_RkNN.get(it).add(makePair(it, reknn));
            }
        }
    }
    // remove objects from RkNNs of objects (in kNN lists)
    {
        SetDBIDs idsSet = DBIDUtil.ensureSet(ids);
        for (DBIDIter nn = kNN_ids.iter(); nn.valid(); nn.advance()) {
            TreeSet<DoubleDBIDPair> rkNN = materialized_RkNN.get(nn);
            for (Iterator<DoubleDBIDPair> it = rkNN.iterator(); it.hasNext(); ) {
                if (idsSet.contains(it.next())) {
                    it.remove();
                }
            }
        }
    }
    // inform listener
    getLogger().beginStep(stepprog, 3, "New deletions ocurred, inform listeners.");
    fireKNNsRemoved(ids, rkNN_ids);
    getLogger().ensureCompleted(stepprog);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs) ArrayList(java.util.ArrayList) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) TreeSet(java.util.TreeSet) DoubleDBIDPair(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)

Example 37 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class MaterializeKNNAndRKNNPreprocessor method affectedkNN.

/**
 * Extracts and removes the DBIDs in the given collections.
 *
 * @param extract a list of lists of DistanceResultPair to extract
 * @param remove the ids to remove
 * @return the DBIDs in the given collection
 */
protected ArrayDBIDs affectedkNN(List<? extends KNNList> extract, DBIDs remove) {
    HashSetModifiableDBIDs ids = DBIDUtil.newHashSet();
    for (KNNList drps : extract) {
        for (DBIDIter iter = drps.iter(); iter.valid(); iter.advance()) {
            ids.add(iter);
        }
    }
    ids.removeDBIDs(remove);
    // Convert back to array
    return DBIDUtil.newArray(ids);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 38 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class PartitionApproximationMaterializeKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    MeanVariance ksize = new MeanVariance();
    if (LOG.isVerbose()) {
        LOG.verbose("Approximating nearest neighbor lists to database objects");
    }
    // Produce a random shuffling of the IDs:
    ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), partitions, rnd);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Processing partitions", partitions, LOG) : null;
    for (int part = 0; part < partitions; part++) {
        final ArrayDBIDs ids = parts[part];
        final int size = ids.size();
        Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 3);
        cache.defaultReturnValue(Double.NaN);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            KNNHeap kNN = DBIDUtil.newHeap(k);
            for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
                DBIDPair key = DBIDUtil.newPair(iter, iter2);
                double d = cache.removeDouble(key);
                if (d == d) {
                    // Not NaN
                    // consume the previous result.
                    kNN.insert(d, iter2);
                } else {
                    // compute new and store the previous result.
                    d = distanceQuery.distance(iter, iter2);
                    kNN.insert(d, iter2);
                    // put it into the cache, but with the keys reversed
                    key = DBIDUtil.newPair(iter2, iter);
                    cache.put(key, d);
                }
            }
            ksize.put(kNN.size());
            storage.put(iter, kNN.toKNNList());
        }
        if (LOG.isDebugging() && cache.size() > 0) {
            LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
        }
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isVerbose()) {
        LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
    }
}
Also used : Object2DoubleOpenHashMap(it.unimi.dsi.fastutil.objects.Object2DoubleOpenHashMap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DBIDPair(de.lmu.ifi.dbs.elki.database.ids.DBIDPair) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)

Example 39 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class RandomSampleKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing random-sample k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
    final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int samplesize = (int) (ids.size() * share);
    Random random = rnd.getSingleThreadedRandom();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNHeap kNN = DBIDUtil.newHeap(k);
        DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, random);
        for (DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) {
            double dist = distanceQuery.distance(iter, iter2);
            kNN.insert(dist, iter2);
        }
        storage.put(iter, kNN.toKNNList());
        getLogger().incrementProcessed(progress);
    }
    getLogger().ensureCompleted(progress);
}
Also used : Random(java.util.Random) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 40 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class SharedNearestNeighborSimilarityFunction method countSharedNeighbors.

/**
 * Compute the intersection size
 *
 * @param neighbors1 SORTED neighbors of first
 * @param neighbors2 SORTED neighbors of second
 * @return Intersection size
 */
protected static int countSharedNeighbors(DBIDs neighbors1, DBIDs neighbors2) {
    int intersection = 0;
    DBIDIter iter1 = neighbors1.iter();
    DBIDIter iter2 = neighbors2.iter();
    while (iter1.valid() && iter2.valid()) {
        final int comp = DBIDUtil.compare(iter1, iter2);
        if (comp == 0) {
            intersection++;
            iter1.advance();
            iter2.advance();
        } else if (comp < 0) {
            iter1.advance();
        } else // iter2 < iter1
        {
            iter2.advance();
        }
    }
    return intersection;
}
Also used : DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21