use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class MaterializeKNNAndRKNNPreprocessor method objectsRemoved.
@Override
protected void objectsRemoved(DBIDs ids) {
StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;
// For debugging: valid DBIDs still in the database.
final DBIDs valid = DBIDUtil.ensureSet(distanceQuery.getRelation().getDBIDs());
ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
// delete the materialized (old) kNNs and RkNNs
getLogger().beginStep(stepprog, 1, "New deletions ocurred, remove their materialized kNNs and RkNNs.");
// Temporary storage of removed lists
List<KNNList> kNNs = new ArrayList<>(ids.size());
List<TreeSet<DoubleDBIDPair>> rkNNs = new ArrayList<>(ids.size());
for (DBIDIter iter = aids.iter(); iter.valid(); iter.advance()) {
kNNs.add(storage.get(iter));
for (DBIDIter it = storage.get(iter).iter(); it.valid(); it.advance()) {
if (!valid.contains(it) && !ids.contains(it)) {
LOG.warning("False kNN: " + it);
}
}
storage.delete(iter);
rkNNs.add(materialized_RkNN.get(iter));
for (DoubleDBIDPair it : materialized_RkNN.get(iter)) {
if (!valid.contains(it) && !ids.contains(it)) {
LOG.warning("False RkNN: " + it);
}
}
materialized_RkNN.delete(iter);
}
// Keep only those IDs not also removed
ArrayDBIDs kNN_ids = affectedkNN(kNNs, aids);
ArrayDBIDs rkNN_ids = affectedRkNN(rkNNs, aids);
// update the affected kNNs and RkNNs
getLogger().beginStep(stepprog, 2, "New deletions ocurred, update the affected kNNs and RkNNs.");
// Recompute the kNN for affected objects (in rkNN lists)
{
List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
int i = 0;
for (DBIDIter reknn = rkNN_ids.iter(); reknn.valid(); reknn.advance(), i++) {
if (kNNList.get(i) == null && !valid.contains(reknn)) {
LOG.warning("BUG in online kNN/RkNN maintainance: " + DBIDUtil.toString(reknn) + " no longer in database.");
continue;
}
assert (kNNList.get(i) != null);
storage.put(reknn, kNNList.get(i));
for (DoubleDBIDListIter it = kNNList.get(i).iter(); it.valid(); it.advance()) {
materialized_RkNN.get(it).add(makePair(it, reknn));
}
}
}
// remove objects from RkNNs of objects (in kNN lists)
{
SetDBIDs idsSet = DBIDUtil.ensureSet(ids);
for (DBIDIter nn = kNN_ids.iter(); nn.valid(); nn.advance()) {
TreeSet<DoubleDBIDPair> rkNN = materialized_RkNN.get(nn);
for (Iterator<DoubleDBIDPair> it = rkNN.iterator(); it.hasNext(); ) {
if (idsSet.contains(it.next())) {
it.remove();
}
}
}
}
// inform listener
getLogger().beginStep(stepprog, 3, "New deletions ocurred, inform listeners.");
fireKNNsRemoved(ids, rkNN_ids);
getLogger().ensureCompleted(stepprog);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class MaterializeKNNAndRKNNPreprocessor method affectedkNN.
/**
* Extracts and removes the DBIDs in the given collections.
*
* @param extract a list of lists of DistanceResultPair to extract
* @param remove the ids to remove
* @return the DBIDs in the given collection
*/
protected ArrayDBIDs affectedkNN(List<? extends KNNList> extract, DBIDs remove) {
HashSetModifiableDBIDs ids = DBIDUtil.newHashSet();
for (KNNList drps : extract) {
for (DBIDIter iter = drps.iter(); iter.valid(); iter.advance()) {
ids.add(iter);
}
}
ids.removeDBIDs(remove);
// Convert back to array
return DBIDUtil.newArray(ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class PartitionApproximationMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
MeanVariance ksize = new MeanVariance();
if (LOG.isVerbose()) {
LOG.verbose("Approximating nearest neighbor lists to database objects");
}
// Produce a random shuffling of the IDs:
ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), partitions, rnd);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Processing partitions", partitions, LOG) : null;
for (int part = 0; part < partitions; part++) {
final ArrayDBIDs ids = parts[part];
final int size = ids.size();
Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 3);
cache.defaultReturnValue(Double.NaN);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(k);
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
DBIDPair key = DBIDUtil.newPair(iter, iter2);
double d = cache.removeDouble(key);
if (d == d) {
// Not NaN
// consume the previous result.
kNN.insert(d, iter2);
} else {
// compute new and store the previous result.
d = distanceQuery.distance(iter, iter2);
kNN.insert(d, iter2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(iter2, iter);
cache.put(key, d);
}
}
ksize.put(kNN.size());
storage.put(iter, kNN.toKNNList());
}
if (LOG.isDebugging() && cache.size() > 0) {
LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
}
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isVerbose()) {
LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class RandomSampleKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing random-sample k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int samplesize = (int) (ids.size() * share);
Random random = rnd.getSingleThreadedRandom();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(k);
DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, random);
for (DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) {
double dist = distanceQuery.distance(iter, iter2);
kNN.insert(dist, iter2);
}
storage.put(iter, kNN.toKNNList());
getLogger().incrementProcessed(progress);
}
getLogger().ensureCompleted(progress);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class SharedNearestNeighborSimilarityFunction method countSharedNeighbors.
/**
* Compute the intersection size
*
* @param neighbors1 SORTED neighbors of first
* @param neighbors2 SORTED neighbors of second
* @return Intersection size
*/
protected static int countSharedNeighbors(DBIDs neighbors1, DBIDs neighbors2) {
int intersection = 0;
DBIDIter iter1 = neighbors1.iter();
DBIDIter iter2 = neighbors2.iter();
while (iter1.valid() && iter2.valid()) {
final int comp = DBIDUtil.compare(iter1, iter2);
if (comp == 0) {
intersection++;
iter1.advance();
iter2.advance();
} else if (comp < 0) {
iter1.advance();
} else // iter2 < iter1
{
iter2.advance();
}
}
return intersection;
}
Aggregations