use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class PartitionApproximationMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
MeanVariance ksize = new MeanVariance();
if (LOG.isVerbose()) {
LOG.verbose("Approximating nearest neighbor lists to database objects");
}
// Produce a random shuffling of the IDs:
ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), partitions, rnd);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Processing partitions", partitions, LOG) : null;
for (int part = 0; part < partitions; part++) {
final ArrayDBIDs ids = parts[part];
final int size = ids.size();
Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 3);
cache.defaultReturnValue(Double.NaN);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(k);
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
DBIDPair key = DBIDUtil.newPair(iter, iter2);
double d = cache.removeDouble(key);
if (d == d) {
// Not NaN
// consume the previous result.
kNN.insert(d, iter2);
} else {
// compute new and store the previous result.
d = distanceQuery.distance(iter, iter2);
kNN.insert(d, iter2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(iter2, iter);
cache.put(key, d);
}
}
ksize.put(kNN.size());
storage.put(iter, kNN.toKNNList());
}
if (LOG.isDebugging() && cache.size() > 0) {
LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
}
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isVerbose()) {
LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
}
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class RandomSampleKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing random-sample k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int samplesize = (int) (ids.size() * share);
Random random = rnd.getSingleThreadedRandom();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(k);
DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, random);
for (DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) {
double dist = distanceQuery.distance(iter, iter2);
kNN.insert(dist, iter2);
}
storage.put(iter, kNN.toKNNList());
getLogger().incrementProcessed(progress);
}
getLogger().ensureCompleted(progress);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class MetricalIndexKNNQuery method getKNNForObject.
@Override
public KNNList getKNNForObject(O q, int k) {
if (k < 1) {
throw new IllegalArgumentException("At least one object has to be requested!");
}
index.statistics.countKNNQuery();
KNNHeap knnList = DBIDUtil.newHeap(k);
double d_k = Double.POSITIVE_INFINITY;
final ComparableMinHeap<MTreeSearchCandidate> pq = new ComparableMinHeap<>();
// Push the root node
pq.add(new MTreeSearchCandidate(0., index.getRootID(), null, 0.));
// search in tree
while (!pq.isEmpty()) {
MTreeSearchCandidate pqNode = pq.poll();
if (knnList.size() >= k && pqNode.mindist > d_k) {
break;
}
AbstractMTreeNode<?, ?, ?> node = index.getNode(pqNode.nodeID);
DBID id_p = pqNode.routingObjectID;
double d1 = pqNode.routingDistance;
// directory node
if (!node.isLeaf()) {
for (int i = 0; i < node.getNumEntries(); i++) {
MTreeEntry entry = node.getEntry(i);
DBID o_r = entry.getRoutingObjectID();
double r_or = entry.getCoveringRadius();
double d2 = id_p != null ? entry.getParentDistance() : 0.;
double diff = Math.abs(d1 - d2);
double sum = d_k + r_or;
if (diff <= sum) {
double d3 = distanceQuery.distance(o_r, q);
index.statistics.countDistanceCalculation();
double d_min = Math.max(d3 - r_or, 0.);
if (d_min <= d_k) {
pq.add(new MTreeSearchCandidate(d_min, ((DirectoryEntry) entry).getPageID(), o_r, d3));
}
}
}
} else // data node
{
for (int i = 0; i < node.getNumEntries(); i++) {
MTreeEntry entry = node.getEntry(i);
DBID o_j = entry.getRoutingObjectID();
double d2 = id_p != null ? entry.getParentDistance() : 0.;
double diff = Math.abs(d1 - d2);
if (diff <= d_k) {
double d3 = distanceQuery.distance(o_j, q);
index.statistics.countDistanceCalculation();
if (d3 <= d_k) {
knnList.insert(d3, o_j);
d_k = knnList.getKNNDistance();
}
}
}
}
}
return knnList.toKNNList();
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class LBABOD method run.
/**
* Run LB-ABOD on the data set.
*
* @param relation Relation to process
* @return Outlier detection result
*/
@Override
public OutlierResult run(Database db, Relation<V> relation) {
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
DBIDArrayIter pB = ids.iter(), pC = ids.iter();
SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
// Output storage.
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmaxabod = new DoubleMinMax();
double max = 0.;
// Storage for squared distances (will be reused!)
WritableDoubleDataStore sqDists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// Nearest neighbor heap (will be reused!)
KNNHeap nn = DBIDUtil.newHeap(k);
// Priority queue for candidates
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(relation.size());
// get Candidate Ranking
for (DBIDIter pA = relation.iterDBIDs(); pA.valid(); pA.advance()) {
// Compute nearest neighbors and distances.
nn.clear();
double simAA = kernelMatrix.getSimilarity(pA, pA);
// Sum of 1./(|AB|) and 1./(|AB|^2); for computing R2.
double sumid = 0., sumisqd = 0.;
for (pB.seek(0); pB.valid(); pB.advance()) {
if (DBIDUtil.equal(pB, pA)) {
continue;
}
double simBB = kernelMatrix.getSimilarity(pB, pB);
double simAB = kernelMatrix.getSimilarity(pA, pB);
double sqdAB = simAA + simBB - simAB - simAB;
sqDists.putDouble(pB, sqdAB);
final double isqdAB = 1. / sqdAB;
sumid += FastMath.sqrt(isqdAB);
sumisqd += isqdAB;
// Update heap
nn.insert(sqdAB, pB);
}
// Compute FastABOD approximation, adjust for lower bound.
// LB-ABOF is defined via a numerically unstable formula.
// Variance as E(X^2)-E(X)^2 suffers from catastrophic cancellation!
// TODO: ensure numerical precision!
double nnsum = 0., nnsumsq = 0., nnsumisqd = 0.;
KNNList nl = nn.toKNNList();
DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
for (; iB.valid(); iB.advance()) {
double sqdAB = iB.doubleValue();
double simAB = kernelMatrix.getSimilarity(pA, iB);
if (!(sqdAB > 0.)) {
continue;
}
for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
double sqdAC = iC.doubleValue();
double simAC = kernelMatrix.getSimilarity(pA, iC);
if (!(sqdAC > 0.)) {
continue;
}
// Exploit bilinearity of scalar product:
// <B-A, C-A> = <B, C-A> - <A,C-A>
// = <B,C> - <B,A> - <A,C> + <A,A>
double simBC = kernelMatrix.getSimilarity(iB, iC);
double numerator = simBC - simAB - simAC + simAA;
double sqweight = 1. / (sqdAB * sqdAC);
double weight = FastMath.sqrt(sqweight);
double val = numerator * sqweight;
nnsum += val * weight;
nnsumsq += val * val * weight;
nnsumisqd += sqweight;
}
}
// Remaining weight, term R2:
double r2 = sumisqd * sumisqd - 2. * nnsumisqd;
double tmp = (2. * nnsum + r2) / (sumid * sumid);
double lbabof = 2. * nnsumsq / (sumid * sumid) - tmp * tmp;
// Track maximum?
if (lbabof > max) {
max = lbabof;
}
abodvalues.putDouble(pA, lbabof);
candidates.add(lbabof, pA);
}
// Put maximum from approximate values.
minmaxabod.put(max);
candidates.sort();
// refine Candidates
int refinements = 0;
DoubleMinHeap topscores = new DoubleMinHeap(l);
MeanVariance s = new MeanVariance();
for (DoubleDBIDListIter pA = candidates.iter(); pA.valid(); pA.advance()) {
// Stop refining
if (topscores.size() >= k && pA.doubleValue() > topscores.peek()) {
break;
}
final double abof = computeABOF(kernelMatrix, pA, pB, pC, s);
// Store refined score:
abodvalues.putDouble(pA, abof);
minmaxabod.put(abof);
// Update the heap tracking the top scores.
if (topscores.size() < k) {
topscores.add(abof);
} else {
if (topscores.peek() > abof) {
topscores.replaceTopElement(abof);
}
}
refinements += 1;
}
if (LOG.isStatistics()) {
LoggingConfiguration.setVerbose(Level.VERYVERBOSE);
LOG.statistics(new LongStatistic("lb-abod.refinements", refinements));
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-based Outlier Detection", "abod-outlier", abodvalues, ids);
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class RdKNNTree method preInsert.
/**
* Performs necessary operations before inserting the specified entry.
*
* @param entry the entry to be inserted
*/
@Override
protected void preInsert(RdKNNEntry entry) {
KNNHeap knns_o = DBIDUtil.newHeap(settings.k_max);
preInsert(entry, getRootEntry(), knns_o);
}
Aggregations