use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class FastABOD method run.
/**
* Run Fast-ABOD on the data set.
*
* @param relation Relation to process
* @return Outlier detection result
*/
@Override
public OutlierResult run(Database db, Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
// Build a kernel matrix, to make O(n^3) slightly less bad.
SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmaxabod = new DoubleMinMax();
MeanVariance s = new MeanVariance();
KNNHeap nn = DBIDUtil.newHeap(k);
for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
final double simAA = kernelMatrix.getSimilarity(pA, pA);
// Choose the k-min nearest
nn.clear();
for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
if (DBIDUtil.equal(nB, pA)) {
continue;
}
double simBB = kernelMatrix.getSimilarity(nB, nB);
double simAB = kernelMatrix.getSimilarity(pA, nB);
double sqdAB = simAA + simBB - simAB - simAB;
if (!(sqdAB > 0.)) {
continue;
}
nn.insert(sqdAB, nB);
}
KNNList nl = nn.toKNNList();
s.reset();
DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
for (; iB.valid(); iB.advance()) {
double sqdAB = iB.doubleValue();
double simAB = kernelMatrix.getSimilarity(pA, iB);
if (!(sqdAB > 0.)) {
continue;
}
for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
double sqdAC = iC.doubleValue();
double simAC = kernelMatrix.getSimilarity(pA, iC);
if (!(sqdAC > 0.)) {
continue;
}
// Exploit bilinearity of scalar product:
// <B-A, C-A> = <B, C-A> - <A,C-A>
// = <B,C> - <B,A> - <A,C> + <A,A>
double simBC = kernelMatrix.getSimilarity(iB, iC);
double numerator = simBC - simAB - simAC + simAA;
double div = 1. / (sqdAB * sqdAC);
s.put(numerator * div, FastMath.sqrt(div));
}
}
// Sample variance probably would probably be better, but the ABOD
// publication uses the naive variance.
final double abof = s.getNaiveVariance();
minmaxabod.put(abof);
abodvalues.putDouble(pA, abof);
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class HiSCPreferenceVectorIndex method initialize.
@Override
public void initialize() {
if (relation == null || relation.size() <= 0) {
throw new EmptyDataException();
}
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, long[].class);
StringBuilder msg = new StringBuilder();
long start = System.currentTimeMillis();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null;
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, EuclideanDistanceFunction.STATIC, k);
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
if (LOG.isDebugging()) {
msg.append("\n\nid = ").append(DBIDUtil.toString(it));
// /msg.append(" ").append(database.getObjectLabelQuery().get(id));
msg.append("\n knns: ");
}
KNNList knns = knnQuery.getKNNForDBID(it, k);
long[] preferenceVector = determinePreferenceVector(relation, it, knns, msg);
storage.put(it, preferenceVector);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isDebugging()) {
LOG.debugFine(msg.toString());
}
long end = System.currentTimeMillis();
// TODO: re-add timing code!
if (LOG.isVerbose()) {
long elapsedTime = end - start;
LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
}
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class RdKNNTree method preInsert.
/**
* Adapts the knn distances before insertion of entry q.
*
* @param q the entry to be inserted
* @param nodeEntry the entry representing the root of the current subtree
* @param knns_q the knns of q
*/
private void preInsert(RdKNNEntry q, RdKNNEntry nodeEntry, KNNHeap knns_q) {
double knnDist_q = knns_q.getKNNDistance();
RdKNNNode node = getNode(nodeEntry);
double knnDist_node = 0.;
// leaf node
if (node.isLeaf()) {
for (int i = 0; i < node.getNumEntries(); i++) {
RdKNNLeafEntry p = (RdKNNLeafEntry) node.getEntry(i);
double dist_pq = distanceQuery.distance(p.getDBID(), ((LeafEntry) q).getDBID());
// ==> p becomes a knn-candidate
if (dist_pq <= knnDist_q) {
knns_q.insert(dist_pq, p.getDBID());
if (knns_q.size() >= settings.k_max) {
knnDist_q = knns_q.getKNNDistance();
q.setKnnDistance(knnDist_q);
}
}
// q becomes knn of p
if (dist_pq <= p.getKnnDistance()) {
O obj = relation.get(p.getDBID());
KNNList knns_without_q = knnQuery.getKNNForObject(obj, settings.k_max);
if (knns_without_q.size() + 1 < settings.k_max) {
p.setKnnDistance(Double.NaN);
} else {
double knnDist_p = Math.min(knns_without_q.get(knns_without_q.size() - 1).doubleValue(), dist_pq);
p.setKnnDistance(knnDist_p);
}
}
knnDist_node = Math.max(knnDist_node, p.getKnnDistance());
}
} else // directory node
{
O obj = relation.get(((LeafEntry) q).getDBID());
List<DoubleObjPair<RdKNNEntry>> entries = getSortedEntries(node, obj, settings.distanceFunction);
for (DoubleObjPair<RdKNNEntry> distEntry : entries) {
RdKNNEntry entry = distEntry.second;
double entry_knnDist = entry.getKnnDistance();
if (distEntry.first < entry_knnDist || distEntry.first < knnDist_q) {
preInsert(q, entry, knns_q);
knnDist_q = knns_q.getKNNDistance();
}
knnDist_node = Math.max(knnDist_node, entry.getKnnDistance());
}
}
nodeEntry.setKnnDistance(knnDist_node);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class CacheDoubleDistanceKNNLists method run.
@Override
public void run() {
database.initialize();
Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
KNNQuery<O> knnQ = database.getKNNQuery(distanceQuery, DatabaseQuery.HINT_HEAVY_USE);
// open file.
try (RandomAccessFile file = new RandomAccessFile(out, "rw");
FileChannel channel = file.getChannel();
// and acquire a file write lock
FileLock lock = channel.lock()) {
// write magic header
file.writeInt(KNN_CACHE_MAGIC);
// Initial size, enough for 2 kNN.
int bufsize = k * 12 * 2 + 10;
ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null;
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final KNNList nn = knnQ.getKNNForDBID(it, k);
final int nnsize = nn.size();
// Grow the buffer when needed:
if (nnsize * 12 + 10 > bufsize) {
while (nnsize * 12 + 10 > bufsize) {
bufsize <<= 1;
}
buffer = ByteBuffer.allocateDirect(bufsize);
}
buffer.clear();
ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
int c = 0;
for (DoubleDBIDListIter ni = nn.iter(); ni.valid(); ni.advance(), c++) {
ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
buffer.putDouble(ni.doubleValue());
}
if (c != nn.size()) {
throw new AbortException("Sizes did not agree. Cache is invalid.");
}
buffer.flip();
channel.write(buffer);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
lock.release();
} catch (IOException e) {
LOG.exception(e);
}
// FIXME: close!
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class DWOF method initializeRadii.
/**
* This method prepares a container for the radii of the objects and
* initializes radii according to the equation:
*
* initialRadii of a certain object = (absoluteMinDist of all objects) *
* (avgDist of the object) / (minAvgDist of all objects)
*
* @param ids Database IDs to process
* @param distFunc Distance function
* @param knnq kNN search function
* @param radii WritableDoubleDataStore to store radii
*/
private void initializeRadii(DBIDs ids, KNNQuery<O> knnq, DistanceQuery<O> distFunc, WritableDoubleDataStore radii) {
FiniteProgress avgDistProgress = LOG.isVerbose() ? new FiniteProgress("Calculating average kNN distances-", ids.size(), LOG) : null;
double absoluteMinDist = Double.POSITIVE_INFINITY;
double minAvgDist = Double.POSITIVE_INFINITY;
// to get the mean for each object
Mean mean = new Mean();
// Iterate over all objects
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNList iterNeighbors = knnq.getKNNForDBID(iter, k);
// skip the point itself
mean.reset();
for (DBIDIter neighbor1 = iterNeighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
if (DBIDUtil.equal(neighbor1, iter)) {
continue;
}
for (DBIDIter neighbor2 = iterNeighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
if (DBIDUtil.equal(neighbor1, neighbor2) || DBIDUtil.equal(neighbor2, iter)) {
continue;
}
double distance = distFunc.distance(neighbor1, neighbor2);
mean.put(distance);
if (distance > 0. && distance < absoluteMinDist) {
absoluteMinDist = distance;
}
}
}
double currentMean = mean.getMean();
radii.putDouble(iter, currentMean);
if (currentMean < minAvgDist) {
minAvgDist = currentMean;
}
LOG.incrementProcessed(avgDistProgress);
}
LOG.ensureCompleted(avgDistProgress);
// Initializing the radii of all objects.
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
radii.putDouble(iter, (minAvgDist > 0) ? (absoluteMinDist * radii.doubleValue(iter) / minAvgDist) : Double.POSITIVE_INFINITY);
}
}
Aggregations