use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class LinearScanEuclideanDistanceKNNQuery method linearScanBatchKNN.
/**
* Perform a linear scan batch kNN for primitive distance functions.
*
* @param objs Objects list
* @param heaps Heaps array
*/
@Override
protected void linearScanBatchKNN(List<O> objs, List<KNNHeap> heaps) {
final SquaredEuclideanDistanceFunction squared = SquaredEuclideanDistanceFunction.STATIC;
final Relation<? extends O> relation = getRelation();
final int size = objs.size();
// Linear scan style KNN.
for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
O candidate = relation.get(iter);
for (int index = 0; index < size; index++) {
final KNNHeap heap = heaps.get(index);
final double dist = squared.distance(objs.get(index), candidate);
if (dist <= heap.getKNNDistance()) {
heap.insert(dist, iter);
}
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class LinearScanPrimitiveDistanceKNNQuery method getKNNForBulkDBIDs.
@Override
public List<KNNList> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) {
final Relation<? extends O> relation = getRelation();
final int size = ids.size();
final List<KNNHeap> heaps = new ArrayList<>(size);
List<O> objs = new ArrayList<>(size);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
heaps.add(DBIDUtil.newHeap(k));
objs.add(relation.get(iter));
}
linearScanBatchKNN(objs, heaps);
List<KNNList> result = new ArrayList<>(heaps.size());
for (KNNHeap heap : heaps) {
result.add(heap.toKNNList());
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class FastABOD method run.
/**
* Run Fast-ABOD on the data set.
*
* @param relation Relation to process
* @return Outlier detection result
*/
@Override
public OutlierResult run(Database db, Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
// Build a kernel matrix, to make O(n^3) slightly less bad.
SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmaxabod = new DoubleMinMax();
MeanVariance s = new MeanVariance();
KNNHeap nn = DBIDUtil.newHeap(k);
for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
final double simAA = kernelMatrix.getSimilarity(pA, pA);
// Choose the k-min nearest
nn.clear();
for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
if (DBIDUtil.equal(nB, pA)) {
continue;
}
double simBB = kernelMatrix.getSimilarity(nB, nB);
double simAB = kernelMatrix.getSimilarity(pA, nB);
double sqdAB = simAA + simBB - simAB - simAB;
if (!(sqdAB > 0.)) {
continue;
}
nn.insert(sqdAB, nB);
}
KNNList nl = nn.toKNNList();
s.reset();
DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
for (; iB.valid(); iB.advance()) {
double sqdAB = iB.doubleValue();
double simAB = kernelMatrix.getSimilarity(pA, iB);
if (!(sqdAB > 0.)) {
continue;
}
for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
double sqdAC = iC.doubleValue();
double simAC = kernelMatrix.getSimilarity(pA, iC);
if (!(sqdAC > 0.)) {
continue;
}
// Exploit bilinearity of scalar product:
// <B-A, C-A> = <B, C-A> - <A,C-A>
// = <B,C> - <B,A> - <A,C> + <A,A>
double simBC = kernelMatrix.getSimilarity(iB, iC);
double numerator = simBC - simAB - simAC + simAA;
double div = 1. / (sqdAB * sqdAC);
s.put(numerator * div, FastMath.sqrt(div));
}
}
// Sample variance probably would probably be better, but the ABOD
// publication uses the naive variance.
final double abof = s.getNaiveVariance();
minmaxabod.put(abof);
abodvalues.putDouble(pA, abof);
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class CTLuRandomWalkEC method run.
/**
* Run the algorithm.
*
* @param spatial Spatial neighborhood relation
* @param relation Attribute value relation
* @return Outlier result
*/
public OutlierResult run(Relation<P> spatial, Relation<? extends NumberVector> relation) {
DistanceQuery<P> distFunc = getDistanceFunction().instantiate(spatial);
WritableDataStore<double[]> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
// Make a static IDs array for matrix column indexing
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// construct the relation Matrix of the ec-graph
double[][] E = new double[ids.size()][ids.size()];
KNNHeap heap = DBIDUtil.newHeap(k);
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
final double val = relation.get(id).doubleValue(0);
assert (heap.size() == 0);
int j = 0;
for (DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
if (i == j) {
continue;
}
final double e;
final double distance = distFunc.distance(id, n);
heap.insert(distance, n);
if (distance == 0) {
LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
e = 0;
} else {
double diff = Math.abs(val - relation.get(n).doubleValue(0));
double exp = FastMath.exp(FastMath.pow(diff, alpha));
// Implementation note: not inverting exp worked a lot better.
// Therefore we diverge from the article here.
e = exp / distance;
}
E[j][i] = e;
}
// Convert kNN Heap into DBID array
ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
while (heap.size() > 0) {
nids.add(heap.poll());
}
neighbors.put(id, nids);
}
}
// Also do the -c multiplication in this process.
for (int i = 0; i < E[0].length; i++) {
double sum = 0.0;
for (int j = 0; j < E.length; j++) {
sum += E[j][i];
}
if (sum == 0) {
sum = 1.0;
}
for (int j = 0; j < E.length; j++) {
E[j][i] = -c * E[j][i] / sum;
}
}
// Add identity matrix. The diagonal should still be 0s, so this is trivial.
assert (E.length == E[0].length);
for (int col = 0; col < E[0].length; col++) {
assert (E[col][col] == 0.0);
E[col][col] = 1.0;
}
E = timesEquals(inverse(E), 1 - c);
// Split the matrix into columns
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
// Note: matrix times ith unit vector = ith column
double[] sim = getCol(E, i);
similarityVectors.put(id, sim);
}
}
E = null;
// compute the relevance scores between specified Object and its neighbors
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double gmean = 1.0;
int cnt = 0;
for (DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(id, iter)) {
continue;
}
double sim = VMath.angle(similarityVectors.get(id), similarityVectors.get(iter));
gmean *= sim;
cnt++;
}
final double score = FastMath.pow(gmean, 1.0 / cnt);
minmax.put(score);
scores.putDouble(id, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.
the class CachedDoubleDistanceKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
createStorage();
// open file.
try (RandomAccessFile file = new RandomAccessFile(filename, "rw");
FileChannel channel = file.getChannel()) {
// check magic header
int header = file.readInt();
if (header != CacheDoubleDistanceKNNLists.KNN_CACHE_MAGIC) {
throw new AbortException("Cache magic number does not match.");
}
MappedByteBuffer buffer = channel.map(MapMode.READ_ONLY, 4, file.length() - 4);
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
int dbid = ByteArrayUtil.readUnsignedVarint(buffer);
int nnsize = ByteArrayUtil.readUnsignedVarint(buffer);
if (nnsize < k) {
throw new AbortException("kNN cache contains fewer than k objects!");
}
// FIXME: avoid the KNNHeap to KNNList roundtrip.
// FIXME: use a DBIDVar instead of importInteger.
KNNHeap knn = DBIDUtil.newHeap(k);
for (int i = 0; i < nnsize; i++) {
int nid = ByteArrayUtil.readUnsignedVarint(buffer);
double dist = buffer.getDouble();
knn.insert(dist, DBIDUtil.importInteger(nid));
}
storage.put(DBIDUtil.importInteger(dbid), knn.toKNNList());
}
if (buffer.hasRemaining()) {
LOG.warning("kNN cache has " + buffer.remaining() + " bytes remaining!");
}
} catch (IOException e) {
throw new AbortException("I/O error in loading kNN cache: " + e.getMessage(), e);
}
}
Aggregations