Search in sources :

Example 16 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class LinearScanEuclideanDistanceKNNQuery method linearScanBatchKNN.

/**
 * Perform a linear scan batch kNN for primitive distance functions.
 *
 * @param objs Objects list
 * @param heaps Heaps array
 */
@Override
protected void linearScanBatchKNN(List<O> objs, List<KNNHeap> heaps) {
    final SquaredEuclideanDistanceFunction squared = SquaredEuclideanDistanceFunction.STATIC;
    final Relation<? extends O> relation = getRelation();
    final int size = objs.size();
    // Linear scan style KNN.
    for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
        O candidate = relation.get(iter);
        for (int index = 0; index < size; index++) {
            final KNNHeap heap = heaps.get(index);
            final double dist = squared.distance(objs.get(index), candidate);
            if (dist <= heap.getKNNDistance()) {
                heap.insert(dist, iter);
            }
        }
    }
}
Also used : SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 17 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class LinearScanPrimitiveDistanceKNNQuery method getKNNForBulkDBIDs.

@Override
public List<KNNList> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) {
    final Relation<? extends O> relation = getRelation();
    final int size = ids.size();
    final List<KNNHeap> heaps = new ArrayList<>(size);
    List<O> objs = new ArrayList<>(size);
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        heaps.add(DBIDUtil.newHeap(k));
        objs.add(relation.get(iter));
    }
    linearScanBatchKNN(objs, heaps);
    List<KNNList> result = new ArrayList<>(heaps.size());
    for (KNNHeap heap : heaps) {
        result.add(heap.toKNNList());
    }
    return result;
}
Also used : KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayList(java.util.ArrayList) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 18 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class FastABOD method run.

/**
 * Run Fast-ABOD on the data set.
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
@Override
public OutlierResult run(Database db, Relation<V> relation) {
    DBIDs ids = relation.getDBIDs();
    // Build a kernel matrix, to make O(n^3) slightly less bad.
    SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
    KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
    WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmaxabod = new DoubleMinMax();
    MeanVariance s = new MeanVariance();
    KNNHeap nn = DBIDUtil.newHeap(k);
    for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
        final double simAA = kernelMatrix.getSimilarity(pA, pA);
        // Choose the k-min nearest
        nn.clear();
        for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
            if (DBIDUtil.equal(nB, pA)) {
                continue;
            }
            double simBB = kernelMatrix.getSimilarity(nB, nB);
            double simAB = kernelMatrix.getSimilarity(pA, nB);
            double sqdAB = simAA + simBB - simAB - simAB;
            if (!(sqdAB > 0.)) {
                continue;
            }
            nn.insert(sqdAB, nB);
        }
        KNNList nl = nn.toKNNList();
        s.reset();
        DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
        for (; iB.valid(); iB.advance()) {
            double sqdAB = iB.doubleValue();
            double simAB = kernelMatrix.getSimilarity(pA, iB);
            if (!(sqdAB > 0.)) {
                continue;
            }
            for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
                double sqdAC = iC.doubleValue();
                double simAC = kernelMatrix.getSimilarity(pA, iC);
                if (!(sqdAC > 0.)) {
                    continue;
                }
                // Exploit bilinearity of scalar product:
                // <B-A, C-A> = <B, C-A> - <A,C-A>
                // = <B,C> - <B,A> - <A,C> + <A,A>
                double simBC = kernelMatrix.getSimilarity(iB, iC);
                double numerator = simBC - simAB - simAC + simAA;
                double div = 1. / (sqdAB * sqdAC);
                s.put(numerator * div, FastMath.sqrt(div));
            }
        }
        // Sample variance probably would probably be better, but the ABOD
        // publication uses the naive variance.
        final double abof = s.getNaiveVariance();
        minmaxabod.put(abof);
        abodvalues.putDouble(pA, abof);
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KernelMatrix(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 19 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class CTLuRandomWalkEC method run.

/**
 * Run the algorithm.
 *
 * @param spatial Spatial neighborhood relation
 * @param relation Attribute value relation
 * @return Outlier result
 */
public OutlierResult run(Relation<P> spatial, Relation<? extends NumberVector> relation) {
    DistanceQuery<P> distFunc = getDistanceFunction().instantiate(spatial);
    WritableDataStore<double[]> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
    WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
    // Make a static IDs array for matrix column indexing
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    // construct the relation Matrix of the ec-graph
    double[][] E = new double[ids.size()][ids.size()];
    KNNHeap heap = DBIDUtil.newHeap(k);
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            final double val = relation.get(id).doubleValue(0);
            assert (heap.size() == 0);
            int j = 0;
            for (DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
                if (i == j) {
                    continue;
                }
                final double e;
                final double distance = distFunc.distance(id, n);
                heap.insert(distance, n);
                if (distance == 0) {
                    LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
                    e = 0;
                } else {
                    double diff = Math.abs(val - relation.get(n).doubleValue(0));
                    double exp = FastMath.exp(FastMath.pow(diff, alpha));
                    // Implementation note: not inverting exp worked a lot better.
                    // Therefore we diverge from the article here.
                    e = exp / distance;
                }
                E[j][i] = e;
            }
            // Convert kNN Heap into DBID array
            ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
            while (heap.size() > 0) {
                nids.add(heap.poll());
            }
            neighbors.put(id, nids);
        }
    }
    // Also do the -c multiplication in this process.
    for (int i = 0; i < E[0].length; i++) {
        double sum = 0.0;
        for (int j = 0; j < E.length; j++) {
            sum += E[j][i];
        }
        if (sum == 0) {
            sum = 1.0;
        }
        for (int j = 0; j < E.length; j++) {
            E[j][i] = -c * E[j][i] / sum;
        }
    }
    // Add identity matrix. The diagonal should still be 0s, so this is trivial.
    assert (E.length == E[0].length);
    for (int col = 0; col < E[0].length; col++) {
        assert (E[col][col] == 0.0);
        E[col][col] = 1.0;
    }
    E = timesEquals(inverse(E), 1 - c);
    // Split the matrix into columns
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            // Note: matrix times ith unit vector = ith column
            double[] sim = getCol(E, i);
            similarityVectors.put(id, sim);
        }
    }
    E = null;
    // compute the relevance scores between specified Object and its neighbors
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        double gmean = 1.0;
        int cnt = 0;
        for (DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(id, iter)) {
                continue;
            }
            double sim = VMath.angle(similarityVectors.get(id), similarityVectors.get(iter));
            gmean *= sim;
            cnt++;
        }
        final double score = FastMath.pow(gmean, 1.0 / cnt);
        minmax.put(score);
        scores.putDouble(id, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 20 with KNNHeap

use of de.lmu.ifi.dbs.elki.database.ids.KNNHeap in project elki by elki-project.

the class CachedDoubleDistanceKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    createStorage();
    // open file.
    try (RandomAccessFile file = new RandomAccessFile(filename, "rw");
        FileChannel channel = file.getChannel()) {
        // check magic header
        int header = file.readInt();
        if (header != CacheDoubleDistanceKNNLists.KNN_CACHE_MAGIC) {
            throw new AbortException("Cache magic number does not match.");
        }
        MappedByteBuffer buffer = channel.map(MapMode.READ_ONLY, 4, file.length() - 4);
        for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
            int dbid = ByteArrayUtil.readUnsignedVarint(buffer);
            int nnsize = ByteArrayUtil.readUnsignedVarint(buffer);
            if (nnsize < k) {
                throw new AbortException("kNN cache contains fewer than k objects!");
            }
            // FIXME: avoid the KNNHeap to KNNList roundtrip.
            // FIXME: use a DBIDVar instead of importInteger.
            KNNHeap knn = DBIDUtil.newHeap(k);
            for (int i = 0; i < nnsize; i++) {
                int nid = ByteArrayUtil.readUnsignedVarint(buffer);
                double dist = buffer.getDouble();
                knn.insert(dist, DBIDUtil.importInteger(nid));
            }
            storage.put(DBIDUtil.importInteger(dbid), knn.toKNNList());
        }
        if (buffer.hasRemaining()) {
            LOG.warning("kNN cache has " + buffer.remaining() + " bytes remaining!");
        }
    } catch (IOException e) {
        throw new AbortException("I/O error in loading kNN cache: " + e.getMessage(), e);
    }
}
Also used : RandomAccessFile(java.io.RandomAccessFile) MappedByteBuffer(java.nio.MappedByteBuffer) FileChannel(java.nio.channels.FileChannel) IOException(java.io.IOException) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)17 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)6 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)5 ArrayList (java.util.ArrayList)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)4 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)3 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)3 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)3 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)3 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)3 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)3 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)3 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)3 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)3 DBIDPair (de.lmu.ifi.dbs.elki.database.ids.DBIDPair)2 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)2 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)2