Search in sources :

Example 66 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class FastABOD method run.

/**
 * Run Fast-ABOD on the data set.
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
@Override
public OutlierResult run(Database db, Relation<V> relation) {
    DBIDs ids = relation.getDBIDs();
    // Build a kernel matrix, to make O(n^3) slightly less bad.
    SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
    KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
    WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmaxabod = new DoubleMinMax();
    MeanVariance s = new MeanVariance();
    KNNHeap nn = DBIDUtil.newHeap(k);
    for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
        final double simAA = kernelMatrix.getSimilarity(pA, pA);
        // Choose the k-min nearest
        nn.clear();
        for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
            if (DBIDUtil.equal(nB, pA)) {
                continue;
            }
            double simBB = kernelMatrix.getSimilarity(nB, nB);
            double simAB = kernelMatrix.getSimilarity(pA, nB);
            double sqdAB = simAA + simBB - simAB - simAB;
            if (!(sqdAB > 0.)) {
                continue;
            }
            nn.insert(sqdAB, nB);
        }
        KNNList nl = nn.toKNNList();
        s.reset();
        DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
        for (; iB.valid(); iB.advance()) {
            double sqdAB = iB.doubleValue();
            double simAB = kernelMatrix.getSimilarity(pA, iB);
            if (!(sqdAB > 0.)) {
                continue;
            }
            for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
                double sqdAC = iC.doubleValue();
                double simAC = kernelMatrix.getSimilarity(pA, iC);
                if (!(sqdAC > 0.)) {
                    continue;
                }
                // Exploit bilinearity of scalar product:
                // <B-A, C-A> = <B, C-A> - <A,C-A>
                // = <B,C> - <B,A> - <A,C> + <A,A>
                double simBC = kernelMatrix.getSimilarity(iB, iC);
                double numerator = simBC - simAB - simAC + simAA;
                double div = 1. / (sqdAB * sqdAC);
                s.put(numerator * div, FastMath.sqrt(div));
            }
        }
        // Sample variance probably would probably be better, but the ABOD
        // publication uses the naive variance.
        final double abof = s.getNaiveVariance();
        minmaxabod.put(abof);
        abodvalues.putDouble(pA, abof);
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KernelMatrix(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 67 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class HiSCPreferenceVectorIndex method initialize.

@Override
public void initialize() {
    if (relation == null || relation.size() <= 0) {
        throw new EmptyDataException();
    }
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, long[].class);
    StringBuilder msg = new StringBuilder();
    long start = System.currentTimeMillis();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null;
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, EuclideanDistanceFunction.STATIC, k);
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        if (LOG.isDebugging()) {
            msg.append("\n\nid = ").append(DBIDUtil.toString(it));
            // /msg.append(" ").append(database.getObjectLabelQuery().get(id));
            msg.append("\n knns: ");
        }
        KNNList knns = knnQuery.getKNNForDBID(it, k);
        long[] preferenceVector = determinePreferenceVector(relation, it, knns, msg);
        storage.put(it, preferenceVector);
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isDebugging()) {
        LOG.debugFine(msg.toString());
    }
    long end = System.currentTimeMillis();
    // TODO: re-add timing code!
    if (LOG.isVerbose()) {
        long elapsedTime = end - start;
        LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
    }
}
Also used : EmptyDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.EmptyDataException) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 68 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class RdKNNTree method preInsert.

/**
 * Adapts the knn distances before insertion of entry q.
 *
 * @param q the entry to be inserted
 * @param nodeEntry the entry representing the root of the current subtree
 * @param knns_q the knns of q
 */
private void preInsert(RdKNNEntry q, RdKNNEntry nodeEntry, KNNHeap knns_q) {
    double knnDist_q = knns_q.getKNNDistance();
    RdKNNNode node = getNode(nodeEntry);
    double knnDist_node = 0.;
    // leaf node
    if (node.isLeaf()) {
        for (int i = 0; i < node.getNumEntries(); i++) {
            RdKNNLeafEntry p = (RdKNNLeafEntry) node.getEntry(i);
            double dist_pq = distanceQuery.distance(p.getDBID(), ((LeafEntry) q).getDBID());
            // ==> p becomes a knn-candidate
            if (dist_pq <= knnDist_q) {
                knns_q.insert(dist_pq, p.getDBID());
                if (knns_q.size() >= settings.k_max) {
                    knnDist_q = knns_q.getKNNDistance();
                    q.setKnnDistance(knnDist_q);
                }
            }
            // q becomes knn of p
            if (dist_pq <= p.getKnnDistance()) {
                O obj = relation.get(p.getDBID());
                KNNList knns_without_q = knnQuery.getKNNForObject(obj, settings.k_max);
                if (knns_without_q.size() + 1 < settings.k_max) {
                    p.setKnnDistance(Double.NaN);
                } else {
                    double knnDist_p = Math.min(knns_without_q.get(knns_without_q.size() - 1).doubleValue(), dist_pq);
                    p.setKnnDistance(knnDist_p);
                }
            }
            knnDist_node = Math.max(knnDist_node, p.getKnnDistance());
        }
    } else // directory node
    {
        O obj = relation.get(((LeafEntry) q).getDBID());
        List<DoubleObjPair<RdKNNEntry>> entries = getSortedEntries(node, obj, settings.distanceFunction);
        for (DoubleObjPair<RdKNNEntry> distEntry : entries) {
            RdKNNEntry entry = distEntry.second;
            double entry_knnDist = entry.getKnnDistance();
            if (distEntry.first < entry_knnDist || distEntry.first < knnDist_q) {
                preInsert(q, entry, knns_q);
                knnDist_q = knns_q.getKNNDistance();
            }
            knnDist_node = Math.max(knnDist_node, entry.getKnnDistance());
        }
    }
    nodeEntry.setKnnDistance(knnDist_node);
}
Also used : KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DoubleObjPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair)

Example 69 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class CacheDoubleDistanceKNNLists method run.

@Override
public void run() {
    database.initialize();
    Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
    DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
    KNNQuery<O> knnQ = database.getKNNQuery(distanceQuery, DatabaseQuery.HINT_HEAVY_USE);
    // open file.
    try (RandomAccessFile file = new RandomAccessFile(out, "rw");
        FileChannel channel = file.getChannel();
        // and acquire a file write lock
        FileLock lock = channel.lock()) {
        // write magic header
        file.writeInt(KNN_CACHE_MAGIC);
        // Initial size, enough for 2 kNN.
        int bufsize = k * 12 * 2 + 10;
        ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null;
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            final KNNList nn = knnQ.getKNNForDBID(it, k);
            final int nnsize = nn.size();
            // Grow the buffer when needed:
            if (nnsize * 12 + 10 > bufsize) {
                while (nnsize * 12 + 10 > bufsize) {
                    bufsize <<= 1;
                }
                buffer = ByteBuffer.allocateDirect(bufsize);
            }
            buffer.clear();
            ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
            ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
            int c = 0;
            for (DoubleDBIDListIter ni = nn.iter(); ni.valid(); ni.advance(), c++) {
                ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
                buffer.putDouble(ni.doubleValue());
            }
            if (c != nn.size()) {
                throw new AbortException("Sizes did not agree. Cache is invalid.");
            }
            buffer.flip();
            channel.write(buffer);
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        lock.release();
    } catch (IOException e) {
        LOG.exception(e);
    }
// FIXME: close!
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FileChannel(java.nio.channels.FileChannel) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) RandomAccessFile(java.io.RandomAccessFile) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FileLock(java.nio.channels.FileLock) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 70 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class DWOF method initializeRadii.

/**
 * This method prepares a container for the radii of the objects and
 * initializes radii according to the equation:
 *
 * initialRadii of a certain object = (absoluteMinDist of all objects) *
 * (avgDist of the object) / (minAvgDist of all objects)
 *
 * @param ids Database IDs to process
 * @param distFunc Distance function
 * @param knnq kNN search function
 * @param radii WritableDoubleDataStore to store radii
 */
private void initializeRadii(DBIDs ids, KNNQuery<O> knnq, DistanceQuery<O> distFunc, WritableDoubleDataStore radii) {
    FiniteProgress avgDistProgress = LOG.isVerbose() ? new FiniteProgress("Calculating average kNN distances-", ids.size(), LOG) : null;
    double absoluteMinDist = Double.POSITIVE_INFINITY;
    double minAvgDist = Double.POSITIVE_INFINITY;
    // to get the mean for each object
    Mean mean = new Mean();
    // Iterate over all objects
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNList iterNeighbors = knnq.getKNNForDBID(iter, k);
        // skip the point itself
        mean.reset();
        for (DBIDIter neighbor1 = iterNeighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
            if (DBIDUtil.equal(neighbor1, iter)) {
                continue;
            }
            for (DBIDIter neighbor2 = iterNeighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
                if (DBIDUtil.equal(neighbor1, neighbor2) || DBIDUtil.equal(neighbor2, iter)) {
                    continue;
                }
                double distance = distFunc.distance(neighbor1, neighbor2);
                mean.put(distance);
                if (distance > 0. && distance < absoluteMinDist) {
                    absoluteMinDist = distance;
                }
            }
        }
        double currentMean = mean.getMean();
        radii.putDouble(iter, currentMean);
        if (currentMean < minAvgDist) {
            minAvgDist = currentMean;
        }
        LOG.incrementProcessed(avgDistProgress);
    }
    LOG.ensureCompleted(avgDistProgress);
    // Initializing the radii of all objects.
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        radii.putDouble(iter, (minAvgDist > 0) ? (absoluteMinDist * radii.doubleValue(iter) / minAvgDist) : Double.POSITIVE_INFINITY);
    }
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)53 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)38 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)32 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)21 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)20 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)18 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)18 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)18 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)18 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)15 ArrayList (java.util.ArrayList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)10 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)9 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)9 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)8 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)8 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)6