Search in sources :

Example 26 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class ALOCI method run.

public OutlierResult run(Database database, Relation<O> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    final Random random = rnd.getSingleThreadedRandom();
    FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
    // Compute extend of dataset.
    double[] min, max;
    {
        double[][] hbbs = RelationUtil.computeMinMax(relation);
        min = hbbs[0];
        max = hbbs[1];
        double maxd = 0;
        for (int i = 0; i < dim; i++) {
            maxd = MathUtil.max(maxd, max[i] - min[i]);
        }
        // Enlarge bounding box to have equal lengths.
        for (int i = 0; i < dim; i++) {
            double diff = (maxd - (max[i] - min[i])) * .5;
            min[i] -= diff;
            max[i] += diff;
        }
    }
    List<ALOCIQuadTree> qts = new ArrayList<>(g);
    double[] nshift = new double[dim];
    ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
    qts.add(qt);
    LOG.incrementProcessed(progressPreproc);
    /*
     * create the remaining g-1 shifted QuadTrees. This not clearly described in
     * the paper and therefore implemented in a way that achieves good results
     * with the test data.
     */
    for (int shift = 1; shift < g; shift++) {
        double[] svec = new double[dim];
        for (int i = 0; i < dim; i++) {
            svec[i] = random.nextDouble() * (max[i] - min[i]);
        }
        qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
        qts.add(qt);
        LOG.incrementProcessed(progressPreproc);
    }
    LOG.ensureCompleted(progressPreproc);
    // aLOCI main loop: evaluate
    FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final O obj = relation.get(iditer);
        double maxmdefnorm = 0;
        // For each level
        for (int l = 0; ; l++) {
            // Find the closest C_i
            Node ci = null;
            for (int i = 0; i < g; i++) {
                Node ci2 = qts.get(i).findClosestNode(obj, l);
                if (ci2.getLevel() != l) {
                    continue;
                }
                // TODO: always use manhattan?
                if (ci == null || distFunc.distance(ci, obj) > distFunc.distance(ci2, obj)) {
                    ci = ci2;
                }
            }
            // LOG.debug("level:" + (ci != null ? ci.getLevel() : -1) +" l:"+l);
            if (ci == null) {
                // no matching tree for this level.
                break;
            }
            // Find the closest C_j
            Node cj = null;
            for (int i = 0; i < g; i++) {
                Node cj2 = qts.get(i).findClosestNode(ci, l - alpha);
                // TODO: allow higher levels or not?
                if (cj != null && cj2.getLevel() < cj.getLevel()) {
                    continue;
                }
                // TODO: always use manhattan?
                if (cj == null || distFunc.distance(cj, ci) > distFunc.distance(cj2, ci)) {
                    cj = cj2;
                }
            }
            // LOG.debug("level:" + (cj != null ? cj.getLevel() : -1) +" l:"+l);
            if (cj == null) {
                // no matching tree for this level.
                continue;
            }
            double mdefnorm = calculate_MDEF_norm(cj, ci);
            // LOG.warning("level:" + ci.getLevel() + "/" + cj.getLevel() +
            // " mdef: " + mdefnorm);
            maxmdefnorm = MathUtil.max(maxmdefnorm, mdefnorm);
        }
        // Store results
        mdef_norm.putDouble(iditer, maxmdefnorm);
        minmax.put(maxmdefnorm);
        LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult = new MaterializedDoubleRelation("aLOCI normalized MDEF", "aloci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Random(java.util.Random) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 27 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class FlexibleLOF method computeLRDs.

/**
 * Computes the local reachability density (LRD) of the specified objects.
 *
 * @param knnq the precomputed neighborhood of the objects w.r.t. the
 *        reachability distance
 * @param ids the ids of the objects
 * @param lrds Reachability storage
 */
protected void computeLRDs(KNNQuery<O> knnq, DBIDs ids, WritableDoubleDataStore lrds) {
    FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final KNNList neighbors = knnq.getKNNForDBID(iter, kreach);
        double sum = 0.0;
        int count = 0;
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(neighbor, iter)) {
                continue;
            }
            KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, kreach);
            sum += MathUtil.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance());
            count++;
        }
        // Avoid division by 0
        final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
        lrds.putDouble(iter, lrd);
        LOG.incrementProcessed(lrdsProgress);
    }
    LOG.ensureCompleted(lrdsProgress);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)

Example 28 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class LDOF method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k);
    // track the maximum value for normalization
    DoubleMinMax ldofminmax = new DoubleMinMax();
    // compute the ldof values
    WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    // compute LOF_SCORE of each db object
    if (LOG.isVerbose()) {
        LOG.verbose("Computing LDOFs");
    }
    FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
    Mean dxp = new Mean(), Dxp = new Mean();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
        dxp.reset();
        Dxp.reset();
        DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
        for (; neighbor1.valid(); neighbor1.advance()) {
            // skip the point itself
            if (DBIDUtil.equal(neighbor1, iditer)) {
                continue;
            }
            dxp.put(neighbor1.doubleValue());
            for (neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
                // skip the point itself
                if (DBIDUtil.equal(neighbor2, iditer)) {
                    continue;
                }
                Dxp.put(distFunc.distance(neighbor1, neighbor2));
            }
        }
        double ldof = dxp.getMean() / Dxp.getMean();
        if (Double.isNaN(ldof) || Double.isInfinite(ldof)) {
            ldof = 1.0;
        }
        ldofs.putDouble(iditer, ldof);
        // update maximum
        ldofminmax.put(ldof);
        LOG.incrementProcessed(progressLDOFs);
    }
    LOG.ensureCompleted(progressLDOFs);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 29 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class HiCS method calculateSubspaces.

/**
 * Identifies high contrast subspaces in a given full-dimensional database.
 *
 * @param relation the relation the HiCS should be evaluated for
 * @param subspaceIndex Subspace indexes
 * @return a set of high contrast subspaces
 */
private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
    final int dbdim = RelationUtil.dimensionality(relation);
    FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
    if (dprog != null) {
        dprog.setProcessed(2, LOG);
    }
    TreeSet<HiCSSubspace> subspaceList = new TreeSet<>(HiCSSubspace.SORT_BY_SUBSPACE);
    TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
    // compute two-element sets of subspaces
    for (int i = 0; i < dbdim; i++) {
        for (int j = i + 1; j < dbdim; j++) {
            HiCSSubspace ts = new HiCSSubspace();
            ts.set(i);
            ts.set(j);
            calculateContrast(relation, ts, subspaceIndex, random);
            dDimensionalList.add(ts);
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
    for (int d = 3; !dDimensionalList.isEmpty(); d++) {
        if (dprog != null) {
            dprog.setProcessed(d, LOG);
        }
        // result now contains all d-dimensional sets of subspaces
        ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
        for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
            subspaceList.add(it.get());
            candidateList.add(it.get());
        }
        dDimensionalList.clear();
        // candidateList now contains the *m* best d-dimensional sets
        Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
        // TODO: optimize APRIORI style, by not even computing the bit set or?
        for (int i = 0; i < candidateList.size() - 1; i++) {
            for (int j = i + 1; j < candidateList.size(); j++) {
                HiCSSubspace set1 = candidateList.get(i);
                HiCSSubspace set2 = candidateList.get(j);
                HiCSSubspace joinedSet = new HiCSSubspace();
                joinedSet.or(set1);
                joinedSet.or(set2);
                if (joinedSet.cardinality() != d) {
                    continue;
                }
                calculateContrast(relation, joinedSet, subspaceIndex, random);
                dDimensionalList.add(joinedSet);
                LOG.incrementProcessed(qprog);
            }
        }
        // Prune
        for (HiCSSubspace cand : candidateList) {
            for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
                if (it.get().contrast > cand.contrast) {
                    subspaceList.remove(cand);
                    break;
                }
            }
        }
    }
    LOG.setCompleted(qprog);
    if (dprog != null) {
        dprog.setProcessed(dbdim, LOG);
        dprog.ensureCompleted(LOG);
    }
    return subspaceList;
}
Also used : TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) TreeSet(java.util.TreeSet) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)

Example 30 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class DBOutlierDetection method computeOutlierScores.

@Override
protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    // Prefer kNN query if available, as this will usually stop earlier.
    KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
    RangeQuery<O> rangeQuery = knnQuery == null ? database.getRangeQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY, d) : null;
    // maximum number of objects in the D-neighborhood of an outlier
    int m = (int) Math.floor((distFunc.getRelation().size()) * (1 - p));
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("DBOutlier detection", distFunc.getRelation().size(), LOG) : null;
    // is more than d -> object is outlier
    if (knnQuery != null) {
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            KNNList knns = knnQuery.getKNNForDBID(iditer, m);
            scores.putDouble(iditer, (knns.getKNNDistance() > d) ? 1. : 0.);
            LOG.incrementProcessed(prog);
        }
    } else if (rangeQuery != null) {
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, d);
            scores.putDouble(iditer, (neighbors.size() < m) ? 1. : 0.);
            LOG.incrementProcessed(prog);
        }
    } else {
        // Linear scan neighbors for each object, but stop early.
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            int count = 0;
            for (DBIDIter iterator = relation.iterDBIDs(); iterator.valid(); iterator.advance()) {
                double currentDistance = distFunc.distance(iditer, iterator);
                if (currentDistance <= d) {
                    if (++count >= m) {
                        break;
                    }
                }
            }
            scores.putDouble(iditer, (count < m) ? 1.0 : 0);
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    return scores;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12