Search in sources :

Example 1 with SubspaceEuclideanDistanceFunction

use of de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction in project elki by elki-project.

the class OUTRES method outresScore.

/**
 * Main loop of OUTRES. Run for each object
 *
 * @param s start dimension
 * @param subspace Current subspace
 * @param id Current object ID
 * @param kernel Kernel
 * @return Score
 */
public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
    // Initial score is 1.0
    double score = 1.0;
    final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
    MeanVariance meanv = new MeanVariance();
    for (int i = s; i < kernel.dim; i++) {
        if (BitsUtil.get(subspace, i)) {
            // with i=0?
            continue;
        }
        BitsUtil.setI(subspace, i);
        df.setSelectedDimensions(subspace);
        final double adjustedEps = kernel.adjustedEps(kernel.dim);
        // Query with a larger window, to also get neighbors of neighbors
        // Subspace euclidean is metric!
        final double range = adjustedEps * 2.;
        RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
        DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
        DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
        if (neigh.size() > 2) {
            // Relevance test
            if (relevantSubspace(subspace, neigh, kernel)) {
                final double density = kernel.subspaceDensity(subspace, neigh);
                // Compute mean and standard deviation for densities of neighbors.
                meanv.reset();
                for (DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
                    DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
                    meanv.put(kernel.subspaceDensity(subspace, n2));
                }
                final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
                // High deviation:
                if (deviation >= 1) {
                    score *= (density / deviation);
                }
                // Recursion
                score *= outresScore(i + 1, subspace, id, kernel);
            }
        }
        BitsUtil.clearI(subspace, i);
    }
    return score;
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) SubspaceEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)

Example 2 with SubspaceEuclideanDistanceFunction

use of de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction in project elki by elki-project.

the class SOD method subspaceOutlierDegree.

/**
 * Compute SOD score.
 *
 * @param queryObject Query object
 * @param center Center vector
 * @param weightVector Weight vector
 * @return sod score
 */
private double subspaceOutlierDegree(V queryObject, double[] center, long[] weightVector) {
    final int card = BitsUtil.cardinality(weightVector);
    if (card == 0) {
        return 0;
    }
    final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector);
    double distance = df.distance(queryObject, DoubleVector.wrap(center));
    // FIXME: defined and published as card, should be
    distance /= card;
    // sqrt(card), unfortunately
    return distance;
}
Also used : SubspaceEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction)

Example 3 with SubspaceEuclideanDistanceFunction

use of de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction in project elki by elki-project.

the class FeatureBagging method run.

/**
 * Run the algorithm on a data set.
 *
 * @param database Database context
 * @param relation Relation to use
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<NumberVector> relation) {
    final int dbdim = RelationUtil.dimensionality(relation);
    final int mindim = dbdim >> 1;
    final int maxdim = dbdim - 1;
    final Random rand = rnd.getSingleThreadedRandom();
    ArrayList<OutlierResult> results = new ArrayList<>(num);
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
        for (int i = 0; i < num; i++) {
            long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
            SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
            LOF<NumberVector> lof = new LOF<>(k, df);
            // run LOF and collect the result
            OutlierResult result = lof.run(database, relation);
            results.add(result);
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
    }
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    if (breadth) {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        @SuppressWarnings("unchecked") Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = (Pair<DBIDIter, DoubleRelation>[]) new Pair[results.size()];
        // Mapping score-sorted DBID-Iterators onto their corresponding scores.
        // We need to initialize them now be able to iterate them "in parallel".
        {
            int i = 0;
            for (OutlierResult r : results) {
                IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().order(relation.getDBIDs()).iter(), r.getScores());
                i++;
            }
        }
        // Iterating over the *lines* of the AS_t(i)-matrix.
        for (int i = 0; i < relation.size(); i++) {
            // Iterating over the elements of a line (breadth-first).
            for (Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
                DBIDIter iter = pair.first;
                // for every DBID).
                if (iter.valid()) {
                    double score = pair.second.doubleValue(iter);
                    if (Double.isNaN(scores.doubleValue(iter))) {
                        scores.putDouble(iter, score);
                        minmax.put(score);
                    }
                    iter.advance();
                } else {
                    LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
                }
            }
            // Progress does not take the initial mapping into account.
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    } else {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
            double sum = 0.0;
            for (OutlierResult r : results) {
                final double s = r.getScores().doubleValue(iter);
                if (!Double.isNaN(s)) {
                    sum += s;
                }
            }
            scores.putDouble(iter, sum);
            minmax.put(sum);
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
    return new OutlierResult(meta, scoreres);
}
Also used : LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayList(java.util.ArrayList) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Random(java.util.Random) SubspaceEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Aggregations

SubspaceEuclideanDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction)3 LOF (de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF)1 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)1 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)1 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)1 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)1 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)1 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)1 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)1 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)1 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)1 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)1 ArrayList (java.util.ArrayList)1 Random (java.util.Random)1