Search in sources :

Example 51 with MaterializedDoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation in project elki by elki-project.

the class SimpleOutlierEnsemble method run.

@Override
public OutlierResult run(Database database) throws IllegalStateException {
    int num = algorithms.size();
    // Run inner outlier algorithms
    ModifiableDBIDs ids = DBIDUtil.newHashSet();
    ArrayList<OutlierResult> results = new ArrayList<>(num);
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null;
        for (Algorithm alg : algorithms) {
            Result res = alg.run(database);
            List<OutlierResult> ors = OutlierResult.getOutlierResults(res);
            for (OutlierResult or : ors) {
                results.add(or);
                ids.addDBIDs(or.getScores().getDBIDs());
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
    }
    // Combine
    WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", ids.size(), LOG) : null;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
            double[] scores = new double[num];
            int i = 0;
            for (OutlierResult r : results) {
                double score = r.getScores().doubleValue(id);
                if (!Double.isNaN(score)) {
                    scores[i] = score;
                    i++;
                } else {
                    LOG.warning("DBID " + id + " was not given a score by result " + r);
                }
            }
            if (i > 0) {
                // Shrink array if necessary.
                if (i < scores.length) {
                    scores = Arrays.copyOf(scores, i);
                }
                double combined = voting.combine(scores);
                sumscore.putDouble(id, combined);
                minmax.put(combined);
            } else {
                LOG.warning("DBID " + id + " was not given any score at all.");
            }
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scores = new MaterializedDoubleRelation("Simple Outlier Ensemble", "ensemble-outlier", sumscore, ids);
    return new OutlierResult(meta, scores);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayList(java.util.ArrayList) Algorithm(de.lmu.ifi.dbs.elki.algorithm.Algorithm) OutlierAlgorithm(de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm) AbstractAlgorithm(de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) Result(de.lmu.ifi.dbs.elki.result.Result) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) List(java.util.List) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 52 with MaterializedDoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation in project elki by elki-project.

the class CTLuGLSBackwardSearchAlgorithm method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relationx Spatial relation
 * @param relationy Attribute relation
 * @return Algorithm result
 */
public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector> relationy) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);
    // Outlier detection loop
    {
        ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
        ProxyView<V> proxy = new ProxyView<>(idview, relationx);
        double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha * .5);
        // Detect outliers while significant.
        while (true) {
            Pair<DBIDVar, Double> candidate = singleIteration(proxy, relationy);
            if (candidate.second < phialpha) {
                break;
            }
            scores.putDouble(candidate.first, candidate.second);
            if (!Double.isNaN(candidate.second)) {
                mm.put(candidate.second);
            }
            idview.remove(candidate.first);
        }
        // Remaining objects are inliers
        for (DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) {
            scores.putDouble(iter, 0.0);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("GLSSODBackward", "GLSSODbackward-outlier", scores, relationx.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0, Double.POSITIVE_INFINITY, 0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : ProxyView(de.lmu.ifi.dbs.elki.database.relation.ProxyView) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 53 with MaterializedDoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation in project elki by elki-project.

the class CTLuMoranScatterplotOutlier method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    // Compute the global mean and variance
    MeanVariance globalmv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        globalmv.put(relation.get(iditer).doubleValue(0));
    }
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    // calculate neighborhood average of normalized attribute values.
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Compute global z score
        final double globalZ = (relation.get(iditer).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev();
        // Compute local average z score
        Mean localm = new Mean();
        for (DBIDIter iter = npred.getNeighborDBIDs(iditer).iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(iditer, iter)) {
                continue;
            }
            localm.put((relation.get(iter).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev());
        }
        // if neighors.size == 0
        final double localZ;
        if (localm.getCount() > 0) {
            localZ = localm.getMean();
        } else {
            // if s has no neighbors => Wzi = zi
            localZ = globalZ;
        }
        // compute score
        // Note: in the original moran scatterplot, any object with a score < 0 would be an outlier.
        final double score = Math.max(-globalZ * localZ, 0);
        minmax.put(score);
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("MoranOutlier", "Moran Scatterplot Outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 54 with MaterializedDoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation in project elki by elki-project.

the class ParallelSimplifiedLOF method run.

public OutlierResult run(Database database, Relation<O> relation) {
    DBIDs ids = relation.getDBIDs();
    DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
    // Phase one: KNN and k-dist
    WritableDataStore<KNNList> knns = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, KNNList.class);
    {
        // Compute kNN
        KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
        SharedObject<KNNList> knnv = new SharedObject<>();
        WriteDataStoreProcessor<KNNList> storek = new WriteDataStoreProcessor<>(knns);
        knnm.connectKNNOutput(knnv);
        storek.connectInput(knnv);
        ParallelExecutor.run(ids, knnm, storek);
    }
    // Phase two: simplified-lrd
    WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
    {
        SimplifiedLRDProcessor lrdm = new SimplifiedLRDProcessor(knns);
        SharedDouble lrdv = new SharedDouble();
        WriteDoubleDataStoreProcessor storelrd = new WriteDoubleDataStoreProcessor(lrds);
        lrdm.connectOutput(lrdv);
        storelrd.connectInput(lrdv);
        ParallelExecutor.run(ids, lrdm, storelrd);
    }
    // Phase three: Simplified-LOF
    WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
    DoubleMinMax minmax;
    {
        LOFProcessor lofm = new LOFProcessor(knns, lrds, true);
        SharedDouble lofv = new SharedDouble();
        DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
        WriteDoubleDataStoreProcessor storelof = new WriteDoubleDataStoreProcessor(lofs);
        lofm.connectOutput(lofv);
        mmm.connectInput(lofv);
        storelof.connectInput(lofv);
        ParallelExecutor.run(ids, lofm, storelof, mmm);
        minmax = mmm.getMinMax();
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids);
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) WriteDataStoreProcessor(de.lmu.ifi.dbs.elki.parallel.processor.WriteDataStoreProcessor) SharedDouble(de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) KNNProcessor(de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) WriteDoubleDataStoreProcessor(de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) SharedObject(de.lmu.ifi.dbs.elki.parallel.variables.SharedObject) DoubleMinMaxProcessor(de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 55 with MaterializedDoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation in project elki by elki-project.

the class FeatureBagging method run.

/**
 * Run the algorithm on a data set.
 *
 * @param database Database context
 * @param relation Relation to use
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<NumberVector> relation) {
    final int dbdim = RelationUtil.dimensionality(relation);
    final int mindim = dbdim >> 1;
    final int maxdim = dbdim - 1;
    final Random rand = rnd.getSingleThreadedRandom();
    ArrayList<OutlierResult> results = new ArrayList<>(num);
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
        for (int i = 0; i < num; i++) {
            long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
            SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
            LOF<NumberVector> lof = new LOF<>(k, df);
            // run LOF and collect the result
            OutlierResult result = lof.run(database, relation);
            results.add(result);
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
    }
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    if (breadth) {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        @SuppressWarnings("unchecked") Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = (Pair<DBIDIter, DoubleRelation>[]) new Pair[results.size()];
        // Mapping score-sorted DBID-Iterators onto their corresponding scores.
        // We need to initialize them now be able to iterate them "in parallel".
        {
            int i = 0;
            for (OutlierResult r : results) {
                IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().order(relation.getDBIDs()).iter(), r.getScores());
                i++;
            }
        }
        // Iterating over the *lines* of the AS_t(i)-matrix.
        for (int i = 0; i < relation.size(); i++) {
            // Iterating over the elements of a line (breadth-first).
            for (Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
                DBIDIter iter = pair.first;
                // for every DBID).
                if (iter.valid()) {
                    double score = pair.second.doubleValue(iter);
                    if (Double.isNaN(scores.doubleValue(iter))) {
                        scores.putDouble(iter, score);
                        minmax.put(score);
                    }
                    iter.advance();
                } else {
                    LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
                }
            }
            // Progress does not take the initial mapping into account.
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    } else {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
        for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
            double sum = 0.0;
            for (OutlierResult r : results) {
                final double s = r.getScores().doubleValue(iter);
                if (!Double.isNaN(s)) {
                    sum += s;
                }
            }
            scores.putDouble(iter, sum);
            minmax.put(sum);
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
    return new OutlierResult(meta, scoreres);
}
Also used : LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayList(java.util.ArrayList) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Random(java.util.Random) SubspaceEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Aggregations

MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)72 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)72 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)71 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)70 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)62 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)55 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)38 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)11 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)9 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)7 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)6