Search in sources :

Example 36 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class OutRankS1 method run.

@Override
public OutlierResult run(Database database) {
    DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
    // Run the primary algorithm
    Clustering<? extends SubspaceModel> clustering = clusteralg.run(database);
    WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        score.putDouble(iter, 0);
    }
    int maxdim = 0, maxsize = 0;
    // Find maximum dimensionality and cluster size
    for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
        maxsize = Math.max(maxsize, cluster.size());
        maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions()));
    }
    // Iterate over all clusters:
    DoubleMinMax minmax = new DoubleMinMax();
    for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
        double relsize = cluster.size() / (double) maxsize;
        double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim;
        // Process objects in the cluster
        for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
            score.putDouble(iter, newscore);
            minmax.put(newscore);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids);
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
    OutlierResult res = new OutlierResult(meta, scoreResult);
    res.addChildResult(clustering);
    return res;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 37 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class KMeansOutlierDetection method run.

/**
 * Run the outlier detection algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceFunction<? super O> df = clusterer.getDistanceFunction();
    DistanceQuery<O> dq = database.getDistanceQuery(relation, df);
    // TODO: improve ELKI api to ensure we're using the same DBIDs!
    Clustering<?> c = clusterer.run(database, relation);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    DoubleMinMax mm = new DoubleMinMax();
    @SuppressWarnings("unchecked") NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory();
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    for (Cluster<?> cluster : clusters) {
        // FIXME: use a primitive distance function on number vectors instead.
        O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation));
        for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double dist = dq.distance(mean, iter);
            scores.put(iter, dist);
            mm.put(dist);
        }
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DataStoreFactory(de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 38 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class DistanceStddevOutlier method run.

/**
 * Run the outlier detection algorithm
 *
 * @param database Database to use
 * @param relation Relation to analyze
 * @return Outlier score result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    // Get a nearest neighbor query on the relation.
    KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
    // Output data storage
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    // Track minimum and maximum scores
    DoubleMinMax minmax = new DoubleMinMax();
    // Iterate over all objects
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        KNNList neighbors = knnq.getKNNForDBID(iter, k);
        // Aggregate distances
        MeanVariance mv = new MeanVariance();
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            // Skip the object itself. The 0 is not very informative.
            if (DBIDUtil.equal(iter, neighbor)) {
                continue;
            }
            mv.put(neighbor.doubleValue());
        }
        // Store score
        scores.putDouble(iter, mv.getSampleStddev());
    }
    // Wrap the result in the standard containers
    // Actual min-max, theoretical min-max!
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
    DoubleRelation rel = new MaterializedDoubleRelation(relation.getDBIDs(), "stddev-outlier", scores);
    return new OutlierResult(meta, rel);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 39 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class TrivialGeneratedOutlier method run.

/**
 * Run the algorithm
 *
 * @param models Model relation
 * @param vecs Vector relation
 * @param labels Label relation
 * @return Outlier result
 */
public OutlierResult run(Relation<Model> models, Relation<NumberVector> vecs, Relation<?> labels) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
    HashSet<GeneratorSingleCluster> generators = new HashSet<>();
    for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
        Model model = models.get(iditer);
        if (model instanceof GeneratorSingleCluster) {
            generators.add((GeneratorSingleCluster) model);
        }
    }
    if (generators.isEmpty()) {
        LOG.warning("No generator models found for dataset - all points will be considered outliers.");
    }
    for (GeneratorSingleCluster gen : generators) {
        for (int i = 0; i < gen.getDim(); i++) {
            Distribution dist = gen.getDistribution(i);
            if (!(dist instanceof NormalDistribution)) {
                throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
            }
        }
    }
    for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = 1.;
        double[] v = vecs.get(iditer).toArray();
        for (GeneratorSingleCluster gen : generators) {
            double[] tv = v;
            // Transform backwards
            if (gen.getTransformation() != null) {
                tv = gen.getTransformation().applyInverse(v);
            }
            final int dim = tv.length;
            double lensq = 0.0;
            int norm = 0;
            for (int i = 0; i < dim; i++) {
                Distribution dist = gen.getDistribution(i);
                if (dist instanceof NormalDistribution) {
                    NormalDistribution d = (NormalDistribution) dist;
                    double delta = (tv[i] - d.getMean()) / d.getStddev();
                    lensq += delta * delta;
                    norm += 1;
                } else {
                    throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
                }
            }
            if (norm > 0.) {
                // The squared distances are ChiSquared distributed
                score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm));
            } else {
                score = 0.;
            }
        }
        if (expect < 1) {
            score = expect * score / (1 - score + expect);
        }
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("Model outlier scores", "model-outlier", scores, models.getDBIDs());
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) GeneratorSingleCluster(de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) ChiSquaredDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution) Model(de.lmu.ifi.dbs.elki.data.model.Model) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) HashSet(java.util.HashSet) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 40 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class FarthestSumPointsInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    // Get a distance query
    DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    // Chose first mean
    List<T> means = new ArrayList<>(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    T prevmean = relation.get(first);
    means.add(prevmean);
    // Find farthest object each.
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double dsum = prev + distQ.distance(prevmean, it);
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, dsum);
            }
            if (dsum > maxdist) {
                maxdist = dsum;
                best.set(it);
            }
        }
        // Add new mean (and drop the initial mean when desired)
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean = relation.get(best);
        means.add(prevmean);
    }
    // Explicitly destroy temporary data.
    store.destroy();
    return unboxVectors(means);
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)90 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)70 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)70 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)70 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)70 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)68 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)61 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)43 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)33 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)20 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)12 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)12 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)12 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)12 ArrayList (java.util.ArrayList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)10 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9