Search in sources :

Example 31 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class OutRankS1 method run.

@Override
public OutlierResult run(Database database) {
    DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
    // Run the primary algorithm
    Clustering<? extends SubspaceModel> clustering = clusteralg.run(database);
    WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        score.putDouble(iter, 0);
    }
    int maxdim = 0, maxsize = 0;
    // Find maximum dimensionality and cluster size
    for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
        maxsize = Math.max(maxsize, cluster.size());
        maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions()));
    }
    // Iterate over all clusters:
    DoubleMinMax minmax = new DoubleMinMax();
    for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
        double relsize = cluster.size() / (double) maxsize;
        double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim;
        // Process objects in the cluster
        for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
            score.putDouble(iter, newscore);
            minmax.put(newscore);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids);
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
    OutlierResult res = new OutlierResult(meta, scoreResult);
    res.addChildResult(clustering);
    return res;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 32 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class KMeansOutlierDetection method run.

/**
 * Run the outlier detection algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceFunction<? super O> df = clusterer.getDistanceFunction();
    DistanceQuery<O> dq = database.getDistanceQuery(relation, df);
    // TODO: improve ELKI api to ensure we're using the same DBIDs!
    Clustering<?> c = clusterer.run(database, relation);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    DoubleMinMax mm = new DoubleMinMax();
    @SuppressWarnings("unchecked") NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory();
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    for (Cluster<?> cluster : clusters) {
        // FIXME: use a primitive distance function on number vectors instead.
        O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation));
        for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double dist = dq.distance(mean, iter);
            scores.put(iter, dist);
            mm.put(dist);
        }
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DataStoreFactory(de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 33 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class DistanceStddevOutlier method run.

/**
 * Run the outlier detection algorithm
 *
 * @param database Database to use
 * @param relation Relation to analyze
 * @return Outlier score result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    // Get a nearest neighbor query on the relation.
    KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
    // Output data storage
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    // Track minimum and maximum scores
    DoubleMinMax minmax = new DoubleMinMax();
    // Iterate over all objects
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        KNNList neighbors = knnq.getKNNForDBID(iter, k);
        // Aggregate distances
        MeanVariance mv = new MeanVariance();
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            // Skip the object itself. The 0 is not very informative.
            if (DBIDUtil.equal(iter, neighbor)) {
                continue;
            }
            mv.put(neighbor.doubleValue());
        }
        // Store score
        scores.putDouble(iter, mv.getSampleStddev());
    }
    // Wrap the result in the standard containers
    // Actual min-max, theoretical min-max!
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
    DoubleRelation rel = new MaterializedDoubleRelation(relation.getDBIDs(), "stddev-outlier", scores);
    return new OutlierResult(meta, rel);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 34 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class TrivialGeneratedOutlier method run.

/**
 * Run the algorithm
 *
 * @param models Model relation
 * @param vecs Vector relation
 * @param labels Label relation
 * @return Outlier result
 */
public OutlierResult run(Relation<Model> models, Relation<NumberVector> vecs, Relation<?> labels) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
    HashSet<GeneratorSingleCluster> generators = new HashSet<>();
    for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
        Model model = models.get(iditer);
        if (model instanceof GeneratorSingleCluster) {
            generators.add((GeneratorSingleCluster) model);
        }
    }
    if (generators.isEmpty()) {
        LOG.warning("No generator models found for dataset - all points will be considered outliers.");
    }
    for (GeneratorSingleCluster gen : generators) {
        for (int i = 0; i < gen.getDim(); i++) {
            Distribution dist = gen.getDistribution(i);
            if (!(dist instanceof NormalDistribution)) {
                throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
            }
        }
    }
    for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = 1.;
        double[] v = vecs.get(iditer).toArray();
        for (GeneratorSingleCluster gen : generators) {
            double[] tv = v;
            // Transform backwards
            if (gen.getTransformation() != null) {
                tv = gen.getTransformation().applyInverse(v);
            }
            final int dim = tv.length;
            double lensq = 0.0;
            int norm = 0;
            for (int i = 0; i < dim; i++) {
                Distribution dist = gen.getDistribution(i);
                if (dist instanceof NormalDistribution) {
                    NormalDistribution d = (NormalDistribution) dist;
                    double delta = (tv[i] - d.getMean()) / d.getStddev();
                    lensq += delta * delta;
                    norm += 1;
                } else {
                    throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
                }
            }
            if (norm > 0.) {
                // The squared distances are ChiSquared distributed
                score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm));
            } else {
                score = 0.;
            }
        }
        if (expect < 1) {
            score = expect * score / (1 - score + expect);
        }
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("Model outlier scores", "model-outlier", scores, models.getDBIDs());
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) GeneratorSingleCluster(de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) ChiSquaredDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution) Model(de.lmu.ifi.dbs.elki.data.model.Model) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) HashSet(java.util.HashSet) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 35 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class ComputeKNNOutlierScores method writeResult.

/**
 * Write a single output line.
 *
 * @param out Output stream
 * @param ids DBIDs
 * @param result Outlier result
 * @param scaling Scaling function
 * @param label Identification label
 */
void writeResult(PrintStream out, DBIDs ids, OutlierResult result, ScalingFunction scaling, String label) {
    if (scaling instanceof OutlierScalingFunction) {
        ((OutlierScalingFunction) scaling).prepare(result);
    }
    out.append(label);
    DoubleRelation scores = result.getScores();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double value = scores.doubleValue(iter);
        value = scaling != null ? scaling.getScaled(value) : value;
        out.append(' ').append(Double.toString(value));
    }
    out.append(FormatUtil.NEWLINE);
}
Also used : OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)89 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)72 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)70 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)70 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)70 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)69 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)65 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)38 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)21 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)14 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)11 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)8 Mean (de.lmu.ifi.dbs.elki.math.Mean)8