Search in sources :

Example 31 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class SilhouetteOutlierDetection method run.

@Override
public OutlierResult run(Database database) {
    Relation<O> relation = database.getRelation(getDistanceFunction().getInputTypeRestriction());
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    // TODO: improve ELKI api to ensure we're using the same DBIDs!
    Clustering<?> c = clusterer.run(database);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    DoubleMinMax mm = new DoubleMinMax();
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    for (Cluster<?> cluster : clusters) {
        if (cluster.size() <= 1 || cluster.isNoise()) {
            switch(noiseOption) {
                case IGNORE_NOISE:
                case TREAT_NOISE_AS_SINGLETONS:
                    // As suggested in Rousseeuw, we use 0 for singletons.
                    for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
                        scores.put(iter, 0.);
                    }
                    mm.put(0.);
                    continue;
                case MERGE_NOISE:
                    // Treat as cluster below
                    break;
            }
        }
        ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs());
        // temporary storage.
        double[] as = new double[ids.size()];
        DBIDArrayIter it1 = ids.iter(), it2 = ids.iter();
        for (it1.seek(0); it1.valid(); it1.advance()) {
            // a: In-cluster distances
            // Already computed distances
            double a = as[it1.getOffset()];
            for (it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) {
                final double dist = dq.distance(it1, it2);
                a += dist;
                as[it2.getOffset()] += dist;
            }
            a /= (ids.size() - 1);
            // b: other clusters:
            double min = Double.POSITIVE_INFINITY;
            for (Cluster<?> ocluster : clusters) {
                if (ocluster == /* yes, reference identity */
                cluster) {
                    continue;
                }
                if (ocluster.isNoise()) {
                    switch(noiseOption) {
                        case IGNORE_NOISE:
                            continue;
                        case MERGE_NOISE:
                            // No special treatment
                            break;
                        case TREAT_NOISE_AS_SINGLETONS:
                            // Treat noise cluster as singletons:
                            for (DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) {
                                double dist = dq.distance(it1, it3);
                                if (dist < min) {
                                    min = dist;
                                }
                            }
                            continue;
                    }
                }
                final DBIDs oids = ocluster.getIDs();
                double b = 0.;
                for (DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) {
                    b += dq.distance(it1, it3);
                }
                b /= oids.size();
                if (b < min) {
                    min = b;
                }
            }
            final double score = (min - a) / Math.max(min, a);
            scores.put(it1, score);
            mm.put(score);
        }
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Silhouette Coefficients", "silhouette-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), -1., 1., .5);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 32 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class OutRankS1 method run.

@Override
public OutlierResult run(Database database) {
    DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
    // Run the primary algorithm
    Clustering<? extends SubspaceModel> clustering = clusteralg.run(database);
    WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        score.putDouble(iter, 0);
    }
    int maxdim = 0, maxsize = 0;
    // Find maximum dimensionality and cluster size
    for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
        maxsize = Math.max(maxsize, cluster.size());
        maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions()));
    }
    // Iterate over all clusters:
    DoubleMinMax minmax = new DoubleMinMax();
    for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
        double relsize = cluster.size() / (double) maxsize;
        double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim;
        // Process objects in the cluster
        for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
            score.putDouble(iter, newscore);
            minmax.put(newscore);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids);
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
    OutlierResult res = new OutlierResult(meta, scoreResult);
    res.addChildResult(clustering);
    return res;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 33 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class KMeansOutlierDetection method run.

/**
 * Run the outlier detection algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceFunction<? super O> df = clusterer.getDistanceFunction();
    DistanceQuery<O> dq = database.getDistanceQuery(relation, df);
    // TODO: improve ELKI api to ensure we're using the same DBIDs!
    Clustering<?> c = clusterer.run(database, relation);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    DoubleMinMax mm = new DoubleMinMax();
    @SuppressWarnings("unchecked") NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory();
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    for (Cluster<?> cluster : clusters) {
        // FIXME: use a primitive distance function on number vectors instead.
        O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation));
        for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double dist = dq.distance(mean, iter);
            scores.put(iter, dist);
            mm.put(dist);
        }
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DataStoreFactory(de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 34 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class SimpleKernelDensityLOFTest method testLDF.

@Test
public void testLDF() {
    Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345);
    OutlierResult result = // 
    new ELKIBuilder<SimpleKernelDensityLOF<DoubleVector>>(SimpleKernelDensityLOF.class).with(LOF.Parameterizer.K_ID, // 
    20).with(SimpleKernelDensityLOF.Parameterizer.KERNEL_ID, // 
    BiweightKernelDensityFunction.class).build().run(db);
    testAUC(db, "Noise", result, 0.87192156);
    testSingleScore(result, 1293, 12.271188);
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) Database(de.lmu.ifi.dbs.elki.database.Database) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Test(org.junit.Test) AbstractOutlierAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)

Example 35 with OutlierResult

use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.

the class VarianceOfVolumeTest method testVOV.

@Test
public void testVOV() {
    Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345);
    OutlierResult result = // 
    new ELKIBuilder<VarianceOfVolume<DoubleVector>>(VarianceOfVolume.class).with(VarianceOfVolume.Parameterizer.K_ID, // 
    10).build().run(db);
    testSingleScore(result, 1293, 2.0733100852601836e13);
    testAUC(db, "Noise", result, 0.9306946778);
}
Also used : Database(de.lmu.ifi.dbs.elki.database.Database) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) Test(org.junit.Test) AbstractOutlierAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)

Aggregations

OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)144 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)72 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)71 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)71 Database (de.lmu.ifi.dbs.elki.database.Database)69 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)62 Test (org.junit.Test)58 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)57 AbstractOutlierAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)50 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)45 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)26 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)23 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)22 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11