Search in sources :

Example 11 with MeasurementGroup

use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.

the class EvaluateSimplifiedSilhouette method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return Mean simplified silhouette
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    NumberVector[] centroids = new NumberVector[clusters.size()];
    int ignorednoise = centroids(rel, clusters, centroids, noiseOption);
    MeanVariance mssil = new MeanVariance();
    Iterator<? extends Cluster<?>> ci = clusters.iterator();
    for (int i = 0; ci.hasNext(); i++) {
        Cluster<?> cluster = ci.next();
        if (cluster.size() <= 1) {
            // As suggested in Rousseeuw, we use 0 for singletons.
            mssil.put(0., cluster.size());
            continue;
        }
        if (cluster.isNoise()) {
            switch(noiseOption) {
                case IGNORE_NOISE:
                    // Ignore elements
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    // As suggested in Rousseeuw, we use 0 for singletons.
                    mssil.put(0., cluster.size());
                    continue;
                case MERGE_NOISE:
                    // Treat as cluster below
                    break;
            }
        }
        // Cluster center:
        final NumberVector center = centroids[i];
        assert (center != null);
        for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
            NumberVector obj = rel.get(it);
            // a: Distance to own centroid
            double a = distance.distance(center, obj);
            // b: Distance to other clusters centroids:
            double min = Double.POSITIVE_INFINITY;
            Iterator<? extends Cluster<?>> cj = clusters.iterator();
            for (int j = 0; cj.hasNext(); j++) {
                Cluster<?> ocluster = cj.next();
                if (i == j) {
                    continue;
                }
                NumberVector other = centroids[j];
                if (other == null) {
                    // Noise!
                    switch(noiseOption) {
                        case IGNORE_NOISE:
                            continue;
                        case TREAT_NOISE_AS_SINGLETONS:
                            // Treat each object like a centroid!
                            for (DBIDIter it2 = ocluster.getIDs().iter(); it2.valid(); it2.advance()) {
                                double dist = distance.distance(rel.get(it2), obj);
                                min = dist < min ? dist : min;
                            }
                            continue;
                        case MERGE_NOISE:
                            // Treat as cluster below, but should not be reachable.
                            break;
                    }
                }
                // Clusters: use centroid.
                double dist = distance.distance(other, obj);
                min = dist < min ? dist : min;
            }
            // One 'real' cluster only?
            min = min < Double.POSITIVE_INFINITY ? min : a;
            mssil.put((min - a) / (min > a ? min : a));
        }
    }
    double penalty = 1.;
    // Only if {@link NoiseHandling#IGNORE_NOISE}:
    if (penalize && ignorednoise > 0) {
        penalty = (rel.size() - ignorednoise) / (double) rel.size();
    }
    final double meanssil = penalty * mssil.getMean();
    final double stdssil = penalty * mssil.getSampleStddev();
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".simplified-silhouette.noise-handling", noiseOption.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".simplified-silhouette.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.mean", meanssil));
        LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.stddev", stdssil));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Simp. Silhouette +-" + FormatUtil.NF2.format(stdssil), meanssil, -1., 1., 0., false);
    db.getHierarchy().resultChanged(ev);
    return meanssil;
}
Also used : MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 12 with MeasurementGroup

use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.

the class EvaluateSquaredErrors method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return ssq
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    boolean square = !distance.isSquared();
    int ignorednoise = 0;
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    double ssq = 0, sum = 0;
    for (Cluster<?> cluster : clusters) {
        if (cluster.size() <= 1 || cluster.isNoise()) {
            switch(noiseOption) {
                case IGNORE_NOISE:
                    ignorednoise += cluster.size();
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    continue;
                case MERGE_NOISE:
                    // Treat as cluster below:
                    break;
            }
        }
        NumberVector center = ModelUtil.getPrototypeOrCentroid(cluster.getModel(), rel, cluster.getIDs());
        for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) {
            final double d = distance.distance(center, rel.get(it1));
            sum += d;
            ssq += square ? d * d : d;
        }
    }
    final int div = Math.max(1, rel.size() - ignorednoise);
    if (LOG.isStatistics()) {
        LOG.statistics(new DoubleStatistic(key + ".mean", sum / div));
        LOG.statistics(new DoubleStatistic(key + ".ssq", ssq));
        LOG.statistics(new DoubleStatistic(key + ".rmsd", FastMath.sqrt(ssq / div)));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Mean distance", sum / div, 0., Double.POSITIVE_INFINITY, true);
    g.addMeasure("Sum of Squares", ssq, 0., Double.POSITIVE_INFINITY, true);
    g.addMeasure("RMSD", FastMath.sqrt(ssq / div), 0., Double.POSITIVE_INFINITY, true);
    db.getHierarchy().add(c, ev);
    return ssq;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)12 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)10 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)9 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)7 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)7 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)2 DBIDsTest (de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest)2 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)2 Centroid (de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid)2 SpatialComparable (de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable)1 Database (de.lmu.ifi.dbs.elki.database.Database)1 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)1 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)1 SquaredEuclideanDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction)1 OutlierScoreAdapter (de.lmu.ifi.dbs.elki.evaluation.scores.adapter.OutlierScoreAdapter)1 SimpleAdapter (de.lmu.ifi.dbs.elki.evaluation.scores.adapter.SimpleAdapter)1