Search in sources :

Example 1 with MeasurementGroup

use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.

the class EvaluateVarianceRatioCriteria method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return Variance Ratio Criteria
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    // FIXME: allow using a precomputed distance matrix!
    final SquaredEuclideanDistanceFunction df = SquaredEuclideanDistanceFunction.STATIC;
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    double vrc = 0.;
    int ignorednoise = 0;
    if (clusters.size() > 1) {
        NumberVector[] centroids = new NumberVector[clusters.size()];
        ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
        // Build global centroid and cluster count:
        final int dim = RelationUtil.dimensionality(rel);
        Centroid overallCentroid = new Centroid(dim);
        int clustercount = globalCentroid(overallCentroid, rel, clusters, centroids, noiseOption);
        // a: Distance to own centroid
        // b: Distance to overall centroid
        double a = 0, b = 0;
        Iterator<? extends Cluster<?>> ci = clusters.iterator();
        for (int i = 0; ci.hasNext(); i++) {
            Cluster<?> cluster = ci.next();
            if (cluster.size() <= 1 || cluster.isNoise()) {
                switch(noiseOption) {
                    case IGNORE_NOISE:
                        // Ignored
                        continue;
                    case TREAT_NOISE_AS_SINGLETONS:
                        // Singletons: a = 0 by definition.
                        for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                            b += df.distance(overallCentroid, rel.get(it));
                        }
                        // with NEXT cluster.
                        continue;
                    case MERGE_NOISE:
                        // Treat like a cluster below:
                        break;
                }
            }
            for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                NumberVector vec = rel.get(it);
                a += df.distance(centroids[i], vec);
                b += df.distance(overallCentroid, vec);
            }
        }
        vrc = ((b - a) / a) * ((rel.size() - clustercount) / (clustercount - 1.));
        // Only if {@link NoiseHandling#IGNORE_NOISE}:
        if (penalize && ignorednoise > 0) {
            vrc *= (rel.size() - ignorednoise) / (double) rel.size();
        }
    }
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".vrc.noise-handling", noiseOption.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".vrc.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".vrc", vrc));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Variance Ratio Criteria", vrc, 0., 1., 0., false);
    return vrc;
}
Also used : MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 2 with MeasurementGroup

use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.

the class OutlierRankingEvaluation method evaluateOrderingResult.

private EvaluationResult evaluateOrderingResult(int size, SetDBIDs positiveids, DBIDs order) {
    if (order.size() != size) {
        throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
    }
    EvaluationResult res = new EvaluationResult("Evaluation of ranking", "ranking-evaluation");
    DBIDsTest test = new DBIDsTest(positiveids);
    double rate = positiveids.size() / (double) size;
    MeasurementGroup g = res.newGroup("Evaluation measures:");
    double rocauc = ROCEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
    double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("Average Precision", avep, 0., 1., rate, false);
    double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
    double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);
    g = res.newGroup("Adjusted for chance:");
    double adjauc = 2 * rocauc - 1;
    g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
    double adjavep = (avep - rate) / (1 - rate);
    g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
    double adjrprec = (rprec - rate) / (1 - rate);
    g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
    double adjmaxf1 = (maxf1 - rate) / (1 - rate);
    g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);
    if (LOG.isStatistics()) {
        LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
        LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
        LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
        LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
        LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
        LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
        LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
        LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
    }
    return res;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) SimpleAdapter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.SimpleAdapter) DBIDsTest(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)

Example 3 with MeasurementGroup

use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.

the class OutlierROCCurve method processNewResult.

@Override
public void processNewResult(ResultHierarchy hier, Result result) {
    Database db = ResultUtil.findDatabase(hier);
    // Prepare
    SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
    if (positiveids.size() == 0) {
        LOG.warning("Computing a ROC curve failed - no objects matched.");
        return;
    }
    boolean nonefound = true;
    List<OutlierResult> oresults = OutlierResult.getOutlierResults(result);
    List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
    // Outlier results are the main use case.
    for (OutlierResult o : oresults) {
        ROCResult rocres = computeROCResult(o.getScores().size(), positiveids, o);
        db.getHierarchy().add(o, rocres);
        EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), o, "Evaluation of ranking", "ranking-evaluation");
        MeasurementGroup g = ev.findOrCreateGroup("Evaluation measures");
        if (!g.hasMeasure(ROCAUC_LABEL)) {
            g.addMeasure(ROCAUC_LABEL, rocres.auc, 0., 1., false);
        }
        // Process each ordering only once.
        orderings.remove(o.getOrdering());
        nonefound = false;
    }
    // otherwise apply an ordering to the database IDs.
    for (OrderingResult or : orderings) {
        DBIDs sorted = or.order(or.getDBIDs());
        ROCResult rocres = computeROCResult(or.getDBIDs().size(), positiveids, sorted);
        db.getHierarchy().add(or, rocres);
        EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), or, "Evaluation of ranking", "ranking-evaluation");
        MeasurementGroup g = ev.findOrCreateGroup("Evaluation measures");
        if (!g.hasMeasure(ROCAUC_LABEL)) {
            g.addMeasure(ROCAUC_LABEL, rocres.auc, 0., 1., false);
        }
        nonefound = false;
    }
    if (nonefound) {
        return;
    // logger.warning("No results found to process with ROC curve analyzer.
    // Got "+iterables.size()+" iterables, "+orderings.size()+" orderings.");
    }
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs) Database(de.lmu.ifi.dbs.elki.database.Database) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)

Example 4 with MeasurementGroup

use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.

the class OutlierRankingEvaluation method evaluateOutlierResult.

private EvaluationResult evaluateOutlierResult(int size, SetDBIDs positiveids, OutlierResult or) {
    EvaluationResult res = EvaluationResult.findOrCreate(or.getHierarchy(), or, "Evaluation of ranking", "ranking-evaluation");
    DBIDsTest test = new DBIDsTest(positiveids);
    final int pos = positiveids.size();
    final double rate = pos / (double) size;
    MeasurementGroup g = res.findOrCreateGroup("Evaluation measures");
    double rocauc = ROCEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    if (!g.hasMeasure("ROC AUC")) {
        g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
    }
    double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("Average Precision", avep, 0., 1., rate, false);
    double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
    double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);
    double maxdcg = DCGEvaluation.maximum(pos);
    double dcg = DCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("DCG", dcg, 0., maxdcg, DCGEvaluation.STATIC.expected(pos, size), false);
    double ndcg = NDCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("NDCG", ndcg, 0., 1., NDCGEvaluation.STATIC.expected(pos, size), false);
    g = res.findOrCreateGroup("Adjusted for chance");
    double adjauc = 2 * rocauc - 1;
    g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
    double adjavep = (avep - rate) / (1 - rate);
    g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
    double adjrprec = (rprec - rate) / (1 - rate);
    g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
    double adjmaxf1 = (maxf1 - rate) / (1 - rate);
    g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);
    double endcg = NDCGEvaluation.STATIC.expected(pos, size);
    double adjndcg = (ndcg - endcg) / (1. - endcg);
    g.addMeasure("Adjusted DCG", adjndcg, 0., 1., 0., false);
    if (LOG.isStatistics()) {
        LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
        LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
        LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
        LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
        LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
        LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
        LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
        LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
        LOG.statistics(new DoubleStatistic(key + ".dcg", dcg));
        LOG.statistics(new DoubleStatistic(key + ".dcg.normalized", ndcg));
        LOG.statistics(new DoubleStatistic(key + ".dcg.adjusted", adjndcg));
    }
    return res;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) OutlierScoreAdapter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.OutlierScoreAdapter) DBIDsTest(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)

Example 5 with MeasurementGroup

use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.

the class EvaluateConcordantPairs method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return Gamma index
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    int ignorednoise = 0, withinPairs = 0;
    for (Cluster<?> cluster : clusters) {
        if ((cluster.size() <= 1 || cluster.isNoise())) {
            switch(noiseHandling) {
                case IGNORE_NOISE:
                    ignorednoise += cluster.size();
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    // No concordant distances.
                    continue;
                case MERGE_NOISE:
                    // Treat like a cluster below.
                    break;
            }
        }
        withinPairs += (cluster.size() * (cluster.size() - 1)) >>> 1;
        if (withinPairs < 0) {
            throw new AbortException("Integer overflow - clusters too large to compute pairwise distances.");
        }
    }
    // Materialize within-cluster distances (sorted):
    double[] withinDistances = computeWithinDistances(rel, clusters, withinPairs);
    int[] withinTies = new int[withinDistances.length];
    // Count ties within
    countTies(withinDistances, withinTies);
    long concordantPairs = 0, discordantPairs = 0, betweenPairs = 0;
    // Step two, compute discordant distances:
    for (int i = 0; i < clusters.size(); i++) {
        Cluster<?> ocluster1 = clusters.get(i);
        if (// 
        (ocluster1.size() <= 1 || ocluster1.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
            continue;
        }
        for (int j = i + 1; j < clusters.size(); j++) {
            Cluster<?> ocluster2 = clusters.get(j);
            if (// 
            (ocluster2.size() <= 1 || ocluster2.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
                continue;
            }
            betweenPairs += ocluster1.size() * ocluster2.size();
            for (DBIDIter oit1 = ocluster1.getIDs().iter(); oit1.valid(); oit1.advance()) {
                NumberVector obj = rel.get(oit1);
                for (DBIDIter oit2 = ocluster2.getIDs().iter(); oit2.valid(); oit2.advance()) {
                    double dist = distanceFunction.distance(obj, rel.get(oit2));
                    int p = Arrays.binarySearch(withinDistances, dist);
                    if (p >= 0) {
                        // Tied distances:
                        while (p > 0 && withinDistances[p - 1] >= dist) {
                            --p;
                        }
                        concordantPairs += p;
                        discordantPairs += withinDistances.length - p - withinTies[p];
                        continue;
                    }
                    p = -p - 1;
                    concordantPairs += p;
                    discordantPairs += withinDistances.length - p;
                }
            }
        }
    }
    // Total number of pairs possible:
    final long t = ((rel.size() - ignorednoise) * (long) (rel.size() - ignorednoise - 1)) >>> 1;
    final long tt = (t * (t - 1)) >>> 1;
    double gamma = (concordantPairs - discordantPairs) / (double) (concordantPairs + discordantPairs);
    double tau = computeTau(concordantPairs, discordantPairs, tt, withinDistances.length, betweenPairs);
    // Avoid NaN when everything is in a single cluster:
    gamma = gamma > 0. ? gamma : 0.;
    tau = tau > 0. ? tau : 0.;
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".gamma", gamma));
        LOG.statistics(new DoubleStatistic(key + ".tau", tau));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Concordance-based Evaluation");
    g.addMeasure("Gamma", gamma, -1., 1., 0., false);
    g.addMeasure("Tau", tau, -1., +1., 0., false);
    db.getHierarchy().resultChanged(ev);
    return gamma;
}
Also used : MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)12 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)10 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)9 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)7 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)7 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)2 DBIDsTest (de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest)2 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)2 Centroid (de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid)2 SpatialComparable (de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable)1 Database (de.lmu.ifi.dbs.elki.database.Database)1 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)1 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)1 SquaredEuclideanDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction)1 OutlierScoreAdapter (de.lmu.ifi.dbs.elki.evaluation.scores.adapter.OutlierScoreAdapter)1 SimpleAdapter (de.lmu.ifi.dbs.elki.evaluation.scores.adapter.SimpleAdapter)1