use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.
the class EvaluateVarianceRatioCriteria method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return Variance Ratio Criteria
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
// FIXME: allow using a precomputed distance matrix!
final SquaredEuclideanDistanceFunction df = SquaredEuclideanDistanceFunction.STATIC;
List<? extends Cluster<?>> clusters = c.getAllClusters();
double vrc = 0.;
int ignorednoise = 0;
if (clusters.size() > 1) {
NumberVector[] centroids = new NumberVector[clusters.size()];
ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
// Build global centroid and cluster count:
final int dim = RelationUtil.dimensionality(rel);
Centroid overallCentroid = new Centroid(dim);
int clustercount = globalCentroid(overallCentroid, rel, clusters, centroids, noiseOption);
// a: Distance to own centroid
// b: Distance to overall centroid
double a = 0, b = 0;
Iterator<? extends Cluster<?>> ci = clusters.iterator();
for (int i = 0; ci.hasNext(); i++) {
Cluster<?> cluster = ci.next();
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
// Ignored
continue;
case TREAT_NOISE_AS_SINGLETONS:
// Singletons: a = 0 by definition.
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
b += df.distance(overallCentroid, rel.get(it));
}
// with NEXT cluster.
continue;
case MERGE_NOISE:
// Treat like a cluster below:
break;
}
}
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
NumberVector vec = rel.get(it);
a += df.distance(centroids[i], vec);
b += df.distance(overallCentroid, vec);
}
}
vrc = ((b - a) / a) * ((rel.size() - clustercount) / (clustercount - 1.));
// Only if {@link NoiseHandling#IGNORE_NOISE}:
if (penalize && ignorednoise > 0) {
vrc *= (rel.size() - ignorednoise) / (double) rel.size();
}
}
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".vrc.noise-handling", noiseOption.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".vrc.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".vrc", vrc));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("Variance Ratio Criteria", vrc, 0., 1., 0., false);
return vrc;
}
use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.
the class OutlierRankingEvaluation method evaluateOrderingResult.
private EvaluationResult evaluateOrderingResult(int size, SetDBIDs positiveids, DBIDs order) {
if (order.size() != size) {
throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
}
EvaluationResult res = new EvaluationResult("Evaluation of ranking", "ranking-evaluation");
DBIDsTest test = new DBIDsTest(positiveids);
double rate = positiveids.size() / (double) size;
MeasurementGroup g = res.newGroup("Evaluation measures:");
double rocauc = ROCEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
g.addMeasure("Average Precision", avep, 0., 1., rate, false);
double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new SimpleAdapter(order.iter()));
g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);
g = res.newGroup("Adjusted for chance:");
double adjauc = 2 * rocauc - 1;
g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
double adjavep = (avep - rate) / (1 - rate);
g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
double adjrprec = (rprec - rate) / (1 - rate);
g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
double adjmaxf1 = (maxf1 - rate) / (1 - rate);
g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);
if (LOG.isStatistics()) {
LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
}
return res;
}
use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.
the class OutlierROCCurve method processNewResult.
@Override
public void processNewResult(ResultHierarchy hier, Result result) {
Database db = ResultUtil.findDatabase(hier);
// Prepare
SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
if (positiveids.size() == 0) {
LOG.warning("Computing a ROC curve failed - no objects matched.");
return;
}
boolean nonefound = true;
List<OutlierResult> oresults = OutlierResult.getOutlierResults(result);
List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
// Outlier results are the main use case.
for (OutlierResult o : oresults) {
ROCResult rocres = computeROCResult(o.getScores().size(), positiveids, o);
db.getHierarchy().add(o, rocres);
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), o, "Evaluation of ranking", "ranking-evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Evaluation measures");
if (!g.hasMeasure(ROCAUC_LABEL)) {
g.addMeasure(ROCAUC_LABEL, rocres.auc, 0., 1., false);
}
// Process each ordering only once.
orderings.remove(o.getOrdering());
nonefound = false;
}
// otherwise apply an ordering to the database IDs.
for (OrderingResult or : orderings) {
DBIDs sorted = or.order(or.getDBIDs());
ROCResult rocres = computeROCResult(or.getDBIDs().size(), positiveids, sorted);
db.getHierarchy().add(or, rocres);
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), or, "Evaluation of ranking", "ranking-evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Evaluation measures");
if (!g.hasMeasure(ROCAUC_LABEL)) {
g.addMeasure(ROCAUC_LABEL, rocres.auc, 0., 1., false);
}
nonefound = false;
}
if (nonefound) {
return;
// logger.warning("No results found to process with ROC curve analyzer.
// Got "+iterables.size()+" iterables, "+orderings.size()+" orderings.");
}
}
use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.
the class OutlierRankingEvaluation method evaluateOutlierResult.
private EvaluationResult evaluateOutlierResult(int size, SetDBIDs positiveids, OutlierResult or) {
EvaluationResult res = EvaluationResult.findOrCreate(or.getHierarchy(), or, "Evaluation of ranking", "ranking-evaluation");
DBIDsTest test = new DBIDsTest(positiveids);
final int pos = positiveids.size();
final double rate = pos / (double) size;
MeasurementGroup g = res.findOrCreateGroup("Evaluation measures");
double rocauc = ROCEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
if (!g.hasMeasure("ROC AUC")) {
g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
}
double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("Average Precision", avep, 0., 1., rate, false);
double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);
double maxdcg = DCGEvaluation.maximum(pos);
double dcg = DCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("DCG", dcg, 0., maxdcg, DCGEvaluation.STATIC.expected(pos, size), false);
double ndcg = NDCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("NDCG", ndcg, 0., 1., NDCGEvaluation.STATIC.expected(pos, size), false);
g = res.findOrCreateGroup("Adjusted for chance");
double adjauc = 2 * rocauc - 1;
g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
double adjavep = (avep - rate) / (1 - rate);
g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
double adjrprec = (rprec - rate) / (1 - rate);
g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
double adjmaxf1 = (maxf1 - rate) / (1 - rate);
g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);
double endcg = NDCGEvaluation.STATIC.expected(pos, size);
double adjndcg = (ndcg - endcg) / (1. - endcg);
g.addMeasure("Adjusted DCG", adjndcg, 0., 1., 0., false);
if (LOG.isStatistics()) {
LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
LOG.statistics(new DoubleStatistic(key + ".dcg", dcg));
LOG.statistics(new DoubleStatistic(key + ".dcg.normalized", ndcg));
LOG.statistics(new DoubleStatistic(key + ".dcg.adjusted", adjndcg));
}
return res;
}
use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.
the class EvaluateConcordantPairs method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return Gamma index
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
List<? extends Cluster<?>> clusters = c.getAllClusters();
int ignorednoise = 0, withinPairs = 0;
for (Cluster<?> cluster : clusters) {
if ((cluster.size() <= 1 || cluster.isNoise())) {
switch(noiseHandling) {
case IGNORE_NOISE:
ignorednoise += cluster.size();
continue;
case TREAT_NOISE_AS_SINGLETONS:
// No concordant distances.
continue;
case MERGE_NOISE:
// Treat like a cluster below.
break;
}
}
withinPairs += (cluster.size() * (cluster.size() - 1)) >>> 1;
if (withinPairs < 0) {
throw new AbortException("Integer overflow - clusters too large to compute pairwise distances.");
}
}
// Materialize within-cluster distances (sorted):
double[] withinDistances = computeWithinDistances(rel, clusters, withinPairs);
int[] withinTies = new int[withinDistances.length];
// Count ties within
countTies(withinDistances, withinTies);
long concordantPairs = 0, discordantPairs = 0, betweenPairs = 0;
// Step two, compute discordant distances:
for (int i = 0; i < clusters.size(); i++) {
Cluster<?> ocluster1 = clusters.get(i);
if (//
(ocluster1.size() <= 1 || ocluster1.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
continue;
}
for (int j = i + 1; j < clusters.size(); j++) {
Cluster<?> ocluster2 = clusters.get(j);
if (//
(ocluster2.size() <= 1 || ocluster2.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
continue;
}
betweenPairs += ocluster1.size() * ocluster2.size();
for (DBIDIter oit1 = ocluster1.getIDs().iter(); oit1.valid(); oit1.advance()) {
NumberVector obj = rel.get(oit1);
for (DBIDIter oit2 = ocluster2.getIDs().iter(); oit2.valid(); oit2.advance()) {
double dist = distanceFunction.distance(obj, rel.get(oit2));
int p = Arrays.binarySearch(withinDistances, dist);
if (p >= 0) {
// Tied distances:
while (p > 0 && withinDistances[p - 1] >= dist) {
--p;
}
concordantPairs += p;
discordantPairs += withinDistances.length - p - withinTies[p];
continue;
}
p = -p - 1;
concordantPairs += p;
discordantPairs += withinDistances.length - p;
}
}
}
}
// Total number of pairs possible:
final long t = ((rel.size() - ignorednoise) * (long) (rel.size() - ignorednoise - 1)) >>> 1;
final long tt = (t * (t - 1)) >>> 1;
double gamma = (concordantPairs - discordantPairs) / (double) (concordantPairs + discordantPairs);
double tau = computeTau(concordantPairs, discordantPairs, tt, withinDistances.length, betweenPairs);
// Avoid NaN when everything is in a single cluster:
gamma = gamma > 0. ? gamma : 0.;
tau = tau > 0. ? tau : 0.;
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".gamma", gamma));
LOG.statistics(new DoubleStatistic(key + ".tau", tau));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Concordance-based Evaluation");
g.addMeasure("Gamma", gamma, -1., 1., 0., false);
g.addMeasure("Tau", tau, -1., +1., 0., false);
db.getHierarchy().resultChanged(ev);
return gamma;
}
Aggregations