use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.
the class EvaluateVarianceRatioCriteria method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return Variance Ratio Criteria
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
// FIXME: allow using a precomputed distance matrix!
final SquaredEuclideanDistanceFunction df = SquaredEuclideanDistanceFunction.STATIC;
List<? extends Cluster<?>> clusters = c.getAllClusters();
double vrc = 0.;
int ignorednoise = 0;
if (clusters.size() > 1) {
NumberVector[] centroids = new NumberVector[clusters.size()];
ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
// Build global centroid and cluster count:
final int dim = RelationUtil.dimensionality(rel);
Centroid overallCentroid = new Centroid(dim);
int clustercount = globalCentroid(overallCentroid, rel, clusters, centroids, noiseOption);
// a: Distance to own centroid
// b: Distance to overall centroid
double a = 0, b = 0;
Iterator<? extends Cluster<?>> ci = clusters.iterator();
for (int i = 0; ci.hasNext(); i++) {
Cluster<?> cluster = ci.next();
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
// Ignored
continue;
case TREAT_NOISE_AS_SINGLETONS:
// Singletons: a = 0 by definition.
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
b += df.distance(overallCentroid, rel.get(it));
}
// with NEXT cluster.
continue;
case MERGE_NOISE:
// Treat like a cluster below:
break;
}
}
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
NumberVector vec = rel.get(it);
a += df.distance(centroids[i], vec);
b += df.distance(overallCentroid, vec);
}
}
vrc = ((b - a) / a) * ((rel.size() - clustercount) / (clustercount - 1.));
// Only if {@link NoiseHandling#IGNORE_NOISE}:
if (penalize && ignorednoise > 0) {
vrc *= (rel.size() - ignorednoise) / (double) rel.size();
}
}
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".vrc.noise-handling", noiseOption.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".vrc.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".vrc", vrc));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("Variance Ratio Criteria", vrc, 0., 1., 0., false);
return vrc;
}
use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.
the class EvaluationVisualization method processNewResult.
@Override
public void processNewResult(VisualizerContext context, Object start) {
VisualizationTree.findNewResults(context, start).filter(EvaluationResult.class).forEach(sr -> {
// Avoid duplicates:
for (It<VisualizationTask> it2 = VisualizationTree.findVis(context, sr).filter(VisualizationTask.class); it2.valid(); it2.advance()) {
if (it2.get().getFactory() instanceof EvaluationVisualization) {
return;
}
}
// Hack: for clusterings, only show the currently visible clustering.
if (sr.visualizeSingleton()) {
Class<? extends EvaluationResult> c = sr.getClass();
// Ensure singleton.
for (It<VisualizationTask> it3 = context.getVisHierarchy().iterChildren(context.getBaseResult()).filter(VisualizationTask.class); it3.valid(); it3.advance()) {
final VisualizationTask otask = it3.get();
if (otask.getFactory() instanceof EvaluationVisualization && otask.getResult() == c) {
return;
}
}
context.addVis(context.getBaseResult(), //
new VisualizationTask(this, NAME, c, null).requestSize(.5, //
sr.numLines() * .05).level(//
VisualizationTask.LEVEL_STATIC).with(UpdateFlag.ON_STYLEPOLICY));
return;
}
context.addVis(sr, //
new VisualizationTask(this, NAME, sr, null).requestSize(.5, sr.numLines() * .05).level(VisualizationTask.LEVEL_STATIC));
});
}
use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.
the class EvaluationVisualization method makeVisualization.
@Override
public Visualization makeVisualization(VisualizerContext context, VisualizationTask task, VisualizationPlot plot, double width, double height, Projection proj) {
// TODO: make a utility class to wrap SVGPlot + parent layer + ypos.
// TODO: use CSSClass and StyleLibrary
// Skip space before first header
double ypos = -.5;
Element parent = plot.svgElement(SVGConstants.SVG_G_TAG);
Object o = task.getResult();
EvaluationResult sr = null;
if (o instanceof EvaluationResult) {
sr = (EvaluationResult) o;
} else if (o instanceof Class && EvaluationResult.class.isAssignableFrom((Class<?>) o)) {
// Use cluster evaluation of current style instead.
StylingPolicy spol = context.getStylingPolicy();
if (spol instanceof ClusterStylingPolicy) {
ClusterStylingPolicy cpol = (ClusterStylingPolicy) spol;
// will be a subtype, actually!
@SuppressWarnings("unchecked") Class<EvaluationResult> c = (Class<EvaluationResult>) o;
for (It<EvaluationResult> it = VisualizationTree.findNewResults(context, cpol.getClustering()).filter(c); it.valid(); it.advance()) {
// may end up displaying the wrong evaluation.
if (context.getHierarchy().iterAncestors(it.get()).find(cpol.getClustering())) {
sr = it.get();
break;
}
}
}
}
if (sr == null) {
// Failed.
return new StaticVisualizationInstance(context, task, plot, width, height, parent);
}
for (String header : sr.getHeaderLines()) {
ypos = addHeader(plot, parent, ypos, header);
}
for (EvaluationResult.MeasurementGroup g : sr) {
ypos = addHeader(plot, parent, ypos, g.getName());
for (EvaluationResult.Measurement m : g) {
ypos = addBarChart(plot, parent, ypos, m.getName(), m.getVal(), m.getMin(), m.getMax(), m.getExp(), m.lowerIsBetter());
}
}
// scale vis
double cols = 10;
final StyleLibrary style = context.getStyleLibrary();
final double margin = style.getSize(StyleLibrary.MARGIN);
final String transform = SVGUtil.makeMarginTransform(width, height, cols, ypos, margin / StyleLibrary.SCALE);
SVGUtil.setAtt(parent, SVGConstants.SVG_TRANSFORM_ATTRIBUTE, transform);
return new StaticVisualizationInstance(context, task, plot, width, height, parent);
}
use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.
the class EvaluateConcordantPairs method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return Gamma index
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
List<? extends Cluster<?>> clusters = c.getAllClusters();
int ignorednoise = 0, withinPairs = 0;
for (Cluster<?> cluster : clusters) {
if ((cluster.size() <= 1 || cluster.isNoise())) {
switch(noiseHandling) {
case IGNORE_NOISE:
ignorednoise += cluster.size();
continue;
case TREAT_NOISE_AS_SINGLETONS:
// No concordant distances.
continue;
case MERGE_NOISE:
// Treat like a cluster below.
break;
}
}
withinPairs += (cluster.size() * (cluster.size() - 1)) >>> 1;
if (withinPairs < 0) {
throw new AbortException("Integer overflow - clusters too large to compute pairwise distances.");
}
}
// Materialize within-cluster distances (sorted):
double[] withinDistances = computeWithinDistances(rel, clusters, withinPairs);
int[] withinTies = new int[withinDistances.length];
// Count ties within
countTies(withinDistances, withinTies);
long concordantPairs = 0, discordantPairs = 0, betweenPairs = 0;
// Step two, compute discordant distances:
for (int i = 0; i < clusters.size(); i++) {
Cluster<?> ocluster1 = clusters.get(i);
if (//
(ocluster1.size() <= 1 || ocluster1.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
continue;
}
for (int j = i + 1; j < clusters.size(); j++) {
Cluster<?> ocluster2 = clusters.get(j);
if (//
(ocluster2.size() <= 1 || ocluster2.isNoise()) && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
continue;
}
betweenPairs += ocluster1.size() * ocluster2.size();
for (DBIDIter oit1 = ocluster1.getIDs().iter(); oit1.valid(); oit1.advance()) {
NumberVector obj = rel.get(oit1);
for (DBIDIter oit2 = ocluster2.getIDs().iter(); oit2.valid(); oit2.advance()) {
double dist = distanceFunction.distance(obj, rel.get(oit2));
int p = Arrays.binarySearch(withinDistances, dist);
if (p >= 0) {
// Tied distances:
while (p > 0 && withinDistances[p - 1] >= dist) {
--p;
}
concordantPairs += p;
discordantPairs += withinDistances.length - p - withinTies[p];
continue;
}
p = -p - 1;
concordantPairs += p;
discordantPairs += withinDistances.length - p;
}
}
}
}
// Total number of pairs possible:
final long t = ((rel.size() - ignorednoise) * (long) (rel.size() - ignorednoise - 1)) >>> 1;
final long tt = (t * (t - 1)) >>> 1;
double gamma = (concordantPairs - discordantPairs) / (double) (concordantPairs + discordantPairs);
double tau = computeTau(concordantPairs, discordantPairs, tt, withinDistances.length, betweenPairs);
// Avoid NaN when everything is in a single cluster:
gamma = gamma > 0. ? gamma : 0.;
tau = tau > 0. ? tau : 0.;
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".gamma", gamma));
LOG.statistics(new DoubleStatistic(key + ".tau", tau));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Concordance-based Evaluation");
g.addMeasure("Gamma", gamma, -1., 1., 0., false);
g.addMeasure("Tau", tau, -1., +1., 0., false);
db.getHierarchy().resultChanged(ev);
return gamma;
}
use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.
the class EvaluateDaviesBouldin method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return DB-index
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
List<? extends Cluster<?>> clusters = c.getAllClusters();
NumberVector[] centroids = new NumberVector[clusters.size()];
int noisecount = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
double[] withinGroupDistance = withinGroupDistances(rel, clusters, centroids);
Mean daviesBouldin = new Mean();
for (int i = 0; i < clusters.size(); i++) {
final NumberVector centroid = centroids[i];
final double withinGroupDistancei = withinGroupDistance[i];
// maximum within-to-between cluster spread
double max = 0;
for (int j = 0; j < clusters.size(); j++) {
NumberVector ocentroid = centroids[j];
if (ocentroid == centroid) {
continue;
}
// Both are real clusters:
if (centroid != null && ocentroid != null) {
// bD = between group distance
double bD = distanceFunction.distance(centroid, ocentroid);
// d = within-to-between cluster spread
double d = (withinGroupDistancei + withinGroupDistance[j]) / bD;
max = d > max ? d : max;
} else if (noiseOption != NoiseHandling.IGNORE_NOISE) {
if (centroid != null) {
double d = Double.POSITIVE_INFINITY;
// Find the closest element
for (DBIDIter it = clusters.get(j).getIDs().iter(); it.valid(); it.advance()) {
double d2 = distanceFunction.distance(centroid, rel.get(it));
d = d2 < d ? d2 : d;
}
d = withinGroupDistancei / d;
max = d > max ? d : max;
} else if (ocentroid != null) {
double d = Double.POSITIVE_INFINITY;
// Find the closest element
for (DBIDIter it = clusters.get(i).getIDs().iter(); it.valid(); it.advance()) {
double d2 = distanceFunction.distance(rel.get(it), ocentroid);
d = d2 < d ? d2 : d;
}
d = withinGroupDistance[j] / d;
max = d > max ? d : max;
}
// else: (0+0) / d = 0.
}
}
daviesBouldin.put(max);
}
// For a single cluster, we return 2 (result for equidistant points)
final double daviesBouldinMean = daviesBouldin.getCount() > 1 ? daviesBouldin.getMean() : 2.;
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".db-index.noise-handling", noiseOption.toString()));
if (noisecount > 0) {
LOG.statistics(new LongStatistic(key + ".db-index.ignored", noisecount));
}
LOG.statistics(new DoubleStatistic(key + ".db-index", daviesBouldinMean));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("Davies Bouldin Index", daviesBouldinMean, 0., Double.POSITIVE_INFINITY, 0., true);
db.getHierarchy().resultChanged(ev);
return daviesBouldinMean;
}
Aggregations