use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.
the class EvaluateSimplifiedSilhouette method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return Mean simplified silhouette
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
List<? extends Cluster<?>> clusters = c.getAllClusters();
NumberVector[] centroids = new NumberVector[clusters.size()];
int ignorednoise = centroids(rel, clusters, centroids, noiseOption);
MeanVariance mssil = new MeanVariance();
Iterator<? extends Cluster<?>> ci = clusters.iterator();
for (int i = 0; ci.hasNext(); i++) {
Cluster<?> cluster = ci.next();
if (cluster.size() <= 1) {
// As suggested in Rousseeuw, we use 0 for singletons.
mssil.put(0., cluster.size());
continue;
}
if (cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
// Ignore elements
continue;
case TREAT_NOISE_AS_SINGLETONS:
// As suggested in Rousseeuw, we use 0 for singletons.
mssil.put(0., cluster.size());
continue;
case MERGE_NOISE:
// Treat as cluster below
break;
}
}
// Cluster center:
final NumberVector center = centroids[i];
assert (center != null);
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
NumberVector obj = rel.get(it);
// a: Distance to own centroid
double a = distance.distance(center, obj);
// b: Distance to other clusters centroids:
double min = Double.POSITIVE_INFINITY;
Iterator<? extends Cluster<?>> cj = clusters.iterator();
for (int j = 0; cj.hasNext(); j++) {
Cluster<?> ocluster = cj.next();
if (i == j) {
continue;
}
NumberVector other = centroids[j];
if (other == null) {
// Noise!
switch(noiseOption) {
case IGNORE_NOISE:
continue;
case TREAT_NOISE_AS_SINGLETONS:
// Treat each object like a centroid!
for (DBIDIter it2 = ocluster.getIDs().iter(); it2.valid(); it2.advance()) {
double dist = distance.distance(rel.get(it2), obj);
min = dist < min ? dist : min;
}
continue;
case MERGE_NOISE:
// Treat as cluster below, but should not be reachable.
break;
}
}
// Clusters: use centroid.
double dist = distance.distance(other, obj);
min = dist < min ? dist : min;
}
// One 'real' cluster only?
min = min < Double.POSITIVE_INFINITY ? min : a;
mssil.put((min - a) / (min > a ? min : a));
}
}
double penalty = 1.;
// Only if {@link NoiseHandling#IGNORE_NOISE}:
if (penalize && ignorednoise > 0) {
penalty = (rel.size() - ignorednoise) / (double) rel.size();
}
final double meanssil = penalty * mssil.getMean();
final double stdssil = penalty * mssil.getSampleStddev();
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".simplified-silhouette.noise-handling", noiseOption.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".simplified-silhouette.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.mean", meanssil));
LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.stddev", stdssil));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("Simp. Silhouette +-" + FormatUtil.NF2.format(stdssil), meanssil, -1., 1., 0., false);
db.getHierarchy().resultChanged(ev);
return meanssil;
}
use of de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup in project elki by elki-project.
the class EvaluateSquaredErrors method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return ssq
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
boolean square = !distance.isSquared();
int ignorednoise = 0;
List<? extends Cluster<?>> clusters = c.getAllClusters();
double ssq = 0, sum = 0;
for (Cluster<?> cluster : clusters) {
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
ignorednoise += cluster.size();
continue;
case TREAT_NOISE_AS_SINGLETONS:
continue;
case MERGE_NOISE:
// Treat as cluster below:
break;
}
}
NumberVector center = ModelUtil.getPrototypeOrCentroid(cluster.getModel(), rel, cluster.getIDs());
for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) {
final double d = distance.distance(center, rel.get(it1));
sum += d;
ssq += square ? d * d : d;
}
}
final int div = Math.max(1, rel.size() - ignorednoise);
if (LOG.isStatistics()) {
LOG.statistics(new DoubleStatistic(key + ".mean", sum / div));
LOG.statistics(new DoubleStatistic(key + ".ssq", ssq));
LOG.statistics(new DoubleStatistic(key + ".rmsd", FastMath.sqrt(ssq / div)));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("Mean distance", sum / div, 0., Double.POSITIVE_INFINITY, true);
g.addMeasure("Sum of Squares", ssq, 0., Double.POSITIVE_INFINITY, true);
g.addMeasure("RMSD", FastMath.sqrt(ssq / div), 0., Double.POSITIVE_INFINITY, true);
db.getHierarchy().add(c, ev);
return ssq;
}
Aggregations