use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class EvaluateCIndex method processCluster.
protected double processCluster(Cluster<?> cluster, List<? extends Cluster<?>> clusters, int i, DistanceQuery<O> dq, DoubleHeap maxDists, DoubleHeap minDists, int w) {
double theta = 0.;
for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) {
// Compare object to every cluster, but only once
for (int j = i; j < clusters.size(); j++) {
Cluster<?> ocluster = clusters.get(j);
if (ocluster.size() <= 1 || ocluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
// Ignore this cluster.
continue;
case TREAT_NOISE_AS_SINGLETONS:
// Treat like a cluster
break;
case MERGE_NOISE:
// Treat like a cluster
break;
}
}
for (DBIDIter it2 = ocluster.getIDs().iter(); it2.valid(); it2.advance()) {
if (DBIDUtil.compare(it1, it2) <= 0) {
// Only once.
continue;
}
double dist = dq.distance(it1, it2);
minDists.add(dist, w);
maxDists.add(dist, w);
if (ocluster == cluster) {
// Within-cluster distances.
theta += dist;
}
}
}
}
return theta;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class EvaluateVarianceRatioCriteria method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return Variance Ratio Criteria
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
// FIXME: allow using a precomputed distance matrix!
final SquaredEuclideanDistanceFunction df = SquaredEuclideanDistanceFunction.STATIC;
List<? extends Cluster<?>> clusters = c.getAllClusters();
double vrc = 0.;
int ignorednoise = 0;
if (clusters.size() > 1) {
NumberVector[] centroids = new NumberVector[clusters.size()];
ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
// Build global centroid and cluster count:
final int dim = RelationUtil.dimensionality(rel);
Centroid overallCentroid = new Centroid(dim);
int clustercount = globalCentroid(overallCentroid, rel, clusters, centroids, noiseOption);
// a: Distance to own centroid
// b: Distance to overall centroid
double a = 0, b = 0;
Iterator<? extends Cluster<?>> ci = clusters.iterator();
for (int i = 0; ci.hasNext(); i++) {
Cluster<?> cluster = ci.next();
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
// Ignored
continue;
case TREAT_NOISE_AS_SINGLETONS:
// Singletons: a = 0 by definition.
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
b += df.distance(overallCentroid, rel.get(it));
}
// with NEXT cluster.
continue;
case MERGE_NOISE:
// Treat like a cluster below:
break;
}
}
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
NumberVector vec = rel.get(it);
a += df.distance(centroids[i], vec);
b += df.distance(overallCentroid, vec);
}
}
vrc = ((b - a) / a) * ((rel.size() - clustercount) / (clustercount - 1.));
// Only if {@link NoiseHandling#IGNORE_NOISE}:
if (penalize && ignorednoise > 0) {
vrc *= (rel.size() - ignorednoise) / (double) rel.size();
}
}
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".vrc.noise-handling", noiseOption.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".vrc.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".vrc", vrc));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("Variance Ratio Criteria", vrc, 0., 1., 0., false);
return vrc;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class EvaluateVarianceRatioCriteria method globalCentroid.
/**
* Update the global centroid.
*
* @param overallCentroid Centroid to udpate
* @param rel Data relation
* @param clusters Clusters
* @param centroids Cluster centroids
* @return Number of clusters
*/
public static int globalCentroid(Centroid overallCentroid, Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, NumberVector[] centroids, NoiseHandling noiseOption) {
int clustercount = 0;
Iterator<? extends Cluster<?>> ci = clusters.iterator();
for (int i = 0; ci.hasNext(); i++) {
Cluster<?> cluster = ci.next();
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
// Ignore completely
continue;
case TREAT_NOISE_AS_SINGLETONS:
clustercount += cluster.size();
// Update global centroid:
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
overallCentroid.put(rel.get(it));
}
// With NEXT cluster.
continue;
case MERGE_NOISE:
// Treat as cluster below:
break;
}
}
// Update centroid:
assert (centroids[i] != null);
overallCentroid.put(centroids[i], cluster.size());
++clustercount;
}
return clustercount;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class Segments method recursivelyFill.
private void recursivelyFill(List<List<? extends Cluster<?>>> cs, int depth, SetDBIDs first, SetDBIDs second, int[] path, boolean objectsegment) {
final int numclusterings = cs.size();
Iterator<? extends Cluster<?>> iter = cs.get(depth).iterator();
for (int cnum = 0; iter.hasNext(); cnum++) {
Cluster<?> clust = iter.next();
// Compute intersections with new cluster.
// nfp := intersection( first, cluster )
// Adding asymmetric differences to nd1, nd2.
// nse := intersection( second, cluster )
HashSetModifiableDBIDs nfirstp = DBIDUtil.newHashSet(first.size());
HashSetModifiableDBIDs ndelta1 = DBIDUtil.newHashSet(first);
HashSetModifiableDBIDs ndelta2 = DBIDUtil.newHashSet();
HashSetModifiableDBIDs nsecond = DBIDUtil.newHashSet(second.size());
for (DBIDIter iter2 = clust.getIDs().iter(); iter2.valid(); iter2.advance()) {
if (ndelta1.remove(iter2)) {
nfirstp.add(iter2);
} else {
ndelta2.add(iter2);
}
if (second.contains(iter2)) {
nsecond.add(iter2);
}
}
if (nsecond.size() <= 0) {
// disjoint
continue;
}
if (nfirstp.size() > 0) {
path[depth] = cnum;
if (depth < numclusterings - 1) {
recursivelyFill(cs, depth + 1, nfirstp, nsecond, path, objectsegment);
} else {
// Add to results.
// In fact, nfirstp should equal nsecond here
int selfpairs = DBIDUtil.intersectionSize(nfirstp, nsecond);
if (objectsegment) {
makeOrUpdateSegment(path, nfirstp, (nfirstp.size() * nsecond.size()) - selfpairs);
} else {
makeOrUpdateSegment(path, null, (nfirstp.size() * nsecond.size()) - selfpairs);
}
}
}
// Elements that were in first, but in not in the cluster
if (ndelta1.size() > 0) {
path[depth] = Segment.UNCLUSTERED;
if (depth < numclusterings - 1) {
recursivelyFill(cs, depth + 1, ndelta1, nsecond, path, false);
} else {
// Add to results.
int selfpairs = DBIDUtil.intersection(ndelta1, nsecond).size();
makeOrUpdateSegment(path, null, (ndelta1.size() * nsecond.size()) - selfpairs);
}
}
// It used to work in revision 9236, eventually go back to this code!
if (ndelta2.size() > 0 && objectsegment) {
int[] npath = new int[path.length];
Arrays.fill(npath, Segment.UNCLUSTERED);
npath[depth] = cnum;
if (depth < numclusterings - 1) {
recursivelyFill(cs, depth + 1, ndelta2, nsecond, npath, false);
} else {
// Add to results.
int selfpairs = DBIDUtil.intersection(ndelta2, nsecond).size();
makeOrUpdateSegment(npath, null, (ndelta2.size() * nsecond.size()) - selfpairs);
}
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class EvaluateConcordantPairs method computeWithinDistances.
protected double[] computeWithinDistances(Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, int withinPairs) {
double[] concordant = new double[withinPairs];
int i = 0;
for (Cluster<?> cluster : clusters) {
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseHandling) {
case IGNORE_NOISE:
continue;
case TREAT_NOISE_AS_SINGLETONS:
// No concordant distances.
continue;
case MERGE_NOISE:
// Treat like a cluster below.
break;
}
}
for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) {
NumberVector obj = rel.get(it1);
for (DBIDIter it2 = cluster.getIDs().iter(); it2.valid(); it2.advance()) {
if (DBIDUtil.compare(it1, it2) <= 0) {
continue;
}
concordant[i++] = distanceFunction.distance(obj, rel.get(it2));
}
}
}
assert (concordant.length == i);
Arrays.sort(concordant);
return concordant;
}
Aggregations