Search in sources :

Example 6 with EvaluationResult

use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.

the class EvaluateCIndex method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return C-Index
 */
public double evaluateClustering(Database db, Relation<? extends O> rel, DistanceQuery<O> dq, Clustering<?> c) {
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    // Count ignored noise, and within-cluster distances
    int ignorednoise = 0, w = 0;
    for (Cluster<?> cluster : clusters) {
        if (cluster.size() <= 1 || cluster.isNoise()) {
            switch(noiseOption) {
                case IGNORE_NOISE:
                    ignorednoise += cluster.size();
                    // Ignore
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    // No within-cluster distances!
                    continue;
                case MERGE_NOISE:
                    // Treat like a cluster
                    break;
                default:
                    LOG.warning("Unknown noise handling option: " + noiseOption);
            }
        }
        w += (cluster.size() * (cluster.size() - 1)) >>> 1;
    }
    // TODO: for small k=2, and balanced clusters, it may be more efficient to
    // just build a long array with all distances, and select the quantiles.
    // The heaps used below pay off in memory consumption for k > 2
    // Yes, maxDists is supposed to be a min heap, and the other way.
    // Because we want to replace the smallest of the current k-largest
    // distances.
    DoubleHeap maxDists = new DoubleMinHeap(w);
    DoubleHeap minDists = new DoubleMaxHeap(w);
    // Sum of within-cluster distances
    double theta = 0.;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Processing clusters for C-Index", clusters.size(), LOG) : null;
    for (int i = 0; i < clusters.size(); i++) {
        Cluster<?> cluster = clusters.get(i);
        if (cluster.size() <= 1 || cluster.isNoise()) {
            switch(noiseOption) {
                case IGNORE_NOISE:
                    LOG.incrementProcessed(prog);
                    // Ignore
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    processSingleton(cluster, rel, dq, maxDists, minDists, w);
                    LOG.incrementProcessed(prog);
                    continue;
                case MERGE_NOISE:
                    // Treat like a cluster, below
                    break;
            }
        }
        theta += processCluster(cluster, clusters, i, dq, maxDists, minDists, w);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // Simulate best and worst cases:
    // Sum of largest and smallest
    double min = 0, max = 0;
    assert (minDists.size() == w);
    assert (maxDists.size() == w);
    for (DoubleHeap.UnsortedIter it = minDists.unsortedIter(); it.valid(); it.advance()) {
        min += it.get();
    }
    for (DoubleHeap.UnsortedIter it = maxDists.unsortedIter(); it.valid(); it.advance()) {
        max += it.get();
    }
    assert (max >= min);
    double cIndex = (max > min) ? (theta - min) / (max - min) : 1.;
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".c-index.noise-handling", noiseOption.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".c-index.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".c-index", cIndex));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("C-Index", cIndex, 0., 1., 0., true);
    db.getHierarchy().resultChanged(ev);
    return cIndex;
}
Also used : DoubleMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleHeap) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DoubleMaxHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 7 with EvaluationResult

use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.

the class EvaluateDBCV method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param cl Clustering
 *
 * @return dbcv DBCV-index
 */
public double evaluateClustering(Database db, Relation<O> rel, Clustering<?> cl) {
    final DistanceQuery<O> dq = rel.getDistanceQuery(distanceFunction);
    List<? extends Cluster<?>> clusters = cl.getAllClusters();
    final int numc = clusters.size();
    // DBCV needs a "dimensionality".
    @SuppressWarnings("unchecked") final Relation<? extends SpatialComparable> vrel = (Relation<? extends SpatialComparable>) rel;
    final int dim = RelationUtil.dimensionality(vrel);
    // precompute all core distances
    ArrayDBIDs[] cids = new ArrayDBIDs[numc];
    double[][] coreDists = new double[numc][];
    for (int c = 0; c < numc; c++) {
        Cluster<?> cluster = clusters.get(c);
        // Singletons are considered as Noise, because they have no sparseness
        if (cluster.isNoise() || cluster.size() < 2) {
            coreDists[c] = null;
            continue;
        }
        // Store for use below:
        ArrayDBIDs ids = cids[c] = DBIDUtil.ensureArray(cluster.getIDs());
        double[] clusterCoreDists = coreDists[c] = new double[ids.size()];
        for (DBIDArrayIter it = ids.iter(), it2 = ids.iter(); it.valid(); it.advance()) {
            double currentCoreDist = 0;
            int neighbors = 0;
            for (it2.seek(0); it2.valid(); it2.advance()) {
                if (DBIDUtil.equal(it, it2)) {
                    continue;
                }
                double dist = dq.distance(it, it2);
                // We ignore such objects.
                if (dist > 0) {
                    currentCoreDist += MathUtil.powi(1. / dist, dim);
                    ++neighbors;
                }
            }
            // Average, and undo power.
            clusterCoreDists[it.getOffset()] = FastMath.pow(currentCoreDist / neighbors, -1. / dim);
        }
    }
    // compute density sparseness of all clusters
    int[][] clusterDegrees = new int[numc][];
    double[] clusterDscMax = new double[numc];
    // describes if a cluster contains any internal edges
    boolean[] internalEdges = new boolean[numc];
    for (int c = 0; c < numc; c++) {
        Cluster<?> cluster = clusters.get(c);
        if (cluster.isNoise() || cluster.size() < 2) {
            clusterDegrees[c] = null;
            clusterDscMax[c] = Double.NaN;
            continue;
        }
        double[] clusterCoreDists = coreDists[c];
        ArrayDBIDs ids = cids[c];
        // Density Sparseness of the Cluster
        double dscMax = 0;
        double[][] distances = new double[cluster.size()][cluster.size()];
        // create mutability distance matrix for Minimum Spanning Tree
        for (DBIDArrayIter it = ids.iter(), it2 = ids.iter(); it.valid(); it.advance()) {
            double currentCoreDist = clusterCoreDists[it.getOffset()];
            for (it2.seek(it.getOffset() + 1); it2.valid(); it2.advance()) {
                double mutualReachDist = MathUtil.max(currentCoreDist, clusterCoreDists[it2.getOffset()], dq.distance(it, it2));
                distances[it.getOffset()][it2.getOffset()] = mutualReachDist;
                distances[it2.getOffset()][it.getOffset()] = mutualReachDist;
            }
        }
        // generate Minimum Spanning Tree
        int[] nodes = PrimsMinimumSpanningTree.processDense(distances);
        // get degree of all nodes in the spanning tree
        int[] degree = new int[cluster.size()];
        for (int i = 0; i < nodes.length; i++) {
            degree[nodes[i]]++;
        }
        // check if cluster contains any internal edges
        for (int i = 0; i < nodes.length; i += 2) {
            if (degree[nodes[i]] > 1 && degree[nodes[i + 1]] > 1) {
                internalEdges[c] = true;
            }
        }
        clusterDegrees[c] = degree;
        // find maximum sparseness in the Minimum Spanning Tree
        for (int i = 0; i < nodes.length; i = i + 2) {
            final int n1 = nodes[i], n2 = nodes[i + 1];
            // If a cluster has no internal nodes we consider all edges.
            if (distances[n1][n2] > dscMax && (!internalEdges[c] || (degree[n1] > 1 && degree[n2] > 1))) {
                dscMax = distances[n1][n2];
            }
        }
        clusterDscMax[c] = dscMax;
    }
    // compute density separation of all clusters
    double dbcv = 0;
    for (int c = 0; c < numc; c++) {
        Cluster<?> cluster = clusters.get(c);
        if (cluster.isNoise() || cluster.size() < 2) {
            continue;
        }
        double currentDscMax = clusterDscMax[c];
        double[] clusterCoreDists = coreDists[c];
        int[] currentDegree = clusterDegrees[c];
        // minimal Density Separation of the Cluster
        double dspcMin = Double.POSITIVE_INFINITY;
        for (DBIDArrayIter it = cids[c].iter(); it.valid(); it.advance()) {
            // nodes.
            if (currentDegree[it.getOffset()] < 2 && internalEdges[c]) {
                continue;
            }
            double currentCoreDist = clusterCoreDists[it.getOffset()];
            for (int oc = 0; oc < numc; oc++) {
                Cluster<?> ocluster = clusters.get(oc);
                if (ocluster.isNoise() || ocluster.size() < 2 || cluster == ocluster) {
                    continue;
                }
                int[] oDegree = clusterDegrees[oc];
                double[] oclusterCoreDists = coreDists[oc];
                for (DBIDArrayIter it2 = cids[oc].iter(); it2.valid(); it2.advance()) {
                    if (oDegree[it2.getOffset()] < 2 && internalEdges[oc]) {
                        continue;
                    }
                    double mutualReachDist = MathUtil.max(currentCoreDist, oclusterCoreDists[it2.getOffset()], dq.distance(it, it2));
                    dspcMin = mutualReachDist < dspcMin ? mutualReachDist : dspcMin;
                }
            }
        }
        // compute DBCV
        double vc = (dspcMin - currentDscMax) / MathUtil.max(dspcMin, currentDscMax);
        double weight = cluster.size() / (double) rel.size();
        dbcv += weight * vc;
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), cl, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Density Based Clustering Validation", dbcv, 0., Double.POSITIVE_INFINITY, 0., true);
    db.getHierarchy().resultChanged(ev);
    return dbcv;
}
Also used : DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) SpatialComparable(de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)

Example 8 with EvaluationResult

use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.

the class EvaluatePBMIndex method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return PBM
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    NumberVector[] centroids = new NumberVector[clusters.size()];
    int ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseHandling);
    // Build global centroid and cluster count:
    final int dim = RelationUtil.dimensionality(rel);
    Centroid overallCentroid = new Centroid(dim);
    EvaluateVarianceRatioCriteria.globalCentroid(overallCentroid, rel, clusters, centroids, noiseHandling);
    // Maximum distance between centroids:
    double max = 0;
    for (int i = 0; i < centroids.length; i++) {
        if (centroids[i] == null && noiseHandling != NoiseHandling.TREAT_NOISE_AS_SINGLETONS) {
            continue;
        }
        for (int j = i + 1; j < centroids.length; j++) {
            if (centroids[j] == null && noiseHandling != NoiseHandling.TREAT_NOISE_AS_SINGLETONS) {
                continue;
            }
            if (centroids[i] == null && centroids[j] == null) {
                // Need to compute pairwise distances of noise clusters.
                for (DBIDIter iti = clusters.get(i).getIDs().iter(); iti.valid(); iti.advance()) {
                    for (DBIDIter itj = clusters.get(j).getIDs().iter(); itj.valid(); itj.advance()) {
                        double dist = distanceFunction.distance(rel.get(iti), rel.get(itj));
                        max = dist > max ? dist : max;
                    }
                }
            } else if (centroids[i] == null) {
                for (DBIDIter iti = clusters.get(i).getIDs().iter(); iti.valid(); iti.advance()) {
                    double dist = distanceFunction.distance(rel.get(iti), centroids[j]);
                    max = dist > max ? dist : max;
                }
            } else if (centroids[j] == null) {
                for (DBIDIter itj = clusters.get(j).getIDs().iter(); itj.valid(); itj.advance()) {
                    double dist = distanceFunction.distance(centroids[i], rel.get(itj));
                    max = dist > max ? dist : max;
                }
            } else {
                double dist = distanceFunction.distance(centroids[i], centroids[j]);
                max = dist > max ? dist : max;
            }
        }
    }
    // a: Distance to own centroid
    // b: Distance to overall centroid
    double a = 0, b = 0;
    Iterator<? extends Cluster<?>> ci = clusters.iterator();
    for (int i = 0; ci.hasNext(); i++) {
        Cluster<?> cluster = ci.next();
        if (cluster.size() <= 1 || cluster.isNoise()) {
            switch(noiseHandling) {
                case IGNORE_NOISE:
                    // Ignored
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    // Singletons: a = 0 by definition.
                    for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                        b += SquaredEuclideanDistanceFunction.STATIC.distance(overallCentroid, rel.get(it));
                    }
                    // with NEXT cluster.
                    continue;
                case MERGE_NOISE:
                    // Treat like a cluster below:
                    break;
            }
        }
        for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
            NumberVector obj = rel.get(it);
            a += distanceFunction.distance(centroids[i], obj);
            b += distanceFunction.distance(overallCentroid, obj);
        }
    }
    final double pbm = FastMath.pow((1. / centroids.length) * (b / a) * max, 2.);
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".pbm", pbm));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("PBM-Index", pbm, 0., Double.POSITIVE_INFINITY, 0., false);
    db.getHierarchy().resultChanged(ev);
    return pbm;
}
Also used : MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 9 with EvaluationResult

use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.

the class EvaluateSilhouette method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param dq Distance query
 * @param c Clustering
 * @return Average silhouette
 */
public double evaluateClustering(Database db, Relation<O> rel, DistanceQuery<O> dq, Clustering<?> c) {
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    MeanVariance msil = new MeanVariance();
    int ignorednoise = 0;
    for (Cluster<?> cluster : clusters) {
        // Note: we treat 1-element clusters the same as noise.
        if (cluster.size() <= 1 || cluster.isNoise()) {
            switch(noiseOption) {
                case IGNORE_NOISE:
                    ignorednoise += cluster.size();
                    // Ignore noise elements
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    // As suggested in Rousseeuw, we use 0 for singletons.
                    msil.put(0., cluster.size());
                    continue;
                case MERGE_NOISE:
                    // Treat as cluster below
                    break;
            }
        }
        ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs());
        // temporary storage.
        double[] as = new double[ids.size()];
        DBIDArrayIter it1 = ids.iter(), it2 = ids.iter();
        for (it1.seek(0); it1.valid(); it1.advance()) {
            // a: In-cluster distances
            // Already computed distances
            double a = as[it1.getOffset()];
            for (it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) {
                final double dist = dq.distance(it1, it2);
                a += dist;
                as[it2.getOffset()] += dist;
            }
            a /= (ids.size() - 1);
            // b: minimum average distance to other clusters:
            double b = Double.POSITIVE_INFINITY;
            for (Cluster<?> ocluster : clusters) {
                if (ocluster == /* yes, reference identity */
                cluster) {
                    // Same cluster
                    continue;
                }
                if (ocluster.size() <= 1 || ocluster.isNoise()) {
                    switch(noiseOption) {
                        case IGNORE_NOISE:
                            // Ignore noise elements
                            continue;
                        case TREAT_NOISE_AS_SINGLETONS:
                            // Treat noise cluster as singletons:
                            for (DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) {
                                final double dist = dq.distance(it1, it3);
                                // Minimum average
                                b = dist < b ? dist : b;
                            }
                            continue;
                        case MERGE_NOISE:
                            // Treat as cluster below
                            break;
                    }
                }
                final DBIDs oids = ocluster.getIDs();
                double btmp = 0.;
                for (DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) {
                    btmp += dq.distance(it1, it3);
                }
                // Average
                btmp /= oids.size();
                // Minimum average
                b = btmp < b ? btmp : b;
            }
            // One cluster only?
            b = b < Double.POSITIVE_INFINITY ? b : a;
            msil.put((b - a) / (b > a ? b : a));
        }
    }
    double penalty = 1.;
    // Only if {@link NoiseHandling#IGNORE_NOISE}:
    if (penalize && ignorednoise > 0) {
        penalty = (rel.size() - ignorednoise) / (double) rel.size();
    }
    final double meansil = penalty * msil.getMean();
    final double stdsil = penalty * msil.getSampleStddev();
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".silhouette.noise-handling", noiseOption.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".silhouette.noise", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".silhouette.mean", meansil));
        LOG.statistics(new DoubleStatistic(key + ".silhouette.stddev", stdsil));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Silhouette +-" + FormatUtil.NF2.format(stdsil), meansil, -1., 1., 0., false);
    db.getHierarchy().resultChanged(ev);
    return meansil;
}
Also used : ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)

Example 10 with EvaluationResult

use of de.lmu.ifi.dbs.elki.result.EvaluationResult in project elki by elki-project.

the class EvaluateSimplifiedSilhouette method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return Mean simplified silhouette
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    NumberVector[] centroids = new NumberVector[clusters.size()];
    int ignorednoise = centroids(rel, clusters, centroids, noiseOption);
    MeanVariance mssil = new MeanVariance();
    Iterator<? extends Cluster<?>> ci = clusters.iterator();
    for (int i = 0; ci.hasNext(); i++) {
        Cluster<?> cluster = ci.next();
        if (cluster.size() <= 1) {
            // As suggested in Rousseeuw, we use 0 for singletons.
            mssil.put(0., cluster.size());
            continue;
        }
        if (cluster.isNoise()) {
            switch(noiseOption) {
                case IGNORE_NOISE:
                    // Ignore elements
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    // As suggested in Rousseeuw, we use 0 for singletons.
                    mssil.put(0., cluster.size());
                    continue;
                case MERGE_NOISE:
                    // Treat as cluster below
                    break;
            }
        }
        // Cluster center:
        final NumberVector center = centroids[i];
        assert (center != null);
        for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
            NumberVector obj = rel.get(it);
            // a: Distance to own centroid
            double a = distance.distance(center, obj);
            // b: Distance to other clusters centroids:
            double min = Double.POSITIVE_INFINITY;
            Iterator<? extends Cluster<?>> cj = clusters.iterator();
            for (int j = 0; cj.hasNext(); j++) {
                Cluster<?> ocluster = cj.next();
                if (i == j) {
                    continue;
                }
                NumberVector other = centroids[j];
                if (other == null) {
                    // Noise!
                    switch(noiseOption) {
                        case IGNORE_NOISE:
                            continue;
                        case TREAT_NOISE_AS_SINGLETONS:
                            // Treat each object like a centroid!
                            for (DBIDIter it2 = ocluster.getIDs().iter(); it2.valid(); it2.advance()) {
                                double dist = distance.distance(rel.get(it2), obj);
                                min = dist < min ? dist : min;
                            }
                            continue;
                        case MERGE_NOISE:
                            // Treat as cluster below, but should not be reachable.
                            break;
                    }
                }
                // Clusters: use centroid.
                double dist = distance.distance(other, obj);
                min = dist < min ? dist : min;
            }
            // One 'real' cluster only?
            min = min < Double.POSITIVE_INFINITY ? min : a;
            mssil.put((min - a) / (min > a ? min : a));
        }
    }
    double penalty = 1.;
    // Only if {@link NoiseHandling#IGNORE_NOISE}:
    if (penalize && ignorednoise > 0) {
        penalty = (rel.size() - ignorednoise) / (double) rel.size();
    }
    final double meanssil = penalty * mssil.getMean();
    final double stdssil = penalty * mssil.getSampleStddev();
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".simplified-silhouette.noise-handling", noiseOption.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".simplified-silhouette.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.mean", meanssil));
        LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.stddev", stdssil));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Simp. Silhouette +-" + FormatUtil.NF2.format(stdssil), meanssil, -1., 1., 0., false);
    db.getHierarchy().resultChanged(ev);
    return meanssil;
}
Also used : MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Aggregations

EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)11 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)9 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)8 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)7 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)7 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)2 Centroid (de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid)2 SpatialComparable (de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)1 SquaredEuclideanDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 Mean (de.lmu.ifi.dbs.elki.math.Mean)1 DoubleHeap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleHeap)1 DoubleMaxHeap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap)1 DoubleMinHeap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap)1