Search in sources :

Example 1 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class EvaluateCIndex method processCluster.

protected double processCluster(Cluster<?> cluster, List<? extends Cluster<?>> clusters, int i, DistanceQuery<O> dq, DoubleHeap maxDists, DoubleHeap minDists, int w) {
    double theta = 0.;
    for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) {
        // Compare object to every cluster, but only once
        for (int j = i; j < clusters.size(); j++) {
            Cluster<?> ocluster = clusters.get(j);
            if (ocluster.size() <= 1 || ocluster.isNoise()) {
                switch(noiseOption) {
                    case IGNORE_NOISE:
                        // Ignore this cluster.
                        continue;
                    case TREAT_NOISE_AS_SINGLETONS:
                        // Treat like a cluster
                        break;
                    case MERGE_NOISE:
                        // Treat like a cluster
                        break;
                }
            }
            for (DBIDIter it2 = ocluster.getIDs().iter(); it2.valid(); it2.advance()) {
                if (DBIDUtil.compare(it1, it2) <= 0) {
                    // Only once.
                    continue;
                }
                double dist = dq.distance(it1, it2);
                minDists.add(dist, w);
                maxDists.add(dist, w);
                if (ocluster == cluster) {
                    // Within-cluster distances.
                    theta += dist;
                }
            }
        }
    }
    return theta;
}
Also used : DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 2 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class EvaluateVarianceRatioCriteria method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return Variance Ratio Criteria
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    // FIXME: allow using a precomputed distance matrix!
    final SquaredEuclideanDistanceFunction df = SquaredEuclideanDistanceFunction.STATIC;
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    double vrc = 0.;
    int ignorednoise = 0;
    if (clusters.size() > 1) {
        NumberVector[] centroids = new NumberVector[clusters.size()];
        ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
        // Build global centroid and cluster count:
        final int dim = RelationUtil.dimensionality(rel);
        Centroid overallCentroid = new Centroid(dim);
        int clustercount = globalCentroid(overallCentroid, rel, clusters, centroids, noiseOption);
        // a: Distance to own centroid
        // b: Distance to overall centroid
        double a = 0, b = 0;
        Iterator<? extends Cluster<?>> ci = clusters.iterator();
        for (int i = 0; ci.hasNext(); i++) {
            Cluster<?> cluster = ci.next();
            if (cluster.size() <= 1 || cluster.isNoise()) {
                switch(noiseOption) {
                    case IGNORE_NOISE:
                        // Ignored
                        continue;
                    case TREAT_NOISE_AS_SINGLETONS:
                        // Singletons: a = 0 by definition.
                        for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                            b += df.distance(overallCentroid, rel.get(it));
                        }
                        // with NEXT cluster.
                        continue;
                    case MERGE_NOISE:
                        // Treat like a cluster below:
                        break;
                }
            }
            for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                NumberVector vec = rel.get(it);
                a += df.distance(centroids[i], vec);
                b += df.distance(overallCentroid, vec);
            }
        }
        vrc = ((b - a) / a) * ((rel.size() - clustercount) / (clustercount - 1.));
        // Only if {@link NoiseHandling#IGNORE_NOISE}:
        if (penalize && ignorednoise > 0) {
            vrc *= (rel.size() - ignorednoise) / (double) rel.size();
        }
    }
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".vrc.noise-handling", noiseOption.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".vrc.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".vrc", vrc));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Variance Ratio Criteria", vrc, 0., 1., 0., false);
    return vrc;
}
Also used : MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 3 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class EvaluateVarianceRatioCriteria method globalCentroid.

/**
 * Update the global centroid.
 *
 * @param overallCentroid Centroid to udpate
 * @param rel Data relation
 * @param clusters Clusters
 * @param centroids Cluster centroids
 * @return Number of clusters
 */
public static int globalCentroid(Centroid overallCentroid, Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, NumberVector[] centroids, NoiseHandling noiseOption) {
    int clustercount = 0;
    Iterator<? extends Cluster<?>> ci = clusters.iterator();
    for (int i = 0; ci.hasNext(); i++) {
        Cluster<?> cluster = ci.next();
        if (cluster.size() <= 1 || cluster.isNoise()) {
            switch(noiseOption) {
                case IGNORE_NOISE:
                    // Ignore completely
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    clustercount += cluster.size();
                    // Update global centroid:
                    for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                        overallCentroid.put(rel.get(it));
                    }
                    // With NEXT cluster.
                    continue;
                case MERGE_NOISE:
                    // Treat as cluster below:
                    break;
            }
        }
        // Update centroid:
        assert (centroids[i] != null);
        overallCentroid.put(centroids[i], cluster.size());
        ++clustercount;
    }
    return clustercount;
}
Also used : DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 4 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class Segments method recursivelyFill.

private void recursivelyFill(List<List<? extends Cluster<?>>> cs, int depth, SetDBIDs first, SetDBIDs second, int[] path, boolean objectsegment) {
    final int numclusterings = cs.size();
    Iterator<? extends Cluster<?>> iter = cs.get(depth).iterator();
    for (int cnum = 0; iter.hasNext(); cnum++) {
        Cluster<?> clust = iter.next();
        // Compute intersections with new cluster.
        // nfp := intersection( first, cluster )
        // Adding asymmetric differences to nd1, nd2.
        // nse := intersection( second, cluster )
        HashSetModifiableDBIDs nfirstp = DBIDUtil.newHashSet(first.size());
        HashSetModifiableDBIDs ndelta1 = DBIDUtil.newHashSet(first);
        HashSetModifiableDBIDs ndelta2 = DBIDUtil.newHashSet();
        HashSetModifiableDBIDs nsecond = DBIDUtil.newHashSet(second.size());
        for (DBIDIter iter2 = clust.getIDs().iter(); iter2.valid(); iter2.advance()) {
            if (ndelta1.remove(iter2)) {
                nfirstp.add(iter2);
            } else {
                ndelta2.add(iter2);
            }
            if (second.contains(iter2)) {
                nsecond.add(iter2);
            }
        }
        if (nsecond.size() <= 0) {
            // disjoint
            continue;
        }
        if (nfirstp.size() > 0) {
            path[depth] = cnum;
            if (depth < numclusterings - 1) {
                recursivelyFill(cs, depth + 1, nfirstp, nsecond, path, objectsegment);
            } else {
                // Add to results.
                // In fact, nfirstp should equal nsecond here
                int selfpairs = DBIDUtil.intersectionSize(nfirstp, nsecond);
                if (objectsegment) {
                    makeOrUpdateSegment(path, nfirstp, (nfirstp.size() * nsecond.size()) - selfpairs);
                } else {
                    makeOrUpdateSegment(path, null, (nfirstp.size() * nsecond.size()) - selfpairs);
                }
            }
        }
        // Elements that were in first, but in not in the cluster
        if (ndelta1.size() > 0) {
            path[depth] = Segment.UNCLUSTERED;
            if (depth < numclusterings - 1) {
                recursivelyFill(cs, depth + 1, ndelta1, nsecond, path, false);
            } else {
                // Add to results.
                int selfpairs = DBIDUtil.intersection(ndelta1, nsecond).size();
                makeOrUpdateSegment(path, null, (ndelta1.size() * nsecond.size()) - selfpairs);
            }
        }
        // It used to work in revision 9236, eventually go back to this code!
        if (ndelta2.size() > 0 && objectsegment) {
            int[] npath = new int[path.length];
            Arrays.fill(npath, Segment.UNCLUSTERED);
            npath[depth] = cnum;
            if (depth < numclusterings - 1) {
                recursivelyFill(cs, depth + 1, ndelta2, nsecond, npath, false);
            } else {
                // Add to results.
                int selfpairs = DBIDUtil.intersection(ndelta2, nsecond).size();
                makeOrUpdateSegment(npath, null, (ndelta2.size() * nsecond.size()) - selfpairs);
            }
        }
    }
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 5 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class EvaluateConcordantPairs method computeWithinDistances.

protected double[] computeWithinDistances(Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, int withinPairs) {
    double[] concordant = new double[withinPairs];
    int i = 0;
    for (Cluster<?> cluster : clusters) {
        if (cluster.size() <= 1 || cluster.isNoise()) {
            switch(noiseHandling) {
                case IGNORE_NOISE:
                    continue;
                case TREAT_NOISE_AS_SINGLETONS:
                    // No concordant distances.
                    continue;
                case MERGE_NOISE:
                    // Treat like a cluster below.
                    break;
            }
        }
        for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) {
            NumberVector obj = rel.get(it1);
            for (DBIDIter it2 = cluster.getIDs().iter(); it2.valid(); it2.advance()) {
                if (DBIDUtil.compare(it1, it2) <= 0) {
                    continue;
                }
                concordant[i++] = distanceFunction.distance(obj, rel.get(it2));
            }
        }
    }
    assert (concordant.length == i);
    Arrays.sort(concordant);
    return concordant;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21