Search in sources :

Example 1 with Centroid

use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.

the class EvaluateVarianceRatioCriteria method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return Variance Ratio Criteria
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    // FIXME: allow using a precomputed distance matrix!
    final SquaredEuclideanDistanceFunction df = SquaredEuclideanDistanceFunction.STATIC;
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    double vrc = 0.;
    int ignorednoise = 0;
    if (clusters.size() > 1) {
        NumberVector[] centroids = new NumberVector[clusters.size()];
        ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
        // Build global centroid and cluster count:
        final int dim = RelationUtil.dimensionality(rel);
        Centroid overallCentroid = new Centroid(dim);
        int clustercount = globalCentroid(overallCentroid, rel, clusters, centroids, noiseOption);
        // a: Distance to own centroid
        // b: Distance to overall centroid
        double a = 0, b = 0;
        Iterator<? extends Cluster<?>> ci = clusters.iterator();
        for (int i = 0; ci.hasNext(); i++) {
            Cluster<?> cluster = ci.next();
            if (cluster.size() <= 1 || cluster.isNoise()) {
                switch(noiseOption) {
                    case IGNORE_NOISE:
                        // Ignored
                        continue;
                    case TREAT_NOISE_AS_SINGLETONS:
                        // Singletons: a = 0 by definition.
                        for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                            b += df.distance(overallCentroid, rel.get(it));
                        }
                        // with NEXT cluster.
                        continue;
                    case MERGE_NOISE:
                        // Treat like a cluster below:
                        break;
                }
            }
            for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                NumberVector vec = rel.get(it);
                a += df.distance(centroids[i], vec);
                b += df.distance(overallCentroid, vec);
            }
        }
        vrc = ((b - a) / a) * ((rel.size() - clustercount) / (clustercount - 1.));
        // Only if {@link NoiseHandling#IGNORE_NOISE}:
        if (penalize && ignorednoise > 0) {
            vrc *= (rel.size() - ignorednoise) / (double) rel.size();
        }
    }
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".vrc.noise-handling", noiseOption.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".vrc.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".vrc", vrc));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Variance Ratio Criteria", vrc, 0., 1., 0., false);
    return vrc;
}
Also used : MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 2 with Centroid

use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.

the class DependencyDerivator method run.

/**
 * Computes quantitatively linear dependencies among the attributes of the
 * given database based on a linear correlation PCA.
 *
 * @param database the database to run this DependencyDerivator on
 * @param relation the relation to use
 * @return the CorrelationAnalysisSolution computed by this
 *         DependencyDerivator
 */
public CorrelationAnalysisSolution<V> run(Database database, Relation<V> relation) {
    if (LOG.isVerbose()) {
        LOG.verbose("retrieving database objects...");
    }
    Centroid centroid = Centroid.make(relation, relation.getDBIDs());
    NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
    V centroidDV = factory.newNumberVector(centroid.getArrayRef());
    DBIDs ids;
    if (this.sampleSize > 0) {
        if (randomsample) {
            ids = DBIDUtil.randomSample(relation.getDBIDs(), this.sampleSize, RandomFactory.DEFAULT);
        } else {
            DistanceQuery<V> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
            KNNList queryResults = // 
            database.getKNNQuery(distanceQuery, this.sampleSize).getKNNForObject(centroidDV, this.sampleSize);
            ids = DBIDUtil.newHashSet(queryResults);
        }
    } else {
        ids = relation.getDBIDs();
    }
    return generateModel(relation, ids, centroid.getArrayRef());
}
Also used : Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs)

Example 3 with Centroid

use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.

the class AutotuningPCA method processIds.

@Override
public PCAResult processIds(DBIDs ids, Relation<? extends NumberVector> database) {
    // Assume Euclidean distance. In the context of PCA, the neighborhood should
    // be L2-spherical to be unbiased.
    Centroid center = Centroid.make(database, ids);
    ModifiableDoubleDBIDList dres = DBIDUtil.newDistanceDBIDList(ids.size());
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final double dist = EuclideanDistanceFunction.STATIC.distance(center, database.get(iter));
        dres.add(dist, iter);
    }
    dres.sort();
    return processQueryResult(dres, database);
}
Also used : Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 4 with Centroid

use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.

the class LinearDiscriminantAnalysisFilter method computeCentroids.

/**
 * Compute the centroid for each class.
 *
 * @param dim Dimensionality
 * @param vectorcolumn Vector column
 * @param keys Key index
 * @param classes Classes
 * @return Centroids for each class.
 */
protected List<Centroid> computeCentroids(int dim, List<V> vectorcolumn, List<ClassLabel> keys, Map<ClassLabel, IntList> classes) {
    final int numc = keys.size();
    List<Centroid> centroids = new ArrayList<>(numc);
    for (int i = 0; i < numc; i++) {
        Centroid c = new Centroid(dim);
        for (IntIterator it = classes.get(keys.get(i)).iterator(); it.hasNext(); ) {
            c.put(vectorcolumn.get(it.nextInt()));
        }
        centroids.add(c);
    }
    return centroids;
}
Also used : Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) IntIterator(it.unimi.dsi.fastutil.ints.IntIterator) ArrayList(java.util.ArrayList)

Example 5 with Centroid

use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.

the class DiSH method findParent.

/**
 * Returns the parent of the specified cluster
 *
 * @param relation the relation storing the objects
 * @param child the child to search the parent for
 * @param clustersMap the map containing the clusters
 * @return the parent of the specified cluster
 */
private Pair<long[], ArrayModifiableDBIDs> findParent(Relation<V> relation, Pair<long[], ArrayModifiableDBIDs> child, Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
    Centroid child_centroid = ProjectedCentroid.make(child.first, relation, child.second);
    Pair<long[], ArrayModifiableDBIDs> result = null;
    int resultCardinality = -1;
    long[] childPV = child.first;
    int childCardinality = BitsUtil.cardinality(childPV);
    for (long[] parentPV : clustersMap.keySet()) {
        int parentCardinality = BitsUtil.cardinality(parentPV);
        if (parentCardinality >= childCardinality) {
            continue;
        }
        if (resultCardinality != -1 && parentCardinality <= resultCardinality) {
            continue;
        }
        long[] pv = BitsUtil.andCMin(childPV, parentPV);
        if (BitsUtil.equal(pv, parentPV)) {
            List<ArrayModifiableDBIDs> parentList = clustersMap.get(parentPV);
            for (ArrayModifiableDBIDs parent : parentList) {
                NumberVector parent_centroid = ProjectedCentroid.make(parentPV, relation, parent);
                double d = weightedDistance(child_centroid, parent_centroid, parentPV);
                if (d <= 2 * epsilon) {
                    result = new Pair<>(parentPV, parent);
                    resultCardinality = parentCardinality;
                    break;
                }
            }
        }
    }
    return result;
}
Also used : Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) ProjectedCentroid(de.lmu.ifi.dbs.elki.math.linearalgebra.ProjectedCentroid) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector)

Aggregations

Centroid (de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid)9 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)6 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)5 ArrayList (java.util.ArrayList)3 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)2 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)2 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)2 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)2 CovarianceMatrix (de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)2 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)2 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)2 IntIterator (it.unimi.dsi.fastutil.ints.IntIterator)2 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)1 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)1 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 MeanModel (de.lmu.ifi.dbs.elki.data.model.MeanModel)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)1 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1