Search in sources :

Example 1 with SquaredEuclideanDistanceFunction

use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.

the class EvaluateVarianceRatioCriteria method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return Variance Ratio Criteria
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    // FIXME: allow using a precomputed distance matrix!
    final SquaredEuclideanDistanceFunction df = SquaredEuclideanDistanceFunction.STATIC;
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    double vrc = 0.;
    int ignorednoise = 0;
    if (clusters.size() > 1) {
        NumberVector[] centroids = new NumberVector[clusters.size()];
        ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
        // Build global centroid and cluster count:
        final int dim = RelationUtil.dimensionality(rel);
        Centroid overallCentroid = new Centroid(dim);
        int clustercount = globalCentroid(overallCentroid, rel, clusters, centroids, noiseOption);
        // a: Distance to own centroid
        // b: Distance to overall centroid
        double a = 0, b = 0;
        Iterator<? extends Cluster<?>> ci = clusters.iterator();
        for (int i = 0; ci.hasNext(); i++) {
            Cluster<?> cluster = ci.next();
            if (cluster.size() <= 1 || cluster.isNoise()) {
                switch(noiseOption) {
                    case IGNORE_NOISE:
                        // Ignored
                        continue;
                    case TREAT_NOISE_AS_SINGLETONS:
                        // Singletons: a = 0 by definition.
                        for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                            b += df.distance(overallCentroid, rel.get(it));
                        }
                        // with NEXT cluster.
                        continue;
                    case MERGE_NOISE:
                        // Treat like a cluster below:
                        break;
                }
            }
            for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
                NumberVector vec = rel.get(it);
                a += df.distance(centroids[i], vec);
                b += df.distance(overallCentroid, vec);
            }
        }
        vrc = ((b - a) / a) * ((rel.size() - clustercount) / (clustercount - 1.));
        // Only if {@link NoiseHandling#IGNORE_NOISE}:
        if (penalize && ignorednoise > 0) {
            vrc *= (rel.size() - ignorednoise) / (double) rel.size();
        }
    }
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".vrc.noise-handling", noiseOption.toString()));
        if (ignorednoise > 0) {
            LOG.statistics(new LongStatistic(key + ".vrc.ignored", ignorednoise));
        }
        LOG.statistics(new DoubleStatistic(key + ".vrc", vrc));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Variance Ratio Criteria", vrc, 0., 1., 0., false);
    return vrc;
}
Also used : MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 2 with SquaredEuclideanDistanceFunction

use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.

the class PassingDataToELKI method main.

/**
 * Main method
 *
 * @param args Command line parameters (not supported)
 */
public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[1000][2];
    for (int i = 0; i < data.length; i++) {
        for (int j = 0; j < data[i].length; j++) {
            data[i][j] = Math.random();
        }
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Create a database (which may contain multiple relations!)
    Database db = new StaticArrayDatabase(dbc, null);
    // Load the data into the database (do NOT forget to initialize...)
    db.initialize();
    // Relation containing the number vectors:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We know that the ids must be a continuous range:
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // K-means should be used with squared Euclidean (least squares):
    SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
    // Default initialization, using global random:
    // To fix the random seed, use: new RandomFactory(seed);
    RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
    // Textbook k-means clustering:
    KMeansLloyd<NumberVector> km = new // 
    KMeansLloyd<>(// 
    dist, // 
    3, /* k - number of partitions */
    0, /* maximum number of iterations: no limit */
    init);
    // K-means will automatically choose a numerical relation from the data set:
    // But we could make it explicit (if there were more than one numeric
    // relation!): km.run(db, rel);
    Clustering<KMeansModel> c = km.run(db);
    // Output all clusters:
    int i = 0;
    for (Cluster<KMeansModel> clu : c.getAllClusters()) {
        // K-means will name all clusters "Cluster" in lack of noise support:
        System.out.println("#" + i + ": " + clu.getNameAutomatic());
        System.out.println("Size: " + clu.size());
        System.out.println("Center: " + clu.getModel().getPrototype().toString());
        // Iterate over objects:
        System.out.print("Objects: ");
        for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
            // To get the vector use:
            // NumberVector v = rel.get(it);
            // Offset within our DBID range: "line number"
            final int offset = ids.getOffset(it);
            System.out.print(" " + offset);
        // Do NOT rely on using "internalGetIndex()" directly!
        }
        System.out.println();
        ++i;
    }
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) RandomlyGeneratedInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 3 with SquaredEuclideanDistanceFunction

use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.

the class UKMeans method getExpectedRepDistance.

/**
 * Get expected distance between a Vector and an uncertain object
 *
 * @param rep A vector, e.g. a cluster representative
 * @param uo A discrete uncertain object
 * @return The distance
 */
protected double getExpectedRepDistance(NumberVector rep, DiscreteUncertainObject uo) {
    SquaredEuclideanDistanceFunction euclidean = SquaredEuclideanDistanceFunction.STATIC;
    int counter = 0;
    double sum = 0.0;
    for (int i = 0; i < uo.getNumberSamples(); i++) {
        sum += euclidean.distance(rep, uo.getSample(i));
        counter++;
    }
    return sum / counter;
}
Also used : SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction)

Example 4 with SquaredEuclideanDistanceFunction

use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.

the class LinearScanEuclideanDistanceKNNQuery method linearScan.

/**
 * Main loop of the linear scan.
 *
 * @param relation Data relation
 * @param iter ID iterator
 * @param obj Query object
 * @param heap Output heap
 * @return Heap
 */
private KNNHeap linearScan(Relation<? extends O> relation, DBIDIter iter, final O obj, KNNHeap heap) {
    final SquaredEuclideanDistanceFunction squared = SquaredEuclideanDistanceFunction.STATIC;
    double max = Double.POSITIVE_INFINITY;
    while (iter.valid()) {
        final double dist = squared.distance(obj, relation.get(iter));
        if (dist <= max) {
            max = heap.insert(dist, iter);
        }
        iter.advance();
    }
    return heap;
}
Also used : SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction)

Example 5 with SquaredEuclideanDistanceFunction

use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.

the class LinearScanEuclideanDistanceKNNQuery method linearScanBatchKNN.

/**
 * Perform a linear scan batch kNN for primitive distance functions.
 *
 * @param objs Objects list
 * @param heaps Heaps array
 */
@Override
protected void linearScanBatchKNN(List<O> objs, List<KNNHeap> heaps) {
    final SquaredEuclideanDistanceFunction squared = SquaredEuclideanDistanceFunction.STATIC;
    final Relation<? extends O> relation = getRelation();
    final int size = objs.size();
    // Linear scan style KNN.
    for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
        O candidate = relation.get(iter);
        for (int index = 0; index < size; index++) {
            final KNNHeap heap = heaps.get(index);
            final double dist = squared.distance(objs.get(index), candidate);
            if (dist <= heap.getKNNDistance()) {
                heap.insert(dist, iter);
            }
        }
    }
}
Also used : SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

SquaredEuclideanDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction)8 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 KMeansLloyd (de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd)1 RandomlyGeneratedInitialMeans (de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans)1 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)1 Database (de.lmu.ifi.dbs.elki.database.Database)1 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)1 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)1 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)1 ArrayAdapterDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection)1 DatabaseConnection (de.lmu.ifi.dbs.elki.datasource.DatabaseConnection)1 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)1 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)1 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)1 Centroid (de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid)1 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)1 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)1