Search in sources :

Example 11 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class IndexTree method initialize.

/**
 * Initializes the index.
 *
 * @param exampleLeaf an object that will be stored in the index
 */
protected final void initialize(E exampleLeaf) {
    initializeCapacities(exampleLeaf);
    // create empty root
    createEmptyRoot(exampleLeaf);
    final Logging log = getLogger();
    if (log.isStatistics()) {
        String cls = this.getClass().getName();
        log.statistics(new LongStatistic(cls + ".directory.capacity", dirCapacity));
        log.statistics(new LongStatistic(cls + ".directory.minfill", dirMinimum));
        log.statistics(new LongStatistic(cls + ".leaf.capacity", leafCapacity));
        log.statistics(new LongStatistic(cls + ".leaf.minfill", leafMinimum));
    }
    initialized = true;
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 12 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class HopkinsStatisticClusteringTendency method run.

/**
 * Runs the algorithm in the timed evaluation part.
 *
 * @param database Database context
 * @param relation Relation to analyze
 */
public Result run(Database database, Relation<NumberVector> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    final DistanceQuery<NumberVector> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<NumberVector> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    final double[] min = new double[dim], extend = new double[dim];
    initializeDataExtends(relation, dim, min, extend);
    if (!LOG.isStatistics()) {
        LOG.warning("This algorithm must be used with at least logging level " + Level.STATISTICS);
    }
    MeanVariance hmean = new MeanVariance(), umean = new MeanVariance(), wmean = new MeanVariance();
    // more stable result
    for (int j = 0; j < this.rep; j++) {
        // Compute NN distances for random objects from within the database
        double w = computeNNForRealData(knnQuery, relation, dim);
        // Compute NN distances for randomly created new uniform objects
        double u = computeNNForUniformData(knnQuery, min, extend);
        // compute hopkins statistik
        // = a / (1+a)
        double h = u / (u + w);
        hmean.put(h);
        umean.put(u);
        wmean.put(w);
    }
    final String prefix = this.getClass().getName();
    LOG.statistics(new LongStatistic(prefix + ".samplesize", sampleSize));
    LOG.statistics(new LongStatistic(prefix + ".dim", dim));
    LOG.statistics(new LongStatistic(prefix + ".hopkins.nearest-neighbor", k));
    LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.mean", hmean.getMean()));
    LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.mean", umean.getMean()));
    LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.mean", wmean.getMean()));
    if (rep > 1) {
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.std", hmean.getSampleStddev()));
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.std", umean.getSampleStddev()));
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.std", wmean.getSampleStddev()));
    }
    // Evaluate:
    double x = hmean.getMean();
    // See Hopkins for a proof that x is supposedly Beta distributed.
    double ix = BetaDistribution.regularizedIncBeta(x, sampleSize, sampleSize);
    double p = (x > .5) ? (1. - ix) : ix;
    LOG.statistics(new DoubleStatistic(prefix + ".hopkins.p", p));
    return null;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 13 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class InMemoryIDistanceIndex method logStatistics.

@Override
public void logStatistics() {
    super.logStatistics();
    MeanVarianceMinMax mm = new MeanVarianceMinMax();
    for (int i = 0; i < index.length; i++) {
        mm.put(index[i].size());
    }
    LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.min", (int) mm.getMin()));
    LOG.statistics(new DoubleStatistic(InMemoryIDistanceIndex.class.getName() + ".size.mean", mm.getMean()));
    LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.max", (int) mm.getMax()));
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax)

Example 14 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class KMeansBatchedLloyd method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), blocks, random);
    double[][] meanshift = new double[k][dim];
    int[] changesize = new int[k];
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = false;
        FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Batch", parts.length, LOG) : null;
        for (int p = 0; p < parts.length; p++) {
            // Initialize new means scratch space.
            for (int i = 0; i < k; i++) {
                Arrays.fill(meanshift[i], 0.);
            }
            Arrays.fill(changesize, 0);
            Arrays.fill(varsum, 0.);
            changed |= assignToNearestCluster(relation, parts[p], means, meanshift, changesize, clusters, assignment, varsum);
            // Recompute means.
            updateMeans(means, meanshift, clusters, changesize);
            LOG.incrementProcessed(pprog);
        }
        LOG.ensureCompleted(pprog);
        logVarstat(varstat, varsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 15 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class KMediansLloyd method run.

@Override
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Medians Clustering", "kmedians-clustering");
    }
    // Choose initial medians
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] medians = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] distsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medians iteration", LOG) : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = assignToNearestCluster(relation, medians, clusters, assignment, distsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
        // Recompute medians.
        medians = medians(clusters, medians, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<MeanModel> result = new Clustering<>("k-Medians Clustering", "kmedians-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        MeanModel model = new MeanModel(medians[i]);
        result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) ArrayList(java.util.ArrayList) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Aggregations

LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)44 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)27 ArrayList (java.util.ArrayList)20 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)19 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)12 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)9 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)7 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)7 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)4 Logging (de.lmu.ifi.dbs.elki.logging.Logging)4 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)4