Search in sources :

Example 11 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class CoverTree method initialize.

@Override
public void initialize() {
    bulkLoad(relation.getDBIDs());
    if (LOG.isVerbose()) {
        int[] counts = new int[5];
        checkCoverTree(root, counts, 0);
        LOG.statistics(new LongStatistic(this.getClass().getName() + ".nodes", counts[0]));
        LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".avg-depth", counts[1] / (double) counts[0]));
        LOG.statistics(new LongStatistic(this.getClass().getName() + ".max-depth", counts[2]));
        LOG.statistics(new LongStatistic(this.getClass().getName() + ".singletons", counts[3]));
        LOG.statistics(new LongStatistic(this.getClass().getName() + ".entries", counts[4]));
    }
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 12 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class EvaluateRetrievalPerformance method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    // For storing the positive neighbors.
    ModifiableDBIDs posn = DBIDUtil.newHashSet();
    // Distance storage.
    ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
    // For counting labels seen in kNN
    Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
    // Statistics tracking
    double map = 0., mroc = 0.;
    double[] knnperf = new double[maxk];
    int samples = 0;
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        Object label = lrelation.get(iter);
        findMatches(posn, lrelation, label);
        if (posn.size() > 0) {
            computeDistances(nlist, iter, distQuery, relation);
            if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
                LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
            }
            map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
            mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
            KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
            samples += 1;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    if (samples < 1) {
        throw new AbortException("No object matched - are labels parsed correctly?");
    }
    if (!(map >= 0) || !(mroc >= 0)) {
        throw new AbortException("NaN in MAP/ROC.");
    }
    map /= samples;
    mroc /= samples;
    LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
    LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
    LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
    for (int k = 0; k < maxk; k++) {
        knnperf[k] = knnperf[k] / samples;
        LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
    }
    return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 13 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class HopkinsStatisticClusteringTendency method run.

/**
 * Runs the algorithm in the timed evaluation part.
 *
 * @param database Database context
 * @param relation Relation to analyze
 */
public Result run(Database database, Relation<NumberVector> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    final DistanceQuery<NumberVector> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<NumberVector> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    final double[] min = new double[dim], extend = new double[dim];
    initializeDataExtends(relation, dim, min, extend);
    if (!LOG.isStatistics()) {
        LOG.warning("This algorithm must be used with at least logging level " + Level.STATISTICS);
    }
    MeanVariance hmean = new MeanVariance(), umean = new MeanVariance(), wmean = new MeanVariance();
    // more stable result
    for (int j = 0; j < this.rep; j++) {
        // Compute NN distances for random objects from within the database
        double w = computeNNForRealData(knnQuery, relation, dim);
        // Compute NN distances for randomly created new uniform objects
        double u = computeNNForUniformData(knnQuery, min, extend);
        // compute hopkins statistik
        // = a / (1+a)
        double h = u / (u + w);
        hmean.put(h);
        umean.put(u);
        wmean.put(w);
    }
    final String prefix = this.getClass().getName();
    LOG.statistics(new LongStatistic(prefix + ".samplesize", sampleSize));
    LOG.statistics(new LongStatistic(prefix + ".dim", dim));
    LOG.statistics(new LongStatistic(prefix + ".hopkins.nearest-neighbor", k));
    LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.mean", hmean.getMean()));
    LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.mean", umean.getMean()));
    LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.mean", wmean.getMean()));
    if (rep > 1) {
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.std", hmean.getSampleStddev()));
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.std", umean.getSampleStddev()));
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.std", wmean.getSampleStddev()));
    }
    // Evaluate:
    double x = hmean.getMean();
    // See Hopkins for a proof that x is supposedly Beta distributed.
    double ix = BetaDistribution.regularizedIncBeta(x, sampleSize, sampleSize);
    double p = (x > .5) ? (1. - ix) : ix;
    LOG.statistics(new DoubleStatistic(prefix + ".hopkins.p", p));
    return null;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 14 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class InMemoryIDistanceIndex method logStatistics.

@Override
public void logStatistics() {
    super.logStatistics();
    MeanVarianceMinMax mm = new MeanVarianceMinMax();
    for (int i = 0; i < index.length; i++) {
        mm.put(index[i].size());
    }
    LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.min", (int) mm.getMin()));
    LOG.statistics(new DoubleStatistic(InMemoryIDistanceIndex.class.getName() + ".size.mean", mm.getMean()));
    LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.max", (int) mm.getMax()));
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax)

Example 15 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class KMeansBatchedLloyd method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), blocks, random);
    double[][] meanshift = new double[k][dim];
    int[] changesize = new int[k];
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = false;
        FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Batch", parts.length, LOG) : null;
        for (int p = 0; p < parts.length; p++) {
            // Initialize new means scratch space.
            for (int i = 0; i < k; i++) {
                Arrays.fill(meanshift[i], 0.);
            }
            Arrays.fill(changesize, 0);
            Arrays.fill(varsum, 0.);
            changed |= assignToNearestCluster(relation, parts[p], means, meanshift, changesize, clusters, assignment, varsum);
            // Recompute means.
            updateMeans(means, meanshift, clusters, changesize);
            LOG.incrementProcessed(pprog);
        }
        LOG.ensureCompleted(pprog);
        logVarstat(varstat, varsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Aggregations

DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)38 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)27 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)17 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)14 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)13 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)13 ArrayList (java.util.ArrayList)13 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)12 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)10 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)10 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)8 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)7 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)3 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3