Search in sources :

Example 16 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class DistanceQuantileSampler method run.

/**
 * Run the distance quantile sampler.
 *
 * @param database
 * @param rel
 * @return Distances sample
 */
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
    DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
    int size = rel.size();
    long pairs = (size * (long) size) >> 1;
    final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
    if (ssize > Integer.MAX_VALUE) {
        throw new AbortException("Sampling size too large.");
    }
    final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
    DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
    ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
    DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
    Random r = rand.getSingleThreadedRandom();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
    for (long i = 0; i < ssize; i++) {
        int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
        double dist = dq.distance(i1.seek(x), i2.seek(y));
        // Skip NaN, and/or zeros.
        if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
            continue;
        }
        heap.add(dist, qsize);
        LOG.incrementProcessed(prog);
    }
    LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
    LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
    LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
    LOG.ensureCompleted(prog);
    Collection<String> header = Arrays.asList(new String[] { "Distance" });
    Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
    return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleMaxHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) CollectionResult(de.lmu.ifi.dbs.elki.result.CollectionResult) Random(java.util.Random) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 17 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class RangeQuerySelectivity method run.

public Result run(Database database, Relation<V> relation) {
    DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<V> rangeQuery = database.getRangeQuery(distQuery, radius);
    MeanVariance numres = new MeanVariance();
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Performing range queries", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        numres.put(rangeQuery.getRangeForDBID(iter, radius).size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    final String prefix = this.getClass().getName();
    LOG.statistics(new DoubleStatistic(prefix + ".mean", numres.getMean()));
    LOG.statistics(new DoubleStatistic(prefix + ".std", numres.getSampleStddev()));
    LOG.statistics(new DoubleStatistic(prefix + ".norm.mean", numres.getMean() / relation.size()));
    LOG.statistics(new DoubleStatistic(prefix + ".norm.std", numres.getSampleStddev() / relation.size()));
    LOG.statistics(new LongStatistic(prefix + ".samplesize", ids.size()));
    return null;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 18 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class INFLO method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("INFLO", 3) : null;
    // Step one: find the kNN
    LOG.beginStep(stepprog, 1, "Materializing nearest-neighbor sets.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), kplus1);
    // Step two: find the RkNN, minus kNN.
    LOG.beginStep(stepprog, 2, "Materialize reverse NN.");
    ModifiableDBIDs pruned = DBIDUtil.newHashSet();
    // RNNS
    WritableDataStore<ModifiableDBIDs> rnns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
    // init the rNN
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        rnns.put(iditer, DBIDUtil.newArray());
    }
    computeNeighborhoods(relation, knnq, pruned, rnns);
    // Step three: compute INFLO scores
    LOG.beginStep(stepprog, 3, "Compute INFLO scores.");
    // Calculate INFLO for any Object
    DoubleMinMax inflominmax = new DoubleMinMax();
    WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    // Note: this modifies knns, by adding rknns!
    computeINFLO(relation, pruned, knnq, rnns, inflos, inflominmax);
    LOG.setCompleted(stepprog);
    LOG.statistics(new LongStatistic(INFLO.class.getName() + ".pruned", pruned.size()));
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Influence Outlier Score", "inflo-outlier", inflos, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 19 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class KMeansElkan method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Clustering", "kmeans-clustering");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    // Elkan bounds
    WritableDoubleDataStore upper = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.POSITIVE_INFINITY);
    WritableDataStore<double[]> lower = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, double[].class);
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        // Filled with 0.
        lower.put(it, new double[k]);
    }
    // Storage for updated means:
    final int dim = means[0].length;
    double[][] sums = new double[k][dim];
    // Cluster separation
    double[] sep = new double[k];
    // Cluster distances
    double[][] cdist = new double[k][k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(this.getClass().getName() + ".reassignments") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        int changed;
        if (iteration == 0) {
            changed = initialAssignToNearestCluster(relation, means, sums, clusters, assignment, upper, lower);
        } else {
            // #1
            recomputeSeperation(means, sep, cdist);
            changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, cdist, upper, lower);
        }
        if (rstat != null) {
            rstat.setLong(changed);
            LOG.statistics(rstat);
        }
        // Stop if no cluster assignment changed.
        if (changed == 0) {
            break;
        }
        // Recompute means.
        for (int i = 0; i < k; i++) {
            final int s = clusters.get(i).size();
            timesEquals(sums[i], s > 0 ? 1. / s : 1.);
        }
        // Overwrites sep
        maxMoved(means, sums, sep);
        updateBounds(relation, assignment, upper, lower, sep);
        for (int i = 0; i < k; i++) {
            final int s = clusters.get(i).size();
            System.arraycopy(sums[i], 0, means[i], 0, dim);
            // Restore to sum for next iteration
            timesEquals(sums[i], s > 0 ? s : 1.);
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    upper.destroy();
    lower.destroy();
    // Wrap result
    double totalvariance = 0.;
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        double[] mean = means[i];
        double varsum = 0.;
        if (varstat) {
            DoubleVector mvec = DoubleVector.wrap(mean);
            for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
                varsum += distanceFunction.distance(mvec, relation.get(it));
            }
            totalvariance += varsum;
        }
        KMeansModel model = new KMeansModel(mean, varsum);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    if (LOG.isStatistics() && varstat) {
        LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Example 20 with LongStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.

the class KMeansHybridLloydMacQueen method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Clustering", "kmeans-clustering");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration += 2) {
        {
            // MacQueen
            LOG.incrementProcessed(prog);
            boolean changed = macQueenIterate(relation, means, clusters, assignment, varsum);
            logVarstat(varstat, varsum);
            if (!changed) {
                break;
            }
        }
        {
            // Lloyd
            LOG.incrementProcessed(prog);
            boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
            logVarstat(varstat, varsum);
            // Stop if no cluster assignment changed.
            if (!changed) {
                break;
            }
            // Recompute means.
            means = means(clusters, means, relation);
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Aggregations

LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)44 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)27 ArrayList (java.util.ArrayList)20 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)19 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)12 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)9 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)7 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)7 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)4 Logging (de.lmu.ifi.dbs.elki.logging.Logging)4 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)4