Search in sources :

Example 76 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class DistanceQuantileSampler method run.

/**
 * Run the distance quantile sampler.
 *
 * @param database
 * @param rel
 * @return Distances sample
 */
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
    DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
    int size = rel.size();
    long pairs = (size * (long) size) >> 1;
    final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
    if (ssize > Integer.MAX_VALUE) {
        throw new AbortException("Sampling size too large.");
    }
    final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
    DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
    ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
    DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
    Random r = rand.getSingleThreadedRandom();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
    for (long i = 0; i < ssize; i++) {
        int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
        double dist = dq.distance(i1.seek(x), i2.seek(y));
        // Skip NaN, and/or zeros.
        if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
            continue;
        }
        heap.add(dist, qsize);
        LOG.incrementProcessed(prog);
    }
    LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
    LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
    LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
    LOG.ensureCompleted(prog);
    Collection<String> header = Arrays.asList(new String[] { "Distance" });
    Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
    return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleMaxHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) CollectionResult(de.lmu.ifi.dbs.elki.result.CollectionResult) Random(java.util.Random) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 77 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class DistanceStatisticsWithClasses method exactMinMax.

/**
 * Compute the exact maximum and minimum.
 *
 * @param relation Relation to process
 * @param distFunc Distance function
 * @return Exact maximum and minimum
 */
private DoubleMinMax exactMinMax(Relation<O> relation, DistanceQuery<O> distFunc) {
    final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Exact fitting distance computations", relation.size(), LOG) : null;
    DoubleMinMax minmax = new DoubleMinMax();
    // find exact minimum and maximum first.
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        for (DBIDIter iditer2 = relation.iterDBIDs(); iditer2.valid(); iditer2.advance()) {
            // skip the point itself.
            if (DBIDUtil.equal(iditer, iditer2)) {
                continue;
            }
            double d = distFunc.distance(iditer, iditer2);
            minmax.put(d);
        }
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    return minmax;
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)

Example 78 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class EvaluateRankingQuality method run.

@Override
public HistogramResult run(Database database) {
    final Relation<V> relation = database.getRelation(getInputTypeRestriction()[0]);
    final DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<V> knnQuery = database.getKNNQuery(distQuery, relation.size());
    if (LOG.isVerbose()) {
        LOG.verbose("Preprocessing clusters...");
    }
    // Cluster by labels
    Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
    // Compute cluster averages and covariance matrix
    HashMap<Cluster<?>, double[]> averages = new HashMap<>(split.size());
    HashMap<Cluster<?>, double[][]> covmats = new HashMap<>(split.size());
    for (Cluster<?> clus : split) {
        CovarianceMatrix covmat = CovarianceMatrix.make(relation, clus.getIDs());
        averages.put(clus, covmat.getMeanVector());
        covmats.put(clus, covmat.destroyToPopulationMatrix());
    }
    MeanVarianceStaticHistogram hist = new MeanVarianceStaticHistogram(numbins, 0.0, 1.0);
    if (LOG.isVerbose()) {
        LOG.verbose("Processing points...");
    }
    FiniteProgress rocloop = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
    ROCEvaluation roc = new ROCEvaluation();
    // sort neighbors
    for (Cluster<?> clus : split) {
        ModifiableDoubleDBIDList cmem = DBIDUtil.newDistanceDBIDList(clus.size());
        double[] av = averages.get(clus);
        double[][] covm = covmats.get(clus);
        for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
            double d = mahalanobisDistance(covm, relation.get(iter).toArray(), av);
            cmem.add(d, iter);
        }
        cmem.sort();
        for (DBIDArrayIter it = cmem.iter(); it.valid(); it.advance()) {
            KNNList knn = knnQuery.getKNNForDBID(it, relation.size());
            double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
            hist.put(((double) it.getOffset()) / clus.size(), result);
            LOG.incrementProcessed(rocloop);
        }
    }
    LOG.ensureCompleted(rocloop);
    // Collections.sort(results);
    // Transform Histogram into a Double Vector array.
    Collection<double[]> res = new ArrayList<>(relation.size());
    for (ObjHistogram.Iter<MeanVariance> iter = hist.iter(); iter.valid(); iter.advance()) {
        res.add(new double[] { iter.getCenter(), iter.getValue().getCount(), iter.getValue().getMean(), iter.getValue().getSampleVariance() });
    }
    return new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
}
Also used : ObjHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram) HistogramResult(de.lmu.ifi.dbs.elki.result.HistogramResult) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ROCEvaluation(de.lmu.ifi.dbs.elki.evaluation.scores.ROCEvaluation) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MeanVarianceStaticHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.MeanVarianceStaticHistogram) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) ByLabelOrAllInOneClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 79 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class RangeQuerySelectivity method run.

public Result run(Database database, Relation<V> relation) {
    DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<V> rangeQuery = database.getRangeQuery(distQuery, radius);
    MeanVariance numres = new MeanVariance();
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Performing range queries", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        numres.put(rangeQuery.getRangeForDBID(iter, radius).size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    final String prefix = this.getClass().getName();
    LOG.statistics(new DoubleStatistic(prefix + ".mean", numres.getMean()));
    LOG.statistics(new DoubleStatistic(prefix + ".std", numres.getSampleStddev()));
    LOG.statistics(new DoubleStatistic(prefix + ".norm.mean", numres.getMean() / relation.size()));
    LOG.statistics(new DoubleStatistic(prefix + ".norm.std", numres.getSampleStddev() / relation.size()));
    LOG.statistics(new LongStatistic(prefix + ".samplesize", ids.size()));
    return null;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 80 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class KNNDistancesSampler method run.

/**
 * Provides an order of the kNN-distances for all objects within the specified
 * database.
 *
 * @param database Database
 * @param relation Relation
 * @return Result
 */
public KNNDistanceOrderResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    final int size = (int) ((sample <= 1.) ? Math.ceil(relation.size() * sample) : sample);
    DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), size, rnd);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling kNN distances", size, LOG) : null;
    double[] knnDistances = new double[size];
    int i = 0;
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance(), i++) {
        final KNNList neighbors = knnQuery.getKNNForDBID(iditer, k + 1);
        knnDistances[i] = neighbors.getKNNDistance();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return new KNNDistanceOrderResult(knnDistances, k);
}
Also used : KNNDistanceOrderResult(de.lmu.ifi.dbs.elki.algorithm.KNNDistancesSampler.KNNDistanceOrderResult) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12