Search in sources :

Example 1 with CollectionResult

use of de.lmu.ifi.dbs.elki.result.CollectionResult in project elki by elki-project.

the class AveragePrecisionAtK method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public CollectionResult<double[]> run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final int qk = k + (includeSelf ? 0 : 1);
    final KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, qk);
    MeanVarianceMinMax[] mvs = MeanVarianceMinMax.newArray(k);
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
    // sort neighbors
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNList knn = knnQuery.getKNNForDBID(iter, qk);
        Object label = lrelation.get(iter);
        int positive = 0, i = 0;
        for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
            if (!includeSelf && DBIDUtil.equal(iter, ri)) {
                // Do not increment i.
                continue;
            }
            positive += match(label, lrelation.get(ri)) ? 1 : 0;
            final double precision = positive / (double) (i + 1);
            mvs[i].put(precision);
            i++;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    // Transform Histogram into a Double Vector array.
    Collection<double[]> res = new ArrayList<>(k);
    for (int i = 0; i < k; i++) {
        final MeanVarianceMinMax mv = mvs[i];
        final double std = mv.getCount() > 1. ? mv.getSampleStddev() : 0.;
        res.add(new double[] { i + 1, mv.getMean(), std, mv.getMin(), mv.getMax(), mv.getCount() });
    }
    return new CollectionResult<>("Average Precision", "average-precision", res);
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) CollectionResult(de.lmu.ifi.dbs.elki.result.CollectionResult) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 2 with CollectionResult

use of de.lmu.ifi.dbs.elki.result.CollectionResult in project elki by elki-project.

the class DistanceQuantileSampler method run.

/**
 * Run the distance quantile sampler.
 *
 * @param database
 * @param rel
 * @return Distances sample
 */
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
    DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
    int size = rel.size();
    long pairs = (size * (long) size) >> 1;
    final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
    if (ssize > Integer.MAX_VALUE) {
        throw new AbortException("Sampling size too large.");
    }
    final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
    DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
    ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
    DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
    Random r = rand.getSingleThreadedRandom();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
    for (long i = 0; i < ssize; i++) {
        int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
        double dist = dq.distance(i1.seek(x), i2.seek(y));
        // Skip NaN, and/or zeros.
        if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
            continue;
        }
        heap.add(dist, qsize);
        LOG.incrementProcessed(prog);
    }
    LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
    LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
    LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
    LOG.ensureCompleted(prog);
    Collection<String> header = Arrays.asList(new String[] { "Distance" });
    Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
    return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleMaxHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) CollectionResult(de.lmu.ifi.dbs.elki.result.CollectionResult) Random(java.util.Random) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 CollectionResult (de.lmu.ifi.dbs.elki.result.CollectionResult)2 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)1 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)1 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)1 MeanVarianceMinMax (de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax)1 DoubleMaxHeap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1 ArrayList (java.util.ArrayList)1 Random (java.util.Random)1