Search in sources :

Example 16 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class OutlierRankingEvaluation method evaluateOrderingResult.

private EvaluationResult evaluateOrderingResult(int size, SetDBIDs positiveids, DBIDs order) {
    if (order.size() != size) {
        throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
    }
    EvaluationResult res = new EvaluationResult("Evaluation of ranking", "ranking-evaluation");
    DBIDsTest test = new DBIDsTest(positiveids);
    double rate = positiveids.size() / (double) size;
    MeasurementGroup g = res.newGroup("Evaluation measures:");
    double rocauc = ROCEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
    double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("Average Precision", avep, 0., 1., rate, false);
    double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
    double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);
    g = res.newGroup("Adjusted for chance:");
    double adjauc = 2 * rocauc - 1;
    g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
    double adjavep = (avep - rate) / (1 - rate);
    g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
    double adjrprec = (rprec - rate) / (1 - rate);
    g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
    double adjmaxf1 = (maxf1 - rate) / (1 - rate);
    g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);
    if (LOG.isStatistics()) {
        LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
        LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
        LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
        LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
        LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
        LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
        LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
        LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
    }
    return res;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) SimpleAdapter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.SimpleAdapter) DBIDsTest(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)

Example 17 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class IntrinsicNearestNeighborAffinityMatrixBuilder method computePij.

/**
 * Compute the sparse pij using the nearest neighbors only.
 *
 * @param ids ID range
 * @param knnq kNN query
 * @param square Use squared distances
 * @param numberOfNeighbours Number of neighbors to get
 * @param pij Output of distances
 * @param indices Output of indexes
 * @param initialScale Initial scaling factor
 */
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
    final double logPerp = FastMath.log(perplexity);
    // Scratch arrays, resizable
    DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
    IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
    // Compute nearest-neighbor sparse affinity matrix
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    Mean mid = LOG.isStatistics() ? new Mean() : null;
    for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
        dists.clear();
        inds.clear();
        KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
        convertNeighbors(ids, ix, square, neighbours, dists, inds, mid);
        double beta = computeSigma(// 
        ix.getOffset(), // 
        dists, // 
        perplexity, // 
        logPerp, pij[ix.getOffset()] = new double[dists.size()]);
        if (mv != null) {
            // Sigma
            mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
        }
        indices[ix.getOffset()] = inds.toArray();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (mid != null) {
        LOG.statistics(new DoubleStatistic(getClass() + ".average-original-id", mid.getMean()));
    }
    // Sum of the sparse affinity matrix:
    double sum = 0.;
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            if (j > i) {
                // Exploit symmetry.
                continue;
            }
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                sum += FastMath.sqrt(pij_i[offi] * pij[j][offj]);
            }
        }
    }
    final double scale = initialScale / (2 * sum);
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                assert (indices[j][offj] == i);
                // Exploit symmetry:
                if (i < j) {
                    // Symmetrize
                    final double val = FastMath.sqrt(pij_i[offi] * pij[j][offj]);
                    pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
                }
            } else {
                // Not found, so zero.
                pij_i[offi] = 0;
            }
        }
    }
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
    }
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)

Example 18 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class DistanceQuantileSampler method run.

/**
 * Run the distance quantile sampler.
 *
 * @param database
 * @param rel
 * @return Distances sample
 */
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
    DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
    int size = rel.size();
    long pairs = (size * (long) size) >> 1;
    final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
    if (ssize > Integer.MAX_VALUE) {
        throw new AbortException("Sampling size too large.");
    }
    final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
    DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
    ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
    DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
    Random r = rand.getSingleThreadedRandom();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
    for (long i = 0; i < ssize; i++) {
        int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
        double dist = dq.distance(i1.seek(x), i2.seek(y));
        // Skip NaN, and/or zeros.
        if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
            continue;
        }
        heap.add(dist, qsize);
        LOG.incrementProcessed(prog);
    }
    LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
    LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
    LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
    LOG.ensureCompleted(prog);
    Collection<String> header = Arrays.asList(new String[] { "Distance" });
    Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
    return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleMaxHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) CollectionResult(de.lmu.ifi.dbs.elki.result.CollectionResult) Random(java.util.Random) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 19 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class EstimateIntrinsicDimensionality method run.

public Result run(Database database, Relation<O> relation) {
    DBIDs allids = relation.getDBIDs();
    // Number of samples to draw.
    int ssize = (int) ((samples > 1.) ? samples : Math.ceil(samples * allids.size()));
    // Number of neighbors to fetch (+ query point)
    int kk = 1 + (int) ((krate > 1.) ? krate : Math.ceil(krate * allids.size()));
    DBIDs sampleids = DBIDUtil.randomSample(allids, ssize, RandomFactory.DEFAULT);
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(dq, kk);
    double[] idim = new double[ssize];
    int samples = 0;
    for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
        idim[samples] = estimator.estimate(knnq, iter, kk);
        ++samples;
    }
    double id = (samples > 1) ? QuickSelect.median(idim, 0, samples) : -1;
    LOG.statistics(new DoubleStatistic(EstimateIntrinsicDimensionality.class.getName() + ".intrinsic-dimensionality", id));
    return null;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 20 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class RangeQuerySelectivity method run.

public Result run(Database database, Relation<V> relation) {
    DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<V> rangeQuery = database.getRangeQuery(distQuery, radius);
    MeanVariance numres = new MeanVariance();
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Performing range queries", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        numres.put(rangeQuery.getRangeForDBID(iter, radius).size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    final String prefix = this.getClass().getName();
    LOG.statistics(new DoubleStatistic(prefix + ".mean", numres.getMean()));
    LOG.statistics(new DoubleStatistic(prefix + ".std", numres.getSampleStddev()));
    LOG.statistics(new DoubleStatistic(prefix + ".norm.mean", numres.getMean() / relation.size()));
    LOG.statistics(new DoubleStatistic(prefix + ".norm.std", numres.getSampleStddev() / relation.size()));
    LOG.statistics(new LongStatistic(prefix + ".samplesize", ids.size()));
    return null;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)38 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)27 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)17 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)14 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)13 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)13 ArrayList (java.util.ArrayList)13 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)12 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)10 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)10 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)8 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)7 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)3 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3