Search in sources :

Example 21 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class OutlierRankingEvaluation method evaluateOutlierResult.

private EvaluationResult evaluateOutlierResult(int size, SetDBIDs positiveids, OutlierResult or) {
    EvaluationResult res = EvaluationResult.findOrCreate(or.getHierarchy(), or, "Evaluation of ranking", "ranking-evaluation");
    DBIDsTest test = new DBIDsTest(positiveids);
    final int pos = positiveids.size();
    final double rate = pos / (double) size;
    MeasurementGroup g = res.findOrCreateGroup("Evaluation measures");
    double rocauc = ROCEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    if (!g.hasMeasure("ROC AUC")) {
        g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
    }
    double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("Average Precision", avep, 0., 1., rate, false);
    double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
    double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);
    double maxdcg = DCGEvaluation.maximum(pos);
    double dcg = DCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("DCG", dcg, 0., maxdcg, DCGEvaluation.STATIC.expected(pos, size), false);
    double ndcg = NDCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("NDCG", ndcg, 0., 1., NDCGEvaluation.STATIC.expected(pos, size), false);
    g = res.findOrCreateGroup("Adjusted for chance");
    double adjauc = 2 * rocauc - 1;
    g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
    double adjavep = (avep - rate) / (1 - rate);
    g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
    double adjrprec = (rprec - rate) / (1 - rate);
    g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
    double adjmaxf1 = (maxf1 - rate) / (1 - rate);
    g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);
    double endcg = NDCGEvaluation.STATIC.expected(pos, size);
    double adjndcg = (ndcg - endcg) / (1. - endcg);
    g.addMeasure("Adjusted DCG", adjndcg, 0., 1., 0., false);
    if (LOG.isStatistics()) {
        LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
        LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
        LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
        LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
        LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
        LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
        LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
        LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
        LOG.statistics(new DoubleStatistic(key + ".dcg", dcg));
        LOG.statistics(new DoubleStatistic(key + ".dcg.normalized", ndcg));
        LOG.statistics(new DoubleStatistic(key + ".dcg.adjusted", adjndcg));
    }
    return res;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) OutlierScoreAdapter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.OutlierScoreAdapter) DBIDsTest(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)

Example 22 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class KMeansElkan method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Clustering", "kmeans-clustering");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    // Elkan bounds
    WritableDoubleDataStore upper = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.POSITIVE_INFINITY);
    WritableDataStore<double[]> lower = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, double[].class);
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        // Filled with 0.
        lower.put(it, new double[k]);
    }
    // Storage for updated means:
    final int dim = means[0].length;
    double[][] sums = new double[k][dim];
    // Cluster separation
    double[] sep = new double[k];
    // Cluster distances
    double[][] cdist = new double[k][k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(this.getClass().getName() + ".reassignments") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        int changed;
        if (iteration == 0) {
            changed = initialAssignToNearestCluster(relation, means, sums, clusters, assignment, upper, lower);
        } else {
            // #1
            recomputeSeperation(means, sep, cdist);
            changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, cdist, upper, lower);
        }
        if (rstat != null) {
            rstat.setLong(changed);
            LOG.statistics(rstat);
        }
        // Stop if no cluster assignment changed.
        if (changed == 0) {
            break;
        }
        // Recompute means.
        for (int i = 0; i < k; i++) {
            final int s = clusters.get(i).size();
            timesEquals(sums[i], s > 0 ? 1. / s : 1.);
        }
        // Overwrites sep
        maxMoved(means, sums, sep);
        updateBounds(relation, assignment, upper, lower, sep);
        for (int i = 0; i < k; i++) {
            final int s = clusters.get(i).size();
            System.arraycopy(sums[i], 0, means[i], 0, dim);
            // Restore to sum for next iteration
            timesEquals(sums[i], s > 0 ? s : 1.);
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    upper.destroy();
    lower.destroy();
    // Wrap result
    double totalvariance = 0.;
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        double[] mean = means[i];
        double varsum = 0.;
        if (varstat) {
            DoubleVector mvec = DoubleVector.wrap(mean);
            for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
                varsum += distanceFunction.distance(mvec, relation.get(it));
            }
            totalvariance += varsum;
        }
        KMeansModel model = new KMeansModel(mean, varsum);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    if (LOG.isStatistics() && varstat) {
        LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Example 23 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class KMeansHybridLloydMacQueen method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Clustering", "kmeans-clustering");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration += 2) {
        {
            // MacQueen
            LOG.incrementProcessed(prog);
            boolean changed = macQueenIterate(relation, means, clusters, assignment, varsum);
            logVarstat(varstat, varsum);
            if (!changed) {
                break;
            }
        }
        {
            // Lloyd
            LOG.incrementProcessed(prog);
            boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
            logVarstat(varstat, varsum);
            // Stop if no cluster assignment changed.
            if (!changed) {
                break;
            }
            // Recompute means.
            means = means(clusters, means, relation);
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 24 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class KMeansMacQueen method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Clustering", "kmeans-clustering");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    // Iterate MacQueen
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = macQueenIterate(relation, means, clusters, assignment, varsum);
        logVarstat(varstat, varsum);
        if (!changed) {
            break;
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 25 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class KMeansSort method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Clustering", "kmeans-clustering");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    // Cluster distances
    double[][] cdist = new double[k][k];
    int[][] cnum = new int[k][k - 1];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    LongStatistic diststat = LOG.isStatistics() ? new LongStatistic(KEY + ".distance-computations") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        recomputeSeperation(means, cdist, cnum, diststat);
        boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum, cdist, cnum, diststat);
        logVarstat(varstat, varsum);
        if (LOG.isStatistics()) {
            LOG.statistics(diststat);
        }
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
        // Recompute means.
        means = means(clusters, means, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Aggregations

DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)38 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)27 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)17 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)14 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)13 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)13 ArrayList (java.util.ArrayList)13 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)12 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)10 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)10 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)8 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)7 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)3 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3