Search in sources :

Example 56 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class DBSCAN method runDBSCAN.

/**
 * Run the DBSCAN algorithm
 *
 * @param relation Data relation
 * @param rangeQuery Range query class
 */
protected void runDBSCAN(Relation<O> relation, RangeQuery<O> rangeQuery) {
    final int size = relation.size();
    FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null;
    IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
    processedIDs = DBIDUtil.newHashSet(size);
    ArrayModifiableDBIDs seeds = DBIDUtil.newArray();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        if (!processedIDs.contains(iditer)) {
            expandCluster(relation, rangeQuery, iditer, seeds, objprog, clusprog);
        }
        if (objprog != null && clusprog != null) {
            objprog.setProcessed(processedIDs.size(), LOG);
            clusprog.setProcessed(resultList.size(), LOG);
        }
        if (processedIDs.size() == size) {
            break;
        }
    }
    // Finish progress logging
    LOG.ensureCompleted(objprog);
    LOG.setCompleted(clusprog);
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 57 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class EvaluateRetrievalPerformance method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    // For storing the positive neighbors.
    ModifiableDBIDs posn = DBIDUtil.newHashSet();
    // Distance storage.
    ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
    // For counting labels seen in kNN
    Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
    // Statistics tracking
    double map = 0., mroc = 0.;
    double[] knnperf = new double[maxk];
    int samples = 0;
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        Object label = lrelation.get(iter);
        findMatches(posn, lrelation, label);
        if (posn.size() > 0) {
            computeDistances(nlist, iter, distQuery, relation);
            if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
                LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
            }
            map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
            mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
            KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
            samples += 1;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    if (samples < 1) {
        throw new AbortException("No object matched - are labels parsed correctly?");
    }
    if (!(map >= 0) || !(mroc >= 0)) {
        throw new AbortException("NaN in MAP/ROC.");
    }
    map /= samples;
    mroc /= samples;
    LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
    LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
    LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
    for (int k = 0; k < maxk; k++) {
        knnperf[k] = knnperf[k] / samples;
        LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
    }
    return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 58 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class RankingQualityHistogram method run.

/**
 * Process a database
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Histogram of ranking qualities
 */
public HistogramResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, relation.size());
    if (LOG.isVerbose()) {
        LOG.verbose("Preprocessing clusters...");
    }
    // Cluster by labels
    Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
    DoubleStaticHistogram hist = new DoubleStaticHistogram(numbins, 0.0, 1.0);
    if (LOG.isVerbose()) {
        LOG.verbose("Processing points...");
    }
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
    ROCEvaluation roc = new ROCEvaluation();
    MeanVariance mv = new MeanVariance();
    // sort neighbors
    for (Cluster<?> clus : split) {
        for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
            KNNList knn = knnQuery.getKNNForDBID(iter, relation.size());
            double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
            mv.put(result);
            hist.increment(result, 1. / relation.size());
            LOG.incrementProcessed(progress);
        }
    }
    LOG.ensureCompleted(progress);
    // Transform Histogram into a Double Vector array.
    Collection<double[]> res = new ArrayList<>(relation.size());
    for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
        res.add(new double[] { iter.getCenter(), iter.getValue() });
    }
    HistogramResult result = new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
    result.addHeader("Mean: " + mv.getMean() + " Variance: " + mv.getSampleVariance());
    return result;
}
Also used : ROCEvaluation(de.lmu.ifi.dbs.elki.evaluation.scores.ROCEvaluation) HistogramResult(de.lmu.ifi.dbs.elki.result.HistogramResult) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) ByLabelOrAllInOneClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DoubleStaticHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleStaticHistogram)

Example 59 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class DOC method runDOC.

/**
 * Performs a single run of DOC, finding a single cluster.
 *
 * @param database Database context
 * @param relation used to get actual values for DBIDs.
 * @param S The set of points we're working on.
 * @param d Dimensionality of the data set we're currently working on.
 * @param r Size of random samples.
 * @param m Number of inner iterations (per seed point).
 * @param n Number of outer iterations (seed points).
 * @param minClusterSize Minimum size a cluster must have to be accepted.
 * @return a cluster, if one is found, else <code>null</code>.
 */
protected Cluster<SubspaceModel> runDOC(Database database, Relation<V> relation, ArrayModifiableDBIDs S, final int d, int n, int m, int r, int minClusterSize) {
    // Best cluster for the current run.
    DBIDs C = null;
    // Relevant attributes for the best cluster.
    long[] D = null;
    // Quality of the best cluster.
    double quality = Double.NEGATIVE_INFINITY;
    // Bounds for our cluster.
    // ModifiableHyperBoundingBox bounds = new ModifiableHyperBoundingBox(new
    // double[d], new double[d]);
    // Inform the user about the progress in the current iteration.
    FiniteProgress iprogress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG) : null;
    Random random = rnd.getSingleThreadedRandom();
    DBIDArrayIter iter = S.iter();
    for (int i = 0; i < n; ++i) {
        // Pick a random seed point.
        iter.seek(random.nextInt(S.size()));
        for (int j = 0; j < m; ++j) {
            // Choose a set of random points.
            DBIDs randomSet = DBIDUtil.randomSample(S, r, random);
            // Initialize cluster info.
            long[] nD = BitsUtil.zero(d);
            // Test each dimension and build bounding box.
            for (int k = 0; k < d; ++k) {
                if (dimensionIsRelevant(k, relation, randomSet)) {
                    BitsUtil.setI(nD, k);
                }
            }
            if (BitsUtil.cardinality(nD) > 0) {
                DBIDs nC = findNeighbors(iter, nD, S, relation);
                if (LOG.isDebuggingFiner()) {
                    LOG.finer("Testing a cluster candidate, |C| = " + nC.size() + ", |D| = " + BitsUtil.cardinality(nD));
                }
                // Is the cluster large enough?
                if (nC.size() < minClusterSize) {
                    // Too small.
                    if (LOG.isDebuggingFiner()) {
                        LOG.finer("... but it's too small.");
                    }
                    continue;
                }
                // Better cluster than before?
                double nQuality = computeClusterQuality(nC.size(), BitsUtil.cardinality(nD));
                if (nQuality > quality) {
                    if (LOG.isDebuggingFiner()) {
                        LOG.finer("... and it's the best so far: " + nQuality + " vs. " + quality);
                    }
                    C = nC;
                    D = nD;
                    quality = nQuality;
                } else {
                    if (LOG.isDebuggingFiner()) {
                        LOG.finer("... but we already have a better one.");
                    }
                }
            }
            LOG.incrementProcessed(iprogress);
        }
    }
    LOG.ensureCompleted(iprogress);
    return (C != null) ? makeCluster(relation, C, D) : null;
}
Also used : Random(java.util.Random) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)

Example 60 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class FastOPTICS method run.

/**
 * Run the algorithm.
 *
 * @param db Database
 * @param rel Relation
 */
public ClusterOrder run(Database db, Relation<V> rel) {
    DBIDs ids = rel.getDBIDs();
    DistanceQuery<V> dq = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
    // initialize points used and reachability distance
    reachDist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, UNDEFINED_DISTANCE);
    // compute projections, density estimates and neighborhoods
    // project points
    index.computeSetsBounds(rel, minPts, ids);
    // compute densities
    inverseDensities = index.computeAverageDistInSet();
    // get neighbors of points
    neighs = index.getNeighs();
    // compute ordering as for OPTICS
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("FastOPTICS clustering", ids.size(), LOG) : null;
    processed = DBIDUtil.newHashSet(ids.size());
    order = new ClusterOrder(ids, "FastOPTICS Cluster Order", "fast-optics");
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        if (!processed.contains(it)) {
            expandClusterOrder(DBIDUtil.deref(it), order, dq, prog);
        }
    }
    index.logStatistics();
    LOG.ensureCompleted(prog);
    return order;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12