Search in sources :

Example 6 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class NearestNeighborAffinityMatrixBuilder method computePij.

/**
 * Compute the sparse pij using the nearest neighbors only.
 *
 * @param ids ID range
 * @param knnq kNN query
 * @param square Use squared distances
 * @param numberOfNeighbours Number of neighbors to get
 * @param pij Output of distances
 * @param indices Output of indexes
 * @param initialScale Initial scaling factor
 */
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
    final double logPerp = FastMath.log(perplexity);
    // Scratch arrays, resizable
    DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
    IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
    // Compute nearest-neighbor sparse affinity matrix
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
        dists.clear();
        inds.clear();
        KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
        convertNeighbors(ids, ix, square, neighbours, dists, inds);
        double beta = computeSigma(// 
        ix.getOffset(), // 
        dists, // 
        perplexity, // 
        logPerp, pij[ix.getOffset()] = new double[dists.size()]);
        if (mv != null) {
            // Sigma
            mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
        }
        indices[ix.getOffset()] = inds.toArray();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // Sum of the sparse affinity matrix:
    double sum = 0.;
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int j = 0; j < pij_i.length; j++) {
            sum += pij_i[j];
        }
    }
    final double scale = initialScale / (2 * sum);
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                assert (indices[j][offj] == i);
                // Exploit symmetry:
                if (i < j) {
                    // Symmetrize
                    final double val = pij_i[offi] + pij[j][offj];
                    pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
                }
            } else {
                // Not found
                // TODO: the original code produces a symmetric matrix
                // And it will now not sum to EARLY_EXAGGERATION anymore.
                pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
            }
        }
    }
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
    }
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)

Example 7 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class UKMeans method run.

/**
 * Run the clustering.
 *
 * @param database the Database
 * @param relation the Relation
 * @return Clustering result
 */
public Clustering<?> run(final Database database, final Relation<DiscreteUncertainObject> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
    }
    // Choose initial means randomly
    DBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
    List<double[]> means = new ArrayList<>(k);
    for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
        means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
    }
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
        logVarstat(varstat, varsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
        // Recompute means.
        means = means(clusters, means, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.isEmpty()) {
            continue;
        }
        result.addToplevelCluster(new Cluster<>(ids, new KMeansModel(means.get(i), varsum[i])));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 8 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class DBSCAN method run.

/**
 * Performs the DBSCAN algorithm on the given database.
 */
public Clustering<Model> run(Relation<O> relation) {
    final int size = relation.size();
    if (size < minpts) {
        Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
        result.addToplevelCluster(new Cluster<Model>(relation.getDBIDs(), true, ClusterModel.CLUSTER));
        return result;
    }
    RangeQuery<O> rangeQuery = QueryUtil.getRangeQuery(relation, getDistanceFunction());
    resultList = new ArrayList<>();
    noise = DBIDUtil.newHashSet();
    runDBSCAN(relation, rangeQuery);
    double averagen = ncounter / (double) relation.size();
    LOG.statistics(new DoubleStatistic(DBSCAN.class.getName() + ".average-neighbors", averagen));
    if (averagen < 1 + 0.1 * (minpts - 1)) {
        LOG.warning("There are very few neighbors found. Epsilon may be too small.");
    }
    if (averagen > 100 * minpts) {
        LOG.warning("There are very many neighbors found. Epsilon may be too large.");
    }
    Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
    for (ModifiableDBIDs res : resultList) {
        result.addToplevelCluster(new Cluster<Model>(res, ClusterModel.CLUSTER));
    }
    result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
    return result;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 9 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class SimplifiedCoverTree method initialize.

@Override
public void initialize() {
    bulkLoad(relation.getDBIDs());
    if (LOG.isVerbose()) {
        int[] counts = new int[5];
        checkCoverTree(root, counts, 0);
        LOG.statistics(new LongStatistic(this.getClass().getName() + ".nodes", counts[0]));
        LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".avg-depth", counts[1] / (double) counts[0]));
        LOG.statistics(new LongStatistic(this.getClass().getName() + ".max-depth", counts[2]));
        LOG.statistics(new LongStatistic(this.getClass().getName() + ".singletons", counts[3]));
        LOG.statistics(new LongStatistic(this.getClass().getName() + ".entries", counts[4]));
    }
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 10 with DoubleStatistic

use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.

the class InMemoryInvertedIndex method logStatistics.

@Override
public void logStatistics() {
    long count = 0L;
    for (ModifiableDoubleDBIDList column : index) {
        count += column.size();
    }
    double sparsity = count / (index.size() * (double) relation.size());
    LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".sparsity", sparsity));
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)

Aggregations

DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)38 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)27 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)17 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)14 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)13 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)13 ArrayList (java.util.ArrayList)13 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)12 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)10 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)10 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)8 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)7 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)3 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3