Search in sources :

Example 6 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class KNNJoin method run.

/**
 * Inner run method. This returns a double store, and is used by
 * {@link de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNJoinMaterializeKNNPreprocessor}
 *
 * @param relation Data relation
 * @param ids Object IDs
 * @return Data store
 */
@SuppressWarnings("unchecked")
public WritableDataStore<KNNList> run(Relation<V> relation, DBIDs ids) {
    if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
        throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
    }
    Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(relation.getHierarchy(), relation, SpatialIndexTree.class);
    if (indexes.size() != 1) {
        throw new MissingPrerequisitesException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one.");
    }
    // FIXME: Ensure were looking at the right relation!
    SpatialIndexTree<N, E> index = indexes.iterator().next();
    SpatialPrimitiveDistanceFunction<V> distFunction = (SpatialPrimitiveDistanceFunction<V>) getDistanceFunction();
    // data pages
    List<E> ps_candidates = new ArrayList<>(index.getLeaves());
    // knn heaps
    List<List<KNNHeap>> heaps = new ArrayList<>(ps_candidates.size());
    // Initialize with the page self-pairing
    for (int i = 0; i < ps_candidates.size(); i++) {
        E pr_entry = ps_candidates.get(i);
        N pr = index.getNode(pr_entry);
        heaps.add(initHeaps(distFunction, pr));
    }
    // Build priority queue
    final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) >>> 1;
    ComparableMinHeap<Task> pq = new ComparableMinHeap<>(sqsize);
    if (LOG.isDebuggingFine()) {
        LOG.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
    }
    FiniteProgress mprogress = LOG.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, LOG) : null;
    for (int i = 0; i < ps_candidates.size(); i++) {
        E pr_entry = ps_candidates.get(i);
        N pr = index.getNode(pr_entry);
        List<KNNHeap> pr_heaps = heaps.get(i);
        double pr_knn_distance = computeStopDistance(pr_heaps);
        for (int j = i + 1; j < ps_candidates.size(); j++) {
            E ps_entry = ps_candidates.get(j);
            N ps = index.getNode(ps_entry);
            List<KNNHeap> ps_heaps = heaps.get(j);
            double ps_knn_distance = computeStopDistance(ps_heaps);
            double minDist = distFunction.minDist(pr_entry, ps_entry);
            // Resolve immediately:
            if (minDist <= 0.) {
                processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
            } else if (minDist <= pr_knn_distance || minDist <= ps_knn_distance) {
                pq.add(new Task(minDist, i, j));
            }
            LOG.incrementProcessed(mprogress);
        }
    }
    LOG.ensureCompleted(mprogress);
    // Process the queue
    FiniteProgress qprogress = LOG.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), LOG) : null;
    IndefiniteProgress fprogress = LOG.isVerbose() ? new IndefiniteProgress("Full comparisons", LOG) : null;
    while (!pq.isEmpty()) {
        Task task = pq.poll();
        List<KNNHeap> pr_heaps = heaps.get(task.i);
        List<KNNHeap> ps_heaps = heaps.get(task.j);
        double pr_knn_distance = computeStopDistance(pr_heaps);
        double ps_knn_distance = computeStopDistance(ps_heaps);
        boolean dor = task.mindist <= pr_knn_distance;
        boolean dos = task.mindist <= ps_knn_distance;
        if (dor || dos) {
            N pr = index.getNode(ps_candidates.get(task.i));
            N ps = index.getNode(ps_candidates.get(task.j));
            if (dor && dos) {
                processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
            } else {
                if (dor) {
                    processDataPages(distFunction, pr_heaps, null, pr, ps);
                } else /* dos */
                {
                    processDataPages(distFunction, ps_heaps, null, ps, pr);
                }
            }
            LOG.incrementProcessed(fprogress);
        }
        LOG.incrementProcessed(qprogress);
    }
    LOG.ensureCompleted(qprogress);
    LOG.setCompleted(fprogress);
    WritableDataStore<KNNList> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
    FiniteProgress pageprog = LOG.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), LOG) : null;
    for (int i = 0; i < ps_candidates.size(); i++) {
        N pr = index.getNode(ps_candidates.get(i));
        List<KNNHeap> pr_heaps = heaps.get(i);
        // Finalize lists
        for (int j = 0; j < pr.getNumEntries(); j++) {
            knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
        }
        // Forget heaps and pq
        heaps.set(i, null);
        LOG.incrementProcessed(pageprog);
    }
    LOG.ensureCompleted(pageprog);
    return knnLists;
}
Also used : ComparableMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap) ArrayList(java.util.ArrayList) SpatialIndexTree(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree) MissingPrerequisitesException(de.lmu.ifi.dbs.elki.utilities.exceptions.MissingPrerequisitesException) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) SpatialPrimitiveDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 7 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class AffinityPropagationClusteringAlgorithm method run.

/**
 * Perform affinity propagation clustering.
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering result
 */
public Clustering<MedoidModel> run(Database db, Relation<O> relation) {
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int size = ids.size();
    int[] assignment = new int[size];
    double[][] s = initialization.getSimilarityMatrix(db, relation, ids);
    double[][] r = new double[size][size];
    double[][] a = new double[size][size];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Affinity Propagation Iteration", LOG) : null;
    MutableProgress aprog = LOG.isVerbose() ? new MutableProgress("Stable assignments", size + 1, LOG) : null;
    int inactive = 0;
    for (int iteration = 0; iteration < maxiter && inactive < convergence; iteration++) {
        // Update responsibility matrix:
        for (int i = 0; i < size; i++) {
            double[] ai = a[i], ri = r[i], si = s[i];
            // Find the two largest values (as initially maxk == i)
            double max1 = Double.NEGATIVE_INFINITY, max2 = Double.NEGATIVE_INFINITY;
            int maxk = -1;
            for (int k = 0; k < size; k++) {
                double val = ai[k] + si[k];
                if (val > max1) {
                    max2 = max1;
                    max1 = val;
                    maxk = k;
                } else if (val > max2) {
                    max2 = val;
                }
            }
            // With the maximum value known, update r:
            for (int k = 0; k < size; k++) {
                double val = si[k] - ((k != maxk) ? max1 : max2);
                ri[k] = ri[k] * lambda + val * (1. - lambda);
            }
        }
        // Update availability matrix
        for (int k = 0; k < size; k++) {
            // Compute sum of max(0, r_ik) for all i.
            // For r_kk, don't apply the max.
            double colposum = 0.;
            for (int i = 0; i < size; i++) {
                if (i == k || r[i][k] > 0.) {
                    colposum += r[i][k];
                }
            }
            for (int i = 0; i < size; i++) {
                double val = colposum;
                // Adjust column sum by the one extra term.
                if (i == k || r[i][k] > 0.) {
                    val -= r[i][k];
                }
                if (i != k && val > 0.) {
                    // min
                    val = 0.;
                }
                a[i][k] = a[i][k] * lambda + val * (1 - lambda);
            }
        }
        int changed = 0;
        for (int i = 0; i < size; i++) {
            double[] ai = a[i], ri = r[i];
            double max = Double.NEGATIVE_INFINITY;
            int maxj = -1;
            for (int j = 0; j < size; j++) {
                double v = ai[j] + ri[j];
                if (v > max || (i == j && v >= max)) {
                    max = v;
                    maxj = j;
                }
            }
            if (assignment[i] != maxj) {
                changed += 1;
                assignment[i] = maxj;
            }
        }
        inactive = (changed > 0) ? 0 : (inactive + 1);
        LOG.incrementProcessed(prog);
        if (aprog != null) {
            aprog.setProcessed(size - changed, LOG);
        }
    }
    if (aprog != null) {
        aprog.setProcessed(aprog.getTotal(), LOG);
    }
    LOG.setCompleted(prog);
    // Cluster map, by lead object
    Int2ObjectOpenHashMap<ModifiableDBIDs> map = new Int2ObjectOpenHashMap<>();
    DBIDArrayIter i1 = ids.iter();
    for (int i = 0; i1.valid(); i1.advance(), i++) {
        int c = assignment[i];
        // Add to cluster members:
        ModifiableDBIDs cids = map.get(c);
        if (cids == null) {
            cids = DBIDUtil.newArray();
            map.put(c, cids);
        }
        cids.add(i1);
    }
    // If we stopped early, the cluster lead might be in a different cluster.
    for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
        Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
        final int key = entry.getIntKey();
        int targetkey = key;
        ModifiableDBIDs tids = null;
        // Chase arrows:
        while (ids == null && assignment[targetkey] != targetkey) {
            targetkey = assignment[targetkey];
            tids = map.get(targetkey);
        }
        if (tids != null && targetkey != key) {
            tids.addDBIDs(entry.getValue());
            iter.remove();
        }
    }
    Clustering<MedoidModel> clustering = new Clustering<>("Affinity Propagation Clustering", "ap-clustering");
    ModifiableDBIDs noise = DBIDUtil.newArray();
    for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
        Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
        i1.seek(entry.getIntKey());
        if (entry.getValue().size() > 1) {
            MedoidModel mod = new MedoidModel(DBIDUtil.deref(i1));
            clustering.addToplevelCluster(new Cluster<>(entry.getValue(), mod));
        } else {
            noise.add(i1);
        }
    }
    if (noise.size() > 0) {
        MedoidModel mod = new MedoidModel(DBIDUtil.deref(noise.iter()));
        clustering.addToplevelCluster(new Cluster<>(noise, true, mod));
    }
    return clustering;
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) MedoidModel(de.lmu.ifi.dbs.elki.data.model.MedoidModel) MutableProgress(de.lmu.ifi.dbs.elki.logging.progress.MutableProgress) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 8 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class UKMeans method run.

/**
 * Run the clustering.
 *
 * @param database the Database
 * @param relation the Relation
 * @return Clustering result
 */
public Clustering<?> run(final Database database, final Relation<DiscreteUncertainObject> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
    }
    // Choose initial means randomly
    DBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
    List<double[]> means = new ArrayList<>(k);
    for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
        means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
    }
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
        logVarstat(varstat, varsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
        // Recompute means.
        means = means(clusters, means, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.isEmpty()) {
            continue;
        }
        result.addToplevelCluster(new Cluster<>(ids, new KMeansModel(means.get(i), varsum[i])));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 9 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class DBSCAN method runDBSCAN.

/**
 * Run the DBSCAN algorithm
 *
 * @param relation Data relation
 * @param rangeQuery Range query class
 */
protected void runDBSCAN(Relation<O> relation, RangeQuery<O> rangeQuery) {
    final int size = relation.size();
    FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null;
    IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
    processedIDs = DBIDUtil.newHashSet(size);
    ArrayModifiableDBIDs seeds = DBIDUtil.newArray();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        if (!processedIDs.contains(iditer)) {
            expandCluster(relation, rangeQuery, iditer, seeds, objprog, clusprog);
        }
        if (objprog != null && clusprog != null) {
            objprog.setProcessed(processedIDs.size(), LOG);
            clusprog.setProcessed(resultList.size(), LOG);
        }
        if (processedIDs.size() == size) {
            break;
        }
    }
    // Finish progress logging
    LOG.ensureCompleted(objprog);
    LOG.setCompleted(clusprog);
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 10 with IndefiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.

the class DOC method run.

/**
 * Performs the DOC or FastDOC (as configured) algorithm on the given
 * Database.
 *
 * This will run exhaustively, i.e. run DOC until no clusters are found
 * anymore / the database size has shrunk below the threshold for minimum
 * cluster size.
 *
 * @param database Database
 * @param relation Data relation
 */
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
    // Dimensionality of our set.
    final int d = RelationUtil.dimensionality(relation);
    // Get available DBIDs as a set we can remove items from.
    ArrayModifiableDBIDs S = DBIDUtil.newArray(relation.getDBIDs());
    // Precompute values as described in Figure 2.
    double r = Math.abs(FastMath.log(d + d) / FastMath.log(beta * .5));
    // Outer loop count.
    int n = (int) (2. / alpha);
    // Inner loop count.
    int m = (int) (FastMath.pow(2. / alpha, r) * FastMath.log(4));
    // TODO: This should only apply for FastDOC.
    m = Math.min(m, Math.min(1000000, d * d));
    // Minimum size for a cluster for it to be accepted.
    int minClusterSize = (int) (alpha * S.size());
    // List of all clusters we found.
    Clustering<SubspaceModel> result = new Clustering<>("DOC Clusters", "DOC");
    // Inform the user about the number of actual clusters found so far.
    IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
    // of points is empty.
    while (S.size() > minClusterSize) {
        Cluster<SubspaceModel> C = runDOC(database, relation, S, d, n, m, (int) r, minClusterSize);
        if (C == null) {
            // Stop trying if we couldn't find a cluster.
            break;
        }
        // Found a cluster, remember it, remove its points from the set.
        result.addToplevelCluster(C);
        // Remove all points of the cluster from the set and continue.
        S.removeDBIDs(C.getIDs());
        if (cprogress != null) {
            cprogress.setProcessed(result.getAllClusters().size(), LOG);
        }
    }
    // Add the remainder as noise.
    if (S.size() > 0) {
        long[] alldims = BitsUtil.ones(d);
        result.addToplevelCluster(new Cluster<>(S, true, new SubspaceModel(new Subspace(alldims), Centroid.make(relation, S).getArrayRef())));
    }
    LOG.setCompleted(cprogress);
    return result;
}
Also used : IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Aggregations

IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)28 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)20 ArrayList (java.util.ArrayList)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)15 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)14 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)13 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)11 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)11 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 Model (de.lmu.ifi.dbs.elki.data.model.Model)4 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)4 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)4 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)3 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)3 AbstractProjectedClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering)2 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)2