Search in sources :

Example 1 with DBIDMIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDMIter in project elki by elki-project.

the class FastutilIntOpenHashSetModifiableDBIDs method retainAll.

@Override
public boolean retainAll(DBIDs set) {
    boolean modified = false;
    for (DBIDMIter it = iter(); it.valid(); it.advance()) {
        if (!set.contains(it)) {
            it.remove();
            modified = true;
        }
    }
    return modified;
}
Also used : DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter)

Example 2 with DBIDMIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDMIter in project elki by elki-project.

the class CanopyPreClustering method run.

/**
 * Run the algorithm
 *
 * @param database Database
 * @param relation Relation to process
 */
public Clustering<PrototypeModel<O>> run(Database database, Relation<O> relation) {
    if (!(t1 >= t2)) {
        throw new AbortException("T1 must be at least as large as T2.");
    }
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    ModifiableDBIDs ids = DBIDUtil.newHashSet(relation.getDBIDs());
    ArrayList<Cluster<PrototypeModel<O>>> clusters = new ArrayList<>();
    final int size = relation.size();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Canopy clustering", size, LOG) : null;
    DBIDVar first = DBIDUtil.newVar();
    while (!ids.isEmpty()) {
        // Remove first element:
        ids.pop(first);
        // Start a new cluster:
        ModifiableDBIDs cids = DBIDUtil.newArray();
        cids.add(first);
        // Compare to remaining objects:
        for (DBIDMIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double dist = dq.distance(first, iter);
            // Inclusion threshold:
            if (dist > t1) {
                continue;
            }
            cids.add(iter);
            // Removal threshold:
            if (dist <= t2) {
                iter.remove();
            }
        }
        // TODO: remember the central object using a CanopyModel?
        // Construct cluster:
        clusters.add(new Cluster<>(cids, new SimplePrototypeModel<>(relation.get(first))));
        if (prog != null) {
            prog.setProcessed(size - ids.size(), LOG);
        }
    }
    LOG.ensureCompleted(prog);
    return new Clustering<>("Canopy clustering", "canopy-clustering", clusters);
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) SimplePrototypeModel(de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 3 with DBIDMIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDMIter in project elki by elki-project.

the class SameSizeKMeansAlgorithm method refineResult.

/**
 * Perform k-means style iterations to improve the clustering result.
 *
 * @param relation Data relation
 * @param means Means list
 * @param clusters Cluster list
 * @param metas Metadata storage
 * @param tids DBIDs array
 * @return final means
 */
protected double[][] refineResult(Relation<V> relation, double[][] means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
    NumberVectorDistanceFunction<? super V> df = getDistanceFunction();
    // Our desired cluster size:
    // rounded down
    final int minsize = tids.size() / k;
    // rounded up
    final int maxsize = (tids.size() + k - 1) / k;
    // Comparator: sort by largest gain by transfer
    final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {

        @Override
        public int compare(DBIDRef o1, DBIDRef o2) {
            Meta c1 = metas.get(o1), c2 = metas.get(o2);
            return Double.compare(c1.priority(), c2.priority());
        }
    };
    // List for sorting cluster preferences
    final int[] preferences = MathUtil.sequence(0, k);
    // Comparator for this list.
    final PreferenceComparator pcomp = new PreferenceComparator();
    // Initialize transfer lists:
    ArrayModifiableDBIDs[] transfers = new ArrayModifiableDBIDs[k];
    for (int i = 0; i < k; i++) {
        transfers[i] = DBIDUtil.newArray();
    }
    DBIDArrayIter id = tids.iter();
    for (int iter = 0; maxiter <= 0 || iter < maxiter; iter++) {
        updateDistances(relation, means, metas, df);
        tids.sort(comp);
        // Track if anything has changed
        int active = 0;
        for (id.seek(0); id.valid(); id.advance()) {
            Meta c = metas.get(id);
            IntegerArrayQuickSort.sort(preferences, pcomp.select(c));
            ModifiableDBIDs source = clusters.get(c.primary);
            assert (source.contains(id));
            tloop: for (int i : preferences) {
                if (i == c.primary) {
                    // Already assigned here
                    continue;
                }
                ModifiableDBIDs dest = clusters.get(i);
                // Can we pair this transfer?
                final double gain = c.gain(i);
                for (DBIDMIter other = transfers[i].iter(); other.valid(); other.advance()) {
                    Meta c2 = metas.get(other);
                    if (gain + c2.gain(c.primary) > 0) {
                        transfer(metas, c2, dest, source, other, c.primary);
                        transfer(metas, c, source, dest, id, i);
                        active += 2;
                        // last, as this invalidates the reference!
                        other.remove();
                        // We are assigned here now.
                        source = dest;
                        // Can try another transfer, with next cluster.
                        continue tloop;
                    }
                }
                // If cluster sizes allow, move a single object.
                if (gain > 0 && (dest.size() < maxsize && source.size() > minsize)) {
                    transfer(metas, c, source, dest, id, i);
                    active += 1;
                    // We are assigned here now.
                    source = dest;
                    continue tloop;
                }
            }
            // transfer list.
            if (c.primary != preferences[0] && c.dists[c.primary] > c.dists[preferences[0]]) {
                transfers[c.primary].add(id);
            }
        }
        // TODO: try to get more transfers out of the transfer lists done by
        // considering more than one object?
        int pending = 0;
        // Clear transfer lists for next iteration.
        for (int i = 0; i < k; i++) {
            pending += transfers[i].size();
            transfers[i].clear();
        }
        if (LOG.isDebuggingFine()) {
            LOG.debugFine("Iteration #" + iter + ": performed " + active + " transfers skipped " + pending);
        }
        if (active <= 0) {
            break;
        }
        // Recompute means after reassignment
        means = means(clusters, means, relation);
    }
    return means;
}
Also used : DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IntegerComparator(de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator) Comparator(java.util.Comparator) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 4 with DBIDMIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDMIter in project elki by elki-project.

the class P3C method findOutliers.

/**
 * Performs outlier detection by testing the Mahalanobis distance of each
 * point in a cluster against the critical value of the ChiSquared
 * distribution with as many degrees of freedom as the cluster has relevant
 * attributes.
 *
 * @param relation Data relation
 * @param models Cluster models
 * @param clusterCandidates the list of clusters to check.
 * @param noise the set to which to add points deemed outliers.
 */
private void findOutliers(Relation<V> relation, List<MultivariateGaussianModel> models, ArrayList<ClusterCandidate> clusterCandidates, ModifiableDBIDs noise) {
    Iterator<MultivariateGaussianModel> it = models.iterator();
    for (int c = 0; it.hasNext(); c++) {
        MultivariateGaussianModel model = it.next();
        final ClusterCandidate candidate = clusterCandidates.get(c);
        final int dof = BitsUtil.cardinality(candidate.dimensions);
        final double threshold = ChiSquaredDistribution.quantile(1 - alpha, dof);
        for (DBIDMIter iter = candidate.ids.iter(); iter.valid(); iter.advance()) {
            final double distance = model.mahalanobisDistance(relation.get(iter));
            if (distance >= threshold) {
                // Outlier, remove it and add it to the outlier set.
                noise.add(iter);
                iter.remove();
            }
        }
    }
}
Also used : MultivariateGaussianModel(de.lmu.ifi.dbs.elki.algorithm.clustering.em.MultivariateGaussianModel) DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter)

Aggregations

DBIDMIter (de.lmu.ifi.dbs.elki.database.ids.DBIDMIter)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 MultivariateGaussianModel (de.lmu.ifi.dbs.elki.algorithm.clustering.em.MultivariateGaussianModel)1 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)1 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 SimplePrototypeModel (de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel)1 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)1 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)1 DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)1 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 IntegerComparator (de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1 ArrayList (java.util.ArrayList)1 Comparator (java.util.Comparator)1