Search in sources :

Example 51 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class PROCLUS method findDimensions.

/**
 * Determines the set of correlated dimensions for each medoid in the
 * specified medoid set.
 *
 * @param medoids the set of medoids
 * @param database the database containing the objects
 * @param distFunc the distance function
 * @return the set of correlated dimensions for each medoid in the specified
 *         medoid set
 */
private long[][] findDimensions(ArrayDBIDs medoids, Relation<V> database, DistanceQuery<V> distFunc, RangeQuery<V> rangeQuery) {
    // get localities
    DataStore<DBIDs> localities = getLocalities(medoids, database, distFunc, rangeQuery);
    // compute x_ij = avg distance from points in l_i to medoid m_i
    final int dim = RelationUtil.dimensionality(database);
    final int numc = medoids.size();
    double[][] averageDistances = new double[numc][];
    for (DBIDArrayIter iter = medoids.iter(); iter.valid(); iter.advance()) {
        V medoid_i = database.get(iter);
        DBIDs l_i = localities.get(iter);
        double[] x_i = new double[dim];
        for (DBIDIter qr = l_i.iter(); qr.valid(); qr.advance()) {
            V o = database.get(qr);
            for (int d = 0; d < dim; d++) {
                x_i[d] += Math.abs(medoid_i.doubleValue(d) - o.doubleValue(d));
            }
        }
        for (int d = 0; d < dim; d++) {
            x_i[d] /= l_i.size();
        }
        averageDistances[iter.getOffset()] = x_i;
    }
    List<DoubleIntInt> z_ijs = computeZijs(averageDistances, dim);
    return computeDimensionMap(z_ijs, dim, numc);
}
Also used : ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 52 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class PROCLUS method greedy.

/**
 * Returns a piercing set of k medoids from the specified sample set.
 *
 * @param distFunc the distance function
 * @param sampleSet the sample set
 * @param m the number of medoids to be returned
 * @param random random number generator
 * @return a piercing set of m medoids from the specified sample set
 */
private ArrayDBIDs greedy(DistanceQuery<V> distFunc, DBIDs sampleSet, int m, Random random) {
    ArrayModifiableDBIDs medoids = DBIDUtil.newArray(m);
    ArrayModifiableDBIDs s = DBIDUtil.newArray(sampleSet);
    DBIDArrayIter iter = s.iter();
    DBIDVar m_i = DBIDUtil.newVar();
    int size = s.size();
    // Move a random element to the end, then pop()
    s.swap(random.nextInt(size), --size);
    medoids.add(s.pop(m_i));
    if (LOG.isDebugging()) {
        LOG.debugFiner("medoids " + medoids.toString());
    }
    // To track the current worst element:
    int worst = -1;
    double worstd = Double.NEGATIVE_INFINITY;
    // compute distances between each point in S and m_i
    WritableDoubleDataStore distances = DataStoreUtil.makeDoubleStorage(s, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    for (iter.seek(0); iter.getOffset() < size; iter.advance()) {
        final double dist = distFunc.distance(iter, m_i);
        distances.putDouble(iter, dist);
        if (dist > worstd) {
            worstd = dist;
            worst = iter.getOffset();
        }
    }
    for (int i = 1; i < m; i++) {
        // choose medoid m_i to be far from previous medoids
        s.swap(worst, --size);
        medoids.add(s.pop(m_i));
        // compute distances of each point to closest medoid; track worst.
        worst = -1;
        worstd = Double.NEGATIVE_INFINITY;
        for (iter.seek(0); iter.getOffset() < size; iter.advance()) {
            double dist_new = distFunc.distance(iter, m_i);
            double dist_old = distances.doubleValue(iter);
            double dist = (dist_new < dist_old) ? dist_new : dist_old;
            distances.putDouble(iter, dist);
            if (dist > worstd) {
                worstd = dist;
                worst = iter.getOffset();
            }
        }
        if (LOG.isDebugging()) {
            LOG.debugFiner("medoids " + medoids.toString());
        }
    }
    return medoids;
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 53 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class KNNKernelDensityMinimaClustering method run.

/**
 * Run the clustering algorithm on a data relation.
 *
 * @param relation Relation
 * @return Clustering result
 */
public Clustering<ClusterModel> run(Relation<V> relation) {
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    final int size = ids.size();
    // Sort by the sole dimension
    ids.sort(new VectorUtil.SortDBIDsBySingleDimension(relation, dim));
    // Density storage.
    WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    DBIDArrayIter iter = ids.iter(), iter2 = ids.iter();
    StepProgress sprog = LOG.isVerbose() ? new StepProgress("Clustering steps", 2) : null;
    LOG.beginStep(sprog, 1, "Kernel density estimation.");
    {
        double[] scratch = new double[2 * k];
        iter.seek(0);
        for (int i = 0; i < size; i++, iter.advance()) {
            // Current value.
            final double curv = relation.get(iter).doubleValue(dim);
            final int pre = Math.max(i - k, 0), prek = i - pre;
            final int pos = Math.min(i + k, size - 1), posk = pos - i;
            iter2.seek(pre);
            for (int j = 0; j < prek; j++, iter2.advance()) {
                scratch[j] = curv - relation.get(iter2).doubleValue(dim);
            }
            assert (iter2.getOffset() == i);
            iter2.advance();
            for (int j = 0; j < posk; j++, iter2.advance()) {
                scratch[prek + j] = relation.get(iter2).doubleValue(dim) - curv;
            }
            assert (prek + posk >= k);
            double kdist = QuickSelect.quickSelect(scratch, 0, prek + posk, k);
            switch(mode) {
                case BALLOON:
                    {
                        double dens = 0.;
                        if (kdist > 0.) {
                            for (int j = 0; j < prek + posk; j++) {
                                dens += kernel.density(scratch[j] / kdist);
                            }
                        } else {
                            dens = Double.POSITIVE_INFINITY;
                        }
                        assert (iter.getOffset() == i);
                        density.putDouble(iter, dens);
                        break;
                    }
                case SAMPLE:
                    {
                        if (kdist > 0.) {
                            iter2.seek(pre);
                            for (int j = 0; j < prek; j++, iter2.advance()) {
                                double delta = curv - relation.get(iter2).doubleValue(dim);
                                density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
                            }
                            assert (iter2.getOffset() == i);
                            iter2.advance();
                            for (int j = 0; j < posk; j++, iter2.advance()) {
                                double delta = relation.get(iter2).doubleValue(dim) - curv;
                                density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
                            }
                        } else {
                            iter2.seek(pre);
                            for (int j = 0; j < prek; j++, iter2.advance()) {
                                double delta = curv - relation.get(iter2).doubleValue(dim);
                                if (!(delta > 0.)) {
                                    density.putDouble(iter2, Double.POSITIVE_INFINITY);
                                }
                            }
                            assert (iter2.getOffset() == i);
                            iter2.advance();
                            for (int j = 0; j < posk; j++, iter2.advance()) {
                                double delta = relation.get(iter2).doubleValue(dim) - curv;
                                if (!(delta > 0.)) {
                                    density.putDouble(iter2, Double.POSITIVE_INFINITY);
                                }
                            }
                        }
                        break;
                    }
                default:
                    throw new UnsupportedOperationException("Unknown mode specified.");
            }
        }
    }
    LOG.beginStep(sprog, 2, "Local minima detection.");
    Clustering<ClusterModel> clustering = new Clustering<>("onedimensional-kde-clustering", "One-Dimensional clustering using kernel density estimation.");
    {
        double[] scratch = new double[2 * minwindow + 1];
        int begin = 0;
        int halfw = (minwindow + 1) >> 1;
        iter.seek(0);
        // Fill initial buffer.
        for (int i = 0; i < size; i++, iter.advance()) {
            final int m = i % scratch.length, t = (i - minwindow - 1) % scratch.length;
            scratch[m] = density.doubleValue(iter);
            if (i > scratch.length) {
                double min = Double.POSITIVE_INFINITY;
                for (int j = 0; j < scratch.length; j++) {
                    if (j != t && scratch[j] < min) {
                        min = scratch[j];
                    }
                }
                // Local minimum:
                if (scratch[t] < min) {
                    int end = i - minwindow + 1;
                    {
                        // Test on which side the kNN is
                        iter2.seek(end);
                        double curv = relation.get(iter2).doubleValue(dim);
                        iter2.seek(end - halfw);
                        double left = relation.get(iter2).doubleValue(dim) - curv;
                        iter2.seek(end + halfw);
                        double right = curv - relation.get(iter2).doubleValue(dim);
                        if (left < right) {
                            end++;
                        }
                    }
                    iter2.seek(begin);
                    ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
                    for (int j = 0; j < end - begin; j++, iter2.advance()) {
                        cids.add(iter2);
                    }
                    clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
                    begin = end;
                }
            }
        }
        // Extract last cluster
        int end = size;
        iter2.seek(begin);
        ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
        for (int j = 0; j < end - begin; j++, iter2.advance()) {
            cids.add(iter2);
        }
        clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
    }
    LOG.ensureCompleted(sprog);
    return clustering;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) VectorUtil(de.lmu.ifi.dbs.elki.data.VectorUtil) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)

Example 54 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class ExternalClustering method attachToRelation.

/**
 * Build a clustering from the file result.
 *
 * @param database Database
 * @param r Result to attach to
 * @param assignment Cluster assignment
 * @param name Name
 */
private void attachToRelation(Database database, Relation<?> r, IntArrayList assignment, ArrayList<String> name) {
    DBIDs ids = r.getDBIDs();
    if (!(ids instanceof ArrayDBIDs)) {
        throw new AbortException("External clusterings can only be used with static DBIDs.");
    }
    Int2IntOpenHashMap sizes = new Int2IntOpenHashMap();
    for (IntListIterator it = assignment.iterator(); it.hasNext(); ) {
        sizes.addTo(it.nextInt(), 1);
    }
    Int2ObjectOpenHashMap<ArrayModifiableDBIDs> cids = new Int2ObjectOpenHashMap<>(sizes.size());
    for (ObjectIterator<Int2IntMap.Entry> it = sizes.int2IntEntrySet().fastIterator(); it.hasNext(); ) {
        Int2IntMap.Entry entry = it.next();
        cids.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
    }
    {
        DBIDArrayIter it = ((ArrayDBIDs) ids).iter();
        for (int i = 0; i < assignment.size(); i++) {
            cids.get(assignment.getInt(i)).add(it.seek(i));
        }
    }
    String nam = FormatUtil.format(name, " ");
    String snam = nam.toLowerCase().replace(' ', '-');
    Clustering<ClusterModel> result = new Clustering<>(nam, snam);
    for (ObjectIterator<Int2ObjectMap.Entry<ArrayModifiableDBIDs>> it = cids.int2ObjectEntrySet().fastIterator(); it.hasNext(); ) {
        Int2ObjectMap.Entry<ArrayModifiableDBIDs> entry = it.next();
        boolean noise = entry.getIntKey() < 0;
        result.addToplevelCluster(new Cluster<>(entry.getValue(), noise, ClusterModel.CLUSTER));
    }
    database.getHierarchy().add(r, result);
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) IntListIterator(it.unimi.dsi.fastutil.ints.IntListIterator) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 55 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class ClusteringVectorDumper method dumpClusteringOutput.

/**
 * Dump a single clustering result.
 *
 * @param writer Output writer
 * @param hierarchy Cluster hierarchy to process
 * @param c Clustering result
 */
protected void dumpClusteringOutput(PrintStream writer, ResultHierarchy hierarchy, Clustering<?> c) {
    DBIDRange ids = null;
    for (It<Relation<?>> iter = hierarchy.iterParents(c).filter(Relation.class); iter.valid(); iter.advance()) {
        DBIDs pids = iter.get().getDBIDs();
        if (pids instanceof DBIDRange) {
            ids = (DBIDRange) pids;
            break;
        }
        LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass());
    }
    // Fallback: try to locate a database.
    if (ids == null) {
        for (It<Database> iter = hierarchy.iterAll().filter(Database.class); iter.valid(); iter.advance()) {
            DBIDs pids = iter.get().getRelation(TypeUtil.ANY).getDBIDs();
            if (pids instanceof DBIDRange) {
                ids = (DBIDRange) pids;
                break;
            }
            LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass());
        }
    }
    if (ids == null) {
        LOG.warning("Cannot dump cluster assignment, as I do not have a well-defined DBIDRange to use for a unique column assignment. DBIDs must be a continuous range.");
        return;
    }
    WritableIntegerDataStore map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP);
    int cnum = 0;
    for (Cluster<?> clu : c.getAllClusters()) {
        for (DBIDIter iter = clu.getIDs().iter(); iter.valid(); iter.advance()) {
            map.putInt(iter, cnum);
        }
        ++cnum;
    }
    for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
        if (iter.getOffset() > 0) {
            writer.append(' ');
        }
        writer.append(Integer.toString(map.intValue(iter)));
    }
    if (forceLabel != null) {
        if (forceLabel.length() > 0) {
            writer.append(' ').append(forceLabel);
        }
    } else {
        writer.append(' ').append(c.getLongName());
    }
    writer.append('\n');
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) Database(de.lmu.ifi.dbs.elki.database.Database) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)64 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)17 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)15 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)13 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)12 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)9 Test (org.junit.Test)9 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 IOException (java.io.IOException)5 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)4 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)3 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)3