Search in sources :

Example 6 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class FarthestSumPointsInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    // Get a distance query
    DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    // Chose first mean
    List<T> means = new ArrayList<>(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    T prevmean = relation.get(first);
    means.add(prevmean);
    // Find farthest object each.
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double dsum = prev + distQ.distance(prevmean, it);
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, dsum);
            }
            if (dsum > maxdist) {
                maxdist = dsum;
                best.set(it);
            }
        }
        // Add new mean (and drop the initial mean when desired)
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean = relation.get(best);
        means.add(prevmean);
    }
    // Explicitly destroy temporary data.
    store.destroy();
    return unboxVectors(means);
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 7 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class SameSizeKMeansAlgorithm method refineResult.

/**
 * Perform k-means style iterations to improve the clustering result.
 *
 * @param relation Data relation
 * @param means Means list
 * @param clusters Cluster list
 * @param metas Metadata storage
 * @param tids DBIDs array
 * @return final means
 */
protected double[][] refineResult(Relation<V> relation, double[][] means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
    NumberVectorDistanceFunction<? super V> df = getDistanceFunction();
    // Our desired cluster size:
    // rounded down
    final int minsize = tids.size() / k;
    // rounded up
    final int maxsize = (tids.size() + k - 1) / k;
    // Comparator: sort by largest gain by transfer
    final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {

        @Override
        public int compare(DBIDRef o1, DBIDRef o2) {
            Meta c1 = metas.get(o1), c2 = metas.get(o2);
            return Double.compare(c1.priority(), c2.priority());
        }
    };
    // List for sorting cluster preferences
    final int[] preferences = MathUtil.sequence(0, k);
    // Comparator for this list.
    final PreferenceComparator pcomp = new PreferenceComparator();
    // Initialize transfer lists:
    ArrayModifiableDBIDs[] transfers = new ArrayModifiableDBIDs[k];
    for (int i = 0; i < k; i++) {
        transfers[i] = DBIDUtil.newArray();
    }
    DBIDArrayIter id = tids.iter();
    for (int iter = 0; maxiter <= 0 || iter < maxiter; iter++) {
        updateDistances(relation, means, metas, df);
        tids.sort(comp);
        // Track if anything has changed
        int active = 0;
        for (id.seek(0); id.valid(); id.advance()) {
            Meta c = metas.get(id);
            IntegerArrayQuickSort.sort(preferences, pcomp.select(c));
            ModifiableDBIDs source = clusters.get(c.primary);
            assert (source.contains(id));
            tloop: for (int i : preferences) {
                if (i == c.primary) {
                    // Already assigned here
                    continue;
                }
                ModifiableDBIDs dest = clusters.get(i);
                // Can we pair this transfer?
                final double gain = c.gain(i);
                for (DBIDMIter other = transfers[i].iter(); other.valid(); other.advance()) {
                    Meta c2 = metas.get(other);
                    if (gain + c2.gain(c.primary) > 0) {
                        transfer(metas, c2, dest, source, other, c.primary);
                        transfer(metas, c, source, dest, id, i);
                        active += 2;
                        // last, as this invalidates the reference!
                        other.remove();
                        // We are assigned here now.
                        source = dest;
                        // Can try another transfer, with next cluster.
                        continue tloop;
                    }
                }
                // If cluster sizes allow, move a single object.
                if (gain > 0 && (dest.size() < maxsize && source.size() > minsize)) {
                    transfer(metas, c, source, dest, id, i);
                    active += 1;
                    // We are assigned here now.
                    source = dest;
                    continue tloop;
                }
            }
            // transfer list.
            if (c.primary != preferences[0] && c.dists[c.primary] > c.dists[preferences[0]]) {
                transfers[c.primary].add(id);
            }
        }
        // TODO: try to get more transfers out of the transfer lists done by
        // considering more than one object?
        int pending = 0;
        // Clear transfer lists for next iteration.
        for (int i = 0; i < k; i++) {
            pending += transfers[i].size();
            transfers[i].clear();
        }
        if (LOG.isDebuggingFine()) {
            LOG.debugFine("Iteration #" + iter + ": performed " + active + " transfers skipped " + pending);
        }
        if (active <= 0) {
            break;
        }
        // Recompute means after reassignment
        means = means(clusters, means, relation);
    }
    return means;
}
Also used : DBIDMIter(de.lmu.ifi.dbs.elki.database.ids.DBIDMIter) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IntegerComparator(de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator) Comparator(java.util.Comparator) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 8 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class CLINK method clinkstep4567.

/**
 * Fourth to seventh step of CLINK: find best insertion
 *
 * @param id Current objct
 * @param ids All objects
 * @param it Iterator
 * @param n Index threshold
 * @param pi Parent data store
 * @param lambda Height data store
 * @param m Distance data store
 */
private void clinkstep4567(DBIDRef id, ArrayDBIDs ids, DBIDArrayIter it, int n, WritableDBIDDataStore pi, WritableDoubleDataStore lambda, WritableDoubleDataStore m) {
    // step 4: a = n
    DBIDArrayIter a = ids.iter().seek(n - 1);
    // step 5:
    {
        DBIDVar p_i = DBIDUtil.newVar();
        for (it.seek(n - 1); it.valid(); it.retract()) {
            double l_i = lambda.doubleValue(it);
            double mp_i = m.doubleValue(p_i.from(pi, it));
            if (l_i >= mp_i) {
                if (m.doubleValue(it) < m.doubleValue(a)) {
                    a.seek(it.getOffset());
                }
            } else {
                m.putDouble(it, Double.POSITIVE_INFINITY);
            }
        }
    }
    // step 6
    // b = pi[a]
    DBIDVar b = DBIDUtil.newVar().from(pi, a);
    double c = lambda.doubleValue(a);
    pi.putDBID(a, id);
    lambda.putDouble(a, m.doubleValue(a));
    // step 7
    if (a.getOffset() < n - 1) {
        // Used below
        DBIDRef last = DBIDUtil.newVar(it.seek(n - 1));
        DBIDVar d = DBIDUtil.newVar();
        // if b < n: (then goto 7)
        while (!DBIDUtil.equal(b, id)) {
            if (DBIDUtil.equal(b, last)) {
                pi.putDBID(b, id);
                lambda.putDouble(b, c);
                break;
            }
            // d = pi[b]
            d.from(pi, b);
            // pi[b] = n + 1
            pi.putDBID(b, id);
            // c = old l[b], l[b] = c
            c = lambda.putDouble(b, c);
            // b = d = old pi[b]
            b.set(d);
        }
    }
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 9 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class FarthestPointsInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    // Get a distance query
    DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
    // Chose first mean
    double[][] means = new double[k][];
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    T prevmean = relation.get(first);
    means[0] = prevmean.toArray();
    // Find farthest object each.
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double val = Math.min(prev, distQ.distance(prevmean, it));
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, val);
            }
            if (val > maxdist) {
                maxdist = val;
                best.set(it);
            }
        }
        // Add new mean (and drop the initial mean when desired)
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean = relation.get(best);
        means[i] = prevmean.toArray();
    }
    // Explicitly destroy temporary data.
    store.destroy();
    return means;
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 10 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class FarthestSumPointsInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    means.add(first);
    DBIDVar prevmean = DBIDUtil.newVar(first);
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        // Find farthest object:
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double dsum = prev + distQ.distance(prevmean, it);
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, dsum);
            }
            if (dsum > maxdist) {
                maxdist = dsum;
                best.set(it);
            }
        }
        // Add new mean:
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean.set(best);
        means.add(best);
    }
    store.destroy();
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)11 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)6 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)5 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)3 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)3 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 IntegerComparator (de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator)2 ArrayList (java.util.ArrayList)2 Comparator (java.util.Comparator)2 Random (java.util.Random)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)1 DBIDMIter (de.lmu.ifi.dbs.elki.database.ids.DBIDMIter)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1