Search in sources :

Example 1 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class FarthestPointsInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    DBIDVar prevmean = DBIDUtil.newVar(first);
    means.add(first);
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        // Find farthest object:
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double val = Math.min(prev, distQ.distance(prevmean, it));
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, val);
            }
            if (val > maxdist) {
                maxdist = val;
                best.set(it);
            }
        }
        // Add new mean:
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean.set(best);
        means.add(best);
    }
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 2 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class KMeansPlusPlusInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    // Chose first mean
    List<NumberVector> means = new ArrayList<>(k);
    if (ids.size() <= k) {
        throw new AbortException("Don't use k-means with k >= data set size.");
    }
    Random random = rnd.getSingleThreadedRandom();
    DBIDRef first = DBIDUtil.randomSample(ids, random);
    T firstvec = relation.get(first);
    means.add(firstvec);
    // Initialize weights
    double weightsum = initialWeights(weights, ids, firstvec, distQ);
    while (true) {
        if (weightsum > Double.MAX_VALUE) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
        }
        if (weightsum < Double.MIN_NORMAL) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
        }
        double r = random.nextDouble() * weightsum, s = 0.;
        DBIDIter it = ids.iter();
        for (; s < r && it.valid(); it.advance()) {
            s += weights.doubleValue(it);
        }
        if (!it.valid()) {
            // Rare case, but happens due to floating math
            // Decrease
            weightsum -= (r - s);
            // Retry
            continue;
        }
        // Add new mean:
        final T newmean = relation.get(it);
        means.add(newmean);
        if (means.size() >= k) {
            break;
        }
        // Update weights:
        weights.putDouble(it, 0.);
        // Choose optimized version for double distances, if applicable.
        weightsum = updateWeights(weights, ids, newmean, distQ);
    }
    // Explicitly destroy temporary data.
    weights.destroy();
    return unboxVectors(means);
}
Also used : Random(java.util.Random) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class KMeansPlusPlusInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> rel = (Relation<O>) distQ.getRelation();
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    Random random = rnd.getSingleThreadedRandom();
    DBIDRef first = DBIDUtil.randomSample(ids, random);
    means.add(first);
    // Initialize weights
    double weightsum = initialWeights(weights, ids, rel.get(first), distQ);
    while (true) {
        if (weightsum > Double.MAX_VALUE) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
        }
        if (weightsum < Double.MIN_NORMAL) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few unique data points?");
        }
        double r = random.nextDouble() * weightsum;
        while (r <= 0 && weightsum > Double.MIN_NORMAL) {
            // Try harder to not choose 0.
            r = random.nextDouble() * weightsum;
        }
        DBIDIter it = ids.iter();
        for (; r > 0. && it.valid(); it.advance()) {
            r -= weights.doubleValue(it);
        }
        // Add new mean:
        means.add(it);
        if (means.size() >= k) {
            break;
        }
        // Update weights:
        weights.putDouble(it, 0.);
        weightsum = updateWeights(weights, ids, rel.get(it), distQ);
    }
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) Random(java.util.Random) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 4 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class ArrayDBIDStore method clear.

@Override
public void clear() {
    // Re-initialize
    DBIDRef inv = DBIDUtil.invalid();
    final int size = data.size();
    data.clear();
    for (int i = 0; i < size; i++) {
        data.add(inv);
    }
}
Also used : DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef)

Example 5 with DBIDRef

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.

the class SameSizeKMeansAlgorithm method initialAssignment.

protected ArrayModifiableDBIDs initialAssignment(List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, DBIDs ids) {
    // Build a sorted list of objects, by descending distance delta
    ArrayModifiableDBIDs tids = DBIDUtil.newArray(ids);
    // Our desired cluster size:
    // rounded up
    final int maxsize = (tids.size() + k - 1) / k;
    // Comparator: sort by largest benefit of assigning to preferred cluster.
    final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {

        @Override
        public int compare(DBIDRef o1, DBIDRef o2) {
            Meta c1 = metas.get(o1), c2 = metas.get(o2);
            return -Double.compare(c1.priority(), c2.priority());
        }
    };
    // We will use this iterator below. It allows seeking!
    DBIDArrayIter id = tids.iter();
    // Initialization phase:
    for (int start = 0; start < tids.size(); ) {
        tids.sort(start, tids.size(), comp);
        for (id.seek(start); id.valid(); id.advance()) {
            Meta c = metas.get(id);
            // Assigning to best cluster - which cannot be full yet!
            ModifiableDBIDs cluster = clusters.get(c.primary);
            assert (cluster.size() <= maxsize);
            cluster.add(id);
            start++;
            // Now the cluster may have become completely filled:
            if (cluster.size() == maxsize) {
                final int full = c.primary;
                // Refresh the not yet assigned objects where necessary:
                for (id.advance(); id.valid(); id.advance()) {
                    Meta ca = metas.get(id);
                    if (ca.primary == full) {
                        // Update the best index:
                        for (int i = 0; i < k; i++) {
                            if (i == full || clusters.get(i).size() >= maxsize) {
                                continue;
                            }
                            if (ca.primary == full || ca.dists[i] < ca.dists[ca.primary]) {
                                ca.primary = i;
                            }
                        }
                        // Changed.
                        metas.put(id, ca);
                    }
                }
                // not really necessary - iterator is at end anyway.
                break;
            }
        }
    // Note: we expect Candidate.a == cluster the object is assigned to!
    }
    return tids;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) IntegerComparator(de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator) Comparator(java.util.Comparator)

Aggregations

DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)11 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)6 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)5 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)3 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)3 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 IntegerComparator (de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator)2 ArrayList (java.util.ArrayList)2 Comparator (java.util.Comparator)2 Random (java.util.Random)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)1 DBIDMIter (de.lmu.ifi.dbs.elki.database.ids.DBIDMIter)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1