Search in sources :

Example 1 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class Eclat method mergeJoin.

private DBIDs mergeJoin(DBIDs first, DBIDs second) {
    assert (!(first instanceof HashSetDBIDs));
    assert (!(second instanceof HashSetDBIDs));
    ArrayModifiableDBIDs ids = DBIDUtil.newArray();
    DBIDIter i1 = first.iter(), i2 = second.iter();
    while (i1.valid() && i2.valid()) {
        int c = DBIDUtil.compare(i1, i2);
        if (c < 0) {
            i1.advance();
        } else if (c > 0) {
            i2.advance();
        } else {
            ids.add(i1);
            i1.advance();
            i2.advance();
        }
    }
    return ids;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) HashSetDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 2 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class FarthestPointsInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    DBIDVar prevmean = DBIDUtil.newVar(first);
    means.add(first);
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        // Find farthest object:
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double val = Math.min(prev, distQ.distance(prevmean, it));
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, val);
            }
            if (val > maxdist) {
                maxdist = val;
                best.set(it);
            }
        }
        // Add new mean:
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean.set(best);
        means.add(best);
    }
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class FirstKInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distanceFunction) {
    DBIDIter iter = ids.iter();
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    for (int i = 0; i < k && iter.valid(); i++, iter.advance()) {
        means.add(iter);
    }
    return means;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 4 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class KMeansPlusPlusInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> rel = (Relation<O>) distQ.getRelation();
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    Random random = rnd.getSingleThreadedRandom();
    DBIDRef first = DBIDUtil.randomSample(ids, random);
    means.add(first);
    // Initialize weights
    double weightsum = initialWeights(weights, ids, rel.get(first), distQ);
    while (true) {
        if (weightsum > Double.MAX_VALUE) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
        }
        if (weightsum < Double.MIN_NORMAL) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few unique data points?");
        }
        double r = random.nextDouble() * weightsum;
        while (r <= 0 && weightsum > Double.MIN_NORMAL) {
            // Try harder to not choose 0.
            r = random.nextDouble() * weightsum;
        }
        DBIDIter it = ids.iter();
        for (; r > 0. && it.valid(); it.advance()) {
            r -= weights.doubleValue(it);
        }
        // Add new mean:
        means.add(it);
        if (means.size() >= k) {
            break;
        }
        // Update weights:
        weights.putDouble(it, 0.);
        weightsum = updateWeights(weights, ids, rel.get(it), distQ);
    }
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) Random(java.util.Random) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 5 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class ParallelLloydKMeans method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    DBIDs ids = relation.getDBIDs();
    // Choose initial means
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Store for current cluster assignment.
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    KMeansProcessor<V> kmm = new KMeansProcessor<>(relation, distanceFunction, assignment, varsum);
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        kmm.nextIteration(means);
        ParallelExecutor.run(ids, kmm);
        // Stop if no cluster assignment changed.
        if (!kmm.changed()) {
            break;
        }
        means = kmm.getMeans();
    }
    LOG.setCompleted(prog);
    // Wrap result
    ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k);
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.length; i++) {
        DBIDs cids = clusters[i];
        if (cids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(cids, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)

Aggregations

ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)49 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)23 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)12 ArrayList (java.util.ArrayList)11 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)10 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)9 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)7 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)6 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)5 DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)5 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)4 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)4 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)4 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)4 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)4 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)4