Search in sources :

Example 16 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class FirstKInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    DBIDIter iter = relation.iterDBIDs();
    double[][] means = new double[k][];
    for (int i = 0; i < k && iter.valid(); i++, iter.advance()) {
        means[i] = relation.get(iter).toArray();
    }
    return means;
}
Also used : DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 17 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class FirstKInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distanceFunction) {
    DBIDIter iter = ids.iter();
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    for (int i = 0; i < k && iter.valid(); i++, iter.advance()) {
        means.add(iter);
    }
    return means;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 18 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class KMeansPlusPlusInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    // Chose first mean
    List<NumberVector> means = new ArrayList<>(k);
    if (ids.size() <= k) {
        throw new AbortException("Don't use k-means with k >= data set size.");
    }
    Random random = rnd.getSingleThreadedRandom();
    DBIDRef first = DBIDUtil.randomSample(ids, random);
    T firstvec = relation.get(first);
    means.add(firstvec);
    // Initialize weights
    double weightsum = initialWeights(weights, ids, firstvec, distQ);
    while (true) {
        if (weightsum > Double.MAX_VALUE) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
        }
        if (weightsum < Double.MIN_NORMAL) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
        }
        double r = random.nextDouble() * weightsum, s = 0.;
        DBIDIter it = ids.iter();
        for (; s < r && it.valid(); it.advance()) {
            s += weights.doubleValue(it);
        }
        if (!it.valid()) {
            // Rare case, but happens due to floating math
            // Decrease
            weightsum -= (r - s);
            // Retry
            continue;
        }
        // Add new mean:
        final T newmean = relation.get(it);
        means.add(newmean);
        if (means.size() >= k) {
            break;
        }
        // Update weights:
        weights.putDouble(it, 0.);
        // Choose optimized version for double distances, if applicable.
        weightsum = updateWeights(weights, ids, newmean, distQ);
    }
    // Explicitly destroy temporary data.
    weights.destroy();
    return unboxVectors(means);
}
Also used : Random(java.util.Random) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 19 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class KMeansPlusPlusInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> rel = (Relation<O>) distQ.getRelation();
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    Random random = rnd.getSingleThreadedRandom();
    DBIDRef first = DBIDUtil.randomSample(ids, random);
    means.add(first);
    // Initialize weights
    double weightsum = initialWeights(weights, ids, rel.get(first), distQ);
    while (true) {
        if (weightsum > Double.MAX_VALUE) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
        }
        if (weightsum < Double.MIN_NORMAL) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few unique data points?");
        }
        double r = random.nextDouble() * weightsum;
        while (r <= 0 && weightsum > Double.MIN_NORMAL) {
            // Try harder to not choose 0.
            r = random.nextDouble() * weightsum;
        }
        DBIDIter it = ids.iter();
        for (; r > 0. && it.valid(); it.advance()) {
            r -= weights.doubleValue(it);
        }
        // Add new mean:
        means.add(it);
        if (means.size() >= k) {
            break;
        }
        // Update weights:
        weights.putDouble(it, 0.);
        weightsum = updateWeights(weights, ids, rel.get(it), distQ);
    }
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) Random(java.util.Random) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 20 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class KMeansPlusPlusInitialMeans method updateWeights.

/**
 * Update the weight list.
 *
 * @param weights Weight list
 * @param ids IDs
 * @param latest Added ID
 * @param distQ Distance query
 * @return Weight sum
 * @param <T> Object type
 */
protected <T> double updateWeights(WritableDoubleDataStore weights, DBIDs ids, T latest, DistanceQuery<? super T> distQ) {
    double weightsum = 0.;
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
        double weight = weights.doubleValue(it);
        if (weight <= 0.) {
            // Duplicate, or already chosen.
            continue;
        }
        double newweight = distQ.distance(latest, it);
        if (newweight < weight) {
            weights.putDouble(it, newweight);
            weight = newweight;
        }
        weightsum += weight;
    }
    return weightsum;
}
Also used : DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21