use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class FirstKInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
DBIDIter iter = relation.iterDBIDs();
double[][] means = new double[k][];
for (int i = 0; i < k && iter.valid(); i++, iter.advance()) {
means[i] = relation.get(iter).toArray();
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class FirstKInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distanceFunction) {
DBIDIter iter = ids.iter();
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
for (int i = 0; i < k && iter.valid(); i++, iter.advance()) {
means.add(iter);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
// Chose first mean
List<NumberVector> means = new ArrayList<>(k);
if (ids.size() <= k) {
throw new AbortException("Don't use k-means with k >= data set size.");
}
Random random = rnd.getSingleThreadedRandom();
DBIDRef first = DBIDUtil.randomSample(ids, random);
T firstvec = relation.get(first);
means.add(firstvec);
// Initialize weights
double weightsum = initialWeights(weights, ids, firstvec, distQ);
while (true) {
if (weightsum > Double.MAX_VALUE) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
}
if (weightsum < Double.MIN_NORMAL) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
}
double r = random.nextDouble() * weightsum, s = 0.;
DBIDIter it = ids.iter();
for (; s < r && it.valid(); it.advance()) {
s += weights.doubleValue(it);
}
if (!it.valid()) {
// Rare case, but happens due to floating math
// Decrease
weightsum -= (r - s);
// Retry
continue;
}
// Add new mean:
final T newmean = relation.get(it);
means.add(newmean);
if (means.size() >= k) {
break;
}
// Update weights:
weights.putDouble(it, 0.);
// Choose optimized version for double distances, if applicable.
weightsum = updateWeights(weights, ids, newmean, distQ);
}
// Explicitly destroy temporary data.
weights.destroy();
return unboxVectors(means);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> rel = (Relation<O>) distQ.getRelation();
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
Random random = rnd.getSingleThreadedRandom();
DBIDRef first = DBIDUtil.randomSample(ids, random);
means.add(first);
// Initialize weights
double weightsum = initialWeights(weights, ids, rel.get(first), distQ);
while (true) {
if (weightsum > Double.MAX_VALUE) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
}
if (weightsum < Double.MIN_NORMAL) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few unique data points?");
}
double r = random.nextDouble() * weightsum;
while (r <= 0 && weightsum > Double.MIN_NORMAL) {
// Try harder to not choose 0.
r = random.nextDouble() * weightsum;
}
DBIDIter it = ids.iter();
for (; r > 0. && it.valid(); it.advance()) {
r -= weights.doubleValue(it);
}
// Add new mean:
means.add(it);
if (means.size() >= k) {
break;
}
// Update weights:
weights.putDouble(it, 0.);
weightsum = updateWeights(weights, ids, rel.get(it), distQ);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method updateWeights.
/**
* Update the weight list.
*
* @param weights Weight list
* @param ids IDs
* @param latest Added ID
* @param distQ Distance query
* @return Weight sum
* @param <T> Object type
*/
protected <T> double updateWeights(WritableDoubleDataStore weights, DBIDs ids, T latest, DistanceQuery<? super T> distQ) {
double weightsum = 0.;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
double weight = weights.doubleValue(it);
if (weight <= 0.) {
// Duplicate, or already chosen.
continue;
}
double newweight = distQ.distance(latest, it);
if (newweight < weight) {
weights.putDouble(it, newweight);
weight = newweight;
}
weightsum += weight;
}
return weightsum;
}
Aggregations