use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class FarthestPointsInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
DBIDVar prevmean = DBIDUtil.newVar(first);
means.add(first);
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
// Find farthest object:
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double val = Math.min(prev, distQ.distance(prevmean, it));
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, val);
}
if (val > maxdist) {
maxdist = val;
best.set(it);
}
}
// Add new mean:
if (i == 0) {
// Remove temporary first element.
means.clear();
}
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean.set(best);
means.add(best);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
// Chose first mean
List<NumberVector> means = new ArrayList<>(k);
if (ids.size() <= k) {
throw new AbortException("Don't use k-means with k >= data set size.");
}
Random random = rnd.getSingleThreadedRandom();
DBIDRef first = DBIDUtil.randomSample(ids, random);
T firstvec = relation.get(first);
means.add(firstvec);
// Initialize weights
double weightsum = initialWeights(weights, ids, firstvec, distQ);
while (true) {
if (weightsum > Double.MAX_VALUE) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
}
if (weightsum < Double.MIN_NORMAL) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
}
double r = random.nextDouble() * weightsum, s = 0.;
DBIDIter it = ids.iter();
for (; s < r && it.valid(); it.advance()) {
s += weights.doubleValue(it);
}
if (!it.valid()) {
// Rare case, but happens due to floating math
// Decrease
weightsum -= (r - s);
// Retry
continue;
}
// Add new mean:
final T newmean = relation.get(it);
means.add(newmean);
if (means.size() >= k) {
break;
}
// Update weights:
weights.putDouble(it, 0.);
// Choose optimized version for double distances, if applicable.
weightsum = updateWeights(weights, ids, newmean, distQ);
}
// Explicitly destroy temporary data.
weights.destroy();
return unboxVectors(means);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> rel = (Relation<O>) distQ.getRelation();
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
Random random = rnd.getSingleThreadedRandom();
DBIDRef first = DBIDUtil.randomSample(ids, random);
means.add(first);
// Initialize weights
double weightsum = initialWeights(weights, ids, rel.get(first), distQ);
while (true) {
if (weightsum > Double.MAX_VALUE) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
}
if (weightsum < Double.MIN_NORMAL) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few unique data points?");
}
double r = random.nextDouble() * weightsum;
while (r <= 0 && weightsum > Double.MIN_NORMAL) {
// Try harder to not choose 0.
r = random.nextDouble() * weightsum;
}
DBIDIter it = ids.iter();
for (; r > 0. && it.valid(); it.advance()) {
r -= weights.doubleValue(it);
}
// Add new mean:
means.add(it);
if (means.size() >= k) {
break;
}
// Update weights:
weights.putDouble(it, 0.);
weightsum = updateWeights(weights, ids, rel.get(it), distQ);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class ArrayDBIDStore method clear.
@Override
public void clear() {
// Re-initialize
DBIDRef inv = DBIDUtil.invalid();
final int size = data.size();
data.clear();
for (int i = 0; i < size; i++) {
data.add(inv);
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class SameSizeKMeansAlgorithm method initialAssignment.
protected ArrayModifiableDBIDs initialAssignment(List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, DBIDs ids) {
// Build a sorted list of objects, by descending distance delta
ArrayModifiableDBIDs tids = DBIDUtil.newArray(ids);
// Our desired cluster size:
// rounded up
final int maxsize = (tids.size() + k - 1) / k;
// Comparator: sort by largest benefit of assigning to preferred cluster.
final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {
@Override
public int compare(DBIDRef o1, DBIDRef o2) {
Meta c1 = metas.get(o1), c2 = metas.get(o2);
return -Double.compare(c1.priority(), c2.priority());
}
};
// We will use this iterator below. It allows seeking!
DBIDArrayIter id = tids.iter();
// Initialization phase:
for (int start = 0; start < tids.size(); ) {
tids.sort(start, tids.size(), comp);
for (id.seek(start); id.valid(); id.advance()) {
Meta c = metas.get(id);
// Assigning to best cluster - which cannot be full yet!
ModifiableDBIDs cluster = clusters.get(c.primary);
assert (cluster.size() <= maxsize);
cluster.add(id);
start++;
// Now the cluster may have become completely filled:
if (cluster.size() == maxsize) {
final int full = c.primary;
// Refresh the not yet assigned objects where necessary:
for (id.advance(); id.valid(); id.advance()) {
Meta ca = metas.get(id);
if (ca.primary == full) {
// Update the best index:
for (int i = 0; i < k; i++) {
if (i == full || clusters.get(i).size() >= maxsize) {
continue;
}
if (ca.primary == full || ca.dists[i] < ca.dists[ca.primary]) {
ca.primary = i;
}
}
// Changed.
metas.put(id, ca);
}
}
// not really necessary - iterator is at end anyway.
break;
}
}
// Note: we expect Candidate.a == cluster the object is assigned to!
}
return tids;
}
Aggregations