use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class Eclat method mergeJoin.
private DBIDs mergeJoin(DBIDs first, DBIDs second) {
assert (!(first instanceof HashSetDBIDs));
assert (!(second instanceof HashSetDBIDs));
ArrayModifiableDBIDs ids = DBIDUtil.newArray();
DBIDIter i1 = first.iter(), i2 = second.iter();
while (i1.valid() && i2.valid()) {
int c = DBIDUtil.compare(i1, i2);
if (c < 0) {
i1.advance();
} else if (c > 0) {
i2.advance();
} else {
ids.add(i1);
i1.advance();
i2.advance();
}
}
return ids;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class FarthestPointsInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
DBIDVar prevmean = DBIDUtil.newVar(first);
means.add(first);
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
// Find farthest object:
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double val = Math.min(prev, distQ.distance(prevmean, it));
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, val);
}
if (val > maxdist) {
maxdist = val;
best.set(it);
}
}
// Add new mean:
if (i == 0) {
// Remove temporary first element.
means.clear();
}
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean.set(best);
means.add(best);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class FirstKInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distanceFunction) {
DBIDIter iter = ids.iter();
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
for (int i = 0; i < k && iter.valid(); i++, iter.advance()) {
means.add(iter);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> rel = (Relation<O>) distQ.getRelation();
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
Random random = rnd.getSingleThreadedRandom();
DBIDRef first = DBIDUtil.randomSample(ids, random);
means.add(first);
// Initialize weights
double weightsum = initialWeights(weights, ids, rel.get(first), distQ);
while (true) {
if (weightsum > Double.MAX_VALUE) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
}
if (weightsum < Double.MIN_NORMAL) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few unique data points?");
}
double r = random.nextDouble() * weightsum;
while (r <= 0 && weightsum > Double.MIN_NORMAL) {
// Try harder to not choose 0.
r = random.nextDouble() * weightsum;
}
DBIDIter it = ids.iter();
for (; r > 0. && it.valid(); it.advance()) {
r -= weights.doubleValue(it);
}
// Add new mean:
means.add(it);
if (means.size() >= k) {
break;
}
// Update weights:
weights.putDouble(it, 0.);
weightsum = updateWeights(weights, ids, rel.get(it), distQ);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class ParallelLloydKMeans method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
// Choose initial means
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Store for current cluster assignment.
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
KMeansProcessor<V> kmm = new KMeansProcessor<>(relation, distanceFunction, assignment, varsum);
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
kmm.nextIteration(means);
ParallelExecutor.run(ids, kmm);
// Stop if no cluster assignment changed.
if (!kmm.changed()) {
break;
}
means = kmm.getMeans();
}
LOG.setCompleted(prog);
// Wrap result
ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k);
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.length; i++) {
DBIDs cids = clusters[i];
if (cids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(cids, model));
}
return result;
}
Aggregations