use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class AbstractHDBSCAN method computeCoreDists.
/**
* Compute the core distances for all objects.
*
* @param ids Objects
* @param knnQ kNN query
* @param minPts Minimum neighborhood size
* @return Data store with core distances
*/
protected WritableDoubleDataStore computeCoreDists(DBIDs ids, KNNQuery<O> knnQ, int minPts) {
final Logging LOG = getLogger();
final WritableDoubleDataStore coredists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Computing core sizes", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
coredists.put(iter, knnQ.getKNNForDBID(iter, minPts).getKNNDistance());
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
return coredists;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class PROCLUS method greedy.
/**
* Returns a piercing set of k medoids from the specified sample set.
*
* @param distFunc the distance function
* @param sampleSet the sample set
* @param m the number of medoids to be returned
* @param random random number generator
* @return a piercing set of m medoids from the specified sample set
*/
private ArrayDBIDs greedy(DistanceQuery<V> distFunc, DBIDs sampleSet, int m, Random random) {
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(m);
ArrayModifiableDBIDs s = DBIDUtil.newArray(sampleSet);
DBIDArrayIter iter = s.iter();
DBIDVar m_i = DBIDUtil.newVar();
int size = s.size();
// Move a random element to the end, then pop()
s.swap(random.nextInt(size), --size);
medoids.add(s.pop(m_i));
if (LOG.isDebugging()) {
LOG.debugFiner("medoids " + medoids.toString());
}
// To track the current worst element:
int worst = -1;
double worstd = Double.NEGATIVE_INFINITY;
// compute distances between each point in S and m_i
WritableDoubleDataStore distances = DataStoreUtil.makeDoubleStorage(s, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for (iter.seek(0); iter.getOffset() < size; iter.advance()) {
final double dist = distFunc.distance(iter, m_i);
distances.putDouble(iter, dist);
if (dist > worstd) {
worstd = dist;
worst = iter.getOffset();
}
}
for (int i = 1; i < m; i++) {
// choose medoid m_i to be far from previous medoids
s.swap(worst, --size);
medoids.add(s.pop(m_i));
// compute distances of each point to closest medoid; track worst.
worst = -1;
worstd = Double.NEGATIVE_INFINITY;
for (iter.seek(0); iter.getOffset() < size; iter.advance()) {
double dist_new = distFunc.distance(iter, m_i);
double dist_old = distances.doubleValue(iter);
double dist = (dist_new < dist_old) ? dist_new : dist_old;
distances.putDouble(iter, dist);
if (dist > worstd) {
worstd = dist;
worst = iter.getOffset();
}
}
if (LOG.isDebugging()) {
LOG.debugFiner("medoids " + medoids.toString());
}
}
return medoids;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class KMeansHamerly method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
// Hamerly bounds
WritableDoubleDataStore upper = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.POSITIVE_INFINITY);
WritableDoubleDataStore lower = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
// Storage for updated means:
final int dim = means[0].length;
double[][] sums = new double[k][dim];
// Separation of means / distance moved.
double[] sep = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(KEY + ".reassignments") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
int changed;
if (iteration == 0) {
changed = initialAssignToNearestCluster(relation, means, sums, clusters, assignment, upper, lower);
} else {
recomputeSeperation(means, sep);
changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, upper, lower);
}
if (rstat != null) {
rstat.setLong(changed);
LOG.statistics(rstat);
}
// Stop if no cluster assignment changed.
if (changed == 0) {
break;
}
// Recompute means.
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
timesEquals(sums[i], s > 0 ? 1. / s : 1.);
}
double delta = maxMoved(means, sums, sep);
updateBounds(relation, assignment, upper, lower, sep, delta);
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
System.arraycopy(sums[i], 0, means[i], 0, dim);
// Restore to sum for next iteration
timesEquals(sums[i], s > 0 ? s : 1.);
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
upper.destroy();
lower.destroy();
// Wrap result
double totalvariance = 0.;
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
double[] mean = means[i];
double varsum = 0.;
if (varstat) {
DoubleVector mvec = DoubleVector.wrap(mean);
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
varsum += distanceFunction.distance(mvec, relation.get(it));
}
totalvariance += varsum;
}
KMeansModel model = new KMeansModel(mean, varsum);
result.addToplevelCluster(new Cluster<>(ids, model));
}
if (LOG.isStatistics() && varstat) {
LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class FarthestPointsInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
// Get a distance query
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
// Chose first mean
double[][] means = new double[k][];
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
T prevmean = relation.get(first);
means[0] = prevmean.toArray();
// Find farthest object each.
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double val = Math.min(prev, distQ.distance(prevmean, it));
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, val);
}
if (val > maxdist) {
maxdist = val;
best.set(it);
}
}
// Add new mean (and drop the initial mean when desired)
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean = relation.get(best);
means[i] = prevmean.toArray();
}
// Explicitly destroy temporary data.
store.destroy();
return means;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class PAMInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
ArrayModifiableDBIDs medids = DBIDUtil.newArray(k);
DBIDVar bestid = DBIDUtil.newVar();
// We need three temporary storage arrays:
WritableDoubleDataStore mindist, bestd, tempd;
mindist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
bestd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
tempd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// First mean is chosen by having the smallest distance sum to all others.
{
double best = Double.POSITIVE_INFINITY;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial mean", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double sum = 0, d;
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
sum += d = distQ.distance(iter, iter2);
tempd.putDouble(iter2, d);
}
if (sum < best) {
best = sum;
bestid.set(iter);
// Swap mindist and newd:
WritableDoubleDataStore temp = mindist;
mindist = tempd;
tempd = temp;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
medids.add(bestid);
}
assert (mindist != null);
// Subsequent means optimize the full criterion.
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial centers", k, LOG) : null;
// First one was just chosen.
LOG.incrementProcessed(prog);
for (int i = 1; i < k; i++) {
double best = Double.POSITIVE_INFINITY;
bestid.unset();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
if (medids.contains(iter)) {
continue;
}
double sum = 0., v;
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
sum += v = MathUtil.min(distQ.distance(iter, iter2), mindist.doubleValue(iter2));
tempd.put(iter2, v);
}
if (sum < best) {
best = sum;
bestid.set(iter);
// Swap bestd and newd:
WritableDoubleDataStore temp = bestd;
bestd = tempd;
tempd = temp;
}
}
if (!bestid.isSet()) {
throw new AbortException("No median found that improves the criterion function?!? Too many infinite distances.");
}
medids.add(bestid);
// Swap bestd and mindist:
WritableDoubleDataStore temp = bestd;
bestd = mindist;
mindist = temp;
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
mindist.destroy();
bestd.destroy();
tempd.destroy();
return medids;
}
Aggregations