use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class SameSizeKMeansAlgorithm method refineResult.
/**
* Perform k-means style iterations to improve the clustering result.
*
* @param relation Data relation
* @param means Means list
* @param clusters Cluster list
* @param metas Metadata storage
* @param tids DBIDs array
* @return final means
*/
protected double[][] refineResult(Relation<V> relation, double[][] means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
NumberVectorDistanceFunction<? super V> df = getDistanceFunction();
// Our desired cluster size:
// rounded down
final int minsize = tids.size() / k;
// rounded up
final int maxsize = (tids.size() + k - 1) / k;
// Comparator: sort by largest gain by transfer
final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {
@Override
public int compare(DBIDRef o1, DBIDRef o2) {
Meta c1 = metas.get(o1), c2 = metas.get(o2);
return Double.compare(c1.priority(), c2.priority());
}
};
// List for sorting cluster preferences
final int[] preferences = MathUtil.sequence(0, k);
// Comparator for this list.
final PreferenceComparator pcomp = new PreferenceComparator();
// Initialize transfer lists:
ArrayModifiableDBIDs[] transfers = new ArrayModifiableDBIDs[k];
for (int i = 0; i < k; i++) {
transfers[i] = DBIDUtil.newArray();
}
DBIDArrayIter id = tids.iter();
for (int iter = 0; maxiter <= 0 || iter < maxiter; iter++) {
updateDistances(relation, means, metas, df);
tids.sort(comp);
// Track if anything has changed
int active = 0;
for (id.seek(0); id.valid(); id.advance()) {
Meta c = metas.get(id);
IntegerArrayQuickSort.sort(preferences, pcomp.select(c));
ModifiableDBIDs source = clusters.get(c.primary);
assert (source.contains(id));
tloop: for (int i : preferences) {
if (i == c.primary) {
// Already assigned here
continue;
}
ModifiableDBIDs dest = clusters.get(i);
// Can we pair this transfer?
final double gain = c.gain(i);
for (DBIDMIter other = transfers[i].iter(); other.valid(); other.advance()) {
Meta c2 = metas.get(other);
if (gain + c2.gain(c.primary) > 0) {
transfer(metas, c2, dest, source, other, c.primary);
transfer(metas, c, source, dest, id, i);
active += 2;
// last, as this invalidates the reference!
other.remove();
// We are assigned here now.
source = dest;
// Can try another transfer, with next cluster.
continue tloop;
}
}
// If cluster sizes allow, move a single object.
if (gain > 0 && (dest.size() < maxsize && source.size() > minsize)) {
transfer(metas, c, source, dest, id, i);
active += 1;
// We are assigned here now.
source = dest;
continue tloop;
}
}
// transfer list.
if (c.primary != preferences[0] && c.dists[c.primary] > c.dists[preferences[0]]) {
transfers[c.primary].add(id);
}
}
// TODO: try to get more transfers out of the transfer lists done by
// considering more than one object?
int pending = 0;
// Clear transfer lists for next iteration.
for (int i = 0; i < k; i++) {
pending += transfers[i].size();
transfers[i].clear();
}
if (LOG.isDebuggingFine()) {
LOG.debugFine("Iteration #" + iter + ": performed " + active + " transfers skipped " + pending);
}
if (active <= 0) {
break;
}
// Recompute means after reassignment
means = means(clusters, means, relation);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class SameSizeKMeansAlgorithm method run.
/**
* Run k-means with cluster size constraints.
*
* @param database Database
* @param relation relation to use
* @return result
*/
@Override
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
// Database objects to process
final DBIDs ids = relation.getDBIDs();
// Choose initial means
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k + 2));
}
// Meta data storage
final WritableDataStore<Meta> metas = initializeMeta(relation, means);
// Perform the initial assignment
ArrayModifiableDBIDs tids = initialAssignment(clusters, metas, ids);
// Recompute the means after the initial assignment
means = means(clusters, means, relation);
// Refine the result via k-means like iterations
means = refineResult(relation, means, clusters, metas, tids);
// Wrap result
Clustering<MeanModel> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering");
for (int i = 0; i < clusters.size(); i++) {
result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel(means[i])));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class AbstractBiclustering method rowsBitsetToIDs.
/**
* Convert a bitset into integer row ids.
*
* @param rows
* @return integer row ids
*/
protected ArrayDBIDs rowsBitsetToIDs(long[] rows) {
ArrayModifiableDBIDs rowIDs = DBIDUtil.newArray(BitsUtil.cardinality(rows));
DBIDArrayIter iter = this.rowIDs.iter();
outer: for (int rlpos = 0; rlpos < rows.length; ++rlpos) {
long rlong = rows[rlpos];
// Fast skip blocks of 64 masked values.
if (rlong == 0L) {
iter.advance(Long.SIZE);
continue;
}
for (int i = 0; i < Long.SIZE; ++i, rlong >>>= 1, iter.advance()) {
if (!iter.valid()) {
break outer;
}
if ((rlong & 1L) == 1L) {
rowIDs.add(iter);
}
}
}
return rowIDs;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class NaiveMeanShiftClustering method run.
/**
* Run the mean-shift clustering algorithm.
*
* @param database Database
* @param relation Data relation
* @return Clustering result
*/
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
final DistanceQuery<V> distq = database.getDistanceQuery(relation, getDistanceFunction());
final RangeQuery<V> rangeq = database.getRangeQuery(distq);
final NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
final int dim = RelationUtil.dimensionality(relation);
// Stopping threshold
final double threshold = bandwidth * 1E-10;
// Result store:
ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
ModifiableDBIDs noise = DBIDUtil.newArray();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Mean-shift clustering", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
// Initial position:
V position = relation.get(iter);
iterations: for (int j = 1; j <= MAXITER; j++) {
// Compute new position:
V newvec = null;
{
DoubleDBIDList neigh = rangeq.getRangeForObject(position, bandwidth);
boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
if (okay) {
Centroid newpos = new Centroid(dim);
for (DoubleDBIDListIter niter = neigh.iter(); niter.valid(); niter.advance()) {
final double weight = kernel.density(niter.doubleValue() / bandwidth);
newpos.put(relation.get(niter), weight);
}
newvec = factory.newNumberVector(newpos.getArrayRef());
// TODO: detect 0 weight!
}
if (!okay) {
noise.add(iter);
break iterations;
}
}
// Test if we are close to one of the known clusters:
double bestd = Double.POSITIVE_INFINITY;
Pair<V, ModifiableDBIDs> bestp = null;
for (Pair<V, ModifiableDBIDs> pair : clusters) {
final double merged = distq.distance(newvec, pair.first);
if (merged < bestd) {
bestd = merged;
bestp = pair;
}
}
// Check for convergence:
double delta = distq.distance(position, newvec);
if (bestd < 10 * threshold || bestd * 2 < delta) {
bestp.second.add(iter);
break iterations;
}
if (j == MAXITER) {
LOG.warning("No convergence after " + MAXITER + " iterations. Distance: " + delta);
}
if (Double.isNaN(delta)) {
LOG.warning("Encountered NaN distance. Invalid center vector? " + newvec.toString());
break iterations;
}
if (j == MAXITER || delta < threshold) {
if (LOG.isDebuggingFine()) {
LOG.debugFine("New cluster:" + newvec + " delta: " + delta + " threshold: " + threshold + " bestd: " + bestd);
}
ArrayModifiableDBIDs cids = DBIDUtil.newArray();
cids.add(iter);
clusters.add(new Pair<V, ModifiableDBIDs>(newvec, cids));
break iterations;
}
position = newvec;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
ArrayList<Cluster<MeanModel>> cs = new ArrayList<>(clusters.size());
for (Pair<V, ModifiableDBIDs> pair : clusters) {
cs.add(new Cluster<>(pair.second, new MeanModel(pair.first.toArray())));
}
if (noise.size() > 0) {
cs.add(new Cluster<MeanModel>(noise, true));
}
Clustering<MeanModel> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
return c;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class PROCLUS method greedy.
/**
* Returns a piercing set of k medoids from the specified sample set.
*
* @param distFunc the distance function
* @param sampleSet the sample set
* @param m the number of medoids to be returned
* @param random random number generator
* @return a piercing set of m medoids from the specified sample set
*/
private ArrayDBIDs greedy(DistanceQuery<V> distFunc, DBIDs sampleSet, int m, Random random) {
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(m);
ArrayModifiableDBIDs s = DBIDUtil.newArray(sampleSet);
DBIDArrayIter iter = s.iter();
DBIDVar m_i = DBIDUtil.newVar();
int size = s.size();
// Move a random element to the end, then pop()
s.swap(random.nextInt(size), --size);
medoids.add(s.pop(m_i));
if (LOG.isDebugging()) {
LOG.debugFiner("medoids " + medoids.toString());
}
// To track the current worst element:
int worst = -1;
double worstd = Double.NEGATIVE_INFINITY;
// compute distances between each point in S and m_i
WritableDoubleDataStore distances = DataStoreUtil.makeDoubleStorage(s, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for (iter.seek(0); iter.getOffset() < size; iter.advance()) {
final double dist = distFunc.distance(iter, m_i);
distances.putDouble(iter, dist);
if (dist > worstd) {
worstd = dist;
worst = iter.getOffset();
}
}
for (int i = 1; i < m; i++) {
// choose medoid m_i to be far from previous medoids
s.swap(worst, --size);
medoids.add(s.pop(m_i));
// compute distances of each point to closest medoid; track worst.
worst = -1;
worstd = Double.NEGATIVE_INFINITY;
for (iter.seek(0); iter.getOffset() < size; iter.advance()) {
double dist_new = distFunc.distance(iter, m_i);
double dist_old = distances.doubleValue(iter);
double dist = (dist_new < dist_old) ? dist_new : dist_old;
distances.putDouble(iter, dist);
if (dist > worstd) {
worstd = dist;
worst = iter.getOffset();
}
}
if (LOG.isDebugging()) {
LOG.debugFiner("medoids " + medoids.toString());
}
}
return medoids;
}
Aggregations