use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class FarthestSumPointsInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
// Get a distance query
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
// Chose first mean
List<T> means = new ArrayList<>(k);
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
T prevmean = relation.get(first);
means.add(prevmean);
// Find farthest object each.
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double dsum = prev + distQ.distance(prevmean, it);
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, dsum);
}
if (dsum > maxdist) {
maxdist = dsum;
best.set(it);
}
}
// Add new mean (and drop the initial mean when desired)
if (i == 0) {
// Remove temporary first element.
means.clear();
}
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean = relation.get(best);
means.add(prevmean);
}
// Explicitly destroy temporary data.
store.destroy();
return unboxVectors(means);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class SameSizeKMeansAlgorithm method refineResult.
/**
* Perform k-means style iterations to improve the clustering result.
*
* @param relation Data relation
* @param means Means list
* @param clusters Cluster list
* @param metas Metadata storage
* @param tids DBIDs array
* @return final means
*/
protected double[][] refineResult(Relation<V> relation, double[][] means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
NumberVectorDistanceFunction<? super V> df = getDistanceFunction();
// Our desired cluster size:
// rounded down
final int minsize = tids.size() / k;
// rounded up
final int maxsize = (tids.size() + k - 1) / k;
// Comparator: sort by largest gain by transfer
final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {
@Override
public int compare(DBIDRef o1, DBIDRef o2) {
Meta c1 = metas.get(o1), c2 = metas.get(o2);
return Double.compare(c1.priority(), c2.priority());
}
};
// List for sorting cluster preferences
final int[] preferences = MathUtil.sequence(0, k);
// Comparator for this list.
final PreferenceComparator pcomp = new PreferenceComparator();
// Initialize transfer lists:
ArrayModifiableDBIDs[] transfers = new ArrayModifiableDBIDs[k];
for (int i = 0; i < k; i++) {
transfers[i] = DBIDUtil.newArray();
}
DBIDArrayIter id = tids.iter();
for (int iter = 0; maxiter <= 0 || iter < maxiter; iter++) {
updateDistances(relation, means, metas, df);
tids.sort(comp);
// Track if anything has changed
int active = 0;
for (id.seek(0); id.valid(); id.advance()) {
Meta c = metas.get(id);
IntegerArrayQuickSort.sort(preferences, pcomp.select(c));
ModifiableDBIDs source = clusters.get(c.primary);
assert (source.contains(id));
tloop: for (int i : preferences) {
if (i == c.primary) {
// Already assigned here
continue;
}
ModifiableDBIDs dest = clusters.get(i);
// Can we pair this transfer?
final double gain = c.gain(i);
for (DBIDMIter other = transfers[i].iter(); other.valid(); other.advance()) {
Meta c2 = metas.get(other);
if (gain + c2.gain(c.primary) > 0) {
transfer(metas, c2, dest, source, other, c.primary);
transfer(metas, c, source, dest, id, i);
active += 2;
// last, as this invalidates the reference!
other.remove();
// We are assigned here now.
source = dest;
// Can try another transfer, with next cluster.
continue tloop;
}
}
// If cluster sizes allow, move a single object.
if (gain > 0 && (dest.size() < maxsize && source.size() > minsize)) {
transfer(metas, c, source, dest, id, i);
active += 1;
// We are assigned here now.
source = dest;
continue tloop;
}
}
// transfer list.
if (c.primary != preferences[0] && c.dists[c.primary] > c.dists[preferences[0]]) {
transfers[c.primary].add(id);
}
}
// TODO: try to get more transfers out of the transfer lists done by
// considering more than one object?
int pending = 0;
// Clear transfer lists for next iteration.
for (int i = 0; i < k; i++) {
pending += transfers[i].size();
transfers[i].clear();
}
if (LOG.isDebuggingFine()) {
LOG.debugFine("Iteration #" + iter + ": performed " + active + " transfers skipped " + pending);
}
if (active <= 0) {
break;
}
// Recompute means after reassignment
means = means(clusters, means, relation);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class CLINK method clinkstep4567.
/**
* Fourth to seventh step of CLINK: find best insertion
*
* @param id Current objct
* @param ids All objects
* @param it Iterator
* @param n Index threshold
* @param pi Parent data store
* @param lambda Height data store
* @param m Distance data store
*/
private void clinkstep4567(DBIDRef id, ArrayDBIDs ids, DBIDArrayIter it, int n, WritableDBIDDataStore pi, WritableDoubleDataStore lambda, WritableDoubleDataStore m) {
// step 4: a = n
DBIDArrayIter a = ids.iter().seek(n - 1);
// step 5:
{
DBIDVar p_i = DBIDUtil.newVar();
for (it.seek(n - 1); it.valid(); it.retract()) {
double l_i = lambda.doubleValue(it);
double mp_i = m.doubleValue(p_i.from(pi, it));
if (l_i >= mp_i) {
if (m.doubleValue(it) < m.doubleValue(a)) {
a.seek(it.getOffset());
}
} else {
m.putDouble(it, Double.POSITIVE_INFINITY);
}
}
}
// step 6
// b = pi[a]
DBIDVar b = DBIDUtil.newVar().from(pi, a);
double c = lambda.doubleValue(a);
pi.putDBID(a, id);
lambda.putDouble(a, m.doubleValue(a));
// step 7
if (a.getOffset() < n - 1) {
// Used below
DBIDRef last = DBIDUtil.newVar(it.seek(n - 1));
DBIDVar d = DBIDUtil.newVar();
// if b < n: (then goto 7)
while (!DBIDUtil.equal(b, id)) {
if (DBIDUtil.equal(b, last)) {
pi.putDBID(b, id);
lambda.putDouble(b, c);
break;
}
// d = pi[b]
d.from(pi, b);
// pi[b] = n + 1
pi.putDBID(b, id);
// c = old l[b], l[b] = c
c = lambda.putDouble(b, c);
// b = d = old pi[b]
b.set(d);
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class FarthestPointsInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
// Get a distance query
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
// Chose first mean
double[][] means = new double[k][];
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
T prevmean = relation.get(first);
means[0] = prevmean.toArray();
// Find farthest object each.
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double val = Math.min(prev, distQ.distance(prevmean, it));
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, val);
}
if (val > maxdist) {
maxdist = val;
best.set(it);
}
}
// Add new mean (and drop the initial mean when desired)
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean = relation.get(best);
means[i] = prevmean.toArray();
}
// Explicitly destroy temporary data.
store.destroy();
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRef in project elki by elki-project.
the class FarthestSumPointsInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
means.add(first);
DBIDVar prevmean = DBIDUtil.newVar(first);
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
// Find farthest object:
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double dsum = prev + distQ.distance(prevmean, it);
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, dsum);
}
if (dsum > maxdist) {
maxdist = dsum;
best.set(it);
}
}
// Add new mean:
if (i == 0) {
// Remove temporary first element.
means.clear();
}
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean.set(best);
means.add(best);
}
store.destroy();
return means;
}
Aggregations