use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class NaiveMeanShiftClustering method run.
/**
* Run the mean-shift clustering algorithm.
*
* @param database Database
* @param relation Data relation
* @return Clustering result
*/
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
final DistanceQuery<V> distq = database.getDistanceQuery(relation, getDistanceFunction());
final RangeQuery<V> rangeq = database.getRangeQuery(distq);
final NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
final int dim = RelationUtil.dimensionality(relation);
// Stopping threshold
final double threshold = bandwidth * 1E-10;
// Result store:
ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
ModifiableDBIDs noise = DBIDUtil.newArray();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Mean-shift clustering", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
// Initial position:
V position = relation.get(iter);
iterations: for (int j = 1; j <= MAXITER; j++) {
// Compute new position:
V newvec = null;
{
DoubleDBIDList neigh = rangeq.getRangeForObject(position, bandwidth);
boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
if (okay) {
Centroid newpos = new Centroid(dim);
for (DoubleDBIDListIter niter = neigh.iter(); niter.valid(); niter.advance()) {
final double weight = kernel.density(niter.doubleValue() / bandwidth);
newpos.put(relation.get(niter), weight);
}
newvec = factory.newNumberVector(newpos.getArrayRef());
// TODO: detect 0 weight!
}
if (!okay) {
noise.add(iter);
break iterations;
}
}
// Test if we are close to one of the known clusters:
double bestd = Double.POSITIVE_INFINITY;
Pair<V, ModifiableDBIDs> bestp = null;
for (Pair<V, ModifiableDBIDs> pair : clusters) {
final double merged = distq.distance(newvec, pair.first);
if (merged < bestd) {
bestd = merged;
bestp = pair;
}
}
// Check for convergence:
double delta = distq.distance(position, newvec);
if (bestd < 10 * threshold || bestd * 2 < delta) {
bestp.second.add(iter);
break iterations;
}
if (j == MAXITER) {
LOG.warning("No convergence after " + MAXITER + " iterations. Distance: " + delta);
}
if (Double.isNaN(delta)) {
LOG.warning("Encountered NaN distance. Invalid center vector? " + newvec.toString());
break iterations;
}
if (j == MAXITER || delta < threshold) {
if (LOG.isDebuggingFine()) {
LOG.debugFine("New cluster:" + newvec + " delta: " + delta + " threshold: " + threshold + " bestd: " + bestd);
}
ArrayModifiableDBIDs cids = DBIDUtil.newArray();
cids.add(iter);
clusters.add(new Pair<V, ModifiableDBIDs>(newvec, cids));
break iterations;
}
position = newvec;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
ArrayList<Cluster<MeanModel>> cs = new ArrayList<>(clusters.size());
for (Pair<V, ModifiableDBIDs> pair : clusters) {
cs.add(new Cluster<>(pair.second, new MeanModel(pair.first.toArray())));
}
if (noise.size() > 0) {
cs.add(new Cluster<MeanModel>(noise, true));
}
Clustering<MeanModel> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
return c;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class RdKNNTree method bulkReverseKNNQueryForID.
public List<ModifiableDoubleDBIDList> bulkReverseKNNQueryForID(DBIDs ids, int k, SpatialPrimitiveDistanceFunction<? super O> distanceFunction, KNNQuery<O> knnQuery) {
checkDistanceFunction(distanceFunction);
if (k > settings.k_max) {
throw new IllegalArgumentException("Parameter k is not supported, k > k_max: " + k + " > " + settings.k_max);
}
// get candidates
Map<DBID, ModifiableDoubleDBIDList> candidateMap = new HashMap<>();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
candidateMap.put(id, DBIDUtil.newDistanceDBIDList());
}
doBulkReverseKNN(getRoot(), ids, candidateMap);
if (k == settings.k_max) {
List<ModifiableDoubleDBIDList> resultList = new ArrayList<>();
for (ModifiableDoubleDBIDList candidates : candidateMap.values()) {
candidates.sort();
resultList.add(candidates);
}
return resultList;
}
// refinement of candidates, if k < k_max
// perform a knn query for the candidates
ArrayModifiableDBIDs candidateIDs = DBIDUtil.newArray();
for (ModifiableDoubleDBIDList candidates : candidateMap.values()) {
candidateIDs.addDBIDs(candidates);
}
candidateIDs.sort();
List<? extends KNNList> knnLists = knnQuery.getKNNForBulkDBIDs(candidateIDs, k);
// and add candidate c to the result if o is a knn of c
List<ModifiableDoubleDBIDList> resultList = new ArrayList<>();
for (DBID id : candidateMap.keySet()) {
ModifiableDoubleDBIDList candidates = candidateMap.get(id);
ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
for (DoubleDBIDListIter candidate = candidates.iter(); candidate.valid(); candidate.advance()) {
int pos = candidateIDs.binarySearch(candidate);
assert (pos >= 0);
for (DoubleDBIDListIter qr = knnLists.get(pos).iter(); qr.valid(); qr.advance()) {
if (DBIDUtil.equal(id, qr)) {
result.add(qr.doubleValue(), candidate);
break;
}
}
}
resultList.add(result);
}
return resultList;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class SimplifiedCoverTree method bulkConstruct.
/**
* Bulk-load the cover tree.
*
* This bulk-load is slightly simpler than the one used in the original
* cover-tree source: We do not look back into the "far" set of candidates.
*
* @param cur Current routing object
* @param maxScale Maximum scale
* @param elems Candidates
* @return Root node of subtree
*/
protected Node bulkConstruct(DBIDRef cur, int maxScale, ModifiableDoubleDBIDList elems) {
assert (!elems.contains(cur));
final double max = maxDistance(elems);
final int scale = Math.min(distToScale(max) - 1, maxScale);
final int nextScale = scale - 1;
// elements remaining:
if (max <= 0 || scale <= scaleBottom || elems.size() < truncate) {
return new Node(cur, max, elems);
}
// Find neighbors in the cover of the current object:
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
excludeNotCovered(elems, scaleToDist(scale), candidates);
// If no elements were not in the cover, build a compact tree:
if (candidates.size() == 0) {
LOG.warning("Scale not chosen appropriately? " + max + " " + scaleToDist(scale));
return bulkConstruct(cur, nextScale, elems);
}
// We will have at least one other child, so build the parent:
Node node = new Node(cur, max);
// Routing element now is a singleton:
final boolean curSingleton = elems.size() == 0;
if (!curSingleton) {
// Add node for the routing object:
node.children.add(bulkConstruct(cur, nextScale, elems));
}
final double fmax = scaleToDist(nextScale);
// Build additional cover nodes:
for (DoubleDBIDListIter it = candidates.iter(); it.valid(); ) {
assert (it.getOffset() == 0);
DBID t = DBIDUtil.deref(it);
// Recycle.
elems.clear();
collectByCover(it, candidates, fmax, elems);
assert (DBIDUtil.equal(t, it)) : "First element in candidates must not change!";
if (elems.size() == 0) {
// Singleton
node.singletons.add(it);
} else {
// Build a full child node:
node.children.add(bulkConstruct(it, nextScale, elems));
}
candidates.removeSwap(0);
}
assert (candidates.size() == 0);
// Routing object is not yet handled:
if (curSingleton) {
if (node.isLeaf()) {
// First in leaf is enough.
node.children = null;
} else {
// Add as regular singleton.
node.singletons.add(cur);
}
}
// TODO: improve recycling of lists?
return node;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class CacheDoubleDistanceRangeQueries method run.
@Override
public void run() {
database.initialize();
Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
RangeQuery<O> rangeQ = database.getRangeQuery(distanceQuery, radius, DatabaseQuery.HINT_HEAVY_USE);
LOG.verbose("Performing range queries with radius " + radius);
// open file.
try (RandomAccessFile file = new RandomAccessFile(out, "rw");
FileChannel channel = file.getChannel();
// and acquire a file write lock
FileLock lock = channel.lock()) {
// write magic header
file.writeInt(RANGE_CACHE_MAGIC);
// write the query radius.
file.writeDouble(radius);
// Initial size, enough for 100.
int bufsize = 100 * 12 * 2 + 10;
ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing range queries", relation.size(), LOG) : null;
ModifiableDoubleDBIDList nn = DBIDUtil.newDistanceDBIDList();
DoubleDBIDListIter ni = nn.iter();
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
nn.clear();
rangeQ.getRangeForDBID(it, radius, nn);
nn.sort();
final int nnsize = nn.size();
// Grow the buffer when needed:
if (nnsize * 12 + 10 > bufsize) {
while (nnsize * 12 + 10 > bufsize) {
bufsize <<= 1;
}
LOG.verbose("Resizing buffer to " + bufsize + " to store " + nnsize + " results:");
buffer = ByteBuffer.allocateDirect(bufsize);
}
buffer.clear();
ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
int c = 0;
for (ni.seek(0); ni.valid(); ni.advance(), c++) {
ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
buffer.putDouble(ni.doubleValue());
}
if (c != nn.size()) {
throw new AbortException("Sizes did not agree. Cache is invalid.");
}
buffer.flip();
channel.write(buffer);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
lock.release();
} catch (IOException e) {
LOG.exception(e);
}
// FIXME: close!
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class CacheDoubleDistanceKNNLists method run.
@Override
public void run() {
database.initialize();
Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
KNNQuery<O> knnQ = database.getKNNQuery(distanceQuery, DatabaseQuery.HINT_HEAVY_USE);
// open file.
try (RandomAccessFile file = new RandomAccessFile(out, "rw");
FileChannel channel = file.getChannel();
// and acquire a file write lock
FileLock lock = channel.lock()) {
// write magic header
file.writeInt(KNN_CACHE_MAGIC);
// Initial size, enough for 2 kNN.
int bufsize = k * 12 * 2 + 10;
ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null;
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final KNNList nn = knnQ.getKNNForDBID(it, k);
final int nnsize = nn.size();
// Grow the buffer when needed:
if (nnsize * 12 + 10 > bufsize) {
while (nnsize * 12 + 10 > bufsize) {
bufsize <<= 1;
}
buffer = ByteBuffer.allocateDirect(bufsize);
}
buffer.clear();
ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
int c = 0;
for (DoubleDBIDListIter ni = nn.iter(); ni.valid(); ni.advance(), c++) {
ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
buffer.putDouble(ni.doubleValue());
}
if (c != nn.size()) {
throw new AbortException("Sizes did not agree. Cache is invalid.");
}
buffer.flip();
channel.write(buffer);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
lock.release();
} catch (IOException e) {
LOG.exception(e);
}
// FIXME: close!
}
Aggregations