use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class NaiveAgglomerativeHierarchicalClustering2 method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public Result run(Database db, Relation<O> relation) {
DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
if (size > 0x10000) {
throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
}
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
// Compute the initial (lower triangular) distance matrix.
double[] scratch = new double[triangleSize(size)];
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
// Position counter - must agree with computeOffset!
int pos = 0;
for (int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
for (int y = 0; y < x; y++, iy.advance()) {
scratch[pos] = dq.distance(ix, iy);
pos++;
}
}
// Initialize space for result:
double[] height = new double[size];
Arrays.fill(height, Double.POSITIVE_INFINITY);
// Parent node, to track merges
// have every object point to itself initially
ArrayModifiableDBIDs parent = DBIDUtil.newArray(ids);
// Active clusters, when not trivial.
Int2ReferenceMap<ModifiableDBIDs> clusters = new Int2ReferenceOpenHashMap<>();
// Repeat until everything merged, except the desired number of clusters:
final int stop = size - numclusters;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null;
for (int i = 0; i < stop; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
for (int x = 0; x < size; x++) {
if (height[x] < Double.POSITIVE_INFINITY) {
continue;
}
final int xbase = triangleSize(x);
for (int y = 0; y < x; y++) {
if (height[y] < Double.POSITIVE_INFINITY) {
continue;
}
final int idx = xbase + y;
if (scratch[idx] < min) {
min = scratch[idx];
minx = x;
miny = y;
}
}
}
assert (minx >= 0 && miny >= 0);
// Avoid allocating memory, by reusing existing iterators:
ix.seek(minx);
iy.seek(miny);
// Perform merge in data structure: x -> y
// Since y < x, prefer keeping y, dropping x.
height[minx] = min;
parent.set(minx, iy);
// Merge into cluster
ModifiableDBIDs cx = clusters.get(minx);
ModifiableDBIDs cy = clusters.get(miny);
if (cy == null) {
cy = DBIDUtil.newHashSet();
cy.add(iy);
}
if (cx == null) {
cy.add(ix);
} else {
cy.addDBIDs(cx);
clusters.remove(minx);
}
clusters.put(miny, cy);
// Update distance matrix. Note: miny < minx
final int xbase = triangleSize(minx), ybase = triangleSize(miny);
// Write to (y, j), with j < y
for (int j = 0; j < miny; j++) {
if (height[j] < Double.POSITIVE_INFINITY) {
continue;
}
scratch[ybase + j] = Math.min(scratch[xbase + j], scratch[ybase + j]);
}
// Write to (j, y), with y < j < x
for (int j = miny + 1; j < minx; j++) {
if (height[j] < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(j);
scratch[jbase + miny] = Math.min(scratch[xbase + j], scratch[jbase + miny]);
}
// Write to (j, y), with y < x < j
for (int j = minx + 1; j < size; j++) {
if (height[j] < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(j);
scratch[jbase + miny] = Math.min(scratch[jbase + minx], scratch[jbase + miny]);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Build the clustering result
final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
for (int x = 0; x < size; x++) {
if (height[x] < Double.POSITIVE_INFINITY) {
DBIDs cids = clusters.get(x);
if (cids == null) {
ix.seek(x);
cids = DBIDUtil.deref(ix);
}
Cluster<Model> cluster = new Cluster<>("Cluster", cids);
dendrogram.addToplevelCluster(cluster);
}
}
return dendrogram;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class MiniMaxAnderberg method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
DistanceQuery<O> dq = DatabaseUtil.precomputedDistanceQuery(db, relation, getDistanceFunction(), LOG);
final DBIDs ids = relation.getDBIDs();
final int size = ids.size();
// Initialize space for result:
PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>();
// Compute the initial (lower triangular) distance matrix.
MatrixParadigm mat = new MatrixParadigm(ids);
ArrayModifiableDBIDs prots = DBIDUtil.newArray(MatrixParadigm.triangleSize(size));
DBIDArrayMIter protiter = prots.iter();
MiniMax.initializeMatrices(mat, prots, dq);
// Arrays used for caching:
double[] bestd = new double[size];
int[] besti = new int[size];
initializeNNCache(mat.matrix, bestd, besti);
// Repeat until everything merged into 1 cluster
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
DBIDArrayIter ix = mat.ix;
for (int i = 1, end = size; i < size; i++) {
end = //
AGNES.shrinkActiveSet(//
ix, //
builder, //
end, findMerge(end, mat, protiter, builder, clusters, bestd, besti, dq));
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return (PointerPrototypeHierarchyRepresentationResult) builder.complete();
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class NNChain method nnChainCore.
/**
* Uses NNChain as in "Modern hierarchical, agglomerative clustering
* algorithms" by Daniel Müllner
*
* @param mat Matrix view
* @param builder Result builder
*/
private void nnChainCore(MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder) {
final DBIDArrayIter ix = mat.ix;
final double[] distances = mat.matrix;
final int size = mat.size;
// The maximum chain size = number of ids + 1
IntegerArray chain = new IntegerArray(size + 1);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running NNChain", size - 1, LOG) : null;
for (int k = 1, end = size; k < size; k++) {
int a = -1, b = -1;
if (chain.size() <= 3) {
// Accessing two arbitrary not yet merged elements could be optimized to
// work in O(1) like in Müllner;
// however this usually does not have a huge impact (empirically just
// about 1/5000 of total performance)
a = findUnlinked(0, end, ix, builder);
b = findUnlinked(a + 1, end, ix, builder);
chain.clear();
chain.add(a);
} else {
// Chain is expected to look like (.... a, b, c, b) with b and c merged.
int lastIndex = chain.size;
int c = chain.get(lastIndex - 2);
b = chain.get(lastIndex - 3);
a = chain.get(lastIndex - 4);
// Ensure we had a loop at the end:
assert (chain.get(lastIndex - 1) == c || chain.get(lastIndex - 1) == b);
// if c < b, then we merged b -> c, otherwise c -> b
b = c < b ? c : b;
// Cut the tail:
chain.size -= 3;
}
// For ties, always prefer the second-last element b:
double minDist = mat.get(a, b);
do {
int c = b;
final int ta = MatrixParadigm.triangleSize(a);
for (int i = 0; i < a; i++) {
if (i != b && !builder.isLinked(ix.seek(i))) {
double dist = distances[ta + i];
if (dist < minDist) {
minDist = dist;
c = i;
}
}
}
for (int i = a + 1; i < size; i++) {
if (i != b && !builder.isLinked(ix.seek(i))) {
double dist = distances[MatrixParadigm.triangleSize(i) + a];
if (dist < minDist) {
minDist = dist;
c = i;
}
}
}
b = a;
a = c;
chain.add(a);
} while (chain.size() < 3 || a != chain.get(chain.size - 1 - 2));
// We always merge the larger into the smaller index:
if (a < b) {
int tmp = a;
a = b;
b = tmp;
}
assert (minDist == mat.get(a, b));
assert (b < a);
merge(size, mat, builder, minDist, a, b);
// Shrink working set
end = AGNES.shrinkActiveSet(ix, builder, end, a);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class SLINKHDBSCANLinearMemory method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerDensityHierarchyRepresentationResult run(Database db, Relation<O> relation) {
final DistanceQuery<O> distQ = db.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQ = db.getKNNQuery(distQ, minPts);
// We need array addressing later.
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// Compute the core distances
// minPts + 1: ignore query point.
final WritableDoubleDataStore coredists = computeCoreDists(ids, knnQ, minPts);
WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
// Temporary storage for m.
WritableDoubleDataStore m = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running HDBSCAN*-SLINK", ids.size(), LOG) : null;
// has to be an array for monotonicity reasons!
ModifiableDBIDs processedIDs = DBIDUtil.newArray(ids.size());
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
// Steps 1,3,4 are exactly as in SLINK
step1(id, pi, lambda);
// Step 2 is modified to use a different distance
step2(id, processedIDs, distQ, coredists, m);
step3(id, pi, lambda, processedIDs, m);
step4(id, pi, lambda, processedIDs);
processedIDs.add(id);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
return new PointerDensityHierarchyRepresentationResult(ids, pi, lambda, distQ.getDistanceFunction().isSquared(), coredists);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class LSDBC method fillDensities.
/**
* Collect all densities into an array for sorting.
*
* @param knnq kNN query
* @param ids DBIDs to process
* @param dens Density storage
*/
private void fillDensities(KNNQuery<O> knnq, DBIDs ids, WritableDoubleDataStore dens) {
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final KNNList neighbors = knnq.getKNNForDBID(iter, k);
dens.putDouble(iter, neighbors.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
Aggregations