use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class NNChain method nnChainCore.
/**
* Uses NNChain as in "Modern hierarchical, agglomerative clustering
* algorithms" by Daniel Müllner
*
* @param mat Matrix view
* @param builder Result builder
*/
private void nnChainCore(MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder) {
final DBIDArrayIter ix = mat.ix;
final double[] distances = mat.matrix;
final int size = mat.size;
// The maximum chain size = number of ids + 1
IntegerArray chain = new IntegerArray(size + 1);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running NNChain", size - 1, LOG) : null;
for (int k = 1, end = size; k < size; k++) {
int a = -1, b = -1;
if (chain.size() <= 3) {
// Accessing two arbitrary not yet merged elements could be optimized to
// work in O(1) like in Müllner;
// however this usually does not have a huge impact (empirically just
// about 1/5000 of total performance)
a = findUnlinked(0, end, ix, builder);
b = findUnlinked(a + 1, end, ix, builder);
chain.clear();
chain.add(a);
} else {
// Chain is expected to look like (.... a, b, c, b) with b and c merged.
int lastIndex = chain.size;
int c = chain.get(lastIndex - 2);
b = chain.get(lastIndex - 3);
a = chain.get(lastIndex - 4);
// Ensure we had a loop at the end:
assert (chain.get(lastIndex - 1) == c || chain.get(lastIndex - 1) == b);
// if c < b, then we merged b -> c, otherwise c -> b
b = c < b ? c : b;
// Cut the tail:
chain.size -= 3;
}
// For ties, always prefer the second-last element b:
double minDist = mat.get(a, b);
do {
int c = b;
final int ta = MatrixParadigm.triangleSize(a);
for (int i = 0; i < a; i++) {
if (i != b && !builder.isLinked(ix.seek(i))) {
double dist = distances[ta + i];
if (dist < minDist) {
minDist = dist;
c = i;
}
}
}
for (int i = a + 1; i < size; i++) {
if (i != b && !builder.isLinked(ix.seek(i))) {
double dist = distances[MatrixParadigm.triangleSize(i) + a];
if (dist < minDist) {
minDist = dist;
c = i;
}
}
}
b = a;
a = c;
chain.add(a);
} while (chain.size() < 3 || a != chain.get(chain.size - 1 - 2));
// We always merge the larger into the smaller index:
if (a < b) {
int tmp = a;
a = b;
b = tmp;
}
assert (minDist == mat.get(a, b));
assert (b < a);
merge(size, mat, builder, minDist, a, b);
// Shrink working set
end = AGNES.shrinkActiveSet(ix, builder, end, a);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AGNES method initializeDistanceMatrix.
/**
* Initialize a distance matrix.
*
* @param mat Matrix
* @param dq Distance query
* @param linkage Linkage method
*/
protected static void initializeDistanceMatrix(MatrixParadigm mat, DistanceQuery<?> dq, Linkage linkage) {
final DBIDArrayIter ix = mat.ix, iy = mat.iy;
final double[] matrix = mat.matrix;
final boolean issquare = dq.getDistanceFunction().isSquared();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Distance matrix computation", matrix.length, LOG) : null;
int pos = 0;
for (ix.seek(0); ix.valid(); ix.advance()) {
final int x = ix.getOffset();
assert (pos == MatrixParadigm.triangleSize(x));
for (iy.seek(0); iy.getOffset() < x; iy.advance()) {
matrix[pos++] = linkage.initial(dq.distance(ix, iy), issquare);
}
if (prog != null) {
prog.setProcessed(pos, LOG);
}
}
// Avoid logging errors in case scratch space was too large:
if (prog != null) {
prog.setProcessed(matrix.length, LOG);
}
LOG.ensureCompleted(prog);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AGNES method updateMatrix.
/**
* Update the scratch distance matrix.
*
* @param end Active set size
* @param mat Matrix view
* @param builder Hierarchy builder (to get cluster sizes)
* @param mindist Distance that was used for merging
* @param x First matrix position
* @param y Second matrix position
* @param sizex Old size of first cluster
* @param sizey Old size of second cluster
*/
protected void updateMatrix(int end, MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y, final int sizex, final int sizey) {
// Update distance matrix. Note: y < x
final int xbase = MatrixParadigm.triangleSize(x);
final int ybase = MatrixParadigm.triangleSize(y);
double[] scratch = mat.matrix;
DBIDArrayIter ij = mat.ix;
// Write to (y, j), with j < y
int j = 0;
for (; j < y; j++) {
if (builder.isLinked(ij.seek(j))) {
continue;
}
// Otherwise, ybase + j is the wrong position!
assert (j < y);
final int yb = ybase + j;
scratch[yb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[yb], builder.getSize(ij), mindist);
}
// Skip y
j++;
// Write to (j, y), with y < j < x
int jbase = MatrixParadigm.triangleSize(j);
for (; j < x; jbase += j++) {
if (builder.isLinked(ij.seek(j))) {
continue;
}
final int jb = jbase + y;
scratch[jb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[jb], builder.getSize(ij), mindist);
}
// Skip x
jbase += j++;
// Write to (j, y), with y < x < j
for (; j < end; jbase += j++) {
if (builder.isLinked(ij.seek(j))) {
continue;
}
final int jb = jbase + y;
scratch[jb] = linkage.combine(sizex, scratch[jbase + x], sizey, scratch[jb], builder.getSize(ij), mindist);
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AGNES method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
if (SingleLinkage.class.isInstance(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
final DBIDs ids = relation.getDBIDs();
final int size = ids.size();
DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
// Compute the initial (lower triangular) distance matrix.
MatrixParadigm mat = new MatrixParadigm(ids);
initializeDistanceMatrix(mat, dq, linkage);
// Initialize space for result:
PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
// Repeat until everything merged into 1 cluster
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
// Use end to shrink the matrix virtually as the tailing objects disappear
DBIDArrayIter ix = mat.ix;
for (int i = 1, end = size; i < size; i++) {
end = shrinkActiveSet(//
ix, //
builder, //
end, findMerge(end, mat, builder));
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return builder.complete();
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AnderbergHierarchicalClustering method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
if (SingleLinkage.class.isInstance(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
final DBIDs ids = relation.getDBIDs();
MatrixParadigm mat = new MatrixParadigm(ids);
final int size = ids.size();
// Position counter - must agree with computeOffset!
AGNES.initializeDistanceMatrix(mat, dq, linkage);
// Arrays used for caching:
double[] bestd = new double[size];
int[] besti = new int[size];
initializeNNCache(mat.matrix, bestd, besti);
// Initialize space for result:
PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids, dq.getDistanceFunction().isSquared());
// Repeat until everything merged into 1 cluster
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
DBIDArrayIter ix = mat.ix;
for (int i = 1, end = size; i < size; i++) {
end = //
AGNES.shrinkActiveSet(//
ix, //
builder, //
end, findMerge(end, mat, bestd, besti, builder));
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return builder.complete();
}
Aggregations