use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class NaiveAgglomerativeHierarchicalClustering4 method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
if (size > 0x10000) {
throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
}
if (Linkage.SINGLE.equals(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
// Compute the initial (lower triangular) distance matrix.
double[] scratch = new double[triangleSize(size)];
DBIDArrayIter ix = ids.iter(), iy = ids.iter(), ij = ids.iter();
// Position counter - must agree with computeOffset!
int pos = 0;
boolean square = Linkage.WARD.equals(linkage) && !getDistanceFunction().isSquared();
for (int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
for (int y = 0; y < x; y++, iy.advance()) {
scratch[pos] = dq.distance(ix, iy);
// Ward uses variances -- i.e. squared values
if (square) {
scratch[pos] *= scratch[pos];
}
pos++;
}
}
// Initialize space for result:
WritableDBIDDataStore parent = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore height = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
parent.put(it, it);
height.put(it, Double.POSITIVE_INFINITY);
csize.put(it, 1);
}
// Repeat until everything merged, except the desired number of clusters:
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
for (int i = 1; i < size; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
for (ix.seek(0); ix.valid(); ix.advance()) {
if (height.doubleValue(ix) < Double.POSITIVE_INFINITY) {
continue;
}
final int xbase = triangleSize(ix.getOffset());
for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
if (height.doubleValue(iy) < Double.POSITIVE_INFINITY) {
continue;
}
final int idx = xbase + iy.getOffset();
if (scratch[idx] <= min) {
min = scratch[idx];
minx = ix.getOffset();
miny = iy.getOffset();
}
}
}
assert (minx >= 0 && miny >= 0);
// Avoid allocating memory, by reusing existing iterators:
ix.seek(minx);
iy.seek(miny);
// Perform merge in data structure: x -> y
// Since y < x, prefer keeping y, dropping x.
int sizex = csize.intValue(ix), sizey = csize.intValue(iy);
height.put(ix, min);
parent.put(ix, iy);
csize.put(iy, sizex + sizey);
// Update distance matrix. Note: miny < minx
final int xbase = triangleSize(minx), ybase = triangleSize(miny);
// Write to (y, j), with j < y
for (ij.seek(0); ij.getOffset() < miny; ij.advance()) {
if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int sizej = csize.intValue(ij);
scratch[ybase + ij.getOffset()] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, min);
}
// Write to (j, y), with y < j < x
for (ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) {
if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(ij.getOffset());
final int sizej = csize.intValue(ij);
scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + miny], sizej, min);
}
// Write to (j, y), with y < x < j
for (ij.seek(minx + 1); ij.valid(); ij.advance()) {
if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(ij.getOffset());
final int sizej = csize.intValue(ij);
scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return new PointerHierarchyRepresentationResult(ids, parent, height, dq.getDistanceFunction().isSquared());
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AbstractBiclustering method rowsBitsetToIDs.
/**
* Convert a bitset into integer row ids.
*
* @param rows
* @return integer row ids
*/
protected ArrayDBIDs rowsBitsetToIDs(long[] rows) {
ArrayModifiableDBIDs rowIDs = DBIDUtil.newArray(BitsUtil.cardinality(rows));
DBIDArrayIter iter = this.rowIDs.iter();
outer: for (int rlpos = 0; rlpos < rows.length; ++rlpos) {
long rlong = rows[rlpos];
// Fast skip blocks of 64 masked values.
if (rlong == 0L) {
iter.advance(Long.SIZE);
continue;
}
for (int i = 0; i < Long.SIZE; ++i, rlong >>>= 1, iter.advance()) {
if (!iter.valid()) {
break outer;
}
if ((rlong & 1L) == 1L) {
rowIDs.add(iter);
}
}
}
return rowIDs;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AGNES method findMerge.
/**
* Perform the next merge step in AGNES.
*
* @param end Active set size
* @param mat Matrix storage
* @param builder Pointer representation builder
* @return the index that has disappeared, for shrinking the working set
*/
protected int findMerge(int end, MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder) {
assert (end > 0);
final DBIDArrayIter ix = mat.ix, iy = mat.iy;
final double[] matrix = mat.matrix;
double mindist = Double.POSITIVE_INFINITY;
int x = -1, y = -1;
// Find minimum:
for (int ox = 0, xbase = 0; ox < end; xbase += ox++) {
// Skip if object has already joined a cluster:
if (builder.isLinked(ix.seek(ox))) {
continue;
}
assert (xbase == MatrixParadigm.triangleSize(ox));
for (int oy = 0; oy < ox; oy++) {
// Skip if object has already joined a cluster:
if (builder.isLinked(iy.seek(oy))) {
continue;
}
final double dist = matrix[xbase + oy];
if (dist <= mindist) {
// Prefer later on ==, to truncate more often.
mindist = dist;
x = ox;
y = oy;
}
}
}
assert (x >= 0 && y >= 0);
// We could swap otherwise, but this shouldn't arise.
assert (y < x);
merge(end, mat, builder, mindist, x, y);
return x;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AGNES method merge.
/**
* Execute the cluster merge.
*
* @param end Active set size
* @param mat Matrix paradigm
* @param builder Hierarchy builder
* @param mindist Distance that was used for merging
* @param x First matrix position
* @param y Second matrix position
*/
protected void merge(int end, MatrixParadigm mat, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y) {
// Avoid allocating memory, by reusing existing iterators:
final DBIDArrayIter ix = mat.ix.seek(x), iy = mat.iy.seek(y);
if (LOG.isDebuggingFine()) {
LOG.debugFine("Merging: " + DBIDUtil.toString(ix) + " -> " + DBIDUtil.toString(iy) + " " + mindist);
}
// Perform merge in data structure: x -> y
assert (y < x);
// Since y < x, prefer keeping y, dropping x.
builder.add(ix, linkage.restore(mindist, getDistanceFunction().isSquared()), iy);
// Update cluster size for y:
final int sizex = builder.getSize(ix), sizey = builder.getSize(iy);
builder.setSize(iy, sizex + sizey);
updateMatrix(end, mat, builder, mindist, x, y, sizex, sizey);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AnderbergHierarchicalClustering method merge.
/**
* Execute the cluster merge.
*
* @param size Data set size
* @param mat Matrix paradigm
* @param bestd Best distance
* @param besti Index of best distance
* @param builder Hierarchy builder
* @param mindist Distance that was used for merging
* @param x First matrix position
* @param y Second matrix position
*/
protected void merge(int size, MatrixParadigm mat, double[] bestd, int[] besti, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y) {
// Avoid allocating memory, by reusing existing iterators:
final DBIDArrayIter ix = mat.ix.seek(x), iy = mat.iy.seek(y);
if (LOG.isDebuggingFine()) {
LOG.debugFine("Merging: " + DBIDUtil.toString(ix) + " -> " + DBIDUtil.toString(iy) + " " + mindist);
}
// Perform merge in data structure: x -> y
assert (y < x);
// Since y < x, prefer keeping y, dropping x.
builder.add(ix, linkage.restore(mindist, getDistanceFunction().isSquared()), iy);
// Update cluster size for y:
final int sizex = builder.getSize(ix), sizey = builder.getSize(iy);
builder.setSize(iy, sizex + sizey);
// Deactivate x in cache:
besti[x] = -1;
// Note: this changes iy.
updateMatrix(size, mat.matrix, iy, bestd, besti, builder, mindist, x, y, sizex, sizey);
if (besti[y] == x) {
findBest(size, mat.matrix, bestd, besti, y);
}
}
Aggregations