use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class BarnesHutTSNE method run.
public Relation<DoubleVector> run(Database database, Relation<O> relation) {
AffinityMatrix neighbors = affinity.computeAffinityMatrix(relation, EARLY_EXAGGERATION);
double[][] solution = randomInitialSolution(neighbors.size(), dim, random.getSingleThreadedRandom());
projectedDistances.setLong(0L);
optimizetSNE(neighbors, solution);
LOG.statistics(projectedDistances);
// Remove the original (unprojected) data unless configured otherwise.
removePreviousRelation(relation);
DBIDs ids = relation.getDBIDs();
WritableDataStore<DoubleVector> proj = DataStoreFactory.FACTORY.makeStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_SORTED, DoubleVector.class);
VectorFieldTypeInformation<DoubleVector> otype = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
for (DBIDArrayIter it = neighbors.iterDBIDs(); it.valid(); it.advance()) {
proj.put(it, DoubleVector.wrap(solution[it.getOffset()]));
}
return new MaterializedRelation<>("tSNE", "t-SNE", otype, proj, ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class CacheDoubleDistanceInOnDiskMatrix method run.
@Override
public void run() {
database.initialize();
Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
DBIDRange ids = DBIDUtil.assertRange(relation.getDBIDs());
int size = ids.size();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing distances", (int) (((size + 1) * (long) size) >>> 1), LOG) : null;
try (//
OnDiskUpperTriangleMatrix matrix = new OnDiskUpperTriangleMatrix(out, DiskCacheBasedDoubleDistanceFunction.DOUBLE_CACHE_MAGIC, 0, ByteArrayUtil.SIZE_DOUBLE, size)) {
DBIDArrayIter id1 = ids.iter(), id2 = ids.iter();
for (; id1.valid(); id1.advance()) {
for (id2.seek(id1.getOffset()); id2.valid(); id2.advance()) {
double d = distanceQuery.distance(id1, id2);
if (debugExtraCheckSymmetry) {
double d2 = distanceQuery.distance(id2, id1);
if (Math.abs(d - d2) > 0.0000001) {
LOG.warning("Distance function doesn't appear to be symmetric!");
}
}
try {
matrix.getRecordBuffer(id1.getOffset(), id2.getOffset()).putDouble(d);
} catch (IOException e) {
throw new AbortException("Error writing distance record " + DBIDUtil.toString(id1) + "," + DBIDUtil.toString(id2) + " to matrix.", e);
}
}
if (prog != null) {
prog.setProcessed(prog.getProcessed() + (size - id1.getOffset()), LOG);
}
}
} catch (IOException e) {
throw new AbortException("Error precomputing distance matrix.", e);
}
LOG.ensureCompleted(prog);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AbstractBiclustering method rowsBitsetToIDs.
/**
* Convert a bitset into integer row ids.
*
* @param rows
* @return integer row ids
*/
protected ArrayDBIDs rowsBitsetToIDs(BitSet rows) {
ArrayModifiableDBIDs rowIDs = DBIDUtil.newArray(rows.cardinality());
DBIDArrayIter iter = this.rowIDs.iter();
for (int i = rows.nextSetBit(0); i >= 0; i = rows.nextSetBit(i + 1)) {
iter.seek(i);
rowIDs.add(iter);
}
return rowIDs;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AffinityPropagationClusteringAlgorithm method run.
/**
* Perform affinity propagation clustering.
*
* @param db Database
* @param relation Relation
* @return Clustering result
*/
public Clustering<MedoidModel> run(Database db, Relation<O> relation) {
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
int[] assignment = new int[size];
double[][] s = initialization.getSimilarityMatrix(db, relation, ids);
double[][] r = new double[size][size];
double[][] a = new double[size][size];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Affinity Propagation Iteration", LOG) : null;
MutableProgress aprog = LOG.isVerbose() ? new MutableProgress("Stable assignments", size + 1, LOG) : null;
int inactive = 0;
for (int iteration = 0; iteration < maxiter && inactive < convergence; iteration++) {
// Update responsibility matrix:
for (int i = 0; i < size; i++) {
double[] ai = a[i], ri = r[i], si = s[i];
// Find the two largest values (as initially maxk == i)
double max1 = Double.NEGATIVE_INFINITY, max2 = Double.NEGATIVE_INFINITY;
int maxk = -1;
for (int k = 0; k < size; k++) {
double val = ai[k] + si[k];
if (val > max1) {
max2 = max1;
max1 = val;
maxk = k;
} else if (val > max2) {
max2 = val;
}
}
// With the maximum value known, update r:
for (int k = 0; k < size; k++) {
double val = si[k] - ((k != maxk) ? max1 : max2);
ri[k] = ri[k] * lambda + val * (1. - lambda);
}
}
// Update availability matrix
for (int k = 0; k < size; k++) {
// Compute sum of max(0, r_ik) for all i.
// For r_kk, don't apply the max.
double colposum = 0.;
for (int i = 0; i < size; i++) {
if (i == k || r[i][k] > 0.) {
colposum += r[i][k];
}
}
for (int i = 0; i < size; i++) {
double val = colposum;
// Adjust column sum by the one extra term.
if (i == k || r[i][k] > 0.) {
val -= r[i][k];
}
if (i != k && val > 0.) {
// min
val = 0.;
}
a[i][k] = a[i][k] * lambda + val * (1 - lambda);
}
}
int changed = 0;
for (int i = 0; i < size; i++) {
double[] ai = a[i], ri = r[i];
double max = Double.NEGATIVE_INFINITY;
int maxj = -1;
for (int j = 0; j < size; j++) {
double v = ai[j] + ri[j];
if (v > max || (i == j && v >= max)) {
max = v;
maxj = j;
}
}
if (assignment[i] != maxj) {
changed += 1;
assignment[i] = maxj;
}
}
inactive = (changed > 0) ? 0 : (inactive + 1);
LOG.incrementProcessed(prog);
if (aprog != null) {
aprog.setProcessed(size - changed, LOG);
}
}
if (aprog != null) {
aprog.setProcessed(aprog.getTotal(), LOG);
}
LOG.setCompleted(prog);
// Cluster map, by lead object
Int2ObjectOpenHashMap<ModifiableDBIDs> map = new Int2ObjectOpenHashMap<>();
DBIDArrayIter i1 = ids.iter();
for (int i = 0; i1.valid(); i1.advance(), i++) {
int c = assignment[i];
// Add to cluster members:
ModifiableDBIDs cids = map.get(c);
if (cids == null) {
cids = DBIDUtil.newArray();
map.put(c, cids);
}
cids.add(i1);
}
// If we stopped early, the cluster lead might be in a different cluster.
for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
final int key = entry.getIntKey();
int targetkey = key;
ModifiableDBIDs tids = null;
// Chase arrows:
while (ids == null && assignment[targetkey] != targetkey) {
targetkey = assignment[targetkey];
tids = map.get(targetkey);
}
if (tids != null && targetkey != key) {
tids.addDBIDs(entry.getValue());
iter.remove();
}
}
Clustering<MedoidModel> clustering = new Clustering<>("Affinity Propagation Clustering", "ap-clustering");
ModifiableDBIDs noise = DBIDUtil.newArray();
for (ObjectIterator<Int2ObjectOpenHashMap.Entry<ModifiableDBIDs>> iter = map.int2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Int2ObjectOpenHashMap.Entry<ModifiableDBIDs> entry = iter.next();
i1.seek(entry.getIntKey());
if (entry.getValue().size() > 1) {
MedoidModel mod = new MedoidModel(DBIDUtil.deref(i1));
clustering.addToplevelCluster(new Cluster<>(entry.getValue(), mod));
} else {
noise.add(i1);
}
}
if (noise.size() > 0) {
MedoidModel mod = new MedoidModel(DBIDUtil.deref(noise.iter()));
clustering.addToplevelCluster(new Cluster<>(noise, true, mod));
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class SameSizeKMeansAlgorithm method initialAssignment.
protected ArrayModifiableDBIDs initialAssignment(List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, DBIDs ids) {
// Build a sorted list of objects, by descending distance delta
ArrayModifiableDBIDs tids = DBIDUtil.newArray(ids);
// Our desired cluster size:
// rounded up
final int maxsize = (tids.size() + k - 1) / k;
// Comparator: sort by largest benefit of assigning to preferred cluster.
final Comparator<DBIDRef> comp = new Comparator<DBIDRef>() {
@Override
public int compare(DBIDRef o1, DBIDRef o2) {
Meta c1 = metas.get(o1), c2 = metas.get(o2);
return -Double.compare(c1.priority(), c2.priority());
}
};
// We will use this iterator below. It allows seeking!
DBIDArrayIter id = tids.iter();
// Initialization phase:
for (int start = 0; start < tids.size(); ) {
tids.sort(start, tids.size(), comp);
for (id.seek(start); id.valid(); id.advance()) {
Meta c = metas.get(id);
// Assigning to best cluster - which cannot be full yet!
ModifiableDBIDs cluster = clusters.get(c.primary);
assert (cluster.size() <= maxsize);
cluster.add(id);
start++;
// Now the cluster may have become completely filled:
if (cluster.size() == maxsize) {
final int full = c.primary;
// Refresh the not yet assigned objects where necessary:
for (id.advance(); id.valid(); id.advance()) {
Meta ca = metas.get(id);
if (ca.primary == full) {
// Update the best index:
for (int i = 0; i < k; i++) {
if (i == full || clusters.get(i).size() >= maxsize) {
continue;
}
if (ca.primary == full || ca.dists[i] < ca.dists[ca.primary]) {
ca.primary = i;
}
}
// Changed.
metas.put(id, ca);
}
}
// not really necessary - iterator is at end anyway.
break;
}
}
// Note: we expect Candidate.a == cluster the object is assigned to!
}
return tids;
}
Aggregations