use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class MetricalIndexApproximationMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
final Logging log = getLogger();
DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
MetricalIndexTree<O, N, E> index = getMetricalIndex(relation);
createStorage();
MeanVariance pagesize = new MeanVariance();
MeanVariance ksize = new MeanVariance();
if (log.isVerbose()) {
log.verbose("Approximating nearest neighbor lists to database objects");
}
List<E> leaves = index.getLeaves();
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Processing leaf nodes", leaves.size(), getLogger()) : null;
for (E leaf : leaves) {
N node = index.getNode(leaf);
int size = node.getNumEntries();
pagesize.put(size);
if (log.isDebuggingFinest()) {
log.debugFinest("NumEntires = " + size);
}
// Collect the ids in this node.
ArrayModifiableDBIDs ids = DBIDUtil.newArray(size);
for (int i = 0; i < size; i++) {
ids.add(((LeafEntry) node.getEntry(i)).getDBID());
}
Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 2);
cache.defaultReturnValue(Double.NaN);
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(k);
for (DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) {
DBIDPair key = DBIDUtil.newPair(id, id2);
double d = cache.removeDouble(key);
if (d == d) {
// Not NaN
// consume the previous result.
kNN.insert(d, id2);
} else {
// compute new and store the previous result.
d = distanceQuery.distance(id, id2);
kNN.insert(d, id2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(id2, id);
cache.put(key, d);
}
}
ksize.put(kNN.size());
storage.put(id, kNN.toKNNList());
}
if (log.isDebugging() && cache.size() > 0) {
log.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
}
log.incrementProcessed(progress);
}
log.ensureCompleted(progress);
if (log.isVerbose()) {
log.verbose("Average page size = " + pagesize.getMean() + " +- " + pagesize.getSampleStddev());
log.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
}
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class DiSH method logClusterSizes.
/**
* Log cluster sizes in verbose mode.
*
* @param m Log message
* @param dimensionality Dimensionality
* @param clustersMap Cluster map
*/
private void logClusterSizes(String m, int dimensionality, Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
if (LOG.isVerbose()) {
final StringBuilder msg = new StringBuilder(1000).append(m).append('\n');
for (ObjectIterator<Object2ObjectMap.Entry<long[], List<ArrayModifiableDBIDs>>> iter = clustersMap.object2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
Object2ObjectMap.Entry<long[], List<ArrayModifiableDBIDs>> entry = iter.next();
msg.append(BitsUtil.toStringLow(entry.getKey(), dimensionality)).append(" sizes:");
for (ArrayModifiableDBIDs c : entry.getValue()) {
msg.append(' ').append(c.size());
}
msg.append('\n');
}
LOG.verbose(msg.toString());
}
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class DiSH method findParent.
/**
* Returns the parent of the specified cluster
*
* @param relation the relation storing the objects
* @param child the child to search the parent for
* @param clustersMap the map containing the clusters
* @return the parent of the specified cluster
*/
private Pair<long[], ArrayModifiableDBIDs> findParent(Relation<V> relation, Pair<long[], ArrayModifiableDBIDs> child, Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
Centroid child_centroid = ProjectedCentroid.make(child.first, relation, child.second);
Pair<long[], ArrayModifiableDBIDs> result = null;
int resultCardinality = -1;
long[] childPV = child.first;
int childCardinality = BitsUtil.cardinality(childPV);
for (long[] parentPV : clustersMap.keySet()) {
int parentCardinality = BitsUtil.cardinality(parentPV);
if (parentCardinality >= childCardinality) {
continue;
}
if (resultCardinality != -1 && parentCardinality <= resultCardinality) {
continue;
}
long[] pv = BitsUtil.andCMin(childPV, parentPV);
if (BitsUtil.equal(pv, parentPV)) {
List<ArrayModifiableDBIDs> parentList = clustersMap.get(parentPV);
for (ArrayModifiableDBIDs parent : parentList) {
NumberVector parent_centroid = ProjectedCentroid.make(parentPV, relation, parent);
double d = weightedDistance(child_centroid, parent_centroid, parentPV);
if (d <= 2 * epsilon) {
result = new Pair<>(parentPV, parent);
resultCardinality = parentCardinality;
break;
}
}
}
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class DiSH method extractClusters.
/**
* Extracts the clusters from the cluster order.
*
* @param relation the database storing the objects
* @param clusterOrder the cluster order to extract the clusters from
* @return the extracted clusters
*/
private Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> extractClusters(Relation<V> relation, DiSHClusterOrder clusterOrder) {
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", relation.size(), LOG) : null;
Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> clustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
// Note clusterOrder currently contains DBID objects anyway.
WritableDataStore<Pair<long[], ArrayModifiableDBIDs>> entryToClusterMap = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Pair.class);
for (DBIDIter iter = clusterOrder.iter(); iter.valid(); iter.advance()) {
V object = relation.get(iter);
long[] preferenceVector = clusterOrder.getCommonPreferenceVector(iter);
// get the list of (parallel) clusters for the preference vector
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(preferenceVector);
if (parallelClusters == null) {
parallelClusters = new ArrayList<>();
clustersMap.put(preferenceVector, parallelClusters);
}
// look for the proper cluster
ArrayModifiableDBIDs cluster = null;
for (ArrayModifiableDBIDs c : parallelClusters) {
NumberVector c_centroid = ProjectedCentroid.make(preferenceVector, relation, c);
long[] commonPreferenceVector = BitsUtil.andCMin(preferenceVector, preferenceVector);
int subspaceDim = subspaceDimensionality(object, c_centroid, preferenceVector, preferenceVector, commonPreferenceVector);
if (subspaceDim == clusterOrder.getCorrelationValue(iter)) {
double d = weightedDistance(object, c_centroid, commonPreferenceVector);
if (d <= 2 * epsilon) {
cluster = c;
break;
}
}
}
if (cluster == null) {
cluster = DBIDUtil.newArray();
parallelClusters.add(cluster);
}
cluster.add(iter);
entryToClusterMap.put(iter, new Pair<>(preferenceVector, cluster));
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isDebuggingFiner()) {
int dim = RelationUtil.dimensionality(relation);
StringBuilder msg = new StringBuilder("Step 0");
for (Map.Entry<long[], List<ArrayModifiableDBIDs>> clusterList : clustersMap.entrySet()) {
for (ArrayModifiableDBIDs c : clusterList.getValue()) {
msg.append('\n').append(BitsUtil.toStringLow(clusterList.getKey(), dim)).append(" ids ").append(c.size());
}
}
LOG.debugFiner(msg.toString());
}
// add the predecessor to the cluster
DBIDVar cur = DBIDUtil.newVar(), pre = DBIDUtil.newVar();
for (long[] pv : clustersMap.keySet()) {
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
for (ArrayModifiableDBIDs cluster : parallelClusters) {
if (cluster.isEmpty()) {
continue;
}
cluster.assignVar(0, cur);
clusterOrder.getPredecessor(cur, pre);
if (!pre.isSet() || DBIDUtil.equal(pre, cur)) {
continue;
}
// parallel cluster
if (BitsUtil.equal(clusterOrder.getCommonPreferenceVector(pre), clusterOrder.getCommonPreferenceVector(cur))) {
continue;
}
if (//
clusterOrder.getCorrelationValue(pre) < clusterOrder.getCorrelationValue(cur) || clusterOrder.getReachability(pre) < clusterOrder.getReachability(cur)) {
continue;
}
Pair<long[], ArrayModifiableDBIDs> oldCluster = entryToClusterMap.get(pre);
oldCluster.second.remove(pre);
cluster.add(pre);
entryToClusterMap.put(pre, new Pair<>(pv, cluster));
}
}
return clustersMap;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class ComputeSimilarityMatrixImage method computeSimilarityMatrixImage.
/**
* Compute the actual similarity image.
*
* @param relation Relation
* @param iter DBID iterator
* @return result object
*/
private SimilarityMatrix computeSimilarityMatrixImage(Relation<O> relation, DBIDIter iter) {
ArrayModifiableDBIDs order = DBIDUtil.newArray(relation.size());
for (; iter.valid(); iter.advance()) {
order.add(iter);
}
if (order.size() != relation.size()) {
throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
}
DistanceQuery<O> dq = distanceFunction.instantiate(relation);
final int size = order.size();
// When the logging is in the outer loop, it's just 2*size (providing enough
// resolution)
// size * (size + 1);
final int ltotal = 2 * size;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Similarity Matrix Image", ltotal, LOG) : null;
// Note: we assume that we have an efficient distance cache available,
// since we are using 2*O(n*n) distance computations.
DoubleMinMax minmax = new DoubleMinMax();
{
DBIDArrayIter id1 = order.iter();
DBIDArrayIter id2 = order.iter();
for (; id1.valid(); id1.advance()) {
id2.seek(id1.getOffset());
for (; id2.valid(); id2.advance()) {
final double dist = dq.distance(id1, id2);
if (!Double.isNaN(dist) && !Double.isInfinite(dist)) /* && dist > 0.0 */
{
if (!skipzero || dist > 0.0) {
minmax.put(dist);
}
}
}
LOG.incrementProcessed(prog);
}
}
double zoom = minmax.getMax() - minmax.getMin();
if (zoom > 0.0) {
zoom = 1. / zoom;
}
LinearScaling scale = new LinearScaling(zoom, -minmax.getMin() * zoom);
BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
{
DBIDArrayIter id1 = order.iter();
DBIDArrayIter id2 = order.iter();
for (int x = 0; x < size && id1.valid(); x++, id1.advance()) {
id2.seek(id1.getOffset());
for (int y = x; y < size && id2.valid(); y++, id2.advance()) {
double ddist = dq.distance(id1, id2);
if (ddist > 0.0) {
ddist = scale.getScaled(ddist);
}
// Apply extra scaling
if (scaling != null) {
ddist = scaling.getScaled(ddist);
}
int dist = 0xFF & (int) (255 * ddist);
int col = 0xff000000 | (dist << 16) | (dist << 8) | dist;
img.setRGB(x, y, col);
img.setRGB(y, x, col);
}
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
return new SimilarityMatrix(img, relation, order);
}
Aggregations