use of it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap in project elki by elki-project.
the class DiSH method checkClusters.
/**
* Removes the clusters with size < minpts from the cluster map and adds them
* to their parents.
*
* @param relation the relation storing the objects
* @param clustersMap the map containing the clusters
*/
private void checkClusters(Relation<V> relation, Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
final int dimensionality = RelationUtil.dimensionality(relation);
// check if there are clusters < minpts
// and add them to not assigned
List<Pair<long[], ArrayModifiableDBIDs>> notAssigned = new ArrayList<>();
Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> newClustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
Pair<long[], ArrayModifiableDBIDs> noise = new Pair<>(BitsUtil.zero(dimensionality), DBIDUtil.newArray());
for (long[] pv : clustersMap.keySet()) {
// noise
if (BitsUtil.cardinality(pv) == 0) {
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
for (ArrayModifiableDBIDs c : parallelClusters) {
noise.second.addDBIDs(c);
}
} else // clusters
{
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
List<ArrayModifiableDBIDs> newParallelClusters = new ArrayList<>(parallelClusters.size());
for (ArrayModifiableDBIDs c : parallelClusters) {
if (!BitsUtil.isZero(pv) && c.size() < mu) {
notAssigned.add(new Pair<>(pv, c));
} else {
newParallelClusters.add(c);
}
}
newClustersMap.put(pv, newParallelClusters);
}
}
clustersMap.clear();
clustersMap.putAll(newClustersMap);
for (Pair<long[], ArrayModifiableDBIDs> c : notAssigned) {
if (c.second.isEmpty()) {
continue;
}
Pair<long[], ArrayModifiableDBIDs> parent = findParent(relation, c, clustersMap);
if (parent != null) {
parent.second.addDBIDs(c.second);
} else {
noise.second.addDBIDs(c.second);
}
}
List<ArrayModifiableDBIDs> noiseList = new ArrayList<>(1);
noiseList.add(noise.second);
clustersMap.put(noise.first, noiseList);
}
use of it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap in project elki by elki-project.
the class DiSH method extractClusters.
/**
* Extracts the clusters from the cluster order.
*
* @param relation the database storing the objects
* @param clusterOrder the cluster order to extract the clusters from
* @return the extracted clusters
*/
private Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> extractClusters(Relation<V> relation, DiSHClusterOrder clusterOrder) {
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", relation.size(), LOG) : null;
Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> clustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
// Note clusterOrder currently contains DBID objects anyway.
WritableDataStore<Pair<long[], ArrayModifiableDBIDs>> entryToClusterMap = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Pair.class);
for (DBIDIter iter = clusterOrder.iter(); iter.valid(); iter.advance()) {
V object = relation.get(iter);
long[] preferenceVector = clusterOrder.getCommonPreferenceVector(iter);
// get the list of (parallel) clusters for the preference vector
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(preferenceVector);
if (parallelClusters == null) {
parallelClusters = new ArrayList<>();
clustersMap.put(preferenceVector, parallelClusters);
}
// look for the proper cluster
ArrayModifiableDBIDs cluster = null;
for (ArrayModifiableDBIDs c : parallelClusters) {
NumberVector c_centroid = ProjectedCentroid.make(preferenceVector, relation, c);
long[] commonPreferenceVector = BitsUtil.andCMin(preferenceVector, preferenceVector);
int subspaceDim = subspaceDimensionality(object, c_centroid, preferenceVector, preferenceVector, commonPreferenceVector);
if (subspaceDim == clusterOrder.getCorrelationValue(iter)) {
double d = weightedDistance(object, c_centroid, commonPreferenceVector);
if (d <= 2 * epsilon) {
cluster = c;
break;
}
}
}
if (cluster == null) {
cluster = DBIDUtil.newArray();
parallelClusters.add(cluster);
}
cluster.add(iter);
entryToClusterMap.put(iter, new Pair<>(preferenceVector, cluster));
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isDebuggingFiner()) {
int dim = RelationUtil.dimensionality(relation);
StringBuilder msg = new StringBuilder("Step 0");
for (Map.Entry<long[], List<ArrayModifiableDBIDs>> clusterList : clustersMap.entrySet()) {
for (ArrayModifiableDBIDs c : clusterList.getValue()) {
msg.append('\n').append(BitsUtil.toStringLow(clusterList.getKey(), dim)).append(" ids ").append(c.size());
}
}
LOG.debugFiner(msg.toString());
}
// add the predecessor to the cluster
DBIDVar cur = DBIDUtil.newVar(), pre = DBIDUtil.newVar();
for (long[] pv : clustersMap.keySet()) {
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
for (ArrayModifiableDBIDs cluster : parallelClusters) {
if (cluster.isEmpty()) {
continue;
}
cluster.assignVar(0, cur);
clusterOrder.getPredecessor(cur, pre);
if (!pre.isSet() || DBIDUtil.equal(pre, cur)) {
continue;
}
// parallel cluster
if (BitsUtil.equal(clusterOrder.getCommonPreferenceVector(pre), clusterOrder.getCommonPreferenceVector(cur))) {
continue;
}
if (//
clusterOrder.getCorrelationValue(pre) < clusterOrder.getCorrelationValue(cur) || clusterOrder.getReachability(pre) < clusterOrder.getReachability(cur)) {
continue;
}
Pair<long[], ArrayModifiableDBIDs> oldCluster = entryToClusterMap.get(pre);
oldCluster.second.remove(pre);
cluster.add(pre);
entryToClusterMap.put(pre, new Pair<>(pv, cluster));
}
}
return clustersMap;
}
Aggregations