Search in sources :

Example 1 with Object2ObjectOpenCustomHashMap

use of it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap in project elki by elki-project.

the class DiSH method checkClusters.

/**
 * Removes the clusters with size < minpts from the cluster map and adds them
 * to their parents.
 *
 * @param relation the relation storing the objects
 * @param clustersMap the map containing the clusters
 */
private void checkClusters(Relation<V> relation, Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    // check if there are clusters < minpts
    // and add them to not assigned
    List<Pair<long[], ArrayModifiableDBIDs>> notAssigned = new ArrayList<>();
    Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> newClustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
    Pair<long[], ArrayModifiableDBIDs> noise = new Pair<>(BitsUtil.zero(dimensionality), DBIDUtil.newArray());
    for (long[] pv : clustersMap.keySet()) {
        // noise
        if (BitsUtil.cardinality(pv) == 0) {
            List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
            for (ArrayModifiableDBIDs c : parallelClusters) {
                noise.second.addDBIDs(c);
            }
        } else // clusters
        {
            List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
            List<ArrayModifiableDBIDs> newParallelClusters = new ArrayList<>(parallelClusters.size());
            for (ArrayModifiableDBIDs c : parallelClusters) {
                if (!BitsUtil.isZero(pv) && c.size() < mu) {
                    notAssigned.add(new Pair<>(pv, c));
                } else {
                    newParallelClusters.add(c);
                }
            }
            newClustersMap.put(pv, newParallelClusters);
        }
    }
    clustersMap.clear();
    clustersMap.putAll(newClustersMap);
    for (Pair<long[], ArrayModifiableDBIDs> c : notAssigned) {
        if (c.second.isEmpty()) {
            continue;
        }
        Pair<long[], ArrayModifiableDBIDs> parent = findParent(relation, c, clustersMap);
        if (parent != null) {
            parent.second.addDBIDs(c.second);
        } else {
            noise.second.addDBIDs(c.second);
        }
    }
    List<ArrayModifiableDBIDs> noiseList = new ArrayList<>(1);
    noiseList.add(noise.second);
    clustersMap.put(noise.first, noiseList);
}
Also used : Object2ObjectOpenCustomHashMap(it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 2 with Object2ObjectOpenCustomHashMap

use of it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap in project elki by elki-project.

the class DiSH method extractClusters.

/**
 * Extracts the clusters from the cluster order.
 *
 * @param relation the database storing the objects
 * @param clusterOrder the cluster order to extract the clusters from
 * @return the extracted clusters
 */
private Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> extractClusters(Relation<V> relation, DiSHClusterOrder clusterOrder) {
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", relation.size(), LOG) : null;
    Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> clustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
    // Note clusterOrder currently contains DBID objects anyway.
    WritableDataStore<Pair<long[], ArrayModifiableDBIDs>> entryToClusterMap = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Pair.class);
    for (DBIDIter iter = clusterOrder.iter(); iter.valid(); iter.advance()) {
        V object = relation.get(iter);
        long[] preferenceVector = clusterOrder.getCommonPreferenceVector(iter);
        // get the list of (parallel) clusters for the preference vector
        List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(preferenceVector);
        if (parallelClusters == null) {
            parallelClusters = new ArrayList<>();
            clustersMap.put(preferenceVector, parallelClusters);
        }
        // look for the proper cluster
        ArrayModifiableDBIDs cluster = null;
        for (ArrayModifiableDBIDs c : parallelClusters) {
            NumberVector c_centroid = ProjectedCentroid.make(preferenceVector, relation, c);
            long[] commonPreferenceVector = BitsUtil.andCMin(preferenceVector, preferenceVector);
            int subspaceDim = subspaceDimensionality(object, c_centroid, preferenceVector, preferenceVector, commonPreferenceVector);
            if (subspaceDim == clusterOrder.getCorrelationValue(iter)) {
                double d = weightedDistance(object, c_centroid, commonPreferenceVector);
                if (d <= 2 * epsilon) {
                    cluster = c;
                    break;
                }
            }
        }
        if (cluster == null) {
            cluster = DBIDUtil.newArray();
            parallelClusters.add(cluster);
        }
        cluster.add(iter);
        entryToClusterMap.put(iter, new Pair<>(preferenceVector, cluster));
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isDebuggingFiner()) {
        int dim = RelationUtil.dimensionality(relation);
        StringBuilder msg = new StringBuilder("Step 0");
        for (Map.Entry<long[], List<ArrayModifiableDBIDs>> clusterList : clustersMap.entrySet()) {
            for (ArrayModifiableDBIDs c : clusterList.getValue()) {
                msg.append('\n').append(BitsUtil.toStringLow(clusterList.getKey(), dim)).append(" ids ").append(c.size());
            }
        }
        LOG.debugFiner(msg.toString());
    }
    // add the predecessor to the cluster
    DBIDVar cur = DBIDUtil.newVar(), pre = DBIDUtil.newVar();
    for (long[] pv : clustersMap.keySet()) {
        List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
        for (ArrayModifiableDBIDs cluster : parallelClusters) {
            if (cluster.isEmpty()) {
                continue;
            }
            cluster.assignVar(0, cur);
            clusterOrder.getPredecessor(cur, pre);
            if (!pre.isSet() || DBIDUtil.equal(pre, cur)) {
                continue;
            }
            // parallel cluster
            if (BitsUtil.equal(clusterOrder.getCommonPreferenceVector(pre), clusterOrder.getCommonPreferenceVector(cur))) {
                continue;
            }
            if (// 
            clusterOrder.getCorrelationValue(pre) < clusterOrder.getCorrelationValue(cur) || clusterOrder.getReachability(pre) < clusterOrder.getReachability(cur)) {
                continue;
            }
            Pair<long[], ArrayModifiableDBIDs> oldCluster = entryToClusterMap.get(pre);
            oldCluster.second.remove(pre);
            cluster.add(pre);
            entryToClusterMap.put(pre, new Pair<>(pv, cluster));
        }
    }
    return clustersMap;
}
Also used : Object2ObjectOpenCustomHashMap(it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) List(java.util.List) ArrayList(java.util.ArrayList) Object2ObjectMap(it.unimi.dsi.fastutil.objects.Object2ObjectMap) Map(java.util.Map) Object2ObjectOpenCustomHashMap(it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Aggregations

ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)2 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)2 Object2ObjectOpenCustomHashMap (it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 Object2ObjectMap (it.unimi.dsi.fastutil.objects.Object2ObjectMap)1 Map (java.util.Map)1