Search in sources :

Example 26 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class MetricalIndexApproximationMaterializeKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    final Logging log = getLogger();
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    MetricalIndexTree<O, N, E> index = getMetricalIndex(relation);
    createStorage();
    MeanVariance pagesize = new MeanVariance();
    MeanVariance ksize = new MeanVariance();
    if (log.isVerbose()) {
        log.verbose("Approximating nearest neighbor lists to database objects");
    }
    List<E> leaves = index.getLeaves();
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Processing leaf nodes", leaves.size(), getLogger()) : null;
    for (E leaf : leaves) {
        N node = index.getNode(leaf);
        int size = node.getNumEntries();
        pagesize.put(size);
        if (log.isDebuggingFinest()) {
            log.debugFinest("NumEntires = " + size);
        }
        // Collect the ids in this node.
        ArrayModifiableDBIDs ids = DBIDUtil.newArray(size);
        for (int i = 0; i < size; i++) {
            ids.add(((LeafEntry) node.getEntry(i)).getDBID());
        }
        Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 2);
        cache.defaultReturnValue(Double.NaN);
        for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
            KNNHeap kNN = DBIDUtil.newHeap(k);
            for (DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) {
                DBIDPair key = DBIDUtil.newPair(id, id2);
                double d = cache.removeDouble(key);
                if (d == d) {
                    // Not NaN
                    // consume the previous result.
                    kNN.insert(d, id2);
                } else {
                    // compute new and store the previous result.
                    d = distanceQuery.distance(id, id2);
                    kNN.insert(d, id2);
                    // put it into the cache, but with the keys reversed
                    key = DBIDUtil.newPair(id2, id);
                    cache.put(key, d);
                }
            }
            ksize.put(kNN.size());
            storage.put(id, kNN.toKNNList());
        }
        if (log.isDebugging() && cache.size() > 0) {
            log.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
        }
        log.incrementProcessed(progress);
    }
    log.ensureCompleted(progress);
    if (log.isVerbose()) {
        log.verbose("Average page size = " + pagesize.getMean() + " +- " + pagesize.getSampleStddev());
        log.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
    }
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) Object2DoubleOpenHashMap(it.unimi.dsi.fastutil.objects.Object2DoubleOpenHashMap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DBIDPair(de.lmu.ifi.dbs.elki.database.ids.DBIDPair) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)

Example 27 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class DiSH method logClusterSizes.

/**
 * Log cluster sizes in verbose mode.
 *
 * @param m Log message
 * @param dimensionality Dimensionality
 * @param clustersMap Cluster map
 */
private void logClusterSizes(String m, int dimensionality, Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
    if (LOG.isVerbose()) {
        final StringBuilder msg = new StringBuilder(1000).append(m).append('\n');
        for (ObjectIterator<Object2ObjectMap.Entry<long[], List<ArrayModifiableDBIDs>>> iter = clustersMap.object2ObjectEntrySet().fastIterator(); iter.hasNext(); ) {
            Object2ObjectMap.Entry<long[], List<ArrayModifiableDBIDs>> entry = iter.next();
            msg.append(BitsUtil.toStringLow(entry.getKey(), dimensionality)).append(" sizes:");
            for (ArrayModifiableDBIDs c : entry.getValue()) {
                msg.append(' ').append(c.size());
            }
            msg.append('\n');
        }
        LOG.verbose(msg.toString());
    }
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) Object2ObjectMap(it.unimi.dsi.fastutil.objects.Object2ObjectMap) List(java.util.List) ArrayList(java.util.ArrayList)

Example 28 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class DiSH method findParent.

/**
 * Returns the parent of the specified cluster
 *
 * @param relation the relation storing the objects
 * @param child the child to search the parent for
 * @param clustersMap the map containing the clusters
 * @return the parent of the specified cluster
 */
private Pair<long[], ArrayModifiableDBIDs> findParent(Relation<V> relation, Pair<long[], ArrayModifiableDBIDs> child, Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
    Centroid child_centroid = ProjectedCentroid.make(child.first, relation, child.second);
    Pair<long[], ArrayModifiableDBIDs> result = null;
    int resultCardinality = -1;
    long[] childPV = child.first;
    int childCardinality = BitsUtil.cardinality(childPV);
    for (long[] parentPV : clustersMap.keySet()) {
        int parentCardinality = BitsUtil.cardinality(parentPV);
        if (parentCardinality >= childCardinality) {
            continue;
        }
        if (resultCardinality != -1 && parentCardinality <= resultCardinality) {
            continue;
        }
        long[] pv = BitsUtil.andCMin(childPV, parentPV);
        if (BitsUtil.equal(pv, parentPV)) {
            List<ArrayModifiableDBIDs> parentList = clustersMap.get(parentPV);
            for (ArrayModifiableDBIDs parent : parentList) {
                NumberVector parent_centroid = ProjectedCentroid.make(parentPV, relation, parent);
                double d = weightedDistance(child_centroid, parent_centroid, parentPV);
                if (d <= 2 * epsilon) {
                    result = new Pair<>(parentPV, parent);
                    resultCardinality = parentCardinality;
                    break;
                }
            }
        }
    }
    return result;
}
Also used : Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) ProjectedCentroid(de.lmu.ifi.dbs.elki.math.linearalgebra.ProjectedCentroid) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector)

Example 29 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class DiSH method extractClusters.

/**
 * Extracts the clusters from the cluster order.
 *
 * @param relation the database storing the objects
 * @param clusterOrder the cluster order to extract the clusters from
 * @return the extracted clusters
 */
private Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> extractClusters(Relation<V> relation, DiSHClusterOrder clusterOrder) {
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", relation.size(), LOG) : null;
    Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> clustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
    // Note clusterOrder currently contains DBID objects anyway.
    WritableDataStore<Pair<long[], ArrayModifiableDBIDs>> entryToClusterMap = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Pair.class);
    for (DBIDIter iter = clusterOrder.iter(); iter.valid(); iter.advance()) {
        V object = relation.get(iter);
        long[] preferenceVector = clusterOrder.getCommonPreferenceVector(iter);
        // get the list of (parallel) clusters for the preference vector
        List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(preferenceVector);
        if (parallelClusters == null) {
            parallelClusters = new ArrayList<>();
            clustersMap.put(preferenceVector, parallelClusters);
        }
        // look for the proper cluster
        ArrayModifiableDBIDs cluster = null;
        for (ArrayModifiableDBIDs c : parallelClusters) {
            NumberVector c_centroid = ProjectedCentroid.make(preferenceVector, relation, c);
            long[] commonPreferenceVector = BitsUtil.andCMin(preferenceVector, preferenceVector);
            int subspaceDim = subspaceDimensionality(object, c_centroid, preferenceVector, preferenceVector, commonPreferenceVector);
            if (subspaceDim == clusterOrder.getCorrelationValue(iter)) {
                double d = weightedDistance(object, c_centroid, commonPreferenceVector);
                if (d <= 2 * epsilon) {
                    cluster = c;
                    break;
                }
            }
        }
        if (cluster == null) {
            cluster = DBIDUtil.newArray();
            parallelClusters.add(cluster);
        }
        cluster.add(iter);
        entryToClusterMap.put(iter, new Pair<>(preferenceVector, cluster));
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isDebuggingFiner()) {
        int dim = RelationUtil.dimensionality(relation);
        StringBuilder msg = new StringBuilder("Step 0");
        for (Map.Entry<long[], List<ArrayModifiableDBIDs>> clusterList : clustersMap.entrySet()) {
            for (ArrayModifiableDBIDs c : clusterList.getValue()) {
                msg.append('\n').append(BitsUtil.toStringLow(clusterList.getKey(), dim)).append(" ids ").append(c.size());
            }
        }
        LOG.debugFiner(msg.toString());
    }
    // add the predecessor to the cluster
    DBIDVar cur = DBIDUtil.newVar(), pre = DBIDUtil.newVar();
    for (long[] pv : clustersMap.keySet()) {
        List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
        for (ArrayModifiableDBIDs cluster : parallelClusters) {
            if (cluster.isEmpty()) {
                continue;
            }
            cluster.assignVar(0, cur);
            clusterOrder.getPredecessor(cur, pre);
            if (!pre.isSet() || DBIDUtil.equal(pre, cur)) {
                continue;
            }
            // parallel cluster
            if (BitsUtil.equal(clusterOrder.getCommonPreferenceVector(pre), clusterOrder.getCommonPreferenceVector(cur))) {
                continue;
            }
            if (// 
            clusterOrder.getCorrelationValue(pre) < clusterOrder.getCorrelationValue(cur) || clusterOrder.getReachability(pre) < clusterOrder.getReachability(cur)) {
                continue;
            }
            Pair<long[], ArrayModifiableDBIDs> oldCluster = entryToClusterMap.get(pre);
            oldCluster.second.remove(pre);
            cluster.add(pre);
            entryToClusterMap.put(pre, new Pair<>(pv, cluster));
        }
    }
    return clustersMap;
}
Also used : Object2ObjectOpenCustomHashMap(it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) List(java.util.List) ArrayList(java.util.ArrayList) Object2ObjectMap(it.unimi.dsi.fastutil.objects.Object2ObjectMap) Map(java.util.Map) Object2ObjectOpenCustomHashMap(it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 30 with ArrayModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.

the class ComputeSimilarityMatrixImage method computeSimilarityMatrixImage.

/**
 * Compute the actual similarity image.
 *
 * @param relation Relation
 * @param iter DBID iterator
 * @return result object
 */
private SimilarityMatrix computeSimilarityMatrixImage(Relation<O> relation, DBIDIter iter) {
    ArrayModifiableDBIDs order = DBIDUtil.newArray(relation.size());
    for (; iter.valid(); iter.advance()) {
        order.add(iter);
    }
    if (order.size() != relation.size()) {
        throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
    }
    DistanceQuery<O> dq = distanceFunction.instantiate(relation);
    final int size = order.size();
    // When the logging is in the outer loop, it's just 2*size (providing enough
    // resolution)
    // size * (size + 1);
    final int ltotal = 2 * size;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Similarity Matrix Image", ltotal, LOG) : null;
    // Note: we assume that we have an efficient distance cache available,
    // since we are using 2*O(n*n) distance computations.
    DoubleMinMax minmax = new DoubleMinMax();
    {
        DBIDArrayIter id1 = order.iter();
        DBIDArrayIter id2 = order.iter();
        for (; id1.valid(); id1.advance()) {
            id2.seek(id1.getOffset());
            for (; id2.valid(); id2.advance()) {
                final double dist = dq.distance(id1, id2);
                if (!Double.isNaN(dist) && !Double.isInfinite(dist)) /* && dist > 0.0 */
                {
                    if (!skipzero || dist > 0.0) {
                        minmax.put(dist);
                    }
                }
            }
            LOG.incrementProcessed(prog);
        }
    }
    double zoom = minmax.getMax() - minmax.getMin();
    if (zoom > 0.0) {
        zoom = 1. / zoom;
    }
    LinearScaling scale = new LinearScaling(zoom, -minmax.getMin() * zoom);
    BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
    {
        DBIDArrayIter id1 = order.iter();
        DBIDArrayIter id2 = order.iter();
        for (int x = 0; x < size && id1.valid(); x++, id1.advance()) {
            id2.seek(id1.getOffset());
            for (int y = x; y < size && id2.valid(); y++, id2.advance()) {
                double ddist = dq.distance(id1, id2);
                if (ddist > 0.0) {
                    ddist = scale.getScaled(ddist);
                }
                // Apply extra scaling
                if (scaling != null) {
                    ddist = scaling.getScaled(ddist);
                }
                int dist = 0xFF & (int) (255 * ddist);
                int col = 0xff000000 | (dist << 16) | (dist << 8) | dist;
                img.setRGB(x, y, col);
                img.setRGB(y, x, col);
            }
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    return new SimilarityMatrix(img, relation, order);
}
Also used : LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) BufferedImage(java.awt.image.BufferedImage) ImageIO(javax.imageio.ImageIO)

Aggregations

ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)49 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)23 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)12 ArrayList (java.util.ArrayList)11 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)10 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)9 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)7 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)6 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)5 DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)5 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)4 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)4 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)4 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)4 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)4 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)4