Search in sources :

Example 1 with LeafEntry

use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.

the class DeLiClu method run.

public ClusterOrder run(Database database, Relation<NV> relation) {
    Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database.getHierarchy(), relation, DeLiCluTreeIndex.class);
    if (indexes.size() != 1) {
        throw new MissingPrerequisitesException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
    }
    DeLiCluTreeIndex<NV> index = indexes.iterator().next();
    if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction<?>)) {
        throw new IllegalArgumentException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
    }
    @SuppressWarnings("unchecked") SpatialPrimitiveDistanceFunction<NV> distFunction = (SpatialPrimitiveDistanceFunction<NV>) getDistanceFunction();
    // first do the knn-Join
    if (LOG.isVerbose()) {
        LOG.verbose("knnJoin...");
    }
    Relation<KNNList> knns = knnJoin.run(relation);
    DBIDs ids = relation.getDBIDs();
    final int size = ids.size();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", size, LOG) : null;
    ClusterOrder clusterOrder = new ClusterOrder(ids, "DeLiClu Clustering", "deliclu-clustering");
    heap = new UpdatableHeap<>();
    // add start object to cluster order and (root, root) to priority queue
    DBID startID = DBIDUtil.deref(ids.iter());
    clusterOrder.add(startID, Double.POSITIVE_INFINITY, null);
    int numHandled = 1;
    index.setHandled(startID, relation.get(startID));
    SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) index.getRootEntry();
    SpatialObjectPair spatialObjectPair = new SpatialObjectPair(0., rootEntry, rootEntry, true);
    heap.add(spatialObjectPair);
    while (numHandled < size) {
        if (heap.isEmpty()) {
            throw new AbortException("DeLiClu heap was empty when it shouldn't have been.");
        }
        SpatialObjectPair dataPair = heap.poll();
        // pair of nodes
        if (dataPair.isExpandable) {
            expandNodes(index, distFunction, dataPair, knns);
        } else // pair of objects
        {
            // set handled
            LeafEntry e1 = (LeafEntry) dataPair.entry1;
            LeafEntry e2 = (LeafEntry) dataPair.entry2;
            final DBID e1id = e1.getDBID();
            IndexTreePath<DeLiCluEntry> path = index.setHandled(e1id, relation.get(e1id));
            if (path == null) {
                throw new RuntimeException("snh: parent(" + e1id + ") = null!!!");
            }
            // add to cluster order
            clusterOrder.add(e1id, dataPair.distance, e2.getDBID());
            numHandled++;
            // reinsert expanded leafs
            reinsertExpanded(distFunction, index, path, knns);
            if (progress != null) {
                progress.setProcessed(numHandled, LOG);
            }
        }
    }
    LOG.ensureCompleted(progress);
    return clusterOrder;
}
Also used : DeLiCluEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluEntry) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) LeafEntry(de.lmu.ifi.dbs.elki.index.tree.LeafEntry) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MissingPrerequisitesException(de.lmu.ifi.dbs.elki.utilities.exceptions.MissingPrerequisitesException) SpatialDirectoryEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DeLiCluTreeIndex(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluTreeIndex) SpatialPrimitiveDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 2 with LeafEntry

use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.

the class AbstractXTree method toString.

/**
 * Returns a string representation of this XTree.
 *
 * @return a string representation of this XTree
 */
@Override
public String toString() {
    long dirNodes = 0;
    long superNodes = 0;
    long leafNodes = 0;
    long objects = 0;
    long maxSuperCapacity = -1;
    long minSuperCapacity = Long.MAX_VALUE;
    BigInteger totalCapacity = BigInteger.ZERO;
    int levels = 0;
    N node = getRoot();
    while (!node.isLeaf()) {
        if (node.getNumEntries() > 0) {
            SpatialEntry entry = node.getEntry(0);
            node = getNode(entry);
            levels++;
        }
    }
    BreadthFirstEnumeration<N, SpatialEntry> enumeration = new BreadthFirstEnumeration<>(this, getRootPath());
    while (enumeration.hasNext()) {
        IndexTreePath<SpatialEntry> indexPath = enumeration.next();
        SpatialEntry entry = indexPath.getEntry();
        if (entry instanceof LeafEntry) {
            objects++;
        } else {
            node = getNode(entry);
            if (node.isLeaf()) {
                leafNodes++;
            } else {
                if (node.isSuperNode()) {
                    superNodes++;
                    if (node.getCapacity() > maxSuperCapacity) {
                        maxSuperCapacity = node.getCapacity();
                    }
                    if (node.getCapacity() < minSuperCapacity) {
                        minSuperCapacity = node.getCapacity();
                    }
                } else {
                    dirNodes++;
                }
            }
            totalCapacity = totalCapacity.add(BigInteger.valueOf(node.getCapacity()));
        }
    }
    assert objects == num_elements : "objects=" + objects + ", size=" + num_elements;
    return // 
    new StringBuilder(10000).append(getClass().getName()).append(" has ").append((levels + 1)).append(" levels.\n").append(dirNodes).append(" Directory Nodes (max = ").append(dirCapacity - 1).append(", min = ").append(dirMinimum).append(// 
    ")\n").append(superNodes).append(" Supernodes (max = ").append(maxSuperCapacity - 1).append(", min = ").append(minSuperCapacity - 1).append(// 
    ")\n").append(leafNodes).append(" Data Nodes (max = ").append(leafCapacity - 1).append(", min = ").append(leafMinimum).append(// 
    ")\n").append(objects).append(' ').append(dimensionality).append(// 
    "-dim. points in the tree \n").append("min_fanout = ").append(settings.min_fanout).append(", max_overlap = ").append(settings.max_overlap).append((settings.overlap_type == Overlap.DATA_OVERLAP ? " data overlap" : " volume overlap")).append(// 
    ", \n").append("Storage Quota ").append(BigInteger.valueOf(objects + dirNodes + superNodes + leafNodes).multiply(BigInteger.valueOf(100)).divide(totalCapacity).toString()).append(// 
    "%\n").toString();
}
Also used : BreadthFirstEnumeration(de.lmu.ifi.dbs.elki.index.tree.BreadthFirstEnumeration) SpatialPointLeafEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry) LeafEntry(de.lmu.ifi.dbs.elki.index.tree.LeafEntry) BigInteger(java.math.BigInteger) SpatialEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry)

Example 3 with LeafEntry

use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.

the class DeLiClu method reinsertExpanded.

private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV> distFunction, DeLiCluTree index, List<IndexTreePath<DeLiCluEntry>> path, int pos, DeLiCluEntry parentEntry, Relation<KNNList> knns) {
    DeLiCluNode parentNode = index.getNode(parentEntry);
    SpatialEntry entry2 = path.get(pos).getEntry();
    if (entry2 instanceof LeafEntry) {
        assert (pos == 0);
        for (int i = 0; i < parentNode.getNumEntries(); i++) {
            DeLiCluEntry entry1 = parentNode.getEntry(i);
            if (entry1.hasHandled()) {
                continue;
            }
            double distance = distFunction.minDist(entry1, entry2);
            double reach = MathUtil.max(distance, knns.get(((LeafEntry) entry2).getDBID()).getKNNDistance());
            SpatialObjectPair dataPair = new SpatialObjectPair(reach, entry1, entry2, false);
            heap.add(dataPair);
        }
        return;
    }
    IntSet expanded = index.getExpanded(entry2);
    for (int i = 0; i < parentNode.getNumEntries(); i++) {
        DeLiCluDirectoryEntry entry1 = (DeLiCluDirectoryEntry) parentNode.getEntry(i);
        // not yet expanded
        if (!expanded.contains(entry1.getPageID())) {
            double distance = distFunction.minDist(entry1, entry2);
            SpatialObjectPair nodePair = new SpatialObjectPair(distance, entry1, entry2, true);
            heap.add(nodePair);
        } else // already expanded
        {
            reinsertExpanded(distFunction, index, path, pos - 1, entry1, knns);
        }
    }
}
Also used : DeLiCluEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluEntry) LeafEntry(de.lmu.ifi.dbs.elki.index.tree.LeafEntry) IntSet(it.unimi.dsi.fastutil.ints.IntSet) DeLiCluDirectoryEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluDirectoryEntry) DeLiCluNode(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluNode) SpatialEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry)

Example 4 with LeafEntry

use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.

the class XSplitter method countXingDataEntries.

/**
 * Count all data objects under entries and whether they intersect the given
 * MBR <code>mbr</code> into <code>numOf</code>.
 *
 * @param entries
 * @param mbr
 * @param numOf array of two integers, the first one is to be filled with the
 *        total number of data objects, the second one with the number of data
 *        objects intersecting <code>mbr</code>
 * @return == the (probably modified) integer array <code>numOf</code>: the
 *         first field is the total number of data objects, the second the
 *         number of data objects intersecting <code>mbr</code>
 */
private int[] countXingDataEntries(final Collection<SpatialEntry> entries, final HyperBoundingBox mbr, int[] numOf) {
    for (SpatialEntry entry : entries) {
        if (entry instanceof LeafEntry) {
            numOf[0]++;
            if (SpatialUtil.intersects(mbr, entry)) {
                numOf[1]++;
            }
        } else {
            N node = tree.getNode(((DirectoryEntry) entry).getPageID());
            countXingDataEntries(node.getEntries(), mbr, numOf);
        }
    }
    return numOf;
}
Also used : LeafEntry(de.lmu.ifi.dbs.elki.index.tree.LeafEntry) SpatialEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry)

Example 5 with LeafEntry

use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.

the class XSplitter method chooseMinimumOverlapSplit.

/**
 * Select the distribution with minimal intersection volume from a Collection
 * of distributions. If there are several equal minimum intersection volumes,
 * the distribution with the minimum volume is selected.
 *
 * @param splitAxis Split axis to be tested
 * @param minEntries The minimum number of entries to be tested; in case of a
 *        data node, this is the minimum leaf capacity, for directory nodes,
 *        this is either the minimum directory capacity or, if a topological
 *        split has already failed, the tree's minimum fan-out parameter.
 * @param maxEntries The maximum number of entries to be tested
 * @param revert if <code>maxEntrie &lt; {@link #entries}.size()</code> and
 *        <code>revert</code> is <code>true</code>, the upper range of the
 *        sorting is tested, i.e. not
 *        <code>{minEntries, ..., maxEntries}</code> but
 *
 *        <code>{{@link #entries}.size() - maxEntries + 1, ..., {@link #entries}.size() - minEntries + 1}</code>
 * @return The distribution with the minimal intersection volume or
 *         <code>null</code>, if the minimum overlap split has a volume which
 *         is larger than the allowed <code>maxOverlap</code> ratio
 */
private SplitSorting chooseMinimumOverlapSplit(int splitAxis, int minEntries, int maxEntries, boolean revert) {
    if (splitAxis == -1) {
        pastOverlap = Double.MAX_VALUE;
        return null;
    }
    double optXVolume = Double.POSITIVE_INFINITY;
    double optVolume = Double.POSITIVE_INFINITY;
    SplitSorting optDistribution = null;
    HyperBoundingBox[] optMBRs = null;
    // generate sortings for the mbr's extrema
    int[] entrySorting = new int[node.getNumEntries()];
    for (int i = 0; i < entrySorting.length; i++) {
        entrySorting[i] = i;
    }
    int[] lbSorting = Arrays.copyOf(entrySorting, entrySorting.length);
    int[] ubSorting = Arrays.copyOf(entrySorting, entrySorting.length);
    sortEntriesForDimension(splitAxis, entrySorting, entrySorting);
    if (revert && maxEntries < node.getNumEntries() / 2) {
        // test reverted sortings
        // temp array
        int[][] reverted = new int[2][node.getNumEntries()];
        for (int i = 0; i < lbSorting.length; i++) {
            reverted[0][reverted[0].length - 1 - i] = lbSorting[i];
            reverted[1][reverted[1].length - 1 - i] = ubSorting[i];
        }
        for (int i = 0; i < lbSorting.length; i++) {
            lbSorting[i] = reverted[0][i];
            ubSorting[i] = reverted[1][i];
        }
    }
    for (int i = 0; i < 2; i++) {
        // test lower and upper bound sortings
        if (i == 0) {
            // lower-bound sorting
            entrySorting = lbSorting;
        } else {
            // upper-bound sorting
            entrySorting = ubSorting;
        }
        for (int limit = minEntries; limit <= maxEntries; limit++) {
            HyperBoundingBox mbr1 = mbr(entrySorting, 0, limit);
            HyperBoundingBox mbr2 = mbr(entrySorting, limit, entrySorting.length);
            double xVolume = SpatialUtil.overlap(mbr1, mbr2);
            if (xVolume < optXVolume) {
                optXVolume = xVolume;
                optDistribution = generateSplitSorting(entrySorting, limit);
                optMBRs = new HyperBoundingBox[] { mbr1, mbr2 };
                optVolume = Double.NaN;
            } else if (xVolume == optXVolume) {
                double vol = SpatialUtil.volume(mbr1);
                vol += SpatialUtil.volume(mbr2);
                if (Double.isNaN(optVolume)) {
                    // calculate when necessary
                    optVolume = SpatialUtil.volume(optMBRs[0]);
                    optVolume += SpatialUtil.volume(optMBRs[1]);
                }
                if (vol < optVolume) {
                    optXVolume = xVolume;
                    optVolume = vol;
                    optDistribution = generateSplitSorting(entrySorting, limit);
                }
            }
        }
    }
    if (node.getEntry(0) instanceof LeafEntry || tree.get_max_overlap() >= 1) {
        // overlap is not computed
        pastOverlap = Double.NaN;
        return optDistribution;
    }
    // test overlap
    switch(maxOverlapStrategy) {
        case DATA_OVERLAP:
            pastOverlap = getRatioOfDataInIntersectionVolume(generateDistribution(optDistribution), optMBRs);
            if (tree.get_max_overlap() < pastOverlap) {
                LOG.finest(String.format(Locale.ENGLISH, "No %s split found%s; best data overlap was %.3f", (minEntries == tree.get_min_fanout() ? "minimum overlap" : "topological"), (maxEntries < node.getNumEntries() / 2 ? " in " + (revert ? "second" : "first") + " range" : ""), pastOverlap));
                return null;
            }
            break;
        case VOLUME_OVERLAP:
            if (Double.isNaN(optVolume)) {
                optVolume = SpatialUtil.volume(optMBRs[0]);
                optVolume += SpatialUtil.volume(optMBRs[1]);
            }
            pastOverlap = optXVolume / optVolume;
            if (tree.get_max_overlap() < pastOverlap) {
                LOG.finest(String.format(Locale.ENGLISH, "No %s split found%s; best volume overlap was %.3f", (minEntries == tree.get_min_fanout() ? "minimum overlap" : "topological"), (maxEntries < node.getNumEntries() / 2 ? " in " + (revert ? "second" : "first") + " range" : ""), pastOverlap));
                return null;
            }
            break;
    }
    return optDistribution;
}
Also used : LeafEntry(de.lmu.ifi.dbs.elki.index.tree.LeafEntry) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox)

Aggregations

LeafEntry (de.lmu.ifi.dbs.elki.index.tree.LeafEntry)8 SpatialEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry)4 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)2 SpatialPointLeafEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry)2 DeLiCluEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluEntry)2 HyperBoundingBox (de.lmu.ifi.dbs.elki.data.HyperBoundingBox)1 ModifiableHyperBoundingBox (de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 SpatialPrimitiveDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction)1 BreadthFirstEnumeration (de.lmu.ifi.dbs.elki.index.tree.BreadthFirstEnumeration)1 SpatialDirectoryEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry)1 DeLiCluDirectoryEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluDirectoryEntry)1 DeLiCluNode (de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluNode)1 DeLiCluTreeIndex (de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluTreeIndex)1 XTreeDirectoryEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.xtree.XTreeDirectoryEntry)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1 MissingPrerequisitesException (de.lmu.ifi.dbs.elki.utilities.exceptions.MissingPrerequisitesException)1 IntIterator (it.unimi.dsi.fastutil.ints.IntIterator)1