Search in sources :

Example 6 with HyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.

the class CASHInterval method split.

/**
 * Splits this interval into 2 children.
 */
public void split() {
    if (hasChildren()) {
        return;
    }
    final boolean issplit = (maxSplitDimension >= (getDimensionality() - 1));
    final int childLevel = issplit ? level + 1 : level;
    final int splitDim = issplit ? 0 : maxSplitDimension + 1;
    final double splitPoint = getMin(splitDim) + (getMax(splitDim) - getMin(splitDim)) * .5;
    // left and right child
    for (int i = 0; i < 2; i++) {
        // clone
        double[] min = SpatialUtil.getMin(this);
        // clone
        double[] max = SpatialUtil.getMax(this);
        // right child
        if (i == 0) {
            min[splitDim] = splitPoint;
        } else // left child
        {
            max[splitDim] = splitPoint;
        }
        ModifiableDBIDs childIDs = split.determineIDs(getIDs(), new HyperBoundingBox(min, max), d_min, d_max);
        if (childIDs != null) {
            // right child
            if (i == 0) {
                rightChild = new CASHInterval(min, max, split, childIDs, splitDim, childLevel, d_min, d_max);
            } else // left child
            {
                leftChild = new CASHInterval(min, max, split, childIDs, splitDim, childLevel, d_min, d_max);
            }
        }
    }
    if (LOG.isDebuggingFine()) {
        StringBuilder msg = new StringBuilder();
        msg.append("Child level ").append(childLevel).append(",  split Dim   ").append(splitDim);
        if (leftChild != null) {
            msg.append("\nleft   ").append(leftChild);
        }
        if (rightChild != null) {
            msg.append("\nright   ").append(rightChild);
        }
        LOG.fine(msg.toString());
    }
}
Also used : ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox)

Example 7 with HyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.

the class CASHIntervalSplit method determineIDs.

/**
 * Determines the ids belonging to the given interval, i.e. the
 * parameterization functions falling within the interval.
 *
 * @param superSetIDs a superset of the ids to be determined
 * @param interval the hyper bounding box defining the interval of alpha
 *        values
 * @param d_min the minimum distance value for the interval
 * @param d_max the maximum distance value for the interval
 * @return the ids belonging to the given interval, if the number ids of
 *         exceeds minPts, null otherwise
 */
public ModifiableDBIDs determineIDs(DBIDs superSetIDs, HyperBoundingBox interval, double d_min, double d_max) {
    StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
    if (msg != null) {
        msg.append("interval ").append(interval);
    }
    ModifiableDBIDs childIDs = DBIDUtil.newHashSet(superSetIDs.size());
    Map<DBID, Double> minima = f_minima.get(interval);
    Map<DBID, Double> maxima = f_maxima.get(interval);
    if (minima == null || maxima == null) {
        minima = new HashMap<>();
        f_minima.put(interval, minima);
        maxima = new HashMap<>();
        f_maxima.put(interval, maxima);
    }
    for (DBIDIter iter = superSetIDs.iter(); iter.valid(); iter.advance()) {
        DBID id = DBIDUtil.deref(iter);
        Double f_min = minima.get(id);
        Double f_max = maxima.get(id);
        if (f_min == null) {
            ParameterizationFunction f = database.get(id);
            HyperBoundingBox minMax = f.determineAlphaMinMax(interval);
            f_min = f.function(SpatialUtil.getMin(minMax));
            f_max = f.function(SpatialUtil.getMax(minMax));
            minima.put(id, f_min);
            maxima.put(id, f_max);
        }
        if (msg != null) {
            msg.append("\n\nf_min ").append(f_min);
            msg.append("\nf_max ").append(f_max);
            msg.append("\nd_min ").append(d_min);
            msg.append("\nd_max ").append(d_max);
        }
        if (f_min - f_max > ParameterizationFunction.DELTA) {
            throw new IllegalArgumentException("Houston, we have a problem: f_min > f_max! " + "\nf_min[" + FormatUtil.format(SpatialUtil.centroid(interval)) + "] = " + f_min + "\nf_max[" + FormatUtil.format(SpatialUtil.centroid(interval)) + "] = " + f_max + "\nf " + database.get(id));
        }
        if (f_min <= d_max && f_max >= d_min) {
            childIDs.add(id);
            if (msg != null) {
                msg.append("\nid ").append(id).append(" appended");
            }
        } else {
            if (msg != null) {
                msg.append("\nid ").append(id).append(" NOT appended");
            }
        }
    }
    if (msg != null) {
        msg.append("\nchildIds ").append(childIDs.size());
        LOG.debugFine(msg.toString());
    }
    if (childIDs.size() < minPts) {
        return null;
    } else {
        return childIDs;
    }
}
Also used : DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 8 with HyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.

the class XSplitter method chooseMinimumOverlapSplit.

/**
 * Select the distribution with minimal intersection volume from a Collection
 * of distributions. If there are several equal minimum intersection volumes,
 * the distribution with the minimum volume is selected.
 *
 * @param splitAxis Split axis to be tested
 * @param minEntries The minimum number of entries to be tested; in case of a
 *        data node, this is the minimum leaf capacity, for directory nodes,
 *        this is either the minimum directory capacity or, if a topological
 *        split has already failed, the tree's minimum fan-out parameter.
 * @param maxEntries The maximum number of entries to be tested
 * @param revert if <code>maxEntrie &lt; {@link #entries}.size()</code> and
 *        <code>revert</code> is <code>true</code>, the upper range of the
 *        sorting is tested, i.e. not
 *        <code>{minEntries, ..., maxEntries}</code> but
 *
 *        <code>{{@link #entries}.size() - maxEntries + 1, ..., {@link #entries}.size() - minEntries + 1}</code>
 * @return The distribution with the minimal intersection volume or
 *         <code>null</code>, if the minimum overlap split has a volume which
 *         is larger than the allowed <code>maxOverlap</code> ratio
 */
private SplitSorting chooseMinimumOverlapSplit(int splitAxis, int minEntries, int maxEntries, boolean revert) {
    if (splitAxis == -1) {
        pastOverlap = Double.MAX_VALUE;
        return null;
    }
    double optXVolume = Double.POSITIVE_INFINITY;
    double optVolume = Double.POSITIVE_INFINITY;
    SplitSorting optDistribution = null;
    HyperBoundingBox[] optMBRs = null;
    // generate sortings for the mbr's extrema
    int[] entrySorting = new int[node.getNumEntries()];
    for (int i = 0; i < entrySorting.length; i++) {
        entrySorting[i] = i;
    }
    int[] lbSorting = Arrays.copyOf(entrySorting, entrySorting.length);
    int[] ubSorting = Arrays.copyOf(entrySorting, entrySorting.length);
    sortEntriesForDimension(splitAxis, entrySorting, entrySorting);
    if (revert && maxEntries < node.getNumEntries() / 2) {
        // test reverted sortings
        // temp array
        int[][] reverted = new int[2][node.getNumEntries()];
        for (int i = 0; i < lbSorting.length; i++) {
            reverted[0][reverted[0].length - 1 - i] = lbSorting[i];
            reverted[1][reverted[1].length - 1 - i] = ubSorting[i];
        }
        for (int i = 0; i < lbSorting.length; i++) {
            lbSorting[i] = reverted[0][i];
            ubSorting[i] = reverted[1][i];
        }
    }
    for (int i = 0; i < 2; i++) {
        // test lower and upper bound sortings
        if (i == 0) {
            // lower-bound sorting
            entrySorting = lbSorting;
        } else {
            // upper-bound sorting
            entrySorting = ubSorting;
        }
        for (int limit = minEntries; limit <= maxEntries; limit++) {
            HyperBoundingBox mbr1 = mbr(entrySorting, 0, limit);
            HyperBoundingBox mbr2 = mbr(entrySorting, limit, entrySorting.length);
            double xVolume = SpatialUtil.overlap(mbr1, mbr2);
            if (xVolume < optXVolume) {
                optXVolume = xVolume;
                optDistribution = generateSplitSorting(entrySorting, limit);
                optMBRs = new HyperBoundingBox[] { mbr1, mbr2 };
                optVolume = Double.NaN;
            } else if (xVolume == optXVolume) {
                double vol = SpatialUtil.volume(mbr1);
                vol += SpatialUtil.volume(mbr2);
                if (Double.isNaN(optVolume)) {
                    // calculate when necessary
                    optVolume = SpatialUtil.volume(optMBRs[0]);
                    optVolume += SpatialUtil.volume(optMBRs[1]);
                }
                if (vol < optVolume) {
                    optXVolume = xVolume;
                    optVolume = vol;
                    optDistribution = generateSplitSorting(entrySorting, limit);
                }
            }
        }
    }
    if (node.getEntry(0) instanceof LeafEntry || tree.get_max_overlap() >= 1) {
        // overlap is not computed
        pastOverlap = Double.NaN;
        return optDistribution;
    }
    // test overlap
    switch(maxOverlapStrategy) {
        case DATA_OVERLAP:
            pastOverlap = getRatioOfDataInIntersectionVolume(generateDistribution(optDistribution), optMBRs);
            if (tree.get_max_overlap() < pastOverlap) {
                LOG.finest(String.format(Locale.ENGLISH, "No %s split found%s; best data overlap was %.3f", (minEntries == tree.get_min_fanout() ? "minimum overlap" : "topological"), (maxEntries < node.getNumEntries() / 2 ? " in " + (revert ? "second" : "first") + " range" : ""), pastOverlap));
                return null;
            }
            break;
        case VOLUME_OVERLAP:
            if (Double.isNaN(optVolume)) {
                optVolume = SpatialUtil.volume(optMBRs[0]);
                optVolume += SpatialUtil.volume(optMBRs[1]);
            }
            pastOverlap = optXVolume / optVolume;
            if (tree.get_max_overlap() < pastOverlap) {
                LOG.finest(String.format(Locale.ENGLISH, "No %s split found%s; best volume overlap was %.3f", (minEntries == tree.get_min_fanout() ? "minimum overlap" : "topological"), (maxEntries < node.getNumEntries() / 2 ? " in " + (revert ? "second" : "first") + " range" : ""), pastOverlap));
                return null;
            }
            break;
    }
    return optDistribution;
}
Also used : LeafEntry(de.lmu.ifi.dbs.elki.index.tree.LeafEntry) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox)

Example 9 with HyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.

the class XSplitter method getSurfaceSums4Sorting.

/**
 * Compute the surfaces of the <code>2 * (maxEntries - minEntries + 1)</code>
 * split MBRs resulting for the sorting <code>entrySorting</code>.
 *
 * @param minEntries minimally allowed subgroup size
 * @param maxEntries maximally allowed subgroup size for the first entry set
 * @param entrySorting a permutation of the indices of {@link #entries}
 * @param dim the dimension of the tree
 * @return the sum of all first and second MBRs' surfaces for the tested entry
 *         distributions
 */
private double getSurfaceSums4Sorting(int minEntries, int maxEntries, int[] entrySorting, int dim) {
    // avoid multiple MBR calculations by updating min/max-logs for the two
    // collections' bounds:
    // the first entries' maximum upper bounds
    double[] pqUBFirst = new double[dim];
    Arrays.fill(pqUBFirst, Double.NEGATIVE_INFINITY);
    // maintain the second entries' upper bounds
    List<Heap<DoubleIntPair>> pqUBSecond = new ArrayList<>(dim);
    for (int i = 0; i < dim; i++) {
        // Descending heap
        pqUBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries, Collections.reverseOrder()));
    }
    // the first entries' minimum lower bounds
    double[] pqLBFirst = new double[dim];
    Arrays.fill(pqLBFirst, Double.POSITIVE_INFINITY);
    // maintain the second entries' minimum lower bounds
    List<Heap<DoubleIntPair>> pqLBSecond = new ArrayList<>(dim);
    for (int i = 0; i < dim; i++) {
        // Ascending heap
        pqLBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries));
    }
    // initialize bounds for first entry collection
    for (int index = 0; index < minEntries; index++) {
        add2MBR(entrySorting, pqUBFirst, pqLBFirst, index);
    }
    HyperBoundingBox mbr1 = new HyperBoundingBox(pqLBFirst, pqUBFirst);
    // fill bounding queues for the second entry collection
    double[] minSecond = new double[dim];
    double[] maxSecond = new double[dim];
    Arrays.fill(maxSecond, Double.NEGATIVE_INFINITY);
    Arrays.fill(minSecond, Double.POSITIVE_INFINITY);
    assert entrySorting.length - maxEntries == minEntries;
    // initialize min/max entries of the second collections' tail
    for (int index = maxEntries; index < entrySorting.length; index++) {
        add2MBR(entrySorting, maxSecond, minSecond, index);
    }
    for (int i = 0; i < dim; i++) {
        // with index entrySorting.length => never to be removed
        pqLBSecond.get(i).add(new DoubleIntPair(minSecond[i], entrySorting.length));
        pqUBSecond.get(i).add(new DoubleIntPair(maxSecond[i], entrySorting.length));
    }
    // add the entries to be removed later on
    for (int index = minEntries; index < maxEntries; index++) {
        add2MBR(entrySorting, pqUBSecond, pqLBSecond, index);
    }
    for (int i = 0; i < minSecond.length; i++) {
        minSecond[i] = pqLBSecond.get(i).peek().first;
        maxSecond[i] = pqUBSecond.get(i).peek().first;
    }
    ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(minSecond, maxSecond);
    double surfaceSum = SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
    // generate the other distributions and file the surface sums
    for (int limit = minEntries; limit < maxEntries; limit++) {
        // extend first MBR by entry at position entrySorting[limit]:
        add2MBR(entrySorting, pqUBFirst, pqLBFirst, limit);
        // shrink entry at position entrySorting[limit] from second MBR:
        removeFromMBR(pqUBSecond, pqLBSecond, limit, mbr2);
        surfaceSum += SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
    }
    return surfaceSum;
}
Also used : ArrayList(java.util.ArrayList) DoubleIntPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)

Example 10 with HyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.

the class AbstractXTree method choosePath.

/**
 * Chooses the best path of the specified subtree for insertion of the given
 * MBR at the specified level. The selection uses the following criteria:
 * <ol>
 * <li>Test on containment (<code>mbr</code> <em>is</em> within one of the
 * children)</li>
 * <li>If there are multiple containing children, the child with the minimum
 * volume is chosen.</li>
 * <li>Else, if the children point to leaf nodes, chooses the child with the
 * minimum multi-overlap increase.</li>
 * <li>Else, or the multi-overlap increase leads to ties, the child with the
 * minimum volume increase is selected.</li>
 * <li>If there are still ties, the child with the minimum volume is
 * chosen.</li>
 * </ol>
 *
 * @param subtree the subtree to be tested for insertion
 * @param mbr the MBR to be inserted
 * @param level the level at which the MBR should be inserted (level 1
 *        indicates leaf-level)
 * @return the path of the appropriate subtree to insert the given
 *         <code>mbr</code>
 */
@Override
protected IndexTreePath<SpatialEntry> choosePath(IndexTreePath<SpatialEntry> subtree, SpatialComparable mbr, int level, int cur) {
    if (getLogger().isDebuggingFiner()) {
        getLogger().debugFiner("node " + subtree + ", level " + level);
    }
    N node = getNode(subtree.getEntry());
    if (node == null) {
        throw new RuntimeException("Page file did not return node for node id: " + getPageID(subtree.getEntry()));
    }
    if (node.isLeaf()) {
        return subtree;
    }
    // first test on containment
    IndexTreePath<SpatialEntry> newSubtree = containedTest(subtree, node, mbr);
    if (newSubtree != null) {
        if (height - subtree.getPathCount() == level) {
            return newSubtree;
        } else {
            return choosePath(newSubtree, mbr, level, ++cur);
        }
    }
    int optEntry = -1;
    HyperBoundingBox optTestMBR = null;
    double optOverlapInc = 0;
    // test overlap increase?
    boolean isLeafContainer = false;
    if ((// also test supernodes
    !OMIT_OVERLAP_INCREASE_4_SUPERNODES || // don't
    (OMIT_OVERLAP_INCREASE_4_SUPERNODES && !node.isSuperNode())) && getNode(node.getEntry(0)).isLeaf()) {
        // children are leafs
        // overlap increase is to be tested
        optOverlapInc = Double.POSITIVE_INFINITY;
        isLeafContainer = true;
    }
    double optVolume = Double.POSITIVE_INFINITY;
    double optVolumeInc = Double.POSITIVE_INFINITY;
    double tempVolume, volume;
    for (int index = 0; index < node.getNumEntries(); index++) {
        SpatialEntry child = node.getEntry(index);
        SpatialComparable childMBR = child;
        HyperBoundingBox testMBR = SpatialUtil.union(childMBR, mbr);
        double pairwiseOverlapInc;
        if (isLeafContainer) {
            pairwiseOverlapInc = calculateOverlapIncrease(node, child, testMBR);
            if (Double.isInfinite(pairwiseOverlapInc) || Double.isNaN(pairwiseOverlapInc)) {
                throw new IllegalStateException("an entry's MBR is too large to calculate its overlap increase: " + pairwiseOverlapInc + "; \nplease re-scale your data s.t. it can be dealt with");
            }
        } else {
            // no need to examine overlap increase?
            pairwiseOverlapInc = 0;
        }
        if (pairwiseOverlapInc <= optOverlapInc) {
            if (pairwiseOverlapInc == optOverlapInc) {
                // If there are multiple entries with the same overlap increase,
                // choose the one with the minimum volume increase.
                // If there are also multiple entries with the same volume increase
                // choose the one with the minimum volume.
                volume = SpatialUtil.volume(childMBR);
                if (Double.isInfinite(volume) || Double.isNaN(volume)) {
                    throw new IllegalStateException("an entry's MBR is too large to calculate its volume: " + volume + "; \nplease re-scale your data s.t. it can be dealt with");
                }
                tempVolume = SpatialUtil.volume(testMBR);
                if (Double.isInfinite(tempVolume) || Double.isNaN(tempVolume)) {
                    throw new IllegalStateException("an entry's MBR is too large to calculate its volume: " + tempVolume + "; \nplease re-scale your data s.t. it can be dealt with");
                }
                double volumeInc = tempVolume - volume;
                if (Double.isNaN(optVolumeInc)) {
                    // has not yet been calculated
                    optVolume = SpatialUtil.volume(node.getEntry(optEntry));
                    optVolumeInc = SpatialUtil.volume(optTestMBR) - optVolume;
                }
                if (volumeInc < optVolumeInc) {
                    optVolumeInc = volumeInc;
                    optVolume = volume;
                    optEntry = index;
                } else if (volumeInc == optVolumeInc && volume < optVolume) {
                    // TODO: decide whether to remove this option
                    System.out.println("####\nEQUAL VOLUME INCREASE: HAPPENS!\n####");
                    optVolumeInc = volumeInc;
                    optVolume = volume;
                    optEntry = index;
                }
            } else {
                // already better
                optOverlapInc = pairwiseOverlapInc;
                optVolume = Double.NaN;
                optVolumeInc = Double.NaN;
                // for later calculations
                optTestMBR = testMBR;
                optEntry = index;
            }
        }
    }
    assert optEntry >= 0;
    newSubtree = new IndexTreePath<>(subtree, node.getEntry(optEntry), optEntry);
    if (height - subtree.getPathCount() == level) {
        return newSubtree;
    } else {
        return choosePath(newSubtree, mbr, level, ++cur);
    }
}
Also used : SpatialComparable(de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox) SpatialEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry)

Aggregations

HyperBoundingBox (de.lmu.ifi.dbs.elki.data.HyperBoundingBox)14 ModifiableHyperBoundingBox (de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox)6 SpatialEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry)4 SpatialComparable (de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable)3 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 SplitHistory (de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.xtree.util.SplitHistory)2 TopBoundedHeap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)2 DoubleIntPair (de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 SimpleGaussianContinuousUncertainObject (de.lmu.ifi.dbs.elki.data.uncertain.SimpleGaussianContinuousUncertainObject)1 UniformContinuousUncertainObject (de.lmu.ifi.dbs.elki.data.uncertain.UniformContinuousUncertainObject)1 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 IndexTreePath (de.lmu.ifi.dbs.elki.index.tree.IndexTreePath)1 LeafEntry (de.lmu.ifi.dbs.elki.index.tree.LeafEntry)1 Heap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap)1 ArrayList (java.util.ArrayList)1