Search in sources :

Example 11 with ModifiableHyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.

the class XSplitter method getSurfaceSums4Sorting.

/**
 * Compute the surfaces of the <code>2 * (maxEntries - minEntries + 1)</code>
 * split MBRs resulting for the sorting <code>entrySorting</code>.
 *
 * @param minEntries minimally allowed subgroup size
 * @param maxEntries maximally allowed subgroup size for the first entry set
 * @param entrySorting a permutation of the indices of {@link #entries}
 * @param dim the dimension of the tree
 * @return the sum of all first and second MBRs' surfaces for the tested entry
 *         distributions
 */
private double getSurfaceSums4Sorting(int minEntries, int maxEntries, int[] entrySorting, int dim) {
    // avoid multiple MBR calculations by updating min/max-logs for the two
    // collections' bounds:
    // the first entries' maximum upper bounds
    double[] pqUBFirst = new double[dim];
    Arrays.fill(pqUBFirst, Double.NEGATIVE_INFINITY);
    // maintain the second entries' upper bounds
    List<Heap<DoubleIntPair>> pqUBSecond = new ArrayList<>(dim);
    for (int i = 0; i < dim; i++) {
        // Descending heap
        pqUBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries, Collections.reverseOrder()));
    }
    // the first entries' minimum lower bounds
    double[] pqLBFirst = new double[dim];
    Arrays.fill(pqLBFirst, Double.POSITIVE_INFINITY);
    // maintain the second entries' minimum lower bounds
    List<Heap<DoubleIntPair>> pqLBSecond = new ArrayList<>(dim);
    for (int i = 0; i < dim; i++) {
        // Ascending heap
        pqLBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries));
    }
    // initialize bounds for first entry collection
    for (int index = 0; index < minEntries; index++) {
        add2MBR(entrySorting, pqUBFirst, pqLBFirst, index);
    }
    HyperBoundingBox mbr1 = new HyperBoundingBox(pqLBFirst, pqUBFirst);
    // fill bounding queues for the second entry collection
    double[] minSecond = new double[dim];
    double[] maxSecond = new double[dim];
    Arrays.fill(maxSecond, Double.NEGATIVE_INFINITY);
    Arrays.fill(minSecond, Double.POSITIVE_INFINITY);
    assert entrySorting.length - maxEntries == minEntries;
    // initialize min/max entries of the second collections' tail
    for (int index = maxEntries; index < entrySorting.length; index++) {
        add2MBR(entrySorting, maxSecond, minSecond, index);
    }
    for (int i = 0; i < dim; i++) {
        // with index entrySorting.length => never to be removed
        pqLBSecond.get(i).add(new DoubleIntPair(minSecond[i], entrySorting.length));
        pqUBSecond.get(i).add(new DoubleIntPair(maxSecond[i], entrySorting.length));
    }
    // add the entries to be removed later on
    for (int index = minEntries; index < maxEntries; index++) {
        add2MBR(entrySorting, pqUBSecond, pqLBSecond, index);
    }
    for (int i = 0; i < minSecond.length; i++) {
        minSecond[i] = pqLBSecond.get(i).peek().first;
        maxSecond[i] = pqUBSecond.get(i).peek().first;
    }
    ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(minSecond, maxSecond);
    double surfaceSum = SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
    // generate the other distributions and file the surface sums
    for (int limit = minEntries; limit < maxEntries; limit++) {
        // extend first MBR by entry at position entrySorting[limit]:
        add2MBR(entrySorting, pqUBFirst, pqLBFirst, limit);
        // shrink entry at position entrySorting[limit] from second MBR:
        removeFromMBR(pqUBSecond, pqLBSecond, limit, mbr2);
        surfaceSum += SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
    }
    return surfaceSum;
}
Also used : ArrayList(java.util.ArrayList) DoubleIntPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)

Example 12 with ModifiableHyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.

the class AngTanLinearSplit method split.

@Override
public <E extends SpatialComparable, A> long[] split(A entries, ArrayAdapter<E, A> getter, int minEntries) {
    final int num = getter.size(entries);
    // We need the overall MBR for computing edge preferences
    ModifiableHyperBoundingBox total = new ModifiableHyperBoundingBox(getter.get(entries, 0));
    {
        for (int i = 1; i < num; i++) {
            total.extend(getter.get(entries, i));
        }
    }
    final int dim = total.getDimensionality();
    // Prepare the axis lists (we use bitsets)
    long[][] closer = new long[dim][num];
    {
        for (int i = 0; i < num; i++) {
            E e = getter.get(entries, i);
            for (int d = 0; d < dim; d++) {
                double low = e.getMin(d) - total.getMin(d);
                double hig = total.getMax(d) - e.getMax(d);
                if (low >= hig) {
                    BitsUtil.setI(closer[d], i);
                }
            }
        }
    }
    // Find the most even split
    {
        int axis = -1;
        int bestcard = Integer.MAX_VALUE;
        long[] bestset = null;
        double bestover = Double.NaN;
        for (int d = 0; d < dim; d++) {
            long[] cand = closer[d];
            int card = BitsUtil.cardinality(cand);
            card = Math.max(card, num - card);
            if (card == num) {
                continue;
            }
            if (card < bestcard) {
                axis = d;
                bestcard = card;
                bestset = cand;
                bestover = Double.NaN;
            } else if (card == bestcard) {
                // Tie handling
                if (Double.isNaN(bestover)) {
                    bestover = computeOverlap(entries, getter, bestset);
                }
                double overlap = computeOverlap(entries, getter, cand);
                if (overlap < bestover) {
                    axis = d;
                    bestcard = card;
                    bestset = cand;
                    bestover = overlap;
                } else if (overlap == bestover) {
                    double bestlen = total.getMax(axis) - total.getMin(axis);
                    double candlen = total.getMax(d) - total.getMin(d);
                    if (candlen < bestlen) {
                        axis = d;
                        bestcard = card;
                        bestset = cand;
                        bestover = overlap;
                    }
                }
            }
        }
        if (bestset == null) {
            LOG.warning("No Ang-Tan-Split found. Probably all points are the same? Returning random split.");
            return BitsUtil.random(num >> 1, num, new Random());
        }
        return bestset;
    }
}
Also used : Random(java.util.Random) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox)

Example 13 with ModifiableHyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.

the class GreeneSplit method split.

@Override
public <E extends SpatialComparable, A> long[] split(A entries, ArrayAdapter<E, A> getter, int minEntries) {
    final int num = getter.size(entries);
    // Choose axis by best normalized separation
    int axis = -1;
    {
        // PickSeeds - find the two most distant rectangles
        double worst = Double.NEGATIVE_INFINITY;
        int w1 = 0, w2 = 0;
        // Compute individual areas
        double[] areas = new double[num];
        for (int e1 = 0; e1 < num - 1; e1++) {
            final E e1i = getter.get(entries, e1);
            areas[e1] = SpatialUtil.volume(e1i);
        }
        // Compute area increase
        for (int e1 = 0; e1 < num - 1; e1++) {
            final E e1i = getter.get(entries, e1);
            for (int e2 = e1 + 1; e2 < num; e2++) {
                final E e2i = getter.get(entries, e2);
                final double areaJ = SpatialUtil.volumeUnion(e1i, e2i);
                final double d = areaJ - areas[e1] - areas[e2];
                if (d > worst) {
                    worst = d;
                    w1 = e1;
                    w2 = e2;
                }
            }
        }
        if (worst > 0) {
            // Data to keep
            // Initial mbrs and areas
            E m1 = getter.get(entries, w1);
            E m2 = getter.get(entries, w2);
            double bestsep = Double.NEGATIVE_INFINITY;
            double bestsep2 = Double.NEGATIVE_INFINITY;
            for (int d = 0; d < m1.getDimensionality(); d++) {
                final double s1 = m1.getMin(d) - m2.getMax(d);
                final double s2 = m2.getMin(d) - m1.getMax(d);
                final double sm = Math.max(s1, s2);
                final double no = Math.max(m1.getMax(d), m2.getMax(d)) - Math.min(m1.getMin(d), m2.getMin(d));
                final double sep = sm / no;
                if (sep > bestsep || (sep == bestsep && sm > bestsep2)) {
                    bestsep = sep;
                    bestsep2 = sm;
                    axis = d;
                }
            }
        } else {
            // All objects are identical!
            final int half = (num + 1) >> 1;
            // Put the first half into second node
            return BitsUtil.ones(half);
        }
    }
    // Sort by minimum value
    DoubleIntPair[] data = new DoubleIntPair[num];
    for (int i = 0; i < num; i++) {
        data[i] = new DoubleIntPair(getter.get(entries, i).getMin(axis), i);
    }
    Arrays.sort(data);
    // Object assignment
    final long[] assignment = BitsUtil.zero(num);
    final int half = (num + 1) >> 1;
    // Put the first half into second node
    for (int i = 0; i < half; i++) {
        BitsUtil.setI(assignment, data[i].second);
    }
    // Tie handling
    if (num % 2 == 0) {
        // We need to compute the bounding boxes
        ModifiableHyperBoundingBox mbr1 = new ModifiableHyperBoundingBox(getter.get(entries, data[0].second));
        for (int i = 1; i < half; i++) {
            mbr1.extend(getter.get(entries, data[i].second));
        }
        ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(getter.get(entries, data[num - 1].second));
        for (int i = half + 1; i < num - 1; i++) {
            mbr2.extend(getter.get(entries, data[i].second));
        }
        E e = getter.get(entries, data[half].second);
        double inc1 = SpatialUtil.volumeUnion(mbr1, e) - SpatialUtil.volume(mbr1);
        double inc2 = SpatialUtil.volumeUnion(mbr2, e) - SpatialUtil.volume(mbr2);
        if (inc1 < inc2) {
            BitsUtil.setI(assignment, data[half].second);
        }
    }
    return assignment;
}
Also used : DoubleIntPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox)

Example 14 with ModifiableHyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.

the class RTreeQuadraticSplit method split.

@Override
public <E extends SpatialComparable, A> long[] split(A entries, ArrayAdapter<E, A> getter, int minEntries) {
    final int num = getter.size(entries);
    // Object assignment, and processed objects
    long[] assignment = BitsUtil.zero(num);
    long[] assigned = BitsUtil.zero(num);
    // MBRs and Areas of current assignments
    ModifiableHyperBoundingBox mbr1, mbr2;
    double area1 = 0, area2 = 0;
    // PickSeeds - find worst pair
    {
        double worst = Double.NEGATIVE_INFINITY;
        int w1 = 0, w2 = 0;
        // Compute individual areas
        double[] areas = new double[num];
        for (int e1 = 0; e1 < num - 1; e1++) {
            final E e1i = getter.get(entries, e1);
            areas[e1] = SpatialUtil.volume(e1i);
        }
        // Compute area increase
        for (int e1 = 0; e1 < num - 1; e1++) {
            final E e1i = getter.get(entries, e1);
            for (int e2 = e1 + 1; e2 < num; e2++) {
                final E e2i = getter.get(entries, e2);
                final double areaJ = SpatialUtil.volumeUnion(e1i, e2i);
                final double d = areaJ - areas[e1] - areas[e2];
                if (d > worst) {
                    worst = d;
                    w1 = e1;
                    w2 = e2;
                }
            }
        }
        // Data to keep
        // Mark both as used
        BitsUtil.setI(assigned, w1);
        BitsUtil.setI(assigned, w2);
        // Assign second to second set
        BitsUtil.setI(assignment, w2);
        // Initial mbrs and areas
        area1 = areas[w1];
        area2 = areas[w2];
        mbr1 = new ModifiableHyperBoundingBox(getter.get(entries, w1));
        mbr2 = new ModifiableHyperBoundingBox(getter.get(entries, w2));
    }
    // Second phase, QS2+QS3
    {
        int in1 = 1, in2 = 1;
        int remaining = num - 2;
        while (remaining > 0) {
            // Shortcut when minEntries must be fulfilled
            if (in1 + remaining <= minEntries) {
                // No need to updated assigned, no changes to assignment.
                break;
            }
            if (in2 + remaining <= minEntries) {
                // Don't bother to update assigned, though
                for (int pos = BitsUtil.nextClearBit(assigned, 0); pos < num; pos = BitsUtil.nextClearBit(assigned, pos + 1)) {
                    BitsUtil.setI(assignment, pos);
                }
                break;
            }
            // PickNext
            double greatestPreference = Double.NEGATIVE_INFINITY;
            int best = -1;
            E best_i = null;
            boolean preferSecond = false;
            for (int pos = BitsUtil.nextClearBit(assigned, 0); pos < num; pos = BitsUtil.nextClearBit(assigned, pos + 1)) {
                // Cost of putting object into both mbrs
                final E pos_i = getter.get(entries, pos);
                final double d1 = SpatialUtil.volumeUnion(mbr1, pos_i) - area1;
                final double d2 = SpatialUtil.volumeUnion(mbr2, pos_i) - area2;
                // Preference
                final double preference = Math.abs(d1 - d2);
                if (preference > greatestPreference) {
                    greatestPreference = preference;
                    best = pos;
                    best_i = pos_i;
                    // Prefer smaller increase
                    preferSecond = (d2 < d1);
                }
            }
            // QS3: tie handling
            if (greatestPreference == 0) {
                // Prefer smaller area
                if (area1 != area2) {
                    preferSecond = (area2 < area1);
                } else {
                    // Prefer smaller group size
                    preferSecond = (in2 < in1);
                }
            }
            // Mark as used.
            BitsUtil.setI(assigned, best);
            remaining--;
            if (!preferSecond) {
                in1++;
                mbr1.extend(best_i);
                area1 = SpatialUtil.volume(mbr1);
            } else {
                in2++;
                BitsUtil.setI(assignment, best);
                mbr2.extend(best_i);
                area2 = SpatialUtil.volume(mbr2);
            }
        // Loop from QS2
        }
    // Note: "assigned" and "remaining" likely not updated!
    }
    return assignment;
}
Also used : ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox)

Example 15 with ModifiableHyperBoundingBox

use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.

the class AbstractRStarTree method initializeCapacities.

@Override
protected void initializeCapacities(E exampleLeaf) {
    /* Simulate the creation of a leaf page to get the page capacity */
    try {
        int cap = 0;
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(baos);
        SpatialPointLeafEntry sl = new SpatialPointLeafEntry(DBIDUtil.importInteger(0), new double[exampleLeaf.getDimensionality()]);
        while (baos.size() <= getPageSize()) {
            sl.writeExternal(oos);
            oos.flush();
            cap++;
        }
        // the last one caused the page to overflow.
        leafCapacity = cap - 1;
    } catch (IOException e) {
        throw new AbortException("Error determining page sizes.", e);
    }
    /* Simulate the creation of a directory page to get the capacity */
    try {
        int cap = 0;
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(baos);
        ModifiableHyperBoundingBox hb = new ModifiableHyperBoundingBox(new double[exampleLeaf.getDimensionality()], new double[exampleLeaf.getDimensionality()]);
        SpatialDirectoryEntry sl = new SpatialDirectoryEntry(0, hb);
        while (baos.size() <= getPageSize()) {
            sl.writeExternal(oos);
            oos.flush();
            cap++;
        }
        dirCapacity = cap - 1;
    } catch (IOException e) {
        throw new AbortException("Error determining page sizes.", e);
    }
    if (dirCapacity <= 2) {
        throw new IllegalArgumentException("Node size of " + getPageSize() + " bytes is chosen too small!");
    }
    final Logging log = getLogger();
    if (dirCapacity < 10) {
        log.warning("Page size is choosen very small! Maximum number of entries in a directory node = " + dirCapacity);
    }
    // minimum entries per directory node
    dirMinimum = (int) Math.floor(dirCapacity * settings.relativeMinFill);
    if (dirMinimum < 1) {
        dirMinimum = 1;
    }
    if (leafCapacity <= 2) {
        throw new IllegalArgumentException("Node size of " + getPageSize() + " bytes is chosen too small!");
    }
    if (leafCapacity < 10) {
        log.warning("Page size is choosen very small! Maximum number of entries in a leaf node = " + leafCapacity);
    }
    // minimum entries per leaf node
    leafMinimum = (int) Math.floor(leafCapacity * settings.relativeMinFill);
    if (leafMinimum < 1) {
        leafMinimum = 1;
    }
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) SpatialPointLeafEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry) SpatialDirectoryEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) ObjectOutputStream(java.io.ObjectOutputStream) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

ModifiableHyperBoundingBox (de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox)17 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 SpatialDirectoryEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry)3 ArrayList (java.util.ArrayList)3 Random (java.util.Random)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 LPNormDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction)2 SpatialEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry)2 SpatialPointLeafEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry)2 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)2 DoubleIntPair (de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair)2 IOException (java.io.IOException)2 Test (org.junit.Test)2 HyperBoundingBox (de.lmu.ifi.dbs.elki.data.HyperBoundingBox)1 Logging (de.lmu.ifi.dbs.elki.logging.Logging)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 Heap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap)1 TopBoundedHeap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)1 BufferedImage (java.awt.image.BufferedImage)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1