Search in sources :

Example 1 with TopBoundedHeap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap in project elki by elki-project.

the class HiCS method calculateSubspaces.

/**
 * Identifies high contrast subspaces in a given full-dimensional database.
 *
 * @param relation the relation the HiCS should be evaluated for
 * @param subspaceIndex Subspace indexes
 * @return a set of high contrast subspaces
 */
private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
    final int dbdim = RelationUtil.dimensionality(relation);
    FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
    if (dprog != null) {
        dprog.setProcessed(2, LOG);
    }
    TreeSet<HiCSSubspace> subspaceList = new TreeSet<>(HiCSSubspace.SORT_BY_SUBSPACE);
    TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
    // compute two-element sets of subspaces
    for (int i = 0; i < dbdim; i++) {
        for (int j = i + 1; j < dbdim; j++) {
            HiCSSubspace ts = new HiCSSubspace();
            ts.set(i);
            ts.set(j);
            calculateContrast(relation, ts, subspaceIndex, random);
            dDimensionalList.add(ts);
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
    for (int d = 3; !dDimensionalList.isEmpty(); d++) {
        if (dprog != null) {
            dprog.setProcessed(d, LOG);
        }
        // result now contains all d-dimensional sets of subspaces
        ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
        for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
            subspaceList.add(it.get());
            candidateList.add(it.get());
        }
        dDimensionalList.clear();
        // candidateList now contains the *m* best d-dimensional sets
        Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
        // TODO: optimize APRIORI style, by not even computing the bit set or?
        for (int i = 0; i < candidateList.size() - 1; i++) {
            for (int j = i + 1; j < candidateList.size(); j++) {
                HiCSSubspace set1 = candidateList.get(i);
                HiCSSubspace set2 = candidateList.get(j);
                HiCSSubspace joinedSet = new HiCSSubspace();
                joinedSet.or(set1);
                joinedSet.or(set2);
                if (joinedSet.cardinality() != d) {
                    continue;
                }
                calculateContrast(relation, joinedSet, subspaceIndex, random);
                dDimensionalList.add(joinedSet);
                LOG.incrementProcessed(qprog);
            }
        }
        // Prune
        for (HiCSSubspace cand : candidateList) {
            for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
                if (it.get().contrast > cand.contrast) {
                    subspaceList.remove(cand);
                    break;
                }
            }
        }
    }
    LOG.setCompleted(qprog);
    if (dprog != null) {
        dprog.setProcessed(dbdim, LOG);
        dprog.ensureCompleted(LOG);
    }
    return subspaceList;
}
Also used : TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) TreeSet(java.util.TreeSet) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)

Example 2 with TopBoundedHeap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap in project elki by elki-project.

the class XSplitter method getSurfaceSums4Sorting.

/**
 * Compute the surfaces of the <code>2 * (maxEntries - minEntries + 1)</code>
 * split MBRs resulting for the sorting <code>entrySorting</code>.
 *
 * @param minEntries minimally allowed subgroup size
 * @param maxEntries maximally allowed subgroup size for the first entry set
 * @param entrySorting a permutation of the indices of {@link #entries}
 * @param dim the dimension of the tree
 * @return the sum of all first and second MBRs' surfaces for the tested entry
 *         distributions
 */
private double getSurfaceSums4Sorting(int minEntries, int maxEntries, int[] entrySorting, int dim) {
    // avoid multiple MBR calculations by updating min/max-logs for the two
    // collections' bounds:
    // the first entries' maximum upper bounds
    double[] pqUBFirst = new double[dim];
    Arrays.fill(pqUBFirst, Double.NEGATIVE_INFINITY);
    // maintain the second entries' upper bounds
    List<Heap<DoubleIntPair>> pqUBSecond = new ArrayList<>(dim);
    for (int i = 0; i < dim; i++) {
        // Descending heap
        pqUBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries, Collections.reverseOrder()));
    }
    // the first entries' minimum lower bounds
    double[] pqLBFirst = new double[dim];
    Arrays.fill(pqLBFirst, Double.POSITIVE_INFINITY);
    // maintain the second entries' minimum lower bounds
    List<Heap<DoubleIntPair>> pqLBSecond = new ArrayList<>(dim);
    for (int i = 0; i < dim; i++) {
        // Ascending heap
        pqLBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries));
    }
    // initialize bounds for first entry collection
    for (int index = 0; index < minEntries; index++) {
        add2MBR(entrySorting, pqUBFirst, pqLBFirst, index);
    }
    HyperBoundingBox mbr1 = new HyperBoundingBox(pqLBFirst, pqUBFirst);
    // fill bounding queues for the second entry collection
    double[] minSecond = new double[dim];
    double[] maxSecond = new double[dim];
    Arrays.fill(maxSecond, Double.NEGATIVE_INFINITY);
    Arrays.fill(minSecond, Double.POSITIVE_INFINITY);
    assert entrySorting.length - maxEntries == minEntries;
    // initialize min/max entries of the second collections' tail
    for (int index = maxEntries; index < entrySorting.length; index++) {
        add2MBR(entrySorting, maxSecond, minSecond, index);
    }
    for (int i = 0; i < dim; i++) {
        // with index entrySorting.length => never to be removed
        pqLBSecond.get(i).add(new DoubleIntPair(minSecond[i], entrySorting.length));
        pqUBSecond.get(i).add(new DoubleIntPair(maxSecond[i], entrySorting.length));
    }
    // add the entries to be removed later on
    for (int index = minEntries; index < maxEntries; index++) {
        add2MBR(entrySorting, pqUBSecond, pqLBSecond, index);
    }
    for (int i = 0; i < minSecond.length; i++) {
        minSecond[i] = pqLBSecond.get(i).peek().first;
        maxSecond[i] = pqUBSecond.get(i).peek().first;
    }
    ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(minSecond, maxSecond);
    double surfaceSum = SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
    // generate the other distributions and file the surface sums
    for (int limit = minEntries; limit < maxEntries; limit++) {
        // extend first MBR by entry at position entrySorting[limit]:
        add2MBR(entrySorting, pqUBFirst, pqLBFirst, limit);
        // shrink entry at position entrySorting[limit] from second MBR:
        removeFromMBR(pqUBSecond, pqLBSecond, limit, mbr2);
        surfaceSum += SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
    }
    return surfaceSum;
}
Also used : ArrayList(java.util.ArrayList) DoubleIntPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)

Example 3 with TopBoundedHeap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap in project elki by elki-project.

the class ApproximativeLeastOverlapInsertionStrategy method choose.

@Override
public <A> int choose(A options, ArrayAdapter<? extends SpatialComparable, A> getter, SpatialComparable obj, int height, int depth) {
    final int size = getter.size(options);
    assert (size > 0) : "Choose from empty set?";
    if (size <= numCandidates) {
        // Skip building the heap.
        return super.choose(options, getter, obj, height, depth);
    }
    // Heap of candidates
    TopBoundedHeap<DoubleIntPair> candidates = new TopBoundedHeap<>(numCandidates, Collections.reverseOrder());
    for (int i = 0; i < size; i++) {
        // Existing object and extended rectangle:
        SpatialComparable entry = getter.get(options, i);
        HyperBoundingBox mbr = SpatialUtil.union(entry, obj);
        // Area increase
        final double inc_area = SpatialUtil.volume(mbr) - SpatialUtil.volume(entry);
        candidates.add(new DoubleIntPair(inc_area, i));
    }
    // R*-Tree: overlap increase for leaves.
    int best = -1;
    double least_overlap = Double.POSITIVE_INFINITY;
    double least_areainc = Double.POSITIVE_INFINITY;
    double least_area = Double.POSITIVE_INFINITY;
    // least overlap increase, on reduced candidate set:
    while (!candidates.isEmpty()) {
        DoubleIntPair pair = candidates.poll();
        final double inc_area = pair.first;
        // Existing object and extended rectangle:
        SpatialComparable entry = getter.get(options, pair.second);
        HyperBoundingBox mbr = SpatialUtil.union(entry, obj);
        // Compute relative overlap increase.
        double overlap_wout = 0.0;
        double overlap_with = 0.0;
        for (int k = 0; k < size; k++) {
            if (pair.second != k) {
                SpatialComparable other = getter.get(options, k);
                overlap_wout += SpatialUtil.relativeOverlap(entry, other);
                overlap_with += SpatialUtil.relativeOverlap(mbr, other);
            }
        }
        double inc_overlap = overlap_with - overlap_wout;
        if (inc_overlap < least_overlap) {
            final double area = SpatialUtil.volume(entry);
            // Volume increase and overlap increase:
            least_overlap = inc_overlap;
            least_areainc = inc_area;
            least_area = area;
            best = pair.second;
        } else if (inc_overlap == least_overlap) {
            final double area = SpatialUtil.volume(entry);
            if (inc_area < least_areainc || (inc_area == least_areainc && area < least_area)) {
                least_overlap = inc_overlap;
                least_areainc = inc_area;
                least_area = area;
                best = pair.second;
            }
        }
    }
    assert (best > -1) : "No split found? Volume outside of double precision?";
    return best;
}
Also used : TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap) SpatialComparable(de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable) DoubleIntPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox)

Aggregations

TopBoundedHeap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)3 HyperBoundingBox (de.lmu.ifi.dbs.elki.data.HyperBoundingBox)2 Heap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap)2 DoubleIntPair (de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair)2 ArrayList (java.util.ArrayList)2 ModifiableHyperBoundingBox (de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox)1 SpatialComparable (de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)1 TreeSet (java.util.TreeSet)1