use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap in project elki by elki-project.
the class HiCS method calculateSubspaces.
/**
* Identifies high contrast subspaces in a given full-dimensional database.
*
* @param relation the relation the HiCS should be evaluated for
* @param subspaceIndex Subspace indexes
* @return a set of high contrast subspaces
*/
private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int dbdim = RelationUtil.dimensionality(relation);
FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
if (dprog != null) {
dprog.setProcessed(2, LOG);
}
TreeSet<HiCSSubspace> subspaceList = new TreeSet<>(HiCSSubspace.SORT_BY_SUBSPACE);
TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
for (int i = 0; i < dbdim; i++) {
for (int j = i + 1; j < dbdim; j++) {
HiCSSubspace ts = new HiCSSubspace();
ts.set(i);
ts.set(j);
calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
for (int d = 3; !dDimensionalList.isEmpty(); d++) {
if (dprog != null) {
dprog.setProcessed(d, LOG);
}
// result now contains all d-dimensional sets of subspaces
ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
subspaceList.add(it.get());
candidateList.add(it.get());
}
dDimensionalList.clear();
// candidateList now contains the *m* best d-dimensional sets
Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
// TODO: optimize APRIORI style, by not even computing the bit set or?
for (int i = 0; i < candidateList.size() - 1; i++) {
for (int j = i + 1; j < candidateList.size(); j++) {
HiCSSubspace set1 = candidateList.get(i);
HiCSSubspace set2 = candidateList.get(j);
HiCSSubspace joinedSet = new HiCSSubspace();
joinedSet.or(set1);
joinedSet.or(set2);
if (joinedSet.cardinality() != d) {
continue;
}
calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
LOG.incrementProcessed(qprog);
}
}
// Prune
for (HiCSSubspace cand : candidateList) {
for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
if (it.get().contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
}
}
}
LOG.setCompleted(qprog);
if (dprog != null) {
dprog.setProcessed(dbdim, LOG);
dprog.ensureCompleted(LOG);
}
return subspaceList;
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap in project elki by elki-project.
the class XSplitter method getSurfaceSums4Sorting.
/**
* Compute the surfaces of the <code>2 * (maxEntries - minEntries + 1)</code>
* split MBRs resulting for the sorting <code>entrySorting</code>.
*
* @param minEntries minimally allowed subgroup size
* @param maxEntries maximally allowed subgroup size for the first entry set
* @param entrySorting a permutation of the indices of {@link #entries}
* @param dim the dimension of the tree
* @return the sum of all first and second MBRs' surfaces for the tested entry
* distributions
*/
private double getSurfaceSums4Sorting(int minEntries, int maxEntries, int[] entrySorting, int dim) {
// avoid multiple MBR calculations by updating min/max-logs for the two
// collections' bounds:
// the first entries' maximum upper bounds
double[] pqUBFirst = new double[dim];
Arrays.fill(pqUBFirst, Double.NEGATIVE_INFINITY);
// maintain the second entries' upper bounds
List<Heap<DoubleIntPair>> pqUBSecond = new ArrayList<>(dim);
for (int i = 0; i < dim; i++) {
// Descending heap
pqUBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries, Collections.reverseOrder()));
}
// the first entries' minimum lower bounds
double[] pqLBFirst = new double[dim];
Arrays.fill(pqLBFirst, Double.POSITIVE_INFINITY);
// maintain the second entries' minimum lower bounds
List<Heap<DoubleIntPair>> pqLBSecond = new ArrayList<>(dim);
for (int i = 0; i < dim; i++) {
// Ascending heap
pqLBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries));
}
// initialize bounds for first entry collection
for (int index = 0; index < minEntries; index++) {
add2MBR(entrySorting, pqUBFirst, pqLBFirst, index);
}
HyperBoundingBox mbr1 = new HyperBoundingBox(pqLBFirst, pqUBFirst);
// fill bounding queues for the second entry collection
double[] minSecond = new double[dim];
double[] maxSecond = new double[dim];
Arrays.fill(maxSecond, Double.NEGATIVE_INFINITY);
Arrays.fill(minSecond, Double.POSITIVE_INFINITY);
assert entrySorting.length - maxEntries == minEntries;
// initialize min/max entries of the second collections' tail
for (int index = maxEntries; index < entrySorting.length; index++) {
add2MBR(entrySorting, maxSecond, minSecond, index);
}
for (int i = 0; i < dim; i++) {
// with index entrySorting.length => never to be removed
pqLBSecond.get(i).add(new DoubleIntPair(minSecond[i], entrySorting.length));
pqUBSecond.get(i).add(new DoubleIntPair(maxSecond[i], entrySorting.length));
}
// add the entries to be removed later on
for (int index = minEntries; index < maxEntries; index++) {
add2MBR(entrySorting, pqUBSecond, pqLBSecond, index);
}
for (int i = 0; i < minSecond.length; i++) {
minSecond[i] = pqLBSecond.get(i).peek().first;
maxSecond[i] = pqUBSecond.get(i).peek().first;
}
ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(minSecond, maxSecond);
double surfaceSum = SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
// generate the other distributions and file the surface sums
for (int limit = minEntries; limit < maxEntries; limit++) {
// extend first MBR by entry at position entrySorting[limit]:
add2MBR(entrySorting, pqUBFirst, pqLBFirst, limit);
// shrink entry at position entrySorting[limit] from second MBR:
removeFromMBR(pqUBSecond, pqLBSecond, limit, mbr2);
surfaceSum += SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
}
return surfaceSum;
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap in project elki by elki-project.
the class ApproximativeLeastOverlapInsertionStrategy method choose.
@Override
public <A> int choose(A options, ArrayAdapter<? extends SpatialComparable, A> getter, SpatialComparable obj, int height, int depth) {
final int size = getter.size(options);
assert (size > 0) : "Choose from empty set?";
if (size <= numCandidates) {
// Skip building the heap.
return super.choose(options, getter, obj, height, depth);
}
// Heap of candidates
TopBoundedHeap<DoubleIntPair> candidates = new TopBoundedHeap<>(numCandidates, Collections.reverseOrder());
for (int i = 0; i < size; i++) {
// Existing object and extended rectangle:
SpatialComparable entry = getter.get(options, i);
HyperBoundingBox mbr = SpatialUtil.union(entry, obj);
// Area increase
final double inc_area = SpatialUtil.volume(mbr) - SpatialUtil.volume(entry);
candidates.add(new DoubleIntPair(inc_area, i));
}
// R*-Tree: overlap increase for leaves.
int best = -1;
double least_overlap = Double.POSITIVE_INFINITY;
double least_areainc = Double.POSITIVE_INFINITY;
double least_area = Double.POSITIVE_INFINITY;
// least overlap increase, on reduced candidate set:
while (!candidates.isEmpty()) {
DoubleIntPair pair = candidates.poll();
final double inc_area = pair.first;
// Existing object and extended rectangle:
SpatialComparable entry = getter.get(options, pair.second);
HyperBoundingBox mbr = SpatialUtil.union(entry, obj);
// Compute relative overlap increase.
double overlap_wout = 0.0;
double overlap_with = 0.0;
for (int k = 0; k < size; k++) {
if (pair.second != k) {
SpatialComparable other = getter.get(options, k);
overlap_wout += SpatialUtil.relativeOverlap(entry, other);
overlap_with += SpatialUtil.relativeOverlap(mbr, other);
}
}
double inc_overlap = overlap_with - overlap_wout;
if (inc_overlap < least_overlap) {
final double area = SpatialUtil.volume(entry);
// Volume increase and overlap increase:
least_overlap = inc_overlap;
least_areainc = inc_area;
least_area = area;
best = pair.second;
} else if (inc_overlap == least_overlap) {
final double area = SpatialUtil.volume(entry);
if (inc_area < least_areainc || (inc_area == least_areainc && area < least_area)) {
least_overlap = inc_overlap;
least_areainc = inc_area;
least_area = area;
best = pair.second;
}
}
}
assert (best > -1) : "No split found? Volume outside of double precision?";
return best;
}
Aggregations