use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.
the class CASHInterval method split.
/**
* Splits this interval into 2 children.
*/
public void split() {
if (hasChildren()) {
return;
}
final boolean issplit = (maxSplitDimension >= (getDimensionality() - 1));
final int childLevel = issplit ? level + 1 : level;
final int splitDim = issplit ? 0 : maxSplitDimension + 1;
final double splitPoint = getMin(splitDim) + (getMax(splitDim) - getMin(splitDim)) * .5;
// left and right child
for (int i = 0; i < 2; i++) {
// clone
double[] min = SpatialUtil.getMin(this);
// clone
double[] max = SpatialUtil.getMax(this);
// right child
if (i == 0) {
min[splitDim] = splitPoint;
} else // left child
{
max[splitDim] = splitPoint;
}
ModifiableDBIDs childIDs = split.determineIDs(getIDs(), new HyperBoundingBox(min, max), d_min, d_max);
if (childIDs != null) {
// right child
if (i == 0) {
rightChild = new CASHInterval(min, max, split, childIDs, splitDim, childLevel, d_min, d_max);
} else // left child
{
leftChild = new CASHInterval(min, max, split, childIDs, splitDim, childLevel, d_min, d_max);
}
}
}
if (LOG.isDebuggingFine()) {
StringBuilder msg = new StringBuilder();
msg.append("Child level ").append(childLevel).append(", split Dim ").append(splitDim);
if (leftChild != null) {
msg.append("\nleft ").append(leftChild);
}
if (rightChild != null) {
msg.append("\nright ").append(rightChild);
}
LOG.fine(msg.toString());
}
}
use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.
the class CASHIntervalSplit method determineIDs.
/**
* Determines the ids belonging to the given interval, i.e. the
* parameterization functions falling within the interval.
*
* @param superSetIDs a superset of the ids to be determined
* @param interval the hyper bounding box defining the interval of alpha
* values
* @param d_min the minimum distance value for the interval
* @param d_max the maximum distance value for the interval
* @return the ids belonging to the given interval, if the number ids of
* exceeds minPts, null otherwise
*/
public ModifiableDBIDs determineIDs(DBIDs superSetIDs, HyperBoundingBox interval, double d_min, double d_max) {
StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
if (msg != null) {
msg.append("interval ").append(interval);
}
ModifiableDBIDs childIDs = DBIDUtil.newHashSet(superSetIDs.size());
Map<DBID, Double> minima = f_minima.get(interval);
Map<DBID, Double> maxima = f_maxima.get(interval);
if (minima == null || maxima == null) {
minima = new HashMap<>();
f_minima.put(interval, minima);
maxima = new HashMap<>();
f_maxima.put(interval, maxima);
}
for (DBIDIter iter = superSetIDs.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
Double f_min = minima.get(id);
Double f_max = maxima.get(id);
if (f_min == null) {
ParameterizationFunction f = database.get(id);
HyperBoundingBox minMax = f.determineAlphaMinMax(interval);
f_min = f.function(SpatialUtil.getMin(minMax));
f_max = f.function(SpatialUtil.getMax(minMax));
minima.put(id, f_min);
maxima.put(id, f_max);
}
if (msg != null) {
msg.append("\n\nf_min ").append(f_min);
msg.append("\nf_max ").append(f_max);
msg.append("\nd_min ").append(d_min);
msg.append("\nd_max ").append(d_max);
}
if (f_min - f_max > ParameterizationFunction.DELTA) {
throw new IllegalArgumentException("Houston, we have a problem: f_min > f_max! " + "\nf_min[" + FormatUtil.format(SpatialUtil.centroid(interval)) + "] = " + f_min + "\nf_max[" + FormatUtil.format(SpatialUtil.centroid(interval)) + "] = " + f_max + "\nf " + database.get(id));
}
if (f_min <= d_max && f_max >= d_min) {
childIDs.add(id);
if (msg != null) {
msg.append("\nid ").append(id).append(" appended");
}
} else {
if (msg != null) {
msg.append("\nid ").append(id).append(" NOT appended");
}
}
}
if (msg != null) {
msg.append("\nchildIds ").append(childIDs.size());
LOG.debugFine(msg.toString());
}
if (childIDs.size() < minPts) {
return null;
} else {
return childIDs;
}
}
use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.
the class XSplitter method chooseMinimumOverlapSplit.
/**
* Select the distribution with minimal intersection volume from a Collection
* of distributions. If there are several equal minimum intersection volumes,
* the distribution with the minimum volume is selected.
*
* @param splitAxis Split axis to be tested
* @param minEntries The minimum number of entries to be tested; in case of a
* data node, this is the minimum leaf capacity, for directory nodes,
* this is either the minimum directory capacity or, if a topological
* split has already failed, the tree's minimum fan-out parameter.
* @param maxEntries The maximum number of entries to be tested
* @param revert if <code>maxEntrie < {@link #entries}.size()</code> and
* <code>revert</code> is <code>true</code>, the upper range of the
* sorting is tested, i.e. not
* <code>{minEntries, ..., maxEntries}</code> but
*
* <code>{{@link #entries}.size() - maxEntries + 1, ..., {@link #entries}.size() - minEntries + 1}</code>
* @return The distribution with the minimal intersection volume or
* <code>null</code>, if the minimum overlap split has a volume which
* is larger than the allowed <code>maxOverlap</code> ratio
*/
private SplitSorting chooseMinimumOverlapSplit(int splitAxis, int minEntries, int maxEntries, boolean revert) {
if (splitAxis == -1) {
pastOverlap = Double.MAX_VALUE;
return null;
}
double optXVolume = Double.POSITIVE_INFINITY;
double optVolume = Double.POSITIVE_INFINITY;
SplitSorting optDistribution = null;
HyperBoundingBox[] optMBRs = null;
// generate sortings for the mbr's extrema
int[] entrySorting = new int[node.getNumEntries()];
for (int i = 0; i < entrySorting.length; i++) {
entrySorting[i] = i;
}
int[] lbSorting = Arrays.copyOf(entrySorting, entrySorting.length);
int[] ubSorting = Arrays.copyOf(entrySorting, entrySorting.length);
sortEntriesForDimension(splitAxis, entrySorting, entrySorting);
if (revert && maxEntries < node.getNumEntries() / 2) {
// test reverted sortings
// temp array
int[][] reverted = new int[2][node.getNumEntries()];
for (int i = 0; i < lbSorting.length; i++) {
reverted[0][reverted[0].length - 1 - i] = lbSorting[i];
reverted[1][reverted[1].length - 1 - i] = ubSorting[i];
}
for (int i = 0; i < lbSorting.length; i++) {
lbSorting[i] = reverted[0][i];
ubSorting[i] = reverted[1][i];
}
}
for (int i = 0; i < 2; i++) {
// test lower and upper bound sortings
if (i == 0) {
// lower-bound sorting
entrySorting = lbSorting;
} else {
// upper-bound sorting
entrySorting = ubSorting;
}
for (int limit = minEntries; limit <= maxEntries; limit++) {
HyperBoundingBox mbr1 = mbr(entrySorting, 0, limit);
HyperBoundingBox mbr2 = mbr(entrySorting, limit, entrySorting.length);
double xVolume = SpatialUtil.overlap(mbr1, mbr2);
if (xVolume < optXVolume) {
optXVolume = xVolume;
optDistribution = generateSplitSorting(entrySorting, limit);
optMBRs = new HyperBoundingBox[] { mbr1, mbr2 };
optVolume = Double.NaN;
} else if (xVolume == optXVolume) {
double vol = SpatialUtil.volume(mbr1);
vol += SpatialUtil.volume(mbr2);
if (Double.isNaN(optVolume)) {
// calculate when necessary
optVolume = SpatialUtil.volume(optMBRs[0]);
optVolume += SpatialUtil.volume(optMBRs[1]);
}
if (vol < optVolume) {
optXVolume = xVolume;
optVolume = vol;
optDistribution = generateSplitSorting(entrySorting, limit);
}
}
}
}
if (node.getEntry(0) instanceof LeafEntry || tree.get_max_overlap() >= 1) {
// overlap is not computed
pastOverlap = Double.NaN;
return optDistribution;
}
// test overlap
switch(maxOverlapStrategy) {
case DATA_OVERLAP:
pastOverlap = getRatioOfDataInIntersectionVolume(generateDistribution(optDistribution), optMBRs);
if (tree.get_max_overlap() < pastOverlap) {
LOG.finest(String.format(Locale.ENGLISH, "No %s split found%s; best data overlap was %.3f", (minEntries == tree.get_min_fanout() ? "minimum overlap" : "topological"), (maxEntries < node.getNumEntries() / 2 ? " in " + (revert ? "second" : "first") + " range" : ""), pastOverlap));
return null;
}
break;
case VOLUME_OVERLAP:
if (Double.isNaN(optVolume)) {
optVolume = SpatialUtil.volume(optMBRs[0]);
optVolume += SpatialUtil.volume(optMBRs[1]);
}
pastOverlap = optXVolume / optVolume;
if (tree.get_max_overlap() < pastOverlap) {
LOG.finest(String.format(Locale.ENGLISH, "No %s split found%s; best volume overlap was %.3f", (minEntries == tree.get_min_fanout() ? "minimum overlap" : "topological"), (maxEntries < node.getNumEntries() / 2 ? " in " + (revert ? "second" : "first") + " range" : ""), pastOverlap));
return null;
}
break;
}
return optDistribution;
}
use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.
the class XSplitter method getSurfaceSums4Sorting.
/**
* Compute the surfaces of the <code>2 * (maxEntries - minEntries + 1)</code>
* split MBRs resulting for the sorting <code>entrySorting</code>.
*
* @param minEntries minimally allowed subgroup size
* @param maxEntries maximally allowed subgroup size for the first entry set
* @param entrySorting a permutation of the indices of {@link #entries}
* @param dim the dimension of the tree
* @return the sum of all first and second MBRs' surfaces for the tested entry
* distributions
*/
private double getSurfaceSums4Sorting(int minEntries, int maxEntries, int[] entrySorting, int dim) {
// avoid multiple MBR calculations by updating min/max-logs for the two
// collections' bounds:
// the first entries' maximum upper bounds
double[] pqUBFirst = new double[dim];
Arrays.fill(pqUBFirst, Double.NEGATIVE_INFINITY);
// maintain the second entries' upper bounds
List<Heap<DoubleIntPair>> pqUBSecond = new ArrayList<>(dim);
for (int i = 0; i < dim; i++) {
// Descending heap
pqUBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries, Collections.reverseOrder()));
}
// the first entries' minimum lower bounds
double[] pqLBFirst = new double[dim];
Arrays.fill(pqLBFirst, Double.POSITIVE_INFINITY);
// maintain the second entries' minimum lower bounds
List<Heap<DoubleIntPair>> pqLBSecond = new ArrayList<>(dim);
for (int i = 0; i < dim; i++) {
// Ascending heap
pqLBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries));
}
// initialize bounds for first entry collection
for (int index = 0; index < minEntries; index++) {
add2MBR(entrySorting, pqUBFirst, pqLBFirst, index);
}
HyperBoundingBox mbr1 = new HyperBoundingBox(pqLBFirst, pqUBFirst);
// fill bounding queues for the second entry collection
double[] minSecond = new double[dim];
double[] maxSecond = new double[dim];
Arrays.fill(maxSecond, Double.NEGATIVE_INFINITY);
Arrays.fill(minSecond, Double.POSITIVE_INFINITY);
assert entrySorting.length - maxEntries == minEntries;
// initialize min/max entries of the second collections' tail
for (int index = maxEntries; index < entrySorting.length; index++) {
add2MBR(entrySorting, maxSecond, minSecond, index);
}
for (int i = 0; i < dim; i++) {
// with index entrySorting.length => never to be removed
pqLBSecond.get(i).add(new DoubleIntPair(minSecond[i], entrySorting.length));
pqUBSecond.get(i).add(new DoubleIntPair(maxSecond[i], entrySorting.length));
}
// add the entries to be removed later on
for (int index = minEntries; index < maxEntries; index++) {
add2MBR(entrySorting, pqUBSecond, pqLBSecond, index);
}
for (int i = 0; i < minSecond.length; i++) {
minSecond[i] = pqLBSecond.get(i).peek().first;
maxSecond[i] = pqUBSecond.get(i).peek().first;
}
ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(minSecond, maxSecond);
double surfaceSum = SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
// generate the other distributions and file the surface sums
for (int limit = minEntries; limit < maxEntries; limit++) {
// extend first MBR by entry at position entrySorting[limit]:
add2MBR(entrySorting, pqUBFirst, pqLBFirst, limit);
// shrink entry at position entrySorting[limit] from second MBR:
removeFromMBR(pqUBSecond, pqLBSecond, limit, mbr2);
surfaceSum += SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
}
return surfaceSum;
}
use of de.lmu.ifi.dbs.elki.data.HyperBoundingBox in project elki by elki-project.
the class AbstractXTree method choosePath.
/**
* Chooses the best path of the specified subtree for insertion of the given
* MBR at the specified level. The selection uses the following criteria:
* <ol>
* <li>Test on containment (<code>mbr</code> <em>is</em> within one of the
* children)</li>
* <li>If there are multiple containing children, the child with the minimum
* volume is chosen.</li>
* <li>Else, if the children point to leaf nodes, chooses the child with the
* minimum multi-overlap increase.</li>
* <li>Else, or the multi-overlap increase leads to ties, the child with the
* minimum volume increase is selected.</li>
* <li>If there are still ties, the child with the minimum volume is
* chosen.</li>
* </ol>
*
* @param subtree the subtree to be tested for insertion
* @param mbr the MBR to be inserted
* @param level the level at which the MBR should be inserted (level 1
* indicates leaf-level)
* @return the path of the appropriate subtree to insert the given
* <code>mbr</code>
*/
@Override
protected IndexTreePath<SpatialEntry> choosePath(IndexTreePath<SpatialEntry> subtree, SpatialComparable mbr, int level, int cur) {
if (getLogger().isDebuggingFiner()) {
getLogger().debugFiner("node " + subtree + ", level " + level);
}
N node = getNode(subtree.getEntry());
if (node == null) {
throw new RuntimeException("Page file did not return node for node id: " + getPageID(subtree.getEntry()));
}
if (node.isLeaf()) {
return subtree;
}
// first test on containment
IndexTreePath<SpatialEntry> newSubtree = containedTest(subtree, node, mbr);
if (newSubtree != null) {
if (height - subtree.getPathCount() == level) {
return newSubtree;
} else {
return choosePath(newSubtree, mbr, level, ++cur);
}
}
int optEntry = -1;
HyperBoundingBox optTestMBR = null;
double optOverlapInc = 0;
// test overlap increase?
boolean isLeafContainer = false;
if ((// also test supernodes
!OMIT_OVERLAP_INCREASE_4_SUPERNODES || // don't
(OMIT_OVERLAP_INCREASE_4_SUPERNODES && !node.isSuperNode())) && getNode(node.getEntry(0)).isLeaf()) {
// children are leafs
// overlap increase is to be tested
optOverlapInc = Double.POSITIVE_INFINITY;
isLeafContainer = true;
}
double optVolume = Double.POSITIVE_INFINITY;
double optVolumeInc = Double.POSITIVE_INFINITY;
double tempVolume, volume;
for (int index = 0; index < node.getNumEntries(); index++) {
SpatialEntry child = node.getEntry(index);
SpatialComparable childMBR = child;
HyperBoundingBox testMBR = SpatialUtil.union(childMBR, mbr);
double pairwiseOverlapInc;
if (isLeafContainer) {
pairwiseOverlapInc = calculateOverlapIncrease(node, child, testMBR);
if (Double.isInfinite(pairwiseOverlapInc) || Double.isNaN(pairwiseOverlapInc)) {
throw new IllegalStateException("an entry's MBR is too large to calculate its overlap increase: " + pairwiseOverlapInc + "; \nplease re-scale your data s.t. it can be dealt with");
}
} else {
// no need to examine overlap increase?
pairwiseOverlapInc = 0;
}
if (pairwiseOverlapInc <= optOverlapInc) {
if (pairwiseOverlapInc == optOverlapInc) {
// If there are multiple entries with the same overlap increase,
// choose the one with the minimum volume increase.
// If there are also multiple entries with the same volume increase
// choose the one with the minimum volume.
volume = SpatialUtil.volume(childMBR);
if (Double.isInfinite(volume) || Double.isNaN(volume)) {
throw new IllegalStateException("an entry's MBR is too large to calculate its volume: " + volume + "; \nplease re-scale your data s.t. it can be dealt with");
}
tempVolume = SpatialUtil.volume(testMBR);
if (Double.isInfinite(tempVolume) || Double.isNaN(tempVolume)) {
throw new IllegalStateException("an entry's MBR is too large to calculate its volume: " + tempVolume + "; \nplease re-scale your data s.t. it can be dealt with");
}
double volumeInc = tempVolume - volume;
if (Double.isNaN(optVolumeInc)) {
// has not yet been calculated
optVolume = SpatialUtil.volume(node.getEntry(optEntry));
optVolumeInc = SpatialUtil.volume(optTestMBR) - optVolume;
}
if (volumeInc < optVolumeInc) {
optVolumeInc = volumeInc;
optVolume = volume;
optEntry = index;
} else if (volumeInc == optVolumeInc && volume < optVolume) {
// TODO: decide whether to remove this option
System.out.println("####\nEQUAL VOLUME INCREASE: HAPPENS!\n####");
optVolumeInc = volumeInc;
optVolume = volume;
optEntry = index;
}
} else {
// already better
optOverlapInc = pairwiseOverlapInc;
optVolume = Double.NaN;
optVolumeInc = Double.NaN;
// for later calculations
optTestMBR = testMBR;
optEntry = index;
}
}
}
assert optEntry >= 0;
newSubtree = new IndexTreePath<>(subtree, node.getEntry(optEntry), optEntry);
if (height - subtree.getPathCount() == level) {
return newSubtree;
} else {
return choosePath(newSubtree, mbr, level, ++cur);
}
}
Aggregations