use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.
the class XSplitter method getSurfaceSums4Sorting.
/**
* Compute the surfaces of the <code>2 * (maxEntries - minEntries + 1)</code>
* split MBRs resulting for the sorting <code>entrySorting</code>.
*
* @param minEntries minimally allowed subgroup size
* @param maxEntries maximally allowed subgroup size for the first entry set
* @param entrySorting a permutation of the indices of {@link #entries}
* @param dim the dimension of the tree
* @return the sum of all first and second MBRs' surfaces for the tested entry
* distributions
*/
private double getSurfaceSums4Sorting(int minEntries, int maxEntries, int[] entrySorting, int dim) {
// avoid multiple MBR calculations by updating min/max-logs for the two
// collections' bounds:
// the first entries' maximum upper bounds
double[] pqUBFirst = new double[dim];
Arrays.fill(pqUBFirst, Double.NEGATIVE_INFINITY);
// maintain the second entries' upper bounds
List<Heap<DoubleIntPair>> pqUBSecond = new ArrayList<>(dim);
for (int i = 0; i < dim; i++) {
// Descending heap
pqUBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries, Collections.reverseOrder()));
}
// the first entries' minimum lower bounds
double[] pqLBFirst = new double[dim];
Arrays.fill(pqLBFirst, Double.POSITIVE_INFINITY);
// maintain the second entries' minimum lower bounds
List<Heap<DoubleIntPair>> pqLBSecond = new ArrayList<>(dim);
for (int i = 0; i < dim; i++) {
// Ascending heap
pqLBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries));
}
// initialize bounds for first entry collection
for (int index = 0; index < minEntries; index++) {
add2MBR(entrySorting, pqUBFirst, pqLBFirst, index);
}
HyperBoundingBox mbr1 = new HyperBoundingBox(pqLBFirst, pqUBFirst);
// fill bounding queues for the second entry collection
double[] minSecond = new double[dim];
double[] maxSecond = new double[dim];
Arrays.fill(maxSecond, Double.NEGATIVE_INFINITY);
Arrays.fill(minSecond, Double.POSITIVE_INFINITY);
assert entrySorting.length - maxEntries == minEntries;
// initialize min/max entries of the second collections' tail
for (int index = maxEntries; index < entrySorting.length; index++) {
add2MBR(entrySorting, maxSecond, minSecond, index);
}
for (int i = 0; i < dim; i++) {
// with index entrySorting.length => never to be removed
pqLBSecond.get(i).add(new DoubleIntPair(minSecond[i], entrySorting.length));
pqUBSecond.get(i).add(new DoubleIntPair(maxSecond[i], entrySorting.length));
}
// add the entries to be removed later on
for (int index = minEntries; index < maxEntries; index++) {
add2MBR(entrySorting, pqUBSecond, pqLBSecond, index);
}
for (int i = 0; i < minSecond.length; i++) {
minSecond[i] = pqLBSecond.get(i).peek().first;
maxSecond[i] = pqUBSecond.get(i).peek().first;
}
ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(minSecond, maxSecond);
double surfaceSum = SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
// generate the other distributions and file the surface sums
for (int limit = minEntries; limit < maxEntries; limit++) {
// extend first MBR by entry at position entrySorting[limit]:
add2MBR(entrySorting, pqUBFirst, pqLBFirst, limit);
// shrink entry at position entrySorting[limit] from second MBR:
removeFromMBR(pqUBSecond, pqLBSecond, limit, mbr2);
surfaceSum += SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
}
return surfaceSum;
}
use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.
the class AngTanLinearSplit method split.
@Override
public <E extends SpatialComparable, A> long[] split(A entries, ArrayAdapter<E, A> getter, int minEntries) {
final int num = getter.size(entries);
// We need the overall MBR for computing edge preferences
ModifiableHyperBoundingBox total = new ModifiableHyperBoundingBox(getter.get(entries, 0));
{
for (int i = 1; i < num; i++) {
total.extend(getter.get(entries, i));
}
}
final int dim = total.getDimensionality();
// Prepare the axis lists (we use bitsets)
long[][] closer = new long[dim][num];
{
for (int i = 0; i < num; i++) {
E e = getter.get(entries, i);
for (int d = 0; d < dim; d++) {
double low = e.getMin(d) - total.getMin(d);
double hig = total.getMax(d) - e.getMax(d);
if (low >= hig) {
BitsUtil.setI(closer[d], i);
}
}
}
}
// Find the most even split
{
int axis = -1;
int bestcard = Integer.MAX_VALUE;
long[] bestset = null;
double bestover = Double.NaN;
for (int d = 0; d < dim; d++) {
long[] cand = closer[d];
int card = BitsUtil.cardinality(cand);
card = Math.max(card, num - card);
if (card == num) {
continue;
}
if (card < bestcard) {
axis = d;
bestcard = card;
bestset = cand;
bestover = Double.NaN;
} else if (card == bestcard) {
// Tie handling
if (Double.isNaN(bestover)) {
bestover = computeOverlap(entries, getter, bestset);
}
double overlap = computeOverlap(entries, getter, cand);
if (overlap < bestover) {
axis = d;
bestcard = card;
bestset = cand;
bestover = overlap;
} else if (overlap == bestover) {
double bestlen = total.getMax(axis) - total.getMin(axis);
double candlen = total.getMax(d) - total.getMin(d);
if (candlen < bestlen) {
axis = d;
bestcard = card;
bestset = cand;
bestover = overlap;
}
}
}
}
if (bestset == null) {
LOG.warning("No Ang-Tan-Split found. Probably all points are the same? Returning random split.");
return BitsUtil.random(num >> 1, num, new Random());
}
return bestset;
}
}
use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.
the class GreeneSplit method split.
@Override
public <E extends SpatialComparable, A> long[] split(A entries, ArrayAdapter<E, A> getter, int minEntries) {
final int num = getter.size(entries);
// Choose axis by best normalized separation
int axis = -1;
{
// PickSeeds - find the two most distant rectangles
double worst = Double.NEGATIVE_INFINITY;
int w1 = 0, w2 = 0;
// Compute individual areas
double[] areas = new double[num];
for (int e1 = 0; e1 < num - 1; e1++) {
final E e1i = getter.get(entries, e1);
areas[e1] = SpatialUtil.volume(e1i);
}
// Compute area increase
for (int e1 = 0; e1 < num - 1; e1++) {
final E e1i = getter.get(entries, e1);
for (int e2 = e1 + 1; e2 < num; e2++) {
final E e2i = getter.get(entries, e2);
final double areaJ = SpatialUtil.volumeUnion(e1i, e2i);
final double d = areaJ - areas[e1] - areas[e2];
if (d > worst) {
worst = d;
w1 = e1;
w2 = e2;
}
}
}
if (worst > 0) {
// Data to keep
// Initial mbrs and areas
E m1 = getter.get(entries, w1);
E m2 = getter.get(entries, w2);
double bestsep = Double.NEGATIVE_INFINITY;
double bestsep2 = Double.NEGATIVE_INFINITY;
for (int d = 0; d < m1.getDimensionality(); d++) {
final double s1 = m1.getMin(d) - m2.getMax(d);
final double s2 = m2.getMin(d) - m1.getMax(d);
final double sm = Math.max(s1, s2);
final double no = Math.max(m1.getMax(d), m2.getMax(d)) - Math.min(m1.getMin(d), m2.getMin(d));
final double sep = sm / no;
if (sep > bestsep || (sep == bestsep && sm > bestsep2)) {
bestsep = sep;
bestsep2 = sm;
axis = d;
}
}
} else {
// All objects are identical!
final int half = (num + 1) >> 1;
// Put the first half into second node
return BitsUtil.ones(half);
}
}
// Sort by minimum value
DoubleIntPair[] data = new DoubleIntPair[num];
for (int i = 0; i < num; i++) {
data[i] = new DoubleIntPair(getter.get(entries, i).getMin(axis), i);
}
Arrays.sort(data);
// Object assignment
final long[] assignment = BitsUtil.zero(num);
final int half = (num + 1) >> 1;
// Put the first half into second node
for (int i = 0; i < half; i++) {
BitsUtil.setI(assignment, data[i].second);
}
// Tie handling
if (num % 2 == 0) {
// We need to compute the bounding boxes
ModifiableHyperBoundingBox mbr1 = new ModifiableHyperBoundingBox(getter.get(entries, data[0].second));
for (int i = 1; i < half; i++) {
mbr1.extend(getter.get(entries, data[i].second));
}
ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(getter.get(entries, data[num - 1].second));
for (int i = half + 1; i < num - 1; i++) {
mbr2.extend(getter.get(entries, data[i].second));
}
E e = getter.get(entries, data[half].second);
double inc1 = SpatialUtil.volumeUnion(mbr1, e) - SpatialUtil.volume(mbr1);
double inc2 = SpatialUtil.volumeUnion(mbr2, e) - SpatialUtil.volume(mbr2);
if (inc1 < inc2) {
BitsUtil.setI(assignment, data[half].second);
}
}
return assignment;
}
use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.
the class RTreeQuadraticSplit method split.
@Override
public <E extends SpatialComparable, A> long[] split(A entries, ArrayAdapter<E, A> getter, int minEntries) {
final int num = getter.size(entries);
// Object assignment, and processed objects
long[] assignment = BitsUtil.zero(num);
long[] assigned = BitsUtil.zero(num);
// MBRs and Areas of current assignments
ModifiableHyperBoundingBox mbr1, mbr2;
double area1 = 0, area2 = 0;
// PickSeeds - find worst pair
{
double worst = Double.NEGATIVE_INFINITY;
int w1 = 0, w2 = 0;
// Compute individual areas
double[] areas = new double[num];
for (int e1 = 0; e1 < num - 1; e1++) {
final E e1i = getter.get(entries, e1);
areas[e1] = SpatialUtil.volume(e1i);
}
// Compute area increase
for (int e1 = 0; e1 < num - 1; e1++) {
final E e1i = getter.get(entries, e1);
for (int e2 = e1 + 1; e2 < num; e2++) {
final E e2i = getter.get(entries, e2);
final double areaJ = SpatialUtil.volumeUnion(e1i, e2i);
final double d = areaJ - areas[e1] - areas[e2];
if (d > worst) {
worst = d;
w1 = e1;
w2 = e2;
}
}
}
// Data to keep
// Mark both as used
BitsUtil.setI(assigned, w1);
BitsUtil.setI(assigned, w2);
// Assign second to second set
BitsUtil.setI(assignment, w2);
// Initial mbrs and areas
area1 = areas[w1];
area2 = areas[w2];
mbr1 = new ModifiableHyperBoundingBox(getter.get(entries, w1));
mbr2 = new ModifiableHyperBoundingBox(getter.get(entries, w2));
}
// Second phase, QS2+QS3
{
int in1 = 1, in2 = 1;
int remaining = num - 2;
while (remaining > 0) {
// Shortcut when minEntries must be fulfilled
if (in1 + remaining <= minEntries) {
// No need to updated assigned, no changes to assignment.
break;
}
if (in2 + remaining <= minEntries) {
// Don't bother to update assigned, though
for (int pos = BitsUtil.nextClearBit(assigned, 0); pos < num; pos = BitsUtil.nextClearBit(assigned, pos + 1)) {
BitsUtil.setI(assignment, pos);
}
break;
}
// PickNext
double greatestPreference = Double.NEGATIVE_INFINITY;
int best = -1;
E best_i = null;
boolean preferSecond = false;
for (int pos = BitsUtil.nextClearBit(assigned, 0); pos < num; pos = BitsUtil.nextClearBit(assigned, pos + 1)) {
// Cost of putting object into both mbrs
final E pos_i = getter.get(entries, pos);
final double d1 = SpatialUtil.volumeUnion(mbr1, pos_i) - area1;
final double d2 = SpatialUtil.volumeUnion(mbr2, pos_i) - area2;
// Preference
final double preference = Math.abs(d1 - d2);
if (preference > greatestPreference) {
greatestPreference = preference;
best = pos;
best_i = pos_i;
// Prefer smaller increase
preferSecond = (d2 < d1);
}
}
// QS3: tie handling
if (greatestPreference == 0) {
// Prefer smaller area
if (area1 != area2) {
preferSecond = (area2 < area1);
} else {
// Prefer smaller group size
preferSecond = (in2 < in1);
}
}
// Mark as used.
BitsUtil.setI(assigned, best);
remaining--;
if (!preferSecond) {
in1++;
mbr1.extend(best_i);
area1 = SpatialUtil.volume(mbr1);
} else {
in2++;
BitsUtil.setI(assignment, best);
mbr2.extend(best_i);
area2 = SpatialUtil.volume(mbr2);
}
// Loop from QS2
}
// Note: "assigned" and "remaining" likely not updated!
}
return assignment;
}
use of de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox in project elki by elki-project.
the class AbstractRStarTree method initializeCapacities.
@Override
protected void initializeCapacities(E exampleLeaf) {
/* Simulate the creation of a leaf page to get the page capacity */
try {
int cap = 0;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
SpatialPointLeafEntry sl = new SpatialPointLeafEntry(DBIDUtil.importInteger(0), new double[exampleLeaf.getDimensionality()]);
while (baos.size() <= getPageSize()) {
sl.writeExternal(oos);
oos.flush();
cap++;
}
// the last one caused the page to overflow.
leafCapacity = cap - 1;
} catch (IOException e) {
throw new AbortException("Error determining page sizes.", e);
}
/* Simulate the creation of a directory page to get the capacity */
try {
int cap = 0;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
ModifiableHyperBoundingBox hb = new ModifiableHyperBoundingBox(new double[exampleLeaf.getDimensionality()], new double[exampleLeaf.getDimensionality()]);
SpatialDirectoryEntry sl = new SpatialDirectoryEntry(0, hb);
while (baos.size() <= getPageSize()) {
sl.writeExternal(oos);
oos.flush();
cap++;
}
dirCapacity = cap - 1;
} catch (IOException e) {
throw new AbortException("Error determining page sizes.", e);
}
if (dirCapacity <= 2) {
throw new IllegalArgumentException("Node size of " + getPageSize() + " bytes is chosen too small!");
}
final Logging log = getLogger();
if (dirCapacity < 10) {
log.warning("Page size is choosen very small! Maximum number of entries in a directory node = " + dirCapacity);
}
// minimum entries per directory node
dirMinimum = (int) Math.floor(dirCapacity * settings.relativeMinFill);
if (dirMinimum < 1) {
dirMinimum = 1;
}
if (leafCapacity <= 2) {
throw new IllegalArgumentException("Node size of " + getPageSize() + " bytes is chosen too small!");
}
if (leafCapacity < 10) {
log.warning("Page size is choosen very small! Maximum number of entries in a leaf node = " + leafCapacity);
}
// minimum entries per leaf node
leafMinimum = (int) Math.floor(leafCapacity * settings.relativeMinFill);
if (leafMinimum < 1) {
leafMinimum = 1;
}
}
Aggregations