use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.
the class DeLiClu method run.
public ClusterOrder run(Database database, Relation<NV> relation) {
Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database.getHierarchy(), relation, DeLiCluTreeIndex.class);
if (indexes.size() != 1) {
throw new MissingPrerequisitesException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
}
DeLiCluTreeIndex<NV> index = indexes.iterator().next();
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction<?>)) {
throw new IllegalArgumentException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
@SuppressWarnings("unchecked") SpatialPrimitiveDistanceFunction<NV> distFunction = (SpatialPrimitiveDistanceFunction<NV>) getDistanceFunction();
// first do the knn-Join
if (LOG.isVerbose()) {
LOG.verbose("knnJoin...");
}
Relation<KNNList> knns = knnJoin.run(relation);
DBIDs ids = relation.getDBIDs();
final int size = ids.size();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", size, LOG) : null;
ClusterOrder clusterOrder = new ClusterOrder(ids, "DeLiClu Clustering", "deliclu-clustering");
heap = new UpdatableHeap<>();
// add start object to cluster order and (root, root) to priority queue
DBID startID = DBIDUtil.deref(ids.iter());
clusterOrder.add(startID, Double.POSITIVE_INFINITY, null);
int numHandled = 1;
index.setHandled(startID, relation.get(startID));
SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) index.getRootEntry();
SpatialObjectPair spatialObjectPair = new SpatialObjectPair(0., rootEntry, rootEntry, true);
heap.add(spatialObjectPair);
while (numHandled < size) {
if (heap.isEmpty()) {
throw new AbortException("DeLiClu heap was empty when it shouldn't have been.");
}
SpatialObjectPair dataPair = heap.poll();
// pair of nodes
if (dataPair.isExpandable) {
expandNodes(index, distFunction, dataPair, knns);
} else // pair of objects
{
// set handled
LeafEntry e1 = (LeafEntry) dataPair.entry1;
LeafEntry e2 = (LeafEntry) dataPair.entry2;
final DBID e1id = e1.getDBID();
IndexTreePath<DeLiCluEntry> path = index.setHandled(e1id, relation.get(e1id));
if (path == null) {
throw new RuntimeException("snh: parent(" + e1id + ") = null!!!");
}
// add to cluster order
clusterOrder.add(e1id, dataPair.distance, e2.getDBID());
numHandled++;
// reinsert expanded leafs
reinsertExpanded(distFunction, index, path, knns);
if (progress != null) {
progress.setProcessed(numHandled, LOG);
}
}
}
LOG.ensureCompleted(progress);
return clusterOrder;
}
use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.
the class AbstractXTree method toString.
/**
* Returns a string representation of this XTree.
*
* @return a string representation of this XTree
*/
@Override
public String toString() {
long dirNodes = 0;
long superNodes = 0;
long leafNodes = 0;
long objects = 0;
long maxSuperCapacity = -1;
long minSuperCapacity = Long.MAX_VALUE;
BigInteger totalCapacity = BigInteger.ZERO;
int levels = 0;
N node = getRoot();
while (!node.isLeaf()) {
if (node.getNumEntries() > 0) {
SpatialEntry entry = node.getEntry(0);
node = getNode(entry);
levels++;
}
}
BreadthFirstEnumeration<N, SpatialEntry> enumeration = new BreadthFirstEnumeration<>(this, getRootPath());
while (enumeration.hasNext()) {
IndexTreePath<SpatialEntry> indexPath = enumeration.next();
SpatialEntry entry = indexPath.getEntry();
if (entry instanceof LeafEntry) {
objects++;
} else {
node = getNode(entry);
if (node.isLeaf()) {
leafNodes++;
} else {
if (node.isSuperNode()) {
superNodes++;
if (node.getCapacity() > maxSuperCapacity) {
maxSuperCapacity = node.getCapacity();
}
if (node.getCapacity() < minSuperCapacity) {
minSuperCapacity = node.getCapacity();
}
} else {
dirNodes++;
}
}
totalCapacity = totalCapacity.add(BigInteger.valueOf(node.getCapacity()));
}
}
assert objects == num_elements : "objects=" + objects + ", size=" + num_elements;
return //
new StringBuilder(10000).append(getClass().getName()).append(" has ").append((levels + 1)).append(" levels.\n").append(dirNodes).append(" Directory Nodes (max = ").append(dirCapacity - 1).append(", min = ").append(dirMinimum).append(//
")\n").append(superNodes).append(" Supernodes (max = ").append(maxSuperCapacity - 1).append(", min = ").append(minSuperCapacity - 1).append(//
")\n").append(leafNodes).append(" Data Nodes (max = ").append(leafCapacity - 1).append(", min = ").append(leafMinimum).append(//
")\n").append(objects).append(' ').append(dimensionality).append(//
"-dim. points in the tree \n").append("min_fanout = ").append(settings.min_fanout).append(", max_overlap = ").append(settings.max_overlap).append((settings.overlap_type == Overlap.DATA_OVERLAP ? " data overlap" : " volume overlap")).append(//
", \n").append("Storage Quota ").append(BigInteger.valueOf(objects + dirNodes + superNodes + leafNodes).multiply(BigInteger.valueOf(100)).divide(totalCapacity).toString()).append(//
"%\n").toString();
}
use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.
the class DeLiClu method reinsertExpanded.
private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV> distFunction, DeLiCluTree index, List<IndexTreePath<DeLiCluEntry>> path, int pos, DeLiCluEntry parentEntry, Relation<KNNList> knns) {
DeLiCluNode parentNode = index.getNode(parentEntry);
SpatialEntry entry2 = path.get(pos).getEntry();
if (entry2 instanceof LeafEntry) {
assert (pos == 0);
for (int i = 0; i < parentNode.getNumEntries(); i++) {
DeLiCluEntry entry1 = parentNode.getEntry(i);
if (entry1.hasHandled()) {
continue;
}
double distance = distFunction.minDist(entry1, entry2);
double reach = MathUtil.max(distance, knns.get(((LeafEntry) entry2).getDBID()).getKNNDistance());
SpatialObjectPair dataPair = new SpatialObjectPair(reach, entry1, entry2, false);
heap.add(dataPair);
}
return;
}
IntSet expanded = index.getExpanded(entry2);
for (int i = 0; i < parentNode.getNumEntries(); i++) {
DeLiCluDirectoryEntry entry1 = (DeLiCluDirectoryEntry) parentNode.getEntry(i);
// not yet expanded
if (!expanded.contains(entry1.getPageID())) {
double distance = distFunction.minDist(entry1, entry2);
SpatialObjectPair nodePair = new SpatialObjectPair(distance, entry1, entry2, true);
heap.add(nodePair);
} else // already expanded
{
reinsertExpanded(distFunction, index, path, pos - 1, entry1, knns);
}
}
}
use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.
the class XSplitter method countXingDataEntries.
/**
* Count all data objects under entries and whether they intersect the given
* MBR <code>mbr</code> into <code>numOf</code>.
*
* @param entries
* @param mbr
* @param numOf array of two integers, the first one is to be filled with the
* total number of data objects, the second one with the number of data
* objects intersecting <code>mbr</code>
* @return == the (probably modified) integer array <code>numOf</code>: the
* first field is the total number of data objects, the second the
* number of data objects intersecting <code>mbr</code>
*/
private int[] countXingDataEntries(final Collection<SpatialEntry> entries, final HyperBoundingBox mbr, int[] numOf) {
for (SpatialEntry entry : entries) {
if (entry instanceof LeafEntry) {
numOf[0]++;
if (SpatialUtil.intersects(mbr, entry)) {
numOf[1]++;
}
} else {
N node = tree.getNode(((DirectoryEntry) entry).getPageID());
countXingDataEntries(node.getEntries(), mbr, numOf);
}
}
return numOf;
}
use of de.lmu.ifi.dbs.elki.index.tree.LeafEntry in project elki by elki-project.
the class XSplitter method chooseMinimumOverlapSplit.
/**
* Select the distribution with minimal intersection volume from a Collection
* of distributions. If there are several equal minimum intersection volumes,
* the distribution with the minimum volume is selected.
*
* @param splitAxis Split axis to be tested
* @param minEntries The minimum number of entries to be tested; in case of a
* data node, this is the minimum leaf capacity, for directory nodes,
* this is either the minimum directory capacity or, if a topological
* split has already failed, the tree's minimum fan-out parameter.
* @param maxEntries The maximum number of entries to be tested
* @param revert if <code>maxEntrie < {@link #entries}.size()</code> and
* <code>revert</code> is <code>true</code>, the upper range of the
* sorting is tested, i.e. not
* <code>{minEntries, ..., maxEntries}</code> but
*
* <code>{{@link #entries}.size() - maxEntries + 1, ..., {@link #entries}.size() - minEntries + 1}</code>
* @return The distribution with the minimal intersection volume or
* <code>null</code>, if the minimum overlap split has a volume which
* is larger than the allowed <code>maxOverlap</code> ratio
*/
private SplitSorting chooseMinimumOverlapSplit(int splitAxis, int minEntries, int maxEntries, boolean revert) {
if (splitAxis == -1) {
pastOverlap = Double.MAX_VALUE;
return null;
}
double optXVolume = Double.POSITIVE_INFINITY;
double optVolume = Double.POSITIVE_INFINITY;
SplitSorting optDistribution = null;
HyperBoundingBox[] optMBRs = null;
// generate sortings for the mbr's extrema
int[] entrySorting = new int[node.getNumEntries()];
for (int i = 0; i < entrySorting.length; i++) {
entrySorting[i] = i;
}
int[] lbSorting = Arrays.copyOf(entrySorting, entrySorting.length);
int[] ubSorting = Arrays.copyOf(entrySorting, entrySorting.length);
sortEntriesForDimension(splitAxis, entrySorting, entrySorting);
if (revert && maxEntries < node.getNumEntries() / 2) {
// test reverted sortings
// temp array
int[][] reverted = new int[2][node.getNumEntries()];
for (int i = 0; i < lbSorting.length; i++) {
reverted[0][reverted[0].length - 1 - i] = lbSorting[i];
reverted[1][reverted[1].length - 1 - i] = ubSorting[i];
}
for (int i = 0; i < lbSorting.length; i++) {
lbSorting[i] = reverted[0][i];
ubSorting[i] = reverted[1][i];
}
}
for (int i = 0; i < 2; i++) {
// test lower and upper bound sortings
if (i == 0) {
// lower-bound sorting
entrySorting = lbSorting;
} else {
// upper-bound sorting
entrySorting = ubSorting;
}
for (int limit = minEntries; limit <= maxEntries; limit++) {
HyperBoundingBox mbr1 = mbr(entrySorting, 0, limit);
HyperBoundingBox mbr2 = mbr(entrySorting, limit, entrySorting.length);
double xVolume = SpatialUtil.overlap(mbr1, mbr2);
if (xVolume < optXVolume) {
optXVolume = xVolume;
optDistribution = generateSplitSorting(entrySorting, limit);
optMBRs = new HyperBoundingBox[] { mbr1, mbr2 };
optVolume = Double.NaN;
} else if (xVolume == optXVolume) {
double vol = SpatialUtil.volume(mbr1);
vol += SpatialUtil.volume(mbr2);
if (Double.isNaN(optVolume)) {
// calculate when necessary
optVolume = SpatialUtil.volume(optMBRs[0]);
optVolume += SpatialUtil.volume(optMBRs[1]);
}
if (vol < optVolume) {
optXVolume = xVolume;
optVolume = vol;
optDistribution = generateSplitSorting(entrySorting, limit);
}
}
}
}
if (node.getEntry(0) instanceof LeafEntry || tree.get_max_overlap() >= 1) {
// overlap is not computed
pastOverlap = Double.NaN;
return optDistribution;
}
// test overlap
switch(maxOverlapStrategy) {
case DATA_OVERLAP:
pastOverlap = getRatioOfDataInIntersectionVolume(generateDistribution(optDistribution), optMBRs);
if (tree.get_max_overlap() < pastOverlap) {
LOG.finest(String.format(Locale.ENGLISH, "No %s split found%s; best data overlap was %.3f", (minEntries == tree.get_min_fanout() ? "minimum overlap" : "topological"), (maxEntries < node.getNumEntries() / 2 ? " in " + (revert ? "second" : "first") + " range" : ""), pastOverlap));
return null;
}
break;
case VOLUME_OVERLAP:
if (Double.isNaN(optVolume)) {
optVolume = SpatialUtil.volume(optMBRs[0]);
optVolume += SpatialUtil.volume(optMBRs[1]);
}
pastOverlap = optXVolume / optVolume;
if (tree.get_max_overlap() < pastOverlap) {
LOG.finest(String.format(Locale.ENGLISH, "No %s split found%s; best volume overlap was %.3f", (minEntries == tree.get_min_fanout() ? "minimum overlap" : "topological"), (maxEntries < node.getNumEntries() / 2 ? " in " + (revert ? "second" : "first") + " range" : ""), pastOverlap));
return null;
}
break;
}
return optDistribution;
}
Aggregations