use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class FileBasedSparseDoubleDistanceFunctionTest method testExternalDistance.
@Test
public void testExternalDistance() throws IOException {
Database db = //
new ELKIBuilder<>(StaticArrayDatabase.class).with(AbstractDatabase.Parameterizer.DATABASE_CONNECTION_ID, //
DBIDRangeDatabaseConnection.class).with(DBIDRangeDatabaseConnection.Parameterizer.COUNT_ID, //
4).build();
db.initialize();
FileBasedSparseDoubleDistanceFunction df = new //
FileBasedSparseDoubleDistanceFunction(new AsciiDistanceParser(CSVReaderFormat.DEFAULT_FORMAT), null, Double.POSITIVE_INFINITY);
// We need to read from a resource, instead of a file.
df.loadCache(4, FileUtil.openSystemFile(FILENAME));
SLINK<DBID> slink = new SLINK<>(df);
CutDendrogramByHeight clus = new CutDendrogramByHeight(slink, 0.5, false);
Clustering<DendrogramModel> c = clus.run(db);
testClusterSizes(c, new int[] { 2, 2 });
}
use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class CoverTree method bulkLoad.
/**
* Bulk-load the index.
*
* @param ids IDs to load
*/
public void bulkLoad(DBIDs ids) {
if (ids.size() == 0) {
return;
}
assert (root == null) : "Tree already initialized.";
DBIDIter it = ids.iter();
DBID first = DBIDUtil.deref(it);
// Compute distances to all neighbors:
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(ids.size() - 1);
for (it.advance(); it.valid(); it.advance()) {
candidates.add(distance(first, it), it);
}
root = bulkConstruct(first, Integer.MAX_VALUE, 0., candidates);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class CoverTree method bulkConstruct.
/**
* Bulk-load the cover tree.
*
* This bulk-load is slightly simpler than the one used in the original
* cover-tree source: We do not look back into the "far" set of candidates.
*
* @param cur Current routing object
* @param maxScale Maximum scale
* @param elems Candidates
* @return Root node of subtree
*/
protected Node bulkConstruct(DBIDRef cur, int maxScale, double parentDist, ModifiableDoubleDBIDList elems) {
assert (!elems.contains(cur));
final double max = maxDistance(elems);
final int scale = Math.min(distToScale(max) - 1, maxScale);
final int nextScale = scale - 1;
// elements remaining:
if (max <= 0 || scale <= scaleBottom || elems.size() < truncate) {
return new Node(cur, max, parentDist, elems);
}
// Find neighbors in the cover of the current object:
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
excludeNotCovered(elems, scaleToDist(scale), candidates);
// If no elements were not in the cover, build a compact tree:
if (candidates.size() == 0) {
LOG.warning("Scale not chosen appropriately? " + max + " " + scaleToDist(scale));
return bulkConstruct(cur, nextScale, parentDist, elems);
}
// We will have at least one other child, so build the parent:
Node node = new Node(cur, max, parentDist);
// Routing element now is a singleton:
final boolean curSingleton = elems.size() == 0;
if (!curSingleton) {
// Add node for the routing object:
node.children.add(bulkConstruct(cur, nextScale, 0, elems));
}
final double fmax = scaleToDist(nextScale);
// Build additional cover nodes:
for (DoubleDBIDListIter it = candidates.iter(); it.valid(); ) {
assert (it.getOffset() == 0);
DBID t = DBIDUtil.deref(it);
// Recycle.
elems.clear();
collectByCover(it, candidates, fmax, elems);
assert (DBIDUtil.equal(t, it)) : "First element in candidates must not change!";
if (elems.size() == 0) {
// Singleton
node.singletons.add(it.doubleValue(), it);
} else {
// Build a full child node:
node.children.add(bulkConstruct(it, nextScale, it.doubleValue(), elems));
}
candidates.removeSwap(0);
}
assert (candidates.size() == 0);
// Routing object is not yet handled:
if (curSingleton) {
if (node.isLeaf()) {
// First in leaf is enough.
node.children = null;
} else {
// Add as regular singleton.
node.singletons.add(parentDist, cur);
}
}
// TODO: improve recycling of lists?
return node;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class PartialVAFile method initialize.
@Override
public void initialize() throws IllegalStateException {
if (splitPartitions != null) {
throw new IllegalStateException("Data already inserted.");
}
if (MathUtil.log2(partitions) != (int) MathUtil.log2(partitions)) {
throw new IllegalArgumentException("Number of partitions must be a power of 2!");
}
final int dimensions = RelationUtil.dimensionality(relation);
splitPartitions = new double[dimensions][];
daFiles = new ArrayList<>(dimensions);
for (int d = 0; d < dimensions; d++) {
final DAFile f = new DAFile(relation, d, partitions);
splitPartitions[d] = f.getSplitPositions();
daFiles.add(f);
}
vectorApprox = new ArrayList<>();
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
V dv = relation.get(id);
VectorApproximation va = calculateFullApproximation(id, dv);
vectorApprox.add(va);
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class IndexPurity method processNewResult.
@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
Database database = ResultUtil.findDatabase(hier);
final ArrayList<SpatialIndexTree<?, ?>> indexes = ResultUtil.filterResults(hier, newResult, SpatialIndexTree.class);
if (indexes == null || indexes.isEmpty()) {
return;
}
Relation<String> lblrel = DatabaseUtil.guessLabelRepresentation(database);
for (SpatialIndexTree<?, ?> index : indexes) {
List<? extends SpatialEntry> leaves = index.getLeaves();
MeanVariance mv = new MeanVariance();
for (SpatialEntry e : leaves) {
SpatialDirectoryEntry leaf = (SpatialDirectoryEntry) e;
Node<?> n = index.getNode(leaf.getPageID());
final int total = n.getNumEntries();
HashMap<String, Integer> map = new HashMap<>(total);
for (int i = 0; i < total; i++) {
DBID id = ((SpatialPointLeafEntry) n.getEntry(i)).getDBID();
String label = lblrel.get(id);
Integer val = map.get(label);
if (val == null) {
val = 1;
} else {
val += 1;
}
map.put(label, val);
}
double gini = 0.0;
for (Entry<String, Integer> ent : map.entrySet()) {
double rel = ent.getValue() / (double) total;
gini += rel * rel;
}
mv.put(gini);
}
Collection<double[]> col = new ArrayList<>();
col.add(new double[] { mv.getMean(), mv.getSampleStddev() });
database.getHierarchy().add((Result) index, new CollectionResult<>("Gini coefficient of index", "index-gini", col));
}
}
Aggregations