use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class FileBasedSparseFloatDistanceFunctionTest method testExternalDistance.
@Test
public void testExternalDistance() throws IOException {
Database db = //
new ELKIBuilder<>(StaticArrayDatabase.class).with(AbstractDatabase.Parameterizer.DATABASE_CONNECTION_ID, //
DBIDRangeDatabaseConnection.class).with(DBIDRangeDatabaseConnection.Parameterizer.COUNT_ID, //
4).build();
db.initialize();
FileBasedSparseFloatDistanceFunction df = new //
FileBasedSparseFloatDistanceFunction(new AsciiDistanceParser(CSVReaderFormat.DEFAULT_FORMAT), null, Float.POSITIVE_INFINITY);
// We need to read from a resource, instead of a file.
df.loadCache(4, FileUtil.openSystemFile(FILENAME));
SLINK<DBID> slink = new SLINK<>(df);
CutDendrogramByHeight clus = new CutDendrogramByHeight(slink, 0.5, false);
Clustering<DendrogramModel> c = clus.run(db);
testClusterSizes(c, new int[] { 2, 2 });
}
use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class SpacefillingMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
// Prepare space filling curve:
final long starttime = System.currentTimeMillis();
final int size = relation.size();
final int numgen = curvegen.size();
final int numcurves = numgen * variants;
List<List<SpatialPair<DBID, NumberVector>>> curves = new ArrayList<>(numcurves);
for (int i = 0; i < numcurves; i++) {
curves.add(new ArrayList<SpatialPair<DBID, NumberVector>>(size));
}
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final NumberVector v = relation.get(iditer);
SpatialPair<DBID, NumberVector> ref = new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), v);
for (List<SpatialPair<DBID, NumberVector>> curve : curves) {
curve.add(ref);
}
}
// Sort spatially
final double[] mms = SpatialSorter.computeMinMax(curves.get(0));
final double[] mmscratch = new double[mms.length];
final int numdim = mms.length >>> 1;
final int[] permutation = new int[numdim];
for (int j = 0; j < variants; j++) {
for (int i = 0; i < mms.length; i += 2) {
double len = mms[i + 1] - mms[i];
mmscratch[i] = mms[i] - len * random.nextDouble();
mmscratch[i + 1] = mms[i + 1] + len * random.nextDouble();
}
// Generate permutation:
for (int i = 0; i < numdim; i++) {
permutation[i] = i;
}
// Knuth / Fisher-Yates style shuffle
for (int i = numdim - 1; i > 0; i--) {
// Swap with random preceeding element.
int ri = random.nextInt(i + 1);
int tmp = permutation[ri];
permutation[ri] = permutation[i];
permutation[i] = tmp;
}
for (int i = 0; i < numgen; i++) {
curvegen.get(i).sort(curves.get(i + numgen * j), 0, size, mmscratch, permutation);
}
}
// Build position index, DBID -> position in the three curves
WritableDataStore<int[]> positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
for (int cnum = 0; cnum < numcurves; cnum++) {
Iterator<SpatialPair<DBID, NumberVector>> it = curves.get(cnum).iterator();
for (int i = 0; it.hasNext(); i++) {
SpatialPair<DBID, NumberVector> r = it.next();
final int[] data;
if (cnum == 0) {
data = new int[numcurves];
positions.put(r.first, data);
} else {
data = positions.get(r.first);
}
data[cnum] = i;
}
}
// Convert to final storage
final int wsize = (int) Math.ceil(window * k);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
HashSetModifiableDBIDs cands = DBIDUtil.newHashSet(2 * wsize * numcurves);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// Get candidates.
cands.clear();
int[] posi = positions.get(iditer);
for (int i = 0; i < posi.length; i++) {
List<SpatialPair<DBID, NumberVector>> curve = curves.get(i);
final int start = Math.max(0, posi[i] - wsize);
final int end = Math.min(posi[i] + wsize + 1, curve.size());
for (int pos = start; pos < end; pos++) {
cands.add(curve.get(pos).first);
}
}
int distc = 0;
KNNHeap heap = DBIDUtil.newHeap(k);
O vec = relation.get(iditer);
for (DBIDIter iter = cands.iter(); iter.valid(); iter.advance()) {
heap.insert(distanceQuery.distance(vec, iter), iter);
distc++;
}
storage.put(iditer, heap.toKNNList());
mean.put(distc / (double) k);
}
final long end = System.currentTimeMillis();
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(this.getClass().getCanonicalName() + ".construction-time.ms", end - starttime));
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class MkMaxTree method reverseKNNQuery.
/**
* Performs a reverse k-nearest neighbor query for the given object ID. In the
* first step the candidates are chosen by performing a reverse k-nearest
* neighbor query with k = {@link #getKmax()}. Then these candidates are refined
* in a second step.
*/
@Override
public DoubleDBIDList reverseKNNQuery(DBIDRef id, int k) {
if (k > this.getKmax()) {
throw new IllegalArgumentException("Parameter k has to be equal or less than " + "parameter k of the MkMax-Tree!");
}
// get the candidates
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
doReverseKNNQuery(id, getRoot(), null, candidates);
if (k == this.getKmax()) {
candidates.sort();
// rkNNStatistics.addResults(candidates.size());
return candidates;
}
// refinement of candidates
ModifiableDBIDs candidateIDs = DBIDUtil.newArray(candidates.size());
for (DBIDIter candidate = candidates.iter(); candidate.valid(); candidate.advance()) {
candidateIDs.add(candidate);
}
Map<DBID, KNNList> knnLists = batchNN(getRoot(), candidateIDs, k);
ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
for (DBIDIter iter = candidateIDs.iter(); iter.valid(); iter.advance()) {
DBID cid = DBIDUtil.deref(iter);
KNNList cands = knnLists.get(cid);
for (DoubleDBIDListIter iter2 = cands.iter(); iter2.valid(); iter2.advance()) {
if (DBIDUtil.equal(id, iter2)) {
result.add(iter2.doubleValue(), cid);
break;
}
}
}
// FIXME: re-add statistics.
// rkNNStatistics.addResults(result.size());
// rkNNStatistics.addCandidates(candidates.size());
result.sort();
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class MkMaxTreeIndex method insertAll.
@Override
public void insertAll(DBIDs ids) {
List<MkMaxEntry> objs = new ArrayList<>(ids.size());
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
final O object = relation.get(id);
objs.add(createNewLeafEntry(id, object, Double.NaN));
}
insertAll(objs);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.
the class RandomSplit method split.
/**
* Selects two objects of the specified node to be promoted and stored into
* the parent node. The m-RAD strategy considers all possible pairs of objects
* and, after partitioning the set of entries, promotes the pair of objects
* for which the sum of covering radiuses is minimum.
*
* @param tree Tree to use
* @param node the node to be split
*/
@Override
public Assignments<E> split(AbstractMTree<O, N, E, ?> tree, N node) {
final int n = node.getNumEntries();
int pos1 = random.nextInt(n);
int pos2 = random.nextInt(n - 1);
if (pos2 >= pos1) {
++pos2;
}
DBID id1 = node.getEntry(pos1).getRoutingObjectID();
DBID id2 = node.getEntry(pos2).getRoutingObjectID();
return balancedPartition(tree, node, id1, id2);
}
Aggregations