Search in sources :

Example 36 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class FileBasedSparseFloatDistanceFunctionTest method testExternalDistance.

@Test
public void testExternalDistance() throws IOException {
    Database db = // 
    new ELKIBuilder<>(StaticArrayDatabase.class).with(AbstractDatabase.Parameterizer.DATABASE_CONNECTION_ID, // 
    DBIDRangeDatabaseConnection.class).with(DBIDRangeDatabaseConnection.Parameterizer.COUNT_ID, // 
    4).build();
    db.initialize();
    FileBasedSparseFloatDistanceFunction df = new // 
    FileBasedSparseFloatDistanceFunction(new AsciiDistanceParser(CSVReaderFormat.DEFAULT_FORMAT), null, Float.POSITIVE_INFINITY);
    // We need to read from a resource, instead of a file.
    df.loadCache(4, FileUtil.openSystemFile(FILENAME));
    SLINK<DBID> slink = new SLINK<>(df);
    CutDendrogramByHeight clus = new CutDendrogramByHeight(slink, 0.5, false);
    Clustering<DendrogramModel> c = clus.run(db);
    testClusterSizes(c, new int[] { 2, 2 });
}
Also used : DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) Database(de.lmu.ifi.dbs.elki.database.Database) AbstractDatabase(de.lmu.ifi.dbs.elki.database.AbstractDatabase) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) SLINK(de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.SLINK) CutDendrogramByHeight(de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.CutDendrogramByHeight) DendrogramModel(de.lmu.ifi.dbs.elki.data.model.DendrogramModel) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) Test(org.junit.Test) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)

Example 37 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class SpacefillingMaterializeKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    // Prepare space filling curve:
    final long starttime = System.currentTimeMillis();
    final int size = relation.size();
    final int numgen = curvegen.size();
    final int numcurves = numgen * variants;
    List<List<SpatialPair<DBID, NumberVector>>> curves = new ArrayList<>(numcurves);
    for (int i = 0; i < numcurves; i++) {
        curves.add(new ArrayList<SpatialPair<DBID, NumberVector>>(size));
    }
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final NumberVector v = relation.get(iditer);
        SpatialPair<DBID, NumberVector> ref = new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), v);
        for (List<SpatialPair<DBID, NumberVector>> curve : curves) {
            curve.add(ref);
        }
    }
    // Sort spatially
    final double[] mms = SpatialSorter.computeMinMax(curves.get(0));
    final double[] mmscratch = new double[mms.length];
    final int numdim = mms.length >>> 1;
    final int[] permutation = new int[numdim];
    for (int j = 0; j < variants; j++) {
        for (int i = 0; i < mms.length; i += 2) {
            double len = mms[i + 1] - mms[i];
            mmscratch[i] = mms[i] - len * random.nextDouble();
            mmscratch[i + 1] = mms[i + 1] + len * random.nextDouble();
        }
        // Generate permutation:
        for (int i = 0; i < numdim; i++) {
            permutation[i] = i;
        }
        // Knuth / Fisher-Yates style shuffle
        for (int i = numdim - 1; i > 0; i--) {
            // Swap with random preceeding element.
            int ri = random.nextInt(i + 1);
            int tmp = permutation[ri];
            permutation[ri] = permutation[i];
            permutation[i] = tmp;
        }
        for (int i = 0; i < numgen; i++) {
            curvegen.get(i).sort(curves.get(i + numgen * j), 0, size, mmscratch, permutation);
        }
    }
    // Build position index, DBID -> position in the three curves
    WritableDataStore<int[]> positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
    for (int cnum = 0; cnum < numcurves; cnum++) {
        Iterator<SpatialPair<DBID, NumberVector>> it = curves.get(cnum).iterator();
        for (int i = 0; it.hasNext(); i++) {
            SpatialPair<DBID, NumberVector> r = it.next();
            final int[] data;
            if (cnum == 0) {
                data = new int[numcurves];
                positions.put(r.first, data);
            } else {
                data = positions.get(r.first);
            }
            data[cnum] = i;
        }
    }
    // Convert to final storage
    final int wsize = (int) Math.ceil(window * k);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    HashSetModifiableDBIDs cands = DBIDUtil.newHashSet(2 * wsize * numcurves);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Get candidates.
        cands.clear();
        int[] posi = positions.get(iditer);
        for (int i = 0; i < posi.length; i++) {
            List<SpatialPair<DBID, NumberVector>> curve = curves.get(i);
            final int start = Math.max(0, posi[i] - wsize);
            final int end = Math.min(posi[i] + wsize + 1, curve.size());
            for (int pos = start; pos < end; pos++) {
                cands.add(curve.get(pos).first);
            }
        }
        int distc = 0;
        KNNHeap heap = DBIDUtil.newHeap(k);
        O vec = relation.get(iditer);
        for (DBIDIter iter = cands.iter(); iter.valid(); iter.advance()) {
            heap.insert(distanceQuery.distance(vec, iter), iter);
            distc++;
        }
        storage.put(iditer, heap.toKNNList());
        mean.put(distc / (double) k);
    }
    final long end = System.currentTimeMillis();
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(this.getClass().getCanonicalName() + ".construction-time.ms", end - starttime));
    }
}
Also used : DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayList(java.util.ArrayList) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List)

Example 38 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class MkMaxTree method reverseKNNQuery.

/**
 * Performs a reverse k-nearest neighbor query for the given object ID. In the
 * first step the candidates are chosen by performing a reverse k-nearest
 * neighbor query with k = {@link #getKmax()}. Then these candidates are refined
 * in a second step.
 */
@Override
public DoubleDBIDList reverseKNNQuery(DBIDRef id, int k) {
    if (k > this.getKmax()) {
        throw new IllegalArgumentException("Parameter k has to be equal or less than " + "parameter k of the MkMax-Tree!");
    }
    // get the candidates
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
    doReverseKNNQuery(id, getRoot(), null, candidates);
    if (k == this.getKmax()) {
        candidates.sort();
        // rkNNStatistics.addResults(candidates.size());
        return candidates;
    }
    // refinement of candidates
    ModifiableDBIDs candidateIDs = DBIDUtil.newArray(candidates.size());
    for (DBIDIter candidate = candidates.iter(); candidate.valid(); candidate.advance()) {
        candidateIDs.add(candidate);
    }
    Map<DBID, KNNList> knnLists = batchNN(getRoot(), candidateIDs, k);
    ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
    for (DBIDIter iter = candidateIDs.iter(); iter.valid(); iter.advance()) {
        DBID cid = DBIDUtil.deref(iter);
        KNNList cands = knnLists.get(cid);
        for (DoubleDBIDListIter iter2 = cands.iter(); iter2.valid(); iter2.advance()) {
            if (DBIDUtil.equal(id, iter2)) {
                result.add(iter2.doubleValue(), cid);
                break;
            }
        }
    }
    // FIXME: re-add statistics.
    // rkNNStatistics.addResults(result.size());
    // rkNNStatistics.addCandidates(candidates.size());
    result.sort();
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 39 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class MkMaxTreeIndex method insertAll.

@Override
public void insertAll(DBIDs ids) {
    List<MkMaxEntry> objs = new ArrayList<>(ids.size());
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        DBID id = DBIDUtil.deref(iter);
        final O object = relation.get(id);
        objs.add(createNewLeafEntry(id, object, Double.NaN));
    }
    insertAll(objs);
}
Also used : DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 40 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class RandomSplit method split.

/**
 * Selects two objects of the specified node to be promoted and stored into
 * the parent node. The m-RAD strategy considers all possible pairs of objects
 * and, after partitioning the set of entries, promotes the pair of objects
 * for which the sum of covering radiuses is minimum.
 *
 * @param tree Tree to use
 * @param node the node to be split
 */
@Override
public Assignments<E> split(AbstractMTree<O, N, E, ?> tree, N node) {
    final int n = node.getNumEntries();
    int pos1 = random.nextInt(n);
    int pos2 = random.nextInt(n - 1);
    if (pos2 >= pos1) {
        ++pos2;
    }
    DBID id1 = node.getEntry(pos1).getRoutingObjectID();
    DBID id2 = node.getEntry(pos2).getRoutingObjectID();
    return balancedPartition(tree, node, id1, id2);
}
Also used : DBID(de.lmu.ifi.dbs.elki.database.ids.DBID)

Aggregations

DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)42 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)20 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)11 ArrayList (java.util.ArrayList)10 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)9 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)9 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)6 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)5 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)5 Database (de.lmu.ifi.dbs.elki.database.Database)4 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)4 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)4 HashMap (java.util.HashMap)4 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)3 MTreeEntry (de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry)3 Test (org.junit.Test)3 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)2 SLINK (de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.SLINK)2 CutDendrogramByHeight (de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.CutDendrogramByHeight)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2