Search in sources :

Example 16 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class FileBasedSparseDoubleDistanceFunctionTest method testExternalDistance.

@Test
public void testExternalDistance() throws IOException {
    Database db = // 
    new ELKIBuilder<>(StaticArrayDatabase.class).with(AbstractDatabase.Parameterizer.DATABASE_CONNECTION_ID, // 
    DBIDRangeDatabaseConnection.class).with(DBIDRangeDatabaseConnection.Parameterizer.COUNT_ID, // 
    4).build();
    db.initialize();
    FileBasedSparseDoubleDistanceFunction df = new // 
    FileBasedSparseDoubleDistanceFunction(new AsciiDistanceParser(CSVReaderFormat.DEFAULT_FORMAT), null, Double.POSITIVE_INFINITY);
    // We need to read from a resource, instead of a file.
    df.loadCache(4, FileUtil.openSystemFile(FILENAME));
    SLINK<DBID> slink = new SLINK<>(df);
    CutDendrogramByHeight clus = new CutDendrogramByHeight(slink, 0.5, false);
    Clustering<DendrogramModel> c = clus.run(db);
    testClusterSizes(c, new int[] { 2, 2 });
}
Also used : DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) Database(de.lmu.ifi.dbs.elki.database.Database) AbstractDatabase(de.lmu.ifi.dbs.elki.database.AbstractDatabase) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) SLINK(de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.SLINK) CutDendrogramByHeight(de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.CutDendrogramByHeight) DendrogramModel(de.lmu.ifi.dbs.elki.data.model.DendrogramModel) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) Test(org.junit.Test) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)

Example 17 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class CoverTree method bulkLoad.

/**
 * Bulk-load the index.
 *
 * @param ids IDs to load
 */
public void bulkLoad(DBIDs ids) {
    if (ids.size() == 0) {
        return;
    }
    assert (root == null) : "Tree already initialized.";
    DBIDIter it = ids.iter();
    DBID first = DBIDUtil.deref(it);
    // Compute distances to all neighbors:
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(ids.size() - 1);
    for (it.advance(); it.valid(); it.advance()) {
        candidates.add(distance(first, it), it);
    }
    root = bulkConstruct(first, Integer.MAX_VALUE, 0., candidates);
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 18 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class CoverTree method bulkConstruct.

/**
 * Bulk-load the cover tree.
 *
 * This bulk-load is slightly simpler than the one used in the original
 * cover-tree source: We do not look back into the "far" set of candidates.
 *
 * @param cur Current routing object
 * @param maxScale Maximum scale
 * @param elems Candidates
 * @return Root node of subtree
 */
protected Node bulkConstruct(DBIDRef cur, int maxScale, double parentDist, ModifiableDoubleDBIDList elems) {
    assert (!elems.contains(cur));
    final double max = maxDistance(elems);
    final int scale = Math.min(distToScale(max) - 1, maxScale);
    final int nextScale = scale - 1;
    // elements remaining:
    if (max <= 0 || scale <= scaleBottom || elems.size() < truncate) {
        return new Node(cur, max, parentDist, elems);
    }
    // Find neighbors in the cover of the current object:
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
    excludeNotCovered(elems, scaleToDist(scale), candidates);
    // If no elements were not in the cover, build a compact tree:
    if (candidates.size() == 0) {
        LOG.warning("Scale not chosen appropriately? " + max + " " + scaleToDist(scale));
        return bulkConstruct(cur, nextScale, parentDist, elems);
    }
    // We will have at least one other child, so build the parent:
    Node node = new Node(cur, max, parentDist);
    // Routing element now is a singleton:
    final boolean curSingleton = elems.size() == 0;
    if (!curSingleton) {
        // Add node for the routing object:
        node.children.add(bulkConstruct(cur, nextScale, 0, elems));
    }
    final double fmax = scaleToDist(nextScale);
    // Build additional cover nodes:
    for (DoubleDBIDListIter it = candidates.iter(); it.valid(); ) {
        assert (it.getOffset() == 0);
        DBID t = DBIDUtil.deref(it);
        // Recycle.
        elems.clear();
        collectByCover(it, candidates, fmax, elems);
        assert (DBIDUtil.equal(t, it)) : "First element in candidates must not change!";
        if (elems.size() == 0) {
            // Singleton
            node.singletons.add(it.doubleValue(), it);
        } else {
            // Build a full child node:
            node.children.add(bulkConstruct(it, nextScale, it.doubleValue(), elems));
        }
        candidates.removeSwap(0);
    }
    assert (candidates.size() == 0);
    // Routing object is not yet handled:
    if (curSingleton) {
        if (node.isLeaf()) {
            // First in leaf is enough.
            node.children = null;
        } else {
            // Add as regular singleton.
            node.singletons.add(parentDist, cur);
        }
    }
    // TODO: improve recycling of lists?
    return node;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID)

Example 19 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class PartialVAFile method initialize.

@Override
public void initialize() throws IllegalStateException {
    if (splitPartitions != null) {
        throw new IllegalStateException("Data already inserted.");
    }
    if (MathUtil.log2(partitions) != (int) MathUtil.log2(partitions)) {
        throw new IllegalArgumentException("Number of partitions must be a power of 2!");
    }
    final int dimensions = RelationUtil.dimensionality(relation);
    splitPartitions = new double[dimensions][];
    daFiles = new ArrayList<>(dimensions);
    for (int d = 0; d < dimensions; d++) {
        final DAFile f = new DAFile(relation, d, partitions);
        splitPartitions[d] = f.getSplitPositions();
        daFiles.add(f);
    }
    vectorApprox = new ArrayList<>();
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        DBID id = DBIDUtil.deref(iter);
        V dv = relation.get(id);
        VectorApproximation va = calculateFullApproximation(id, dv);
        vectorApprox.add(va);
    }
}
Also used : DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 20 with DBID

use of de.lmu.ifi.dbs.elki.database.ids.DBID in project elki by elki-project.

the class IndexPurity method processNewResult.

@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
    Database database = ResultUtil.findDatabase(hier);
    final ArrayList<SpatialIndexTree<?, ?>> indexes = ResultUtil.filterResults(hier, newResult, SpatialIndexTree.class);
    if (indexes == null || indexes.isEmpty()) {
        return;
    }
    Relation<String> lblrel = DatabaseUtil.guessLabelRepresentation(database);
    for (SpatialIndexTree<?, ?> index : indexes) {
        List<? extends SpatialEntry> leaves = index.getLeaves();
        MeanVariance mv = new MeanVariance();
        for (SpatialEntry e : leaves) {
            SpatialDirectoryEntry leaf = (SpatialDirectoryEntry) e;
            Node<?> n = index.getNode(leaf.getPageID());
            final int total = n.getNumEntries();
            HashMap<String, Integer> map = new HashMap<>(total);
            for (int i = 0; i < total; i++) {
                DBID id = ((SpatialPointLeafEntry) n.getEntry(i)).getDBID();
                String label = lblrel.get(id);
                Integer val = map.get(label);
                if (val == null) {
                    val = 1;
                } else {
                    val += 1;
                }
                map.put(label, val);
            }
            double gini = 0.0;
            for (Entry<String, Integer> ent : map.entrySet()) {
                double rel = ent.getValue() / (double) total;
                gini += rel * rel;
            }
            mv.put(gini);
        }
        Collection<double[]> col = new ArrayList<>();
        col.add(new double[] { mv.getMean(), mv.getSampleStddev() });
        database.getHierarchy().add((Result) index, new CollectionResult<>("Gini coefficient of index", "index-gini", col));
    }
}
Also used : SpatialPointLeafEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry) HashMap(java.util.HashMap) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayList(java.util.ArrayList) SpatialIndexTree(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree) SpatialEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) SpatialDirectoryEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry) Database(de.lmu.ifi.dbs.elki.database.Database)

Aggregations

DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)42 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)20 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)11 ArrayList (java.util.ArrayList)10 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)9 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)9 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)6 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)5 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)5 Database (de.lmu.ifi.dbs.elki.database.Database)4 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)4 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)4 HashMap (java.util.HashMap)4 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)3 MTreeEntry (de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry)3 Test (org.junit.Test)3 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)2 SLINK (de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.SLINK)2 CutDendrogramByHeight (de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.CutDendrogramByHeight)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2