Search in sources :

Example 6 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class InMemoryInvertedIndex method naiveQuerySparse.

/**
 * Query the most similar objects, sparse version.
 *
 * @param obj Query object
 * @param scores Score storage
 * @param cands Non-zero objects set
 * @return Result
 */
private double naiveQuerySparse(SparseNumberVector obj, WritableDoubleDataStore scores, HashSetModifiableDBIDs cands) {
    // Length of query object, for final normalization
    double len = 0.;
    for (int iter = obj.iter(); obj.iterValid(iter); iter = obj.iterAdvance(iter)) {
        final int dim = obj.iterDim(iter);
        final double val = obj.iterDoubleValue(iter);
        if (val == 0. || val != val) {
            continue;
        }
        len += val * val;
        // No matching documents in index:
        if (dim >= index.size()) {
            continue;
        }
        ModifiableDoubleDBIDList column = index.get(dim);
        for (DoubleDBIDListIter n = column.iter(); n.valid(); n.advance()) {
            scores.increment(n, n.doubleValue() * val);
            cands.add(n);
        }
    }
    return FastMath.sqrt(len);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)

Example 7 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class InMemoryInvertedIndex method naiveQueryDense.

/**
 * Query the most similar objects, dense version.
 *
 * @param obj Query object
 * @param scores Score storage
 * @param cands Non-zero objects set
 * @return Result
 */
private double naiveQueryDense(NumberVector obj, WritableDoubleDataStore scores, HashSetModifiableDBIDs cands) {
    // Length of query object, for final normalization
    double len = 0.;
    for (int dim = 0, max = obj.getDimensionality(); dim < max; dim++) {
        final double val = obj.doubleValue(dim);
        if (val == 0. || val != val) {
            continue;
        }
        len += val * val;
        // No matching documents in index:
        if (dim >= index.size()) {
            continue;
        }
        ModifiableDoubleDBIDList column = index.get(dim);
        for (DoubleDBIDListIter n = column.iter(); n.valid(); n.advance()) {
            scores.increment(n, n.doubleValue() * val);
            cands.add(n);
        }
    }
    return FastMath.sqrt(len);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)

Example 8 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class InMemoryInvertedIndex method logStatistics.

@Override
public void logStatistics() {
    long count = 0L;
    for (ModifiableDoubleDBIDList column : index) {
        count += column.size();
    }
    double sparsity = count / (index.size() * (double) relation.size());
    LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".sparsity", sparsity));
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)

Example 9 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class CoverTree method bulkLoad.

/**
 * Bulk-load the index.
 *
 * @param ids IDs to load
 */
public void bulkLoad(DBIDs ids) {
    if (ids.size() == 0) {
        return;
    }
    assert (root == null) : "Tree already initialized.";
    DBIDIter it = ids.iter();
    DBID first = DBIDUtil.deref(it);
    // Compute distances to all neighbors:
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(ids.size() - 1);
    for (it.advance(); it.valid(); it.advance()) {
        candidates.add(distance(first, it), it);
    }
    root = bulkConstruct(first, Integer.MAX_VALUE, 0., candidates);
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 10 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class CoverTree method bulkConstruct.

/**
 * Bulk-load the cover tree.
 *
 * This bulk-load is slightly simpler than the one used in the original
 * cover-tree source: We do not look back into the "far" set of candidates.
 *
 * @param cur Current routing object
 * @param maxScale Maximum scale
 * @param elems Candidates
 * @return Root node of subtree
 */
protected Node bulkConstruct(DBIDRef cur, int maxScale, double parentDist, ModifiableDoubleDBIDList elems) {
    assert (!elems.contains(cur));
    final double max = maxDistance(elems);
    final int scale = Math.min(distToScale(max) - 1, maxScale);
    final int nextScale = scale - 1;
    // elements remaining:
    if (max <= 0 || scale <= scaleBottom || elems.size() < truncate) {
        return new Node(cur, max, parentDist, elems);
    }
    // Find neighbors in the cover of the current object:
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
    excludeNotCovered(elems, scaleToDist(scale), candidates);
    // If no elements were not in the cover, build a compact tree:
    if (candidates.size() == 0) {
        LOG.warning("Scale not chosen appropriately? " + max + " " + scaleToDist(scale));
        return bulkConstruct(cur, nextScale, parentDist, elems);
    }
    // We will have at least one other child, so build the parent:
    Node node = new Node(cur, max, parentDist);
    // Routing element now is a singleton:
    final boolean curSingleton = elems.size() == 0;
    if (!curSingleton) {
        // Add node for the routing object:
        node.children.add(bulkConstruct(cur, nextScale, 0, elems));
    }
    final double fmax = scaleToDist(nextScale);
    // Build additional cover nodes:
    for (DoubleDBIDListIter it = candidates.iter(); it.valid(); ) {
        assert (it.getOffset() == 0);
        DBID t = DBIDUtil.deref(it);
        // Recycle.
        elems.clear();
        collectByCover(it, candidates, fmax, elems);
        assert (DBIDUtil.equal(t, it)) : "First element in candidates must not change!";
        if (elems.size() == 0) {
            // Singleton
            node.singletons.add(it.doubleValue(), it);
        } else {
            // Build a full child node:
            node.children.add(bulkConstruct(it, nextScale, it.doubleValue(), elems));
        }
        candidates.removeSwap(0);
    }
    assert (candidates.size() == 0);
    // Routing object is not yet handled:
    if (curSingleton) {
        if (node.isLeaf()) {
            // First in leaf is enough.
            node.children = null;
        } else {
            // Add as regular singleton.
            node.singletons.add(parentDist, cur);
        }
    }
    // TODO: improve recycling of lists?
    return node;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID)

Aggregations

ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)53 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)16 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)9 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)5 ArrayList (java.util.ArrayList)5 Test (org.junit.Test)5 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DoubleDBIDPair (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)3 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)3 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 DoubleDBIDListMIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListMIter)2