use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class InMemoryInvertedIndex method naiveQuerySparse.
/**
* Query the most similar objects, sparse version.
*
* @param obj Query object
* @param scores Score storage
* @param cands Non-zero objects set
* @return Result
*/
private double naiveQuerySparse(SparseNumberVector obj, WritableDoubleDataStore scores, HashSetModifiableDBIDs cands) {
// Length of query object, for final normalization
double len = 0.;
for (int iter = obj.iter(); obj.iterValid(iter); iter = obj.iterAdvance(iter)) {
final int dim = obj.iterDim(iter);
final double val = obj.iterDoubleValue(iter);
if (val == 0. || val != val) {
continue;
}
len += val * val;
// No matching documents in index:
if (dim >= index.size()) {
continue;
}
ModifiableDoubleDBIDList column = index.get(dim);
for (DoubleDBIDListIter n = column.iter(); n.valid(); n.advance()) {
scores.increment(n, n.doubleValue() * val);
cands.add(n);
}
}
return FastMath.sqrt(len);
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class InMemoryInvertedIndex method naiveQueryDense.
/**
* Query the most similar objects, dense version.
*
* @param obj Query object
* @param scores Score storage
* @param cands Non-zero objects set
* @return Result
*/
private double naiveQueryDense(NumberVector obj, WritableDoubleDataStore scores, HashSetModifiableDBIDs cands) {
// Length of query object, for final normalization
double len = 0.;
for (int dim = 0, max = obj.getDimensionality(); dim < max; dim++) {
final double val = obj.doubleValue(dim);
if (val == 0. || val != val) {
continue;
}
len += val * val;
// No matching documents in index:
if (dim >= index.size()) {
continue;
}
ModifiableDoubleDBIDList column = index.get(dim);
for (DoubleDBIDListIter n = column.iter(); n.valid(); n.advance()) {
scores.increment(n, n.doubleValue() * val);
cands.add(n);
}
}
return FastMath.sqrt(len);
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class InMemoryInvertedIndex method logStatistics.
@Override
public void logStatistics() {
long count = 0L;
for (ModifiableDoubleDBIDList column : index) {
count += column.size();
}
double sparsity = count / (index.size() * (double) relation.size());
LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".sparsity", sparsity));
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class CoverTree method bulkLoad.
/**
* Bulk-load the index.
*
* @param ids IDs to load
*/
public void bulkLoad(DBIDs ids) {
if (ids.size() == 0) {
return;
}
assert (root == null) : "Tree already initialized.";
DBIDIter it = ids.iter();
DBID first = DBIDUtil.deref(it);
// Compute distances to all neighbors:
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(ids.size() - 1);
for (it.advance(); it.valid(); it.advance()) {
candidates.add(distance(first, it), it);
}
root = bulkConstruct(first, Integer.MAX_VALUE, 0., candidates);
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class CoverTree method bulkConstruct.
/**
* Bulk-load the cover tree.
*
* This bulk-load is slightly simpler than the one used in the original
* cover-tree source: We do not look back into the "far" set of candidates.
*
* @param cur Current routing object
* @param maxScale Maximum scale
* @param elems Candidates
* @return Root node of subtree
*/
protected Node bulkConstruct(DBIDRef cur, int maxScale, double parentDist, ModifiableDoubleDBIDList elems) {
assert (!elems.contains(cur));
final double max = maxDistance(elems);
final int scale = Math.min(distToScale(max) - 1, maxScale);
final int nextScale = scale - 1;
// elements remaining:
if (max <= 0 || scale <= scaleBottom || elems.size() < truncate) {
return new Node(cur, max, parentDist, elems);
}
// Find neighbors in the cover of the current object:
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
excludeNotCovered(elems, scaleToDist(scale), candidates);
// If no elements were not in the cover, build a compact tree:
if (candidates.size() == 0) {
LOG.warning("Scale not chosen appropriately? " + max + " " + scaleToDist(scale));
return bulkConstruct(cur, nextScale, parentDist, elems);
}
// We will have at least one other child, so build the parent:
Node node = new Node(cur, max, parentDist);
// Routing element now is a singleton:
final boolean curSingleton = elems.size() == 0;
if (!curSingleton) {
// Add node for the routing object:
node.children.add(bulkConstruct(cur, nextScale, 0, elems));
}
final double fmax = scaleToDist(nextScale);
// Build additional cover nodes:
for (DoubleDBIDListIter it = candidates.iter(); it.valid(); ) {
assert (it.getOffset() == 0);
DBID t = DBIDUtil.deref(it);
// Recycle.
elems.clear();
collectByCover(it, candidates, fmax, elems);
assert (DBIDUtil.equal(t, it)) : "First element in candidates must not change!";
if (elems.size() == 0) {
// Singleton
node.singletons.add(it.doubleValue(), it);
} else {
// Build a full child node:
node.children.add(bulkConstruct(it, nextScale, it.doubleValue(), elems));
}
candidates.removeSwap(0);
}
assert (candidates.size() == 0);
// Routing object is not yet handled:
if (curSingleton) {
if (node.isLeaf()) {
// First in leaf is enough.
node.children = null;
} else {
// Add as regular singleton.
node.singletons.add(parentDist, cur);
}
}
// TODO: improve recycling of lists?
return node;
}
Aggregations