use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class InMemoryInvertedIndex method naiveQueryDense.
/**
* Query the most similar objects, dense version.
*
* @param obj Query object
* @param scores Score storage
* @param cands Non-zero objects set
* @return Result
*/
private double naiveQueryDense(NumberVector obj, WritableDoubleDataStore scores, HashSetModifiableDBIDs cands) {
// Length of query object, for final normalization
double len = 0.;
for (int dim = 0, max = obj.getDimensionality(); dim < max; dim++) {
final double val = obj.doubleValue(dim);
if (val == 0. || val != val) {
continue;
}
len += val * val;
// No matching documents in index:
if (dim >= index.size()) {
continue;
}
ModifiableDoubleDBIDList column = index.get(dim);
for (DoubleDBIDListIter n = column.iter(); n.valid(); n.advance()) {
scores.increment(n, n.doubleValue() * val);
cands.add(n);
}
}
return FastMath.sqrt(len);
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class CoverTree method bulkConstruct.
/**
* Bulk-load the cover tree.
*
* This bulk-load is slightly simpler than the one used in the original
* cover-tree source: We do not look back into the "far" set of candidates.
*
* @param cur Current routing object
* @param maxScale Maximum scale
* @param elems Candidates
* @return Root node of subtree
*/
protected Node bulkConstruct(DBIDRef cur, int maxScale, double parentDist, ModifiableDoubleDBIDList elems) {
assert (!elems.contains(cur));
final double max = maxDistance(elems);
final int scale = Math.min(distToScale(max) - 1, maxScale);
final int nextScale = scale - 1;
// elements remaining:
if (max <= 0 || scale <= scaleBottom || elems.size() < truncate) {
return new Node(cur, max, parentDist, elems);
}
// Find neighbors in the cover of the current object:
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
excludeNotCovered(elems, scaleToDist(scale), candidates);
// If no elements were not in the cover, build a compact tree:
if (candidates.size() == 0) {
LOG.warning("Scale not chosen appropriately? " + max + " " + scaleToDist(scale));
return bulkConstruct(cur, nextScale, parentDist, elems);
}
// We will have at least one other child, so build the parent:
Node node = new Node(cur, max, parentDist);
// Routing element now is a singleton:
final boolean curSingleton = elems.size() == 0;
if (!curSingleton) {
// Add node for the routing object:
node.children.add(bulkConstruct(cur, nextScale, 0, elems));
}
final double fmax = scaleToDist(nextScale);
// Build additional cover nodes:
for (DoubleDBIDListIter it = candidates.iter(); it.valid(); ) {
assert (it.getOffset() == 0);
DBID t = DBIDUtil.deref(it);
// Recycle.
elems.clear();
collectByCover(it, candidates, fmax, elems);
assert (DBIDUtil.equal(t, it)) : "First element in candidates must not change!";
if (elems.size() == 0) {
// Singleton
node.singletons.add(it.doubleValue(), it);
} else {
// Build a full child node:
node.children.add(bulkConstruct(it, nextScale, it.doubleValue(), elems));
}
candidates.removeSwap(0);
}
assert (candidates.size() == 0);
// Routing object is not yet handled:
if (curSingleton) {
if (node.isLeaf()) {
// First in leaf is enough.
node.children = null;
} else {
// Add as regular singleton.
node.singletons.add(parentDist, cur);
}
}
// TODO: improve recycling of lists?
return node;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class KMeansMinusMinus method meansWithTreshhold.
/**
* Returns the mean vectors of the given clusters in the given database.
*
* @param clusters the clusters to compute the means
* @param means the recent means
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
protected double[][] meansWithTreshhold(List<? extends ModifiableDoubleDBIDList> clusters, double[][] means, Relation<V> database, Double tresh) {
// TODO: use Kahan summation for better numerical precision?
double[][] newMeans = new double[k][];
for (int i = 0; i < k; i++) {
DoubleDBIDList list = clusters.get(i);
double[] raw = null;
int count = 0;
// Update with remaining instances
for (DoubleDBIDListIter iter = list.iter(); iter.valid(); iter.advance()) {
if (iter.doubleValue() >= tresh) {
continue;
}
NumberVector vec = database.get(iter);
if (raw == null) {
// Initialize:
raw = vec.toArray();
}
for (int j = 0; j < raw.length; j++) {
raw[j] += vec.doubleValue(j);
}
count++;
}
newMeans[i] = (raw != null) ? VMath.timesEquals(raw, 1.0 / count) : means[i];
}
return newMeans;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class ALIDEstimator method estimate.
@Override
public double estimate(KNNQuery<?> knnq, DBIDRef cur, int k) {
int a = 0;
double sum = 0;
final KNNList kl = knnq.getKNNForDBID(cur, k);
final double w = kl.getKNNDistance();
final double halfw = 0.5 * w;
for (DoubleDBIDListIter it = kl.iter(); it.valid(); it.advance()) {
if (it.doubleValue() <= 0. || DBIDUtil.equal(cur, it)) {
continue;
}
final double v = it.doubleValue();
sum += v < halfw ? FastMath.log(v / w) : FastMath.log1p((v - w) / w);
++a;
final double nw = w - v;
final double halfnw = 0.5 * nw;
for (DoubleDBIDListIter it2 = knnq.getKNNForDBID(it, k).iter(); it2.valid() && it2.doubleValue() <= nw; it2.advance()) {
if (it2.doubleValue() <= 0. || DBIDUtil.equal(it, it2)) {
continue;
}
final double v2 = it2.doubleValue();
sum += v2 < halfnw ? FastMath.log(v2 / nw) : FastMath.log1p((v2 - nw) / nw);
++a;
}
}
return -a / sum;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class ALIDEstimator method estimate.
@Override
public double estimate(RangeQuery<?> rnq, DBIDRef cur, double range) {
int a = 0;
double sum = 0;
final double halfw = 0.5 * range;
for (DoubleDBIDListIter it = rnq.getRangeForDBID(cur, range).iter(); it.valid(); it.advance()) {
if (it.doubleValue() == 0. || DBIDUtil.equal(cur, it)) {
continue;
}
final double v = it.doubleValue();
sum += v < halfw ? FastMath.log(v / range) : FastMath.log1p((v - range) / range);
++a;
final double nw = range - v;
final double halfnw = 0.5 * nw;
for (DoubleDBIDListIter it2 = rnq.getRangeForDBID(it, nw).iter(); it.valid(); it.advance()) {
if (it2.doubleValue() <= 0. || DBIDUtil.equal(it, it2)) {
continue;
}
final double v2 = it2.doubleValue();
sum += v2 < halfnw ? FastMath.log(v2 / nw) : FastMath.log1p((v2 - nw) / nw);
++a;
}
}
return -a / sum;
}
Aggregations