Search in sources :

Example 36 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class InMemoryInvertedIndex method naiveQueryDense.

/**
 * Query the most similar objects, dense version.
 *
 * @param obj Query object
 * @param scores Score storage
 * @param cands Non-zero objects set
 * @return Result
 */
private double naiveQueryDense(NumberVector obj, WritableDoubleDataStore scores, HashSetModifiableDBIDs cands) {
    // Length of query object, for final normalization
    double len = 0.;
    for (int dim = 0, max = obj.getDimensionality(); dim < max; dim++) {
        final double val = obj.doubleValue(dim);
        if (val == 0. || val != val) {
            continue;
        }
        len += val * val;
        // No matching documents in index:
        if (dim >= index.size()) {
            continue;
        }
        ModifiableDoubleDBIDList column = index.get(dim);
        for (DoubleDBIDListIter n = column.iter(); n.valid(); n.advance()) {
            scores.increment(n, n.doubleValue() * val);
            cands.add(n);
        }
    }
    return FastMath.sqrt(len);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)

Example 37 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class CoverTree method bulkConstruct.

/**
 * Bulk-load the cover tree.
 *
 * This bulk-load is slightly simpler than the one used in the original
 * cover-tree source: We do not look back into the "far" set of candidates.
 *
 * @param cur Current routing object
 * @param maxScale Maximum scale
 * @param elems Candidates
 * @return Root node of subtree
 */
protected Node bulkConstruct(DBIDRef cur, int maxScale, double parentDist, ModifiableDoubleDBIDList elems) {
    assert (!elems.contains(cur));
    final double max = maxDistance(elems);
    final int scale = Math.min(distToScale(max) - 1, maxScale);
    final int nextScale = scale - 1;
    // elements remaining:
    if (max <= 0 || scale <= scaleBottom || elems.size() < truncate) {
        return new Node(cur, max, parentDist, elems);
    }
    // Find neighbors in the cover of the current object:
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
    excludeNotCovered(elems, scaleToDist(scale), candidates);
    // If no elements were not in the cover, build a compact tree:
    if (candidates.size() == 0) {
        LOG.warning("Scale not chosen appropriately? " + max + " " + scaleToDist(scale));
        return bulkConstruct(cur, nextScale, parentDist, elems);
    }
    // We will have at least one other child, so build the parent:
    Node node = new Node(cur, max, parentDist);
    // Routing element now is a singleton:
    final boolean curSingleton = elems.size() == 0;
    if (!curSingleton) {
        // Add node for the routing object:
        node.children.add(bulkConstruct(cur, nextScale, 0, elems));
    }
    final double fmax = scaleToDist(nextScale);
    // Build additional cover nodes:
    for (DoubleDBIDListIter it = candidates.iter(); it.valid(); ) {
        assert (it.getOffset() == 0);
        DBID t = DBIDUtil.deref(it);
        // Recycle.
        elems.clear();
        collectByCover(it, candidates, fmax, elems);
        assert (DBIDUtil.equal(t, it)) : "First element in candidates must not change!";
        if (elems.size() == 0) {
            // Singleton
            node.singletons.add(it.doubleValue(), it);
        } else {
            // Build a full child node:
            node.children.add(bulkConstruct(it, nextScale, it.doubleValue(), elems));
        }
        candidates.removeSwap(0);
    }
    assert (candidates.size() == 0);
    // Routing object is not yet handled:
    if (curSingleton) {
        if (node.isLeaf()) {
            // First in leaf is enough.
            node.children = null;
        } else {
            // Add as regular singleton.
            node.singletons.add(parentDist, cur);
        }
    }
    // TODO: improve recycling of lists?
    return node;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID)

Example 38 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class KMeansMinusMinus method meansWithTreshhold.

/**
 * Returns the mean vectors of the given clusters in the given database.
 *
 * @param clusters the clusters to compute the means
 * @param means the recent means
 * @param database the database containing the vectors
 * @return the mean vectors of the given clusters in the given database
 */
protected double[][] meansWithTreshhold(List<? extends ModifiableDoubleDBIDList> clusters, double[][] means, Relation<V> database, Double tresh) {
    // TODO: use Kahan summation for better numerical precision?
    double[][] newMeans = new double[k][];
    for (int i = 0; i < k; i++) {
        DoubleDBIDList list = clusters.get(i);
        double[] raw = null;
        int count = 0;
        // Update with remaining instances
        for (DoubleDBIDListIter iter = list.iter(); iter.valid(); iter.advance()) {
            if (iter.doubleValue() >= tresh) {
                continue;
            }
            NumberVector vec = database.get(iter);
            if (raw == null) {
                // Initialize:
                raw = vec.toArray();
            }
            for (int j = 0; j < raw.length; j++) {
                raw[j] += vec.doubleValue(j);
            }
            count++;
        }
        newMeans[i] = (raw != null) ? VMath.timesEquals(raw, 1.0 / count) : means[i];
    }
    return newMeans;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)

Example 39 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class ALIDEstimator method estimate.

@Override
public double estimate(KNNQuery<?> knnq, DBIDRef cur, int k) {
    int a = 0;
    double sum = 0;
    final KNNList kl = knnq.getKNNForDBID(cur, k);
    final double w = kl.getKNNDistance();
    final double halfw = 0.5 * w;
    for (DoubleDBIDListIter it = kl.iter(); it.valid(); it.advance()) {
        if (it.doubleValue() <= 0. || DBIDUtil.equal(cur, it)) {
            continue;
        }
        final double v = it.doubleValue();
        sum += v < halfw ? FastMath.log(v / w) : FastMath.log1p((v - w) / w);
        ++a;
        final double nw = w - v;
        final double halfnw = 0.5 * nw;
        for (DoubleDBIDListIter it2 = knnq.getKNNForDBID(it, k).iter(); it2.valid() && it2.doubleValue() <= nw; it2.advance()) {
            if (it2.doubleValue() <= 0. || DBIDUtil.equal(it, it2)) {
                continue;
            }
            final double v2 = it2.doubleValue();
            sum += v2 < halfnw ? FastMath.log(v2 / nw) : FastMath.log1p((v2 - nw) / nw);
            ++a;
        }
    }
    return -a / sum;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 40 with DoubleDBIDListIter

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.

the class ALIDEstimator method estimate.

@Override
public double estimate(RangeQuery<?> rnq, DBIDRef cur, double range) {
    int a = 0;
    double sum = 0;
    final double halfw = 0.5 * range;
    for (DoubleDBIDListIter it = rnq.getRangeForDBID(cur, range).iter(); it.valid(); it.advance()) {
        if (it.doubleValue() == 0. || DBIDUtil.equal(cur, it)) {
            continue;
        }
        final double v = it.doubleValue();
        sum += v < halfw ? FastMath.log(v / range) : FastMath.log1p((v - range) / range);
        ++a;
        final double nw = range - v;
        final double halfnw = 0.5 * nw;
        for (DoubleDBIDListIter it2 = rnq.getRangeForDBID(it, nw).iter(); it.valid(); it.advance()) {
            if (it2.doubleValue() <= 0. || DBIDUtil.equal(it, it2)) {
                continue;
            }
            final double v2 = it2.doubleValue();
            sum += v2 < halfnw ? FastMath.log(v2 / nw) : FastMath.log1p((v2 - nw) / nw);
            ++a;
        }
    }
    return -a / sum;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)

Aggregations

DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)69 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)38 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)20 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)19 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)12 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)11 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)11 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)11 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)11 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)11 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)10 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)9 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)6 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)5 DoubleDBIDPair (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)5 ArrayList (java.util.ArrayList)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4