Search in sources :

Example 6 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class EvaluateDaviesBouldin method withinGroupDistances.

public double[] withinGroupDistances(Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, NumberVector[] centroids) {
    double[] withinGroupDists = new double[clusters.size()];
    Iterator<? extends Cluster<?>> ci = clusters.iterator();
    for (int i = 0; ci.hasNext(); i++) {
        Cluster<?> cluster = ci.next();
        NumberVector centroid = centroids[i];
        if (centroid == null) {
            // Empty, noise or singleton cluster:
            withinGroupDists[i] = 0.;
            continue;
        }
        double wD = 0.;
        for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
            wD += distanceFunction.distance(centroid, rel.get(it));
        }
        withinGroupDists[i] = wD / cluster.size();
    }
    return withinGroupDists;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 7 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class APRIORI method buildFrequentOneItemsets.

/**
 * Build the 1-itemsets.
 *
 * @param relation Data relation
 * @param dim Maximum dimensionality
 * @param needed Minimum support needed
 * @return 1-itemsets
 */
protected List<OneItemset> buildFrequentOneItemsets(final Relation<? extends SparseFeatureVector<?>> relation, final int dim, final int needed) {
    // TODO: use TIntList and prefill appropriately to avoid knowing "dim"
    // beforehand?
    int[] counts = new int[dim];
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        SparseFeatureVector<?> bv = relation.get(iditer);
        for (int it = bv.iter(); bv.iterValid(it); it = bv.iterAdvance(it)) {
            counts[bv.iterDim(it)]++;
        }
    }
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(STAT + "1-items.candidates", dim));
    }
    // Generate initial candidates of length 1.
    List<OneItemset> frequent = new ArrayList<>(dim);
    for (int i = 0; i < dim; i++) {
        if (counts[i] >= needed) {
            frequent.add(new OneItemset(i, counts[i]));
        }
    }
    return frequent;
}
Also used : LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 8 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class APRIORI method frequentItemsetsSparse.

/**
 * Returns the frequent BitSets out of the given BitSets with respect to the
 * given database. Optimized implementation for SparseItemset.
 *
 * @param candidates the candidates to be evaluated
 * @param relation the database to evaluate the candidates on
 * @param needed Minimum support needed
 * @param ids Objects to process
 * @param survivors Output: objects that had at least two 1-frequent items.
 * @param length Itemset length
 * @return Itemsets with sufficient support
 */
protected List<SparseItemset> frequentItemsetsSparse(List<SparseItemset> candidates, Relation<BitVector> relation, int needed, DBIDs ids, ArrayModifiableDBIDs survivors, int length) {
    // Current search interval:
    int begin = 0, end = candidates.size();
    int[] scratchi = new int[length], iters = new int[length];
    SparseItemset scratch = new SparseItemset(scratchi);
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        BitVector bv = relation.get(iditer);
        if (!initializeSearchItemset(bv, scratchi, iters)) {
            continue;
        }
        int lives = 0;
        while (begin < end) {
            begin = binarySearch(candidates, scratch, begin, end);
            if (begin > 0) {
                candidates.get(begin).increaseSupport();
                ++lives;
            } else {
                begin = (-begin) - 1;
            }
            if (begin >= end || !nextSearchItemset(bv, scratchi, iters)) {
                break;
            }
        }
        for (Itemset candidate : candidates) {
            if (candidate.containedIn(bv)) {
                candidate.increaseSupport();
                ++lives;
            }
        }
        if (lives > length) {
            survivors.add(iditer);
        }
    }
    // Retain only those with minimum support:
    List<SparseItemset> frequent = new ArrayList<>(candidates.size());
    for (Iterator<SparseItemset> iter = candidates.iterator(); iter.hasNext(); ) {
        final SparseItemset candidate = iter.next();
        if (candidate.getSupport() >= needed) {
            frequent.add(candidate);
        }
    }
    return frequent;
}
Also used : BitVector(de.lmu.ifi.dbs.elki.data.BitVector) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 9 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class Eclat method mergeJoin.

private DBIDs mergeJoin(DBIDs first, DBIDs second) {
    assert (!(first instanceof HashSetDBIDs));
    assert (!(second instanceof HashSetDBIDs));
    ArrayModifiableDBIDs ids = DBIDUtil.newArray();
    DBIDIter i1 = first.iter(), i2 = second.iter();
    while (i1.valid() && i2.valid()) {
        int c = DBIDUtil.compare(i1, i2);
        if (c < 0) {
            i1.advance();
        } else if (c > 0) {
            i2.advance();
        } else {
            ids.add(i1);
            i1.advance();
            i2.advance();
        }
    }
    return ids;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) HashSetDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 10 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class OPTICSCut method makeOPTICSCut.

/**
 * Compute an OPTICS cut clustering
 *
 * @param co Cluster order result
 * @param epsilon Epsilon value for cut
 * @return New partitioning clustering
 */
public static <E extends ClusterOrder> Clustering<Model> makeOPTICSCut(E co, double epsilon) {
    // Clustering model we are building
    Clustering<Model> clustering = new Clustering<>("OPTICS Cut Clustering", "optics-cut");
    // Collects noise elements
    ModifiableDBIDs noise = DBIDUtil.newHashSet();
    double lastDist = Double.MAX_VALUE;
    double actDist = Double.MAX_VALUE;
    // Current working set
    ModifiableDBIDs current = DBIDUtil.newHashSet();
    // TODO: can we implement this more nicely with a 1-lookahead?
    DBIDVar prev = DBIDUtil.newVar();
    for (DBIDIter it = co.iter(); it.valid(); prev.set(it), it.advance()) {
        lastDist = actDist;
        actDist = co.getReachability(it);
        if (actDist <= epsilon) {
            // the last element before the plot drops belongs to the cluster
            if (lastDist > epsilon && prev.isSet()) {
                // So un-noise it
                noise.remove(prev);
                // Add it to the cluster
                current.add(prev);
            }
            current.add(it);
        } else {
            // 'Finish' the previous cluster
            if (!current.isEmpty()) {
                // TODO: do we want a minpts restriction?
                // But we get have only core points guaranteed anyway.
                clustering.addToplevelCluster(new Cluster<Model>(current, ClusterModel.CLUSTER));
                current = DBIDUtil.newHashSet();
            }
            // Add to noise
            noise.add(it);
        }
    }
    // Any unfinished cluster will also be added
    if (!current.isEmpty()) {
        clustering.addToplevelCluster(new Cluster<Model>(current, ClusterModel.CLUSTER));
    }
    // Add noise
    clustering.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
    return clustering;
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) Model(de.lmu.ifi.dbs.elki.data.model.Model) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21