use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class EvaluateDaviesBouldin method withinGroupDistances.
public double[] withinGroupDistances(Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, NumberVector[] centroids) {
double[] withinGroupDists = new double[clusters.size()];
Iterator<? extends Cluster<?>> ci = clusters.iterator();
for (int i = 0; ci.hasNext(); i++) {
Cluster<?> cluster = ci.next();
NumberVector centroid = centroids[i];
if (centroid == null) {
// Empty, noise or singleton cluster:
withinGroupDists[i] = 0.;
continue;
}
double wD = 0.;
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
wD += distanceFunction.distance(centroid, rel.get(it));
}
withinGroupDists[i] = wD / cluster.size();
}
return withinGroupDists;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class APRIORI method buildFrequentOneItemsets.
/**
* Build the 1-itemsets.
*
* @param relation Data relation
* @param dim Maximum dimensionality
* @param needed Minimum support needed
* @return 1-itemsets
*/
protected List<OneItemset> buildFrequentOneItemsets(final Relation<? extends SparseFeatureVector<?>> relation, final int dim, final int needed) {
// TODO: use TIntList and prefill appropriately to avoid knowing "dim"
// beforehand?
int[] counts = new int[dim];
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
SparseFeatureVector<?> bv = relation.get(iditer);
for (int it = bv.iter(); bv.iterValid(it); it = bv.iterAdvance(it)) {
counts[bv.iterDim(it)]++;
}
}
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + "1-items.candidates", dim));
}
// Generate initial candidates of length 1.
List<OneItemset> frequent = new ArrayList<>(dim);
for (int i = 0; i < dim; i++) {
if (counts[i] >= needed) {
frequent.add(new OneItemset(i, counts[i]));
}
}
return frequent;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class APRIORI method frequentItemsetsSparse.
/**
* Returns the frequent BitSets out of the given BitSets with respect to the
* given database. Optimized implementation for SparseItemset.
*
* @param candidates the candidates to be evaluated
* @param relation the database to evaluate the candidates on
* @param needed Minimum support needed
* @param ids Objects to process
* @param survivors Output: objects that had at least two 1-frequent items.
* @param length Itemset length
* @return Itemsets with sufficient support
*/
protected List<SparseItemset> frequentItemsetsSparse(List<SparseItemset> candidates, Relation<BitVector> relation, int needed, DBIDs ids, ArrayModifiableDBIDs survivors, int length) {
// Current search interval:
int begin = 0, end = candidates.size();
int[] scratchi = new int[length], iters = new int[length];
SparseItemset scratch = new SparseItemset(scratchi);
for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
BitVector bv = relation.get(iditer);
if (!initializeSearchItemset(bv, scratchi, iters)) {
continue;
}
int lives = 0;
while (begin < end) {
begin = binarySearch(candidates, scratch, begin, end);
if (begin > 0) {
candidates.get(begin).increaseSupport();
++lives;
} else {
begin = (-begin) - 1;
}
if (begin >= end || !nextSearchItemset(bv, scratchi, iters)) {
break;
}
}
for (Itemset candidate : candidates) {
if (candidate.containedIn(bv)) {
candidate.increaseSupport();
++lives;
}
}
if (lives > length) {
survivors.add(iditer);
}
}
// Retain only those with minimum support:
List<SparseItemset> frequent = new ArrayList<>(candidates.size());
for (Iterator<SparseItemset> iter = candidates.iterator(); iter.hasNext(); ) {
final SparseItemset candidate = iter.next();
if (candidate.getSupport() >= needed) {
frequent.add(candidate);
}
}
return frequent;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class Eclat method mergeJoin.
private DBIDs mergeJoin(DBIDs first, DBIDs second) {
assert (!(first instanceof HashSetDBIDs));
assert (!(second instanceof HashSetDBIDs));
ArrayModifiableDBIDs ids = DBIDUtil.newArray();
DBIDIter i1 = first.iter(), i2 = second.iter();
while (i1.valid() && i2.valid()) {
int c = DBIDUtil.compare(i1, i2);
if (c < 0) {
i1.advance();
} else if (c > 0) {
i2.advance();
} else {
ids.add(i1);
i1.advance();
i2.advance();
}
}
return ids;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class OPTICSCut method makeOPTICSCut.
/**
* Compute an OPTICS cut clustering
*
* @param co Cluster order result
* @param epsilon Epsilon value for cut
* @return New partitioning clustering
*/
public static <E extends ClusterOrder> Clustering<Model> makeOPTICSCut(E co, double epsilon) {
// Clustering model we are building
Clustering<Model> clustering = new Clustering<>("OPTICS Cut Clustering", "optics-cut");
// Collects noise elements
ModifiableDBIDs noise = DBIDUtil.newHashSet();
double lastDist = Double.MAX_VALUE;
double actDist = Double.MAX_VALUE;
// Current working set
ModifiableDBIDs current = DBIDUtil.newHashSet();
// TODO: can we implement this more nicely with a 1-lookahead?
DBIDVar prev = DBIDUtil.newVar();
for (DBIDIter it = co.iter(); it.valid(); prev.set(it), it.advance()) {
lastDist = actDist;
actDist = co.getReachability(it);
if (actDist <= epsilon) {
// the last element before the plot drops belongs to the cluster
if (lastDist > epsilon && prev.isSet()) {
// So un-noise it
noise.remove(prev);
// Add it to the cluster
current.add(prev);
}
current.add(it);
} else {
// 'Finish' the previous cluster
if (!current.isEmpty()) {
// TODO: do we want a minpts restriction?
// But we get have only core points guaranteed anyway.
clustering.addToplevelCluster(new Cluster<Model>(current, ClusterModel.CLUSTER));
current = DBIDUtil.newHashSet();
}
// Add to noise
noise.add(it);
}
}
// Any unfinished cluster will also be added
if (!current.isEmpty()) {
clustering.addToplevelCluster(new Cluster<Model>(current, ClusterModel.CLUSTER));
}
// Add noise
clustering.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
return clustering;
}
Aggregations