use of de.lmu.ifi.dbs.elki.data.BitVector in project elki by elki-project.
the class FPGrowth method run.
/**
* Run the FP-Growth algorithm
*
* @param db Database to process
* @param relation Bit vector relation
* @return Frequent patterns found
*/
public FrequentItemsetsResult run(Database db, final Relation<BitVector> relation) {
// TODO: implement with resizable array, to not need dim.
final int dim = RelationUtil.dimensionality(relation);
final VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
// Compute absolute minsupport
final int minsupp = getMinimumSupport(relation.size());
LOG.verbose("Finding item frequencies for ordering.");
final int[] counts = countItemSupport(relation, dim);
// Forward and backward indexes
int[] iidx = new int[dim];
final int[] idx = buildIndex(counts, iidx, minsupp);
final int items = idx.length;
LOG.statistics(new LongStatistic(STAT + "raw-items", dim));
LOG.statistics(new LongStatistic(STAT + "raw-transactions", relation.size()));
LOG.statistics(new DoubleStatistic(STAT + "minsupp-relative", minsupp / (double) relation.size()));
LOG.statistics(new LongStatistic(STAT + "minsupp-absolute", minsupp));
LOG.verbose("Building FP-Tree.");
Duration ctime = LOG.newDuration(STAT + "fp-tree.construction.time").begin();
FPTree tree = buildFPTree(relation, iidx, items);
if (LOG.isStatistics()) {
tree.logStatistics();
}
if (LOG.isDebuggingFinest()) {
StringBuilder buf = new StringBuilder(10000).append("FP-tree:\n");
tree.appendTo(buf, new FPNode.Translator() {
@Override
public StringBuilder appendTo(StringBuilder buf, int i) {
String l = meta.getLabel(idx[i]);
return (l != null) ? buf.append(l) : buf.append(i);
}
});
LOG.debugFinest(buf.toString());
}
// Reduce memory usage:
tree.reduceMemory();
LOG.statistics(ctime.end());
LOG.verbose("Extracting frequent patterns.");
Duration etime = LOG.newDuration(STAT + "fp-growth.extraction.time").begin();
final IndefiniteProgress itemp = LOG.isVerbose() ? new IndefiniteProgress("Frequent itemsets", LOG) : null;
final List<Itemset> solution = new ArrayList<>();
// Start extraction with the least frequent items
tree.extract(minsupp, minlength, maxlength, true, new FPTree.Collector() {
@Override
public void collect(int support, int[] data, int start, int plen) {
// Always translate the indexes back to the original values via 'idx'!
if (plen - start == 1) {
solution.add(new OneItemset(idx[data[start]], support));
LOG.incrementProcessed(itemp);
return;
}
// Copy from buffer to a permanent storage
int[] indices = new int[plen - start];
for (int i = start, j = 0; i < plen; i++) {
// Translate to original items
indices[j++] = idx[data[i]];
}
Arrays.sort(indices);
solution.add(new SparseItemset(indices, support));
LOG.incrementProcessed(itemp);
}
});
LOG.setCompleted(itemp);
Collections.sort(solution);
LOG.statistics(etime.end());
LOG.statistics(new LongStatistic(STAT + "frequent-itemsets", solution.size()));
return new FrequentItemsetsResult("FP-Growth", "fp-growth", solution, meta, relation.size());
}
use of de.lmu.ifi.dbs.elki.data.BitVector in project elki by elki-project.
the class Eclat method run.
/**
* Run the Eclat algorithm
*
* @param db Database to process
* @param relation Bit vector relation
* @return Frequent patterns found
*/
public FrequentItemsetsResult run(Database db, final Relation<BitVector> relation) {
// TODO: implement with resizable arrays, to not need dim.
final int dim = RelationUtil.dimensionality(relation);
final VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
// Compute absolute minsupport
final int minsupp = getMinimumSupport(relation.size());
LOG.verbose("Build 1-dimensional transaction lists.");
Duration ctime = LOG.newDuration(STAT + "eclat.transposition.time").begin();
DBIDs[] idx = buildIndex(relation, dim, minsupp);
LOG.statistics(ctime.end());
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Building frequent itemsets", idx.length, LOG) : null;
Duration etime = LOG.newDuration(STAT + "eclat.extraction.time").begin();
final List<Itemset> solution = new ArrayList<>();
for (int i = 0; i < idx.length; i++) {
LOG.incrementProcessed(prog);
extractItemsets(idx, i, minsupp, solution);
}
LOG.ensureCompleted(prog);
Collections.sort(solution);
LOG.statistics(etime.end());
LOG.statistics(new LongStatistic(STAT + "frequent-itemsets", solution.size()));
return new FrequentItemsetsResult("Eclat", "eclat", solution, meta, relation.size());
}
use of de.lmu.ifi.dbs.elki.data.BitVector in project elki by elki-project.
the class SimpleTransactionParser method nextEvent.
@Override
public Event nextEvent() {
if (nextevent != null) {
Event ret = nextevent;
nextevent = null;
return ret;
}
try {
while (reader.nextLineExceptComments()) {
// Don't reuse bitsets, will not be copied by BitVector constructor.
buf.clear();
for (; /* initialized by nextLineExceptComments() */
tokenizer.valid(); tokenizer.advance()) {
String token = tokenizer.getSubstring();
int t = keymap.getInt(token);
if (t < 0) {
t = keymap.size();
keymap.put(token, t);
}
final int word = t >>> 6;
final int off = t & 0x3F;
while (word >= buf.size()) {
// Ensure size.
buf.add(0L);
}
buf.set(word, buf.getLong(word) | (1L << off));
}
curvec = new BitVector(buf.toLongArray(), keymap.size());
return Event.NEXT_OBJECT;
}
nextevent = Event.END_OF_STREAM;
// Construct final metadata:
meta = new BundleMeta(1);
String[] colnames = new String[keymap.size()];
for (ObjectIterator<Object2IntMap.Entry<String>> iter = keymap.object2IntEntrySet().fastIterator(); iter.hasNext(); ) {
Object2IntMap.Entry<String> entry = iter.next();
colnames[entry.getIntValue()] = entry.getKey();
}
meta.add(new VectorFieldTypeInformation<>(BitVector.FACTORY, colnames.length, colnames));
// Force a final meta update.
return Event.META_CHANGED;
} catch (IOException e) {
throw new IllegalArgumentException("Error while parsing line " + reader.getLineNumber() + ".");
}
}
Aggregations