use of de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult in project elki by elki-project.
the class FPGrowthTest method testLarge.
@Test
public void testLarge() {
Database db = loadTransactions(UNITTEST + "itemsets/zutaten.txt.gz", 16401);
FrequentItemsetsResult res = //
new ELKIBuilder<>(FPGrowth.class).with(FPGrowth.Parameterizer.MINSUPP_ID, 200).build().run(db);
assertEquals("Size not as expected.", 184, res.getItemsets().size());
}
use of de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult in project elki by elki-project.
the class DiSHPreferenceVectorIndex method determinePreferenceVectorByApriori.
/**
* Determines the preference vector with the apriori strategy.
*
* @param relation the database storing the objects
* @param neighborIDs the list of ids of the neighbors in each dimension
* @param msg a string buffer for debug messages
* @return the preference vector
*/
private long[] determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) {
int dimensionality = neighborIDs.length;
// database for apriori
UpdatableDatabase apriori_db = new HashmapDatabase();
SimpleTypeInformation<?> bitmeta = VectorFieldTypeInformation.typeRequest(BitVector.class, dimensionality, dimensionality);
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
long[] bits = BitsUtil.zero(dimensionality);
boolean allFalse = true;
for (int d = 0; d < dimensionality; d++) {
if (neighborIDs[d].contains(it)) {
BitsUtil.setI(bits, d);
allFalse = false;
}
}
if (!allFalse) {
SingleObjectBundle oaa = new SingleObjectBundle();
oaa.append(bitmeta, new BitVector(bits, dimensionality));
apriori_db.insert(oaa);
}
}
APRIORI apriori = new APRIORI(minpts);
FrequentItemsetsResult aprioriResult = apriori.run(apriori_db);
// result of apriori
List<Itemset> frequentItemsets = aprioriResult.getItemsets();
if (msg != null) {
msg.append("\n Frequent itemsets: ").append(frequentItemsets);
}
int maxSupport = 0;
int maxCardinality = 0;
long[] preferenceVector = BitsUtil.zero(dimensionality);
for (Itemset itemset : frequentItemsets) {
if ((maxCardinality < itemset.length()) || (maxCardinality == itemset.length() && maxSupport == itemset.getSupport())) {
preferenceVector = Itemset.toBitset(itemset, BitsUtil.zero(dimensionality));
maxCardinality = itemset.length();
maxSupport = itemset.getSupport();
}
}
if (msg != null) {
//
msg.append("\n preference ").append(//
BitsUtil.toStringLow(preferenceVector, dimensionality)).append('\n');
LOG.debugFine(msg.toString());
}
return preferenceVector;
}
use of de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult in project elki by elki-project.
the class APRIORI method run.
/**
* Performs the APRIORI algorithm on the given database.
*
* @param relation the Relation to process
* @return the AprioriResult learned by this APRIORI
*/
public FrequentItemsetsResult run(Relation<BitVector> relation) {
DBIDs ids = relation.getDBIDs();
List<Itemset> solution = new ArrayList<>();
final int size = ids.size();
final int needed = getMinimumSupport(size);
// TODO: we don't strictly require a vector field.
// We could work with knowing just the maximum dimensionality beforehand.
VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
if (size > 0) {
final int dim = meta.getDimensionality();
Duration timeone = LOG.newDuration(STAT + "1-items.time").begin();
List<OneItemset> oneitems = buildFrequentOneItemsets(relation, dim, needed);
LOG.statistics(timeone.end());
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + "1-items.frequent", oneitems.size()));
LOG.statistics(new LongStatistic(STAT + "1-items.transactions", ids.size()));
}
if (LOG.isDebuggingFine()) {
LOG.debugFine(debugDumpCandidates(new StringBuilder(), oneitems, meta));
}
if (minlength <= 1) {
solution.addAll(oneitems);
}
if (oneitems.size() >= 2 && maxlength >= 2) {
Duration timetwo = LOG.newDuration(STAT + "2-items.time").begin();
ArrayModifiableDBIDs survivors = DBIDUtil.newArray(ids.size());
List<? extends Itemset> candidates = buildFrequentTwoItemsets(oneitems, relation, dim, needed, ids, survivors);
// Continue with reduced set of transactions.
ids = survivors;
LOG.statistics(timetwo.end());
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + "2-items.frequent", candidates.size()));
LOG.statistics(new LongStatistic(STAT + "2-items.transactions", ids.size()));
}
if (LOG.isDebuggingFine()) {
LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
}
if (minlength <= 2) {
solution.addAll(candidates);
}
for (int length = 3; length <= maxlength && candidates.size() >= length; length++) {
Duration timel = LOG.newDuration(STAT + length + "-items.time").begin();
// Join to get the new candidates
candidates = aprioriGenerate(candidates, length, dim);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest(debugDumpCandidates(new StringBuilder().append("Before pruning: "), candidates, meta));
}
survivors = DBIDUtil.newArray(ids.size());
candidates = frequentItemsets(candidates, relation, needed, ids, survivors, length);
// Continue with reduced set of transactions.
ids = survivors;
LOG.statistics(timel.end());
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + length + "-items.frequent", candidates.size()));
LOG.statistics(new LongStatistic(STAT + length + "-items.transactions", ids.size()));
}
if (LOG.isDebuggingFine()) {
LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
}
solution.addAll(candidates);
}
}
}
return new FrequentItemsetsResult("APRIORI", "apriori", solution, meta, size);
}
use of de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult in project elki by elki-project.
the class FPGrowth method run.
/**
* Run the FP-Growth algorithm
*
* @param db Database to process
* @param relation Bit vector relation
* @return Frequent patterns found
*/
public FrequentItemsetsResult run(Database db, final Relation<BitVector> relation) {
// TODO: implement with resizable array, to not need dim.
final int dim = RelationUtil.dimensionality(relation);
final VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
// Compute absolute minsupport
final int minsupp = getMinimumSupport(relation.size());
LOG.verbose("Finding item frequencies for ordering.");
final int[] counts = countItemSupport(relation, dim);
// Forward and backward indexes
int[] iidx = new int[dim];
final int[] idx = buildIndex(counts, iidx, minsupp);
final int items = idx.length;
LOG.statistics(new LongStatistic(STAT + "raw-items", dim));
LOG.statistics(new LongStatistic(STAT + "raw-transactions", relation.size()));
LOG.statistics(new DoubleStatistic(STAT + "minsupp-relative", minsupp / (double) relation.size()));
LOG.statistics(new LongStatistic(STAT + "minsupp-absolute", minsupp));
LOG.verbose("Building FP-Tree.");
Duration ctime = LOG.newDuration(STAT + "fp-tree.construction.time").begin();
FPTree tree = buildFPTree(relation, iidx, items);
if (LOG.isStatistics()) {
tree.logStatistics();
}
if (LOG.isDebuggingFinest()) {
StringBuilder buf = new StringBuilder(10000).append("FP-tree:\n");
tree.appendTo(buf, new FPNode.Translator() {
@Override
public StringBuilder appendTo(StringBuilder buf, int i) {
String l = meta.getLabel(idx[i]);
return (l != null) ? buf.append(l) : buf.append(i);
}
});
LOG.debugFinest(buf.toString());
}
// Reduce memory usage:
tree.reduceMemory();
LOG.statistics(ctime.end());
LOG.verbose("Extracting frequent patterns.");
Duration etime = LOG.newDuration(STAT + "fp-growth.extraction.time").begin();
final IndefiniteProgress itemp = LOG.isVerbose() ? new IndefiniteProgress("Frequent itemsets", LOG) : null;
final List<Itemset> solution = new ArrayList<>();
// Start extraction with the least frequent items
tree.extract(minsupp, minlength, maxlength, true, new FPTree.Collector() {
@Override
public void collect(int support, int[] data, int start, int plen) {
// Always translate the indexes back to the original values via 'idx'!
if (plen - start == 1) {
solution.add(new OneItemset(idx[data[start]], support));
LOG.incrementProcessed(itemp);
return;
}
// Copy from buffer to a permanent storage
int[] indices = new int[plen - start];
for (int i = start, j = 0; i < plen; i++) {
// Translate to original items
indices[j++] = idx[data[i]];
}
Arrays.sort(indices);
solution.add(new SparseItemset(indices, support));
LOG.incrementProcessed(itemp);
}
});
LOG.setCompleted(itemp);
Collections.sort(solution);
LOG.statistics(etime.end());
LOG.statistics(new LongStatistic(STAT + "frequent-itemsets", solution.size()));
return new FrequentItemsetsResult("FP-Growth", "fp-growth", solution, meta, relation.size());
}
use of de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult in project elki by elki-project.
the class APRIORITest method testLarge.
@Test
public void testLarge() {
Database db = loadTransactions(UNITTEST + "itemsets/zutaten.txt.gz", 16401);
FrequentItemsetsResult res = //
new ELKIBuilder<>(APRIORI.class).with(APRIORI.Parameterizer.MINSUPP_ID, 200).build().run(db);
assertEquals("Size not as expected.", 184, res.getItemsets().size());
}
Aggregations