use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class FPGrowth method run.
/**
* Run the FP-Growth algorithm
*
* @param db Database to process
* @param relation Bit vector relation
* @return Frequent patterns found
*/
public FrequentItemsetsResult run(Database db, final Relation<BitVector> relation) {
// TODO: implement with resizable array, to not need dim.
final int dim = RelationUtil.dimensionality(relation);
final VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
// Compute absolute minsupport
final int minsupp = getMinimumSupport(relation.size());
LOG.verbose("Finding item frequencies for ordering.");
final int[] counts = countItemSupport(relation, dim);
// Forward and backward indexes
int[] iidx = new int[dim];
final int[] idx = buildIndex(counts, iidx, minsupp);
final int items = idx.length;
LOG.statistics(new LongStatistic(STAT + "raw-items", dim));
LOG.statistics(new LongStatistic(STAT + "raw-transactions", relation.size()));
LOG.statistics(new DoubleStatistic(STAT + "minsupp-relative", minsupp / (double) relation.size()));
LOG.statistics(new LongStatistic(STAT + "minsupp-absolute", minsupp));
LOG.verbose("Building FP-Tree.");
Duration ctime = LOG.newDuration(STAT + "fp-tree.construction.time").begin();
FPTree tree = buildFPTree(relation, iidx, items);
if (LOG.isStatistics()) {
tree.logStatistics();
}
if (LOG.isDebuggingFinest()) {
StringBuilder buf = new StringBuilder(10000).append("FP-tree:\n");
tree.appendTo(buf, new FPNode.Translator() {
@Override
public StringBuilder appendTo(StringBuilder buf, int i) {
String l = meta.getLabel(idx[i]);
return (l != null) ? buf.append(l) : buf.append(i);
}
});
LOG.debugFinest(buf.toString());
}
// Reduce memory usage:
tree.reduceMemory();
LOG.statistics(ctime.end());
LOG.verbose("Extracting frequent patterns.");
Duration etime = LOG.newDuration(STAT + "fp-growth.extraction.time").begin();
final IndefiniteProgress itemp = LOG.isVerbose() ? new IndefiniteProgress("Frequent itemsets", LOG) : null;
final List<Itemset> solution = new ArrayList<>();
// Start extraction with the least frequent items
tree.extract(minsupp, minlength, maxlength, true, new FPTree.Collector() {
@Override
public void collect(int support, int[] data, int start, int plen) {
// Always translate the indexes back to the original values via 'idx'!
if (plen - start == 1) {
solution.add(new OneItemset(idx[data[start]], support));
LOG.incrementProcessed(itemp);
return;
}
// Copy from buffer to a permanent storage
int[] indices = new int[plen - start];
for (int i = start, j = 0; i < plen; i++) {
// Translate to original items
indices[j++] = idx[data[i]];
}
Arrays.sort(indices);
solution.add(new SparseItemset(indices, support));
LOG.incrementProcessed(itemp);
}
});
LOG.setCompleted(itemp);
Collections.sort(solution);
LOG.statistics(etime.end());
LOG.statistics(new LongStatistic(STAT + "frequent-itemsets", solution.size()));
return new FrequentItemsetsResult("FP-Growth", "fp-growth", solution, meta, relation.size());
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class Eclat method run.
/**
* Run the Eclat algorithm
*
* @param db Database to process
* @param relation Bit vector relation
* @return Frequent patterns found
*/
public FrequentItemsetsResult run(Database db, final Relation<BitVector> relation) {
// TODO: implement with resizable arrays, to not need dim.
final int dim = RelationUtil.dimensionality(relation);
final VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
// Compute absolute minsupport
final int minsupp = getMinimumSupport(relation.size());
LOG.verbose("Build 1-dimensional transaction lists.");
Duration ctime = LOG.newDuration(STAT + "eclat.transposition.time").begin();
DBIDs[] idx = buildIndex(relation, dim, minsupp);
LOG.statistics(ctime.end());
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Building frequent itemsets", idx.length, LOG) : null;
Duration etime = LOG.newDuration(STAT + "eclat.extraction.time").begin();
final List<Itemset> solution = new ArrayList<>();
for (int i = 0; i < idx.length; i++) {
LOG.incrementProcessed(prog);
extractItemsets(idx, i, minsupp, solution);
}
LOG.ensureCompleted(prog);
Collections.sort(solution);
LOG.statistics(etime.end());
LOG.statistics(new LongStatistic(STAT + "frequent-itemsets", solution.size()));
return new FrequentItemsetsResult("Eclat", "eclat", solution, meta, relation.size());
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class MaterializeKNNPreprocessor method preprocess.
/**
* The actual preprocessing step.
*/
@Override
protected void preprocess() {
// Could be subclass
final Logging log = getLogger();
createStorage();
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
if (log.isStatistics()) {
log.statistics(new LongStatistic(this.getClass().getName() + ".k", k));
}
Duration duration = log.isStatistics() ? log.newDuration(this.getClass().getName() + ".precomputation-time").begin() : null;
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors (k=" + k + ")", ids.size(), getLogger()) : null;
// Try bulk
List<? extends KNNList> kNNList = null;
if (usebulk) {
kNNList = knnQuery.getKNNForBulkDBIDs(ids, k);
if (kNNList != null) {
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
storage.put(id, kNNList.get(i));
log.incrementProcessed(progress);
}
}
} else {
final boolean ismetric = getDistanceQuery().getDistanceFunction().isMetric();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
if (ismetric && storage.get(iter) != null) {
log.incrementProcessed(progress);
// Previously computed (duplicate point?)
continue;
}
KNNList knn = knnQuery.getKNNForDBID(iter, k);
storage.put(iter, knn);
if (ismetric) {
for (DoubleDBIDListIter it = knn.iter(); it.valid() && it.doubleValue() == 0.; it.advance()) {
// Reuse
storage.put(it, knn);
}
}
log.incrementProcessed(progress);
}
}
log.ensureCompleted(progress);
if (duration != null) {
log.statistics(duration.end());
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class AlgorithmStep method runAlgorithms.
/**
* Run algorithms.
*
* @param database Database
* @return Algorithm result
*/
public Result runAlgorithms(Database database) {
ResultHierarchy hier = database.getHierarchy();
if (LOG.isStatistics()) {
boolean first = true;
for (It<Index> it = hier.iterDescendants(database).filter(Index.class); it.valid(); it.advance()) {
if (first) {
LOG.statistics("Index statistics before running algorithms:");
first = false;
}
it.get().logStatistics();
}
}
stepresult = new BasicResult("Algorithm Step", "algorithm-step");
for (Algorithm algorithm : algorithms) {
Thread.currentThread().setName(algorithm.toString());
Duration duration = LOG.isStatistics() ? LOG.newDuration(algorithm.getClass().getName() + ".runtime").begin() : null;
Result res = algorithm.run(database);
if (duration != null) {
LOG.statistics(duration.end());
}
if (LOG.isStatistics()) {
boolean first = true;
for (It<Index> it = hier.iterDescendants(database).filter(Index.class); it.valid(); it.advance()) {
if (first) {
LOG.statistics("Index statistics after running algorithm " + algorithm.toString() + ":");
first = false;
}
it.get().logStatistics();
}
}
if (res != null) {
// Make sure the result is attached, but usually this is a noop:
hier.add(database, res);
}
}
return stepresult;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class InputStreamDatabaseConnection method loadData.
@Override
public MultipleObjectsBundle loadData() {
// Run parser
if (LOG.isDebugging()) {
LOG.debugFine("Invoking parsers.");
}
// Streaming parsers may yield to stream filters immediately.
if (parser instanceof StreamingParser) {
final StreamingParser streamParser = (StreamingParser) parser;
streamParser.initStream(in);
// normalize objects and transform labels
if (LOG.isDebugging()) {
LOG.debugFine("Parsing as stream.");
}
Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".load").begin() : null;
MultipleObjectsBundle objects = invokeStreamFilters(streamParser).asMultipleObjectsBundle();
parser.cleanup();
if (duration != null) {
LOG.statistics(duration.end());
}
return objects;
} else {
// For non-streaming parsers, we first parse, then filter
Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".parse").begin() : null;
MultipleObjectsBundle parsingResult = parser.parse(in);
parser.cleanup();
if (duration != null) {
LOG.statistics(duration.end());
}
// normalize objects and transform labels
if (LOG.isDebugging()) {
LOG.debugFine("Invoking filters.");
}
Duration fduration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".filter").begin() : null;
MultipleObjectsBundle objects = invokeBundleFilters(parsingResult);
if (fduration != null) {
LOG.statistics(fduration.end());
}
return objects;
}
}
Aggregations