Search in sources :

Example 16 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class FPGrowth method run.

/**
 * Run the FP-Growth algorithm
 *
 * @param db Database to process
 * @param relation Bit vector relation
 * @return Frequent patterns found
 */
public FrequentItemsetsResult run(Database db, final Relation<BitVector> relation) {
    // TODO: implement with resizable array, to not need dim.
    final int dim = RelationUtil.dimensionality(relation);
    final VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
    // Compute absolute minsupport
    final int minsupp = getMinimumSupport(relation.size());
    LOG.verbose("Finding item frequencies for ordering.");
    final int[] counts = countItemSupport(relation, dim);
    // Forward and backward indexes
    int[] iidx = new int[dim];
    final int[] idx = buildIndex(counts, iidx, minsupp);
    final int items = idx.length;
    LOG.statistics(new LongStatistic(STAT + "raw-items", dim));
    LOG.statistics(new LongStatistic(STAT + "raw-transactions", relation.size()));
    LOG.statistics(new DoubleStatistic(STAT + "minsupp-relative", minsupp / (double) relation.size()));
    LOG.statistics(new LongStatistic(STAT + "minsupp-absolute", minsupp));
    LOG.verbose("Building FP-Tree.");
    Duration ctime = LOG.newDuration(STAT + "fp-tree.construction.time").begin();
    FPTree tree = buildFPTree(relation, iidx, items);
    if (LOG.isStatistics()) {
        tree.logStatistics();
    }
    if (LOG.isDebuggingFinest()) {
        StringBuilder buf = new StringBuilder(10000).append("FP-tree:\n");
        tree.appendTo(buf, new FPNode.Translator() {

            @Override
            public StringBuilder appendTo(StringBuilder buf, int i) {
                String l = meta.getLabel(idx[i]);
                return (l != null) ? buf.append(l) : buf.append(i);
            }
        });
        LOG.debugFinest(buf.toString());
    }
    // Reduce memory usage:
    tree.reduceMemory();
    LOG.statistics(ctime.end());
    LOG.verbose("Extracting frequent patterns.");
    Duration etime = LOG.newDuration(STAT + "fp-growth.extraction.time").begin();
    final IndefiniteProgress itemp = LOG.isVerbose() ? new IndefiniteProgress("Frequent itemsets", LOG) : null;
    final List<Itemset> solution = new ArrayList<>();
    // Start extraction with the least frequent items
    tree.extract(minsupp, minlength, maxlength, true, new FPTree.Collector() {

        @Override
        public void collect(int support, int[] data, int start, int plen) {
            // Always translate the indexes back to the original values via 'idx'!
            if (plen - start == 1) {
                solution.add(new OneItemset(idx[data[start]], support));
                LOG.incrementProcessed(itemp);
                return;
            }
            // Copy from buffer to a permanent storage
            int[] indices = new int[plen - start];
            for (int i = start, j = 0; i < plen; i++) {
                // Translate to original items
                indices[j++] = idx[data[i]];
            }
            Arrays.sort(indices);
            solution.add(new SparseItemset(indices, support));
            LOG.incrementProcessed(itemp);
        }
    });
    LOG.setCompleted(itemp);
    Collections.sort(solution);
    LOG.statistics(etime.end());
    LOG.statistics(new LongStatistic(STAT + "frequent-itemsets", solution.size()));
    return new FrequentItemsetsResult("FP-Growth", "fp-growth", solution, meta, relation.size());
}
Also used : BitVector(de.lmu.ifi.dbs.elki.data.BitVector) ArrayList(java.util.ArrayList) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) FrequentItemsetsResult(de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 17 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class Eclat method run.

/**
 * Run the Eclat algorithm
 *
 * @param db Database to process
 * @param relation Bit vector relation
 * @return Frequent patterns found
 */
public FrequentItemsetsResult run(Database db, final Relation<BitVector> relation) {
    // TODO: implement with resizable arrays, to not need dim.
    final int dim = RelationUtil.dimensionality(relation);
    final VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
    // Compute absolute minsupport
    final int minsupp = getMinimumSupport(relation.size());
    LOG.verbose("Build 1-dimensional transaction lists.");
    Duration ctime = LOG.newDuration(STAT + "eclat.transposition.time").begin();
    DBIDs[] idx = buildIndex(relation, dim, minsupp);
    LOG.statistics(ctime.end());
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Building frequent itemsets", idx.length, LOG) : null;
    Duration etime = LOG.newDuration(STAT + "eclat.extraction.time").begin();
    final List<Itemset> solution = new ArrayList<>();
    for (int i = 0; i < idx.length; i++) {
        LOG.incrementProcessed(prog);
        extractItemsets(idx, i, minsupp, solution);
    }
    LOG.ensureCompleted(prog);
    Collections.sort(solution);
    LOG.statistics(etime.end());
    LOG.statistics(new LongStatistic(STAT + "frequent-itemsets", solution.size()));
    return new FrequentItemsetsResult("Eclat", "eclat", solution, meta, relation.size());
}
Also used : BitVector(de.lmu.ifi.dbs.elki.data.BitVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) FrequentItemsetsResult(de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult)

Example 18 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class MaterializeKNNPreprocessor method preprocess.

/**
 * The actual preprocessing step.
 */
@Override
protected void preprocess() {
    // Could be subclass
    final Logging log = getLogger();
    createStorage();
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    if (log.isStatistics()) {
        log.statistics(new LongStatistic(this.getClass().getName() + ".k", k));
    }
    Duration duration = log.isStatistics() ? log.newDuration(this.getClass().getName() + ".precomputation-time").begin() : null;
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors (k=" + k + ")", ids.size(), getLogger()) : null;
    // Try bulk
    List<? extends KNNList> kNNList = null;
    if (usebulk) {
        kNNList = knnQuery.getKNNForBulkDBIDs(ids, k);
        if (kNNList != null) {
            int i = 0;
            for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
                storage.put(id, kNNList.get(i));
                log.incrementProcessed(progress);
            }
        }
    } else {
        final boolean ismetric = getDistanceQuery().getDistanceFunction().isMetric();
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            if (ismetric && storage.get(iter) != null) {
                log.incrementProcessed(progress);
                // Previously computed (duplicate point?)
                continue;
            }
            KNNList knn = knnQuery.getKNNForDBID(iter, k);
            storage.put(iter, knn);
            if (ismetric) {
                for (DoubleDBIDListIter it = knn.iter(); it.valid() && it.doubleValue() == 0.; it.advance()) {
                    // Reuse
                    storage.put(it, knn);
                }
            }
            log.incrementProcessed(progress);
        }
    }
    log.ensureCompleted(progress);
    if (duration != null) {
        log.statistics(duration.end());
    }
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 19 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class AlgorithmStep method runAlgorithms.

/**
 * Run algorithms.
 *
 * @param database Database
 * @return Algorithm result
 */
public Result runAlgorithms(Database database) {
    ResultHierarchy hier = database.getHierarchy();
    if (LOG.isStatistics()) {
        boolean first = true;
        for (It<Index> it = hier.iterDescendants(database).filter(Index.class); it.valid(); it.advance()) {
            if (first) {
                LOG.statistics("Index statistics before running algorithms:");
                first = false;
            }
            it.get().logStatistics();
        }
    }
    stepresult = new BasicResult("Algorithm Step", "algorithm-step");
    for (Algorithm algorithm : algorithms) {
        Thread.currentThread().setName(algorithm.toString());
        Duration duration = LOG.isStatistics() ? LOG.newDuration(algorithm.getClass().getName() + ".runtime").begin() : null;
        Result res = algorithm.run(database);
        if (duration != null) {
            LOG.statistics(duration.end());
        }
        if (LOG.isStatistics()) {
            boolean first = true;
            for (It<Index> it = hier.iterDescendants(database).filter(Index.class); it.valid(); it.advance()) {
                if (first) {
                    LOG.statistics("Index statistics after running algorithm " + algorithm.toString() + ":");
                    first = false;
                }
                it.get().logStatistics();
            }
        }
        if (res != null) {
            // Make sure the result is attached, but usually this is a noop:
            hier.add(database, res);
        }
    }
    return stepresult;
}
Also used : ResultHierarchy(de.lmu.ifi.dbs.elki.result.ResultHierarchy) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult) Index(de.lmu.ifi.dbs.elki.index.Index) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) Algorithm(de.lmu.ifi.dbs.elki.algorithm.Algorithm) AbstractAlgorithm(de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm) Result(de.lmu.ifi.dbs.elki.result.Result) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult)

Example 20 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class InputStreamDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    // Run parser
    if (LOG.isDebugging()) {
        LOG.debugFine("Invoking parsers.");
    }
    // Streaming parsers may yield to stream filters immediately.
    if (parser instanceof StreamingParser) {
        final StreamingParser streamParser = (StreamingParser) parser;
        streamParser.initStream(in);
        // normalize objects and transform labels
        if (LOG.isDebugging()) {
            LOG.debugFine("Parsing as stream.");
        }
        Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".load").begin() : null;
        MultipleObjectsBundle objects = invokeStreamFilters(streamParser).asMultipleObjectsBundle();
        parser.cleanup();
        if (duration != null) {
            LOG.statistics(duration.end());
        }
        return objects;
    } else {
        // For non-streaming parsers, we first parse, then filter
        Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".parse").begin() : null;
        MultipleObjectsBundle parsingResult = parser.parse(in);
        parser.cleanup();
        if (duration != null) {
            LOG.statistics(duration.end());
        }
        // normalize objects and transform labels
        if (LOG.isDebugging()) {
            LOG.debugFine("Invoking filters.");
        }
        Duration fduration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".filter").begin() : null;
        MultipleObjectsBundle objects = invokeBundleFilters(parsingResult);
        if (fduration != null) {
            LOG.statistics(fduration.end());
        }
        return objects;
    }
}
Also used : StreamingParser(de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration)

Aggregations

Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)20 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)13 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)5 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)4 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)4 BitVector (de.lmu.ifi.dbs.elki.data.BitVector)3 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)3 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)3 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)3 FrequentItemsetsResult (de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 ArrayList (java.util.ArrayList)3 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 Index (de.lmu.ifi.dbs.elki.index.Index)2 DoubleArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)2 IntegerArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)2 AbstractAlgorithm (de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm)1 Algorithm (de.lmu.ifi.dbs.elki.algorithm.Algorithm)1