Search in sources :

Example 11 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class IntrinsicNearestNeighborAffinityMatrixBuilder method computePij.

/**
 * Compute the sparse pij using the nearest neighbors only.
 *
 * @param ids ID range
 * @param knnq kNN query
 * @param square Use squared distances
 * @param numberOfNeighbours Number of neighbors to get
 * @param pij Output of distances
 * @param indices Output of indexes
 * @param initialScale Initial scaling factor
 */
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
    final double logPerp = FastMath.log(perplexity);
    // Scratch arrays, resizable
    DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
    IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
    // Compute nearest-neighbor sparse affinity matrix
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    Mean mid = LOG.isStatistics() ? new Mean() : null;
    for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
        dists.clear();
        inds.clear();
        KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
        convertNeighbors(ids, ix, square, neighbours, dists, inds, mid);
        double beta = computeSigma(// 
        ix.getOffset(), // 
        dists, // 
        perplexity, // 
        logPerp, pij[ix.getOffset()] = new double[dists.size()]);
        if (mv != null) {
            // Sigma
            mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
        }
        indices[ix.getOffset()] = inds.toArray();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (mid != null) {
        LOG.statistics(new DoubleStatistic(getClass() + ".average-original-id", mid.getMean()));
    }
    // Sum of the sparse affinity matrix:
    double sum = 0.;
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            if (j > i) {
                // Exploit symmetry.
                continue;
            }
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                sum += FastMath.sqrt(pij_i[offi] * pij[j][offj]);
            }
        }
    }
    final double scale = initialScale / (2 * sum);
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                assert (indices[j][offj] == i);
                // Exploit symmetry:
                if (i < j) {
                    // Symmetrize
                    final double val = FastMath.sqrt(pij_i[offi] * pij[j][offj]);
                    pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
                }
            } else {
                // Not found, so zero.
                pij_i[offi] = 0;
            }
        }
    }
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
    }
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)

Example 12 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class AbstractRangeQueryNeighborPredicate method preprocess.

/**
 * Perform the preprocessing step.
 *
 * @param modelcls Class of models
 * @param relation Data relation
 * @param query Range query
 * @return Precomputed models
 */
public DataStore<M> preprocess(Class<? super M> modelcls, Relation<O> relation, RangeQuery<O> query) {
    WritableDataStore<M> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, modelcls);
    Duration time = getLogger().newDuration(this.getClass().getName() + ".preprocessing-time").begin();
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), getLogger()) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DoubleDBIDList neighbors = query.getRangeForDBID(iditer, epsilon);
        storage.put(iditer, computeLocalModel(iditer, neighbors, relation));
        getLogger().incrementProcessed(progress);
    }
    getLogger().ensureCompleted(progress);
    getLogger().statistics(time.end());
    return storage;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 13 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class COPACNeighborPredicate method instantiate.

/**
 * Full instantiation method.
 *
 * @param database Database
 * @param relation Vector relation
 * @return Instance
 */
public COPACNeighborPredicate.Instance instantiate(Database database, Relation<V> relation) {
    DistanceQuery<V> dq = database.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
    KNNQuery<V> knnq = database.getKNNQuery(dq, settings.k);
    WritableDataStore<COPACModel> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, COPACModel.class);
    Duration time = LOG.newDuration(this.getClass().getName() + ".preprocessing-time").begin();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DoubleDBIDList ref = knnq.getKNNForDBID(iditer, settings.k);
        storage.put(iditer, computeLocalModel(iditer, ref, relation));
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    LOG.statistics(time.end());
    return new Instance(relation.getDBIDs(), storage);
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 14 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class ERiCNeighborPredicate method instantiate.

/**
 * Full instantiation interface.
 *
 * @param database Database
 * @param relation Relation
 * @return Instance
 */
public Instance instantiate(Database database, Relation<V> relation) {
    DistanceQuery<V> dq = database.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
    KNNQuery<V> knnq = database.getKNNQuery(dq, settings.k);
    WritableDataStore<PCAFilteredResult> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, PCAFilteredResult.class);
    PCARunner pca = settings.pca;
    EigenPairFilter filter = settings.filter;
    Duration time = LOG.newDuration(this.getClass().getName() + ".preprocessing-time").begin();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DoubleDBIDList ref = knnq.getKNNForDBID(iditer, settings.k);
        PCAResult pcares = pca.processQueryResult(ref, relation);
        storage.put(iditer, new PCAFilteredResult(pcares.getEigenPairs(), filter.filter(pcares.getEigenvalues()), 1., 0.));
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    LOG.statistics(time.end());
    return new Instance(relation.getDBIDs(), storage, relation);
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) PCARunner(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) PCAResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)

Example 15 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class APRIORI method run.

/**
 * Performs the APRIORI algorithm on the given database.
 *
 * @param relation the Relation to process
 * @return the AprioriResult learned by this APRIORI
 */
public FrequentItemsetsResult run(Relation<BitVector> relation) {
    DBIDs ids = relation.getDBIDs();
    List<Itemset> solution = new ArrayList<>();
    final int size = ids.size();
    final int needed = getMinimumSupport(size);
    // TODO: we don't strictly require a vector field.
    // We could work with knowing just the maximum dimensionality beforehand.
    VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
    if (size > 0) {
        final int dim = meta.getDimensionality();
        Duration timeone = LOG.newDuration(STAT + "1-items.time").begin();
        List<OneItemset> oneitems = buildFrequentOneItemsets(relation, dim, needed);
        LOG.statistics(timeone.end());
        if (LOG.isStatistics()) {
            LOG.statistics(new LongStatistic(STAT + "1-items.frequent", oneitems.size()));
            LOG.statistics(new LongStatistic(STAT + "1-items.transactions", ids.size()));
        }
        if (LOG.isDebuggingFine()) {
            LOG.debugFine(debugDumpCandidates(new StringBuilder(), oneitems, meta));
        }
        if (minlength <= 1) {
            solution.addAll(oneitems);
        }
        if (oneitems.size() >= 2 && maxlength >= 2) {
            Duration timetwo = LOG.newDuration(STAT + "2-items.time").begin();
            ArrayModifiableDBIDs survivors = DBIDUtil.newArray(ids.size());
            List<? extends Itemset> candidates = buildFrequentTwoItemsets(oneitems, relation, dim, needed, ids, survivors);
            // Continue with reduced set of transactions.
            ids = survivors;
            LOG.statistics(timetwo.end());
            if (LOG.isStatistics()) {
                LOG.statistics(new LongStatistic(STAT + "2-items.frequent", candidates.size()));
                LOG.statistics(new LongStatistic(STAT + "2-items.transactions", ids.size()));
            }
            if (LOG.isDebuggingFine()) {
                LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
            }
            if (minlength <= 2) {
                solution.addAll(candidates);
            }
            for (int length = 3; length <= maxlength && candidates.size() >= length; length++) {
                Duration timel = LOG.newDuration(STAT + length + "-items.time").begin();
                // Join to get the new candidates
                candidates = aprioriGenerate(candidates, length, dim);
                if (LOG.isDebuggingFinest()) {
                    LOG.debugFinest(debugDumpCandidates(new StringBuilder().append("Before pruning: "), candidates, meta));
                }
                survivors = DBIDUtil.newArray(ids.size());
                candidates = frequentItemsets(candidates, relation, needed, ids, survivors, length);
                // Continue with reduced set of transactions.
                ids = survivors;
                LOG.statistics(timel.end());
                if (LOG.isStatistics()) {
                    LOG.statistics(new LongStatistic(STAT + length + "-items.frequent", candidates.size()));
                    LOG.statistics(new LongStatistic(STAT + length + "-items.transactions", ids.size()));
                }
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
                }
                solution.addAll(candidates);
            }
        }
    }
    return new FrequentItemsetsResult("APRIORI", "apriori", solution, meta, size);
}
Also used : BitVector(de.lmu.ifi.dbs.elki.data.BitVector) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) FrequentItemsetsResult(de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Aggregations

Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)20 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)13 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)5 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)4 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)4 BitVector (de.lmu.ifi.dbs.elki.data.BitVector)3 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)3 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)3 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)3 FrequentItemsetsResult (de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 ArrayList (java.util.ArrayList)3 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 Index (de.lmu.ifi.dbs.elki.index.Index)2 DoubleArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)2 IntegerArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)2 AbstractAlgorithm (de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm)1 Algorithm (de.lmu.ifi.dbs.elki.algorithm.Algorithm)1