Search in sources :

Example 6 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class NearestNeighborAffinityMatrixBuilder method computePij.

/**
 * Compute the sparse pij using the nearest neighbors only.
 *
 * @param ids ID range
 * @param knnq kNN query
 * @param square Use squared distances
 * @param numberOfNeighbours Number of neighbors to get
 * @param pij Output of distances
 * @param indices Output of indexes
 * @param initialScale Initial scaling factor
 */
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
    final double logPerp = FastMath.log(perplexity);
    // Scratch arrays, resizable
    DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
    IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
    // Compute nearest-neighbor sparse affinity matrix
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
        dists.clear();
        inds.clear();
        KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
        convertNeighbors(ids, ix, square, neighbours, dists, inds);
        double beta = computeSigma(// 
        ix.getOffset(), // 
        dists, // 
        perplexity, // 
        logPerp, pij[ix.getOffset()] = new double[dists.size()]);
        if (mv != null) {
            // Sigma
            mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
        }
        indices[ix.getOffset()] = inds.toArray();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // Sum of the sparse affinity matrix:
    double sum = 0.;
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int j = 0; j < pij_i.length; j++) {
            sum += pij_i[j];
        }
    }
    final double scale = initialScale / (2 * sum);
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                assert (indices[j][offj] == i);
                // Exploit symmetry:
                if (i < j) {
                    // Symmetrize
                    final double val = pij_i[offi] + pij[j][offj];
                    pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
                }
            } else {
                // Not found
                // TODO: the original code produces a symmetric matrix
                // And it will now not sum to EARLY_EXAGGERATION anymore.
                pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
            }
        }
    }
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
    }
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)

Example 7 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class ClassifierHoldoutEvaluationTask method run.

@Override
public void run() {
    Duration ptime = LOG.newDuration("evaluation.time.load").begin();
    MultipleObjectsBundle allData = databaseConnection.loadData();
    holdout.initialize(allData);
    LOG.statistics(ptime.end());
    Duration time = LOG.newDuration("evaluation.time.total").begin();
    ArrayList<ClassLabel> labels = holdout.getLabels();
    int[][] confusion = new int[labels.size()][labels.size()];
    for (int p = 0; p < holdout.numberOfPartitions(); p++) {
        TrainingAndTestSet partition = holdout.nextPartitioning();
        // Load the data set into a database structure (for indexing)
        Duration dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".init.time").begin();
        Database db = new StaticArrayDatabase(new MultipleObjectsBundleDatabaseConnection(partition.getTraining()), indexFactories);
        db.initialize();
        LOG.statistics(dur.end());
        // Train the classifier
        dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".train.time").begin();
        Relation<ClassLabel> lrel = db.getRelation(TypeUtil.CLASSLABEL);
        algorithm.buildClassifier(db, lrel);
        LOG.statistics(dur.end());
        // Evaluate the test set
        dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".evaluation.time").begin();
        // FIXME: this part is still a big hack, unfortunately!
        MultipleObjectsBundle test = partition.getTest();
        int lcol = AbstractHoldout.findClassLabelColumn(test);
        int tcol = (lcol == 0) ? 1 : 0;
        for (int i = 0, l = test.dataLength(); i < l; ++i) {
            @SuppressWarnings("unchecked") O obj = (O) test.data(i, tcol);
            ClassLabel truelbl = (ClassLabel) test.data(i, lcol);
            ClassLabel predlbl = algorithm.classify(obj);
            int pred = Collections.binarySearch(labels, predlbl);
            int real = Collections.binarySearch(labels, truelbl);
            confusion[pred][real]++;
        }
        LOG.statistics(dur.end());
    }
    LOG.statistics(time.end());
    ConfusionMatrix m = new ConfusionMatrix(labels, confusion);
    LOG.statistics(m.toString());
}
Also used : ConfusionMatrix(de.lmu.ifi.dbs.elki.evaluation.classification.ConfusionMatrix) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) TrainingAndTestSet(de.lmu.ifi.dbs.elki.evaluation.classification.holdout.TrainingAndTestSet) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) AbstractDatabase(de.lmu.ifi.dbs.elki.database.AbstractDatabase) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) Database(de.lmu.ifi.dbs.elki.database.Database) MultipleObjectsBundleDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.MultipleObjectsBundleDatabaseConnection) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 8 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class StaticArrayDatabase method initialize.

/**
 * Initialize the database by getting the initial data from the database
 * connection.
 */
@Override
public void initialize() {
    if (databaseConnection != null) {
        if (LOG.isDebugging()) {
            LOG.debugFine("Loading data from database connection.");
        }
        MultipleObjectsBundle bundle = databaseConnection.loadData();
        // Run at most once.
        databaseConnection = null;
        // Find DBIDs for bundle
        {
            DBIDs bids = bundle.getDBIDs();
            if (bids instanceof ArrayStaticDBIDs) {
                this.ids = (ArrayStaticDBIDs) bids;
            } else if (bids == null) {
                this.ids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
            } else {
                this.ids = (ArrayStaticDBIDs) DBIDUtil.makeUnmodifiable(bids);
            }
        }
        // Replace id representation (it would be nicer if we would not need
        // DBIDView at all)
        this.idrep = new DBIDView(this.ids);
        relations.add(this.idrep);
        getHierarchy().add(this, idrep);
        DBIDArrayIter it = this.ids.iter();
        int numrel = bundle.metaLength();
        for (int i = 0; i < numrel; i++) {
            SimpleTypeInformation<?> meta = bundle.meta(i);
            @SuppressWarnings("unchecked") SimpleTypeInformation<Object> ometa = (SimpleTypeInformation<Object>) meta;
            WritableDataStore<Object> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, ometa.getRestrictionClass());
            for (it.seek(0); it.valid(); it.advance()) {
                store.put(it, bundle.data(it.getOffset(), i));
            }
            Relation<?> relation = new MaterializedRelation<>(ometa, ids, null, store);
            relations.add(relation);
            getHierarchy().add(this, relation);
            // Try to add indexes where appropriate
            for (IndexFactory<?, ?> factory : indexFactories) {
                if (factory.getInputTypeRestriction().isAssignableFromType(ometa)) {
                    @SuppressWarnings("unchecked") final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
                    @SuppressWarnings("unchecked") final Relation<Object> orep = (Relation<Object>) relation;
                    final Index index = ofact.instantiate(orep);
                    Duration duration = LOG.isStatistics() ? LOG.newDuration(index.getClass().getName() + ".construction").begin() : null;
                    index.initialize();
                    if (duration != null) {
                        LOG.statistics(duration.end());
                    }
                    getHierarchy().add(relation, index);
                }
            }
        }
        // fire insertion event
        eventManager.fireObjectsInserted(ids);
    }
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Index(de.lmu.ifi.dbs.elki.index.Index) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) IndexFactory(de.lmu.ifi.dbs.elki.index.IndexFactory) DBIDView(de.lmu.ifi.dbs.elki.database.relation.DBIDView)

Example 9 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class ComputeKNNOutlierScores method runForEachK.

/**
 * Iterate over the k range.
 *
 * @param prefix Prefix string
 * @param startk Start k
 * @param stepk Step k
 * @param maxk Max k
 * @param runner Runner to run
 * @param out Output function
 */
private void runForEachK(String prefix, int startk, int stepk, int maxk, IntFunction<OutlierResult> runner, BiConsumer<String, OutlierResult> out) {
    if (isDisabled(prefix)) {
        LOG.verbose("Skipping (disabled): " + prefix);
        // Disabled
        return;
    }
    LOG.verbose("Running " + prefix);
    final int digits = (int) FastMath.ceil(FastMath.log10(maxk + 1));
    final String format = "%s-%0" + digits + "d";
    for (int k = startk; k <= maxk; k += stepk) {
        Duration time = LOG.newDuration(this.getClass().getCanonicalName() + "." + prefix + ".k" + k + ".runtime").begin();
        OutlierResult result = runner.apply(k);
        LOG.statistics(time.end());
        if (result != null) {
            out.accept(String.format(Locale.ROOT, format, prefix, k), result);
            result.getHierarchy().removeSubtree(result);
        }
    }
}
Also used : OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration)

Example 10 with Duration

use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.

the class GaussianAffinityMatrixBuilder method buildDistanceMatrix.

/**
 * Build a distance matrix of squared distances.
 *
 * @param ids DBIDs
 * @param dq Distance query
 * @return Distance matrix
 */
protected double[][] buildDistanceMatrix(ArrayDBIDs ids, DistanceQuery<?> dq) {
    final int size = ids.size();
    double[][] dmat = new double[size][size];
    final boolean square = !dq.getDistanceFunction().isSquared();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing distance matrix", (size * (size - 1)) >>> 1, LOG) : null;
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.distancematrix").begin() : null;
    DBIDArrayIter ix = ids.iter(), iy = ids.iter();
    for (ix.seek(0); ix.valid(); ix.advance()) {
        double[] dmat_x = dmat[ix.getOffset()];
        for (iy.seek(ix.getOffset() + 1); iy.valid(); iy.advance()) {
            final double dist = dq.distance(ix, iy);
            dmat[iy.getOffset()][ix.getOffset()] = dmat_x[iy.getOffset()] = square ? (dist * dist) : dist;
        }
        if (prog != null) {
            int row = ix.getOffset() + 1;
            prog.setProcessed(row * size - ((row * (row + 1)) >>> 1), LOG);
        }
    }
    LOG.ensureCompleted(prog);
    if (timer != null) {
        LOG.statistics(timer.end());
    }
    return dmat;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Aggregations

Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)20 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)13 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)5 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)4 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)4 BitVector (de.lmu.ifi.dbs.elki.data.BitVector)3 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)3 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)3 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)3 FrequentItemsetsResult (de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 ArrayList (java.util.ArrayList)3 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 Index (de.lmu.ifi.dbs.elki.index.Index)2 DoubleArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)2 IntegerArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)2 AbstractAlgorithm (de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm)1 Algorithm (de.lmu.ifi.dbs.elki.algorithm.Algorithm)1