Search in sources :

Example 46 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class KNNBenchmarkAlgorithm method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
    // No query set - use original database.
    if (queries == null) {
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            KNNList knns = knnQuery.getKNNForDBID(iditer, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            KNNList knns = knnQuery.getKNNForObject(o, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange)

Example 47 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class CASH method run.

/**
 * Run CASH on the relation.
 *
 * @param database Database
 * @param vrel Relation
 * @return Clustering result
 */
public Clustering<Model> run(Database database, Relation<V> vrel) {
    fulldatabase = preprocess(database, vrel);
    processedIDs = DBIDUtil.newHashSet(fulldatabase.size());
    noiseDim = dimensionality(fulldatabase);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("CASH Clustering", fulldatabase.size(), LOG) : null;
    Clustering<Model> result = doRun(fulldatabase, progress);
    LOG.ensureCompleted(progress);
    if (LOG.isVerbose()) {
        StringBuilder msg = new StringBuilder(1000);
        for (Cluster<Model> c : result.getAllClusters()) {
            if (c.getModel() instanceof LinearEquationModel) {
                LinearEquationModel s = (LinearEquationModel) c.getModel();
                msg.append("\n Cluster: Dim: " + s.getLes().subspacedim() + " size: " + c.size());
            } else {
                msg.append("\n Cluster: " + c.getModel().getClass().getName() + " size: " + c.size());
            }
        }
        LOG.verbose(msg.toString());
    }
    return result;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) LinearEquationModel(de.lmu.ifi.dbs.elki.data.model.LinearEquationModel) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) LinearEquationModel(de.lmu.ifi.dbs.elki.data.model.LinearEquationModel)

Example 48 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class BarnesHutTSNE method optimizetSNE.

/**
 * Perform the actual tSNE optimization.
 *
 * @param pij Sparse initial affinity matrix
 * @param sol Solution output array (preinitialized)
 */
protected void optimizetSNE(AffinityMatrix pij, double[][] sol) {
    final int size = pij.size();
    if (size * 3L * dim > 0x7FFF_FFFAL) {
        throw new AbortException("Memory exceeds Java array size limit.");
    }
    // Meta information on each point; joined for memory locality.
    // Gradient, Momentum, and learning rate
    // For performance, we use a flat memory layout!
    double[] meta = new double[size * 3 * dim];
    final int dim3 = dim * 3;
    for (int off = 2 * dim; off < meta.length; off += dim3) {
        // Initial learning rate
        Arrays.fill(meta, off, off + dim, 1.);
    }
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Iterative Optimization", iterations, LOG) : null;
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.optimization").begin() : null;
    // Optimize
    for (int i = 0; i < iterations; i++) {
        computeGradient(pij, sol, meta);
        updateSolution(sol, meta, i);
        // Undo early exaggeration
        if (i == EARLY_EXAGGERATION_ITERATIONS) {
            pij.scale(1. / EARLY_EXAGGERATION);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (timer != null) {
        LOG.statistics(timer.end());
    }
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 49 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class GaussianAffinityMatrixBuilder method computePij.

/**
 * Compute the pij from the distance matrix.
 *
 * @param dist Distance matrix.
 * @param sigma Kernel bandwidth sigma
 * @param initialScale Initial scale
 * @return Affinity matrix pij
 */
protected static double[][] computePij(double[][] dist, double sigma, double initialScale) {
    final int size = dist.length;
    final double msigmasq = -.5 / (sigma * sigma);
    double[][] pij = new double[size][size];
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing affinities", size, LOG) : null;
    Duration timer = LOG.isStatistics() ? LOG.newDuration(GaussianAffinityMatrixBuilder.class.getName() + ".runtime.pijmatrix").begin() : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    for (int i = 0; i < size; i++) {
        double logP = computeH(i, dist[i], pij[i], msigmasq);
        if (mv != null) {
            mv.put(FastMath.exp(logP));
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(GaussianAffinityMatrixBuilder.class.getName() + ".perplexity.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(GaussianAffinityMatrixBuilder.class.getName() + ".perplexity.stddev", mv.getSampleStddev()));
    }
    // Scale pij to have the desired sum EARLY_EXAGGERATION
    double sum = 0.;
    for (int i = 1; i < size; i++) {
        final double[] pij_i = pij[i];
        for (int j = 0; j < i; j++) {
            // Nur über halbe Matrix!
            // Symmetrie herstellen
            sum += (pij_i[j] += pij[j][i]);
        }
    }
    // Scaling taken from original tSNE code:
    final double scale = initialScale / (2. * sum);
    for (int i = 1; i < size; i++) {
        final double[] pij_i = pij[i];
        for (int j = 0; j < i; j++) {
            pij_i[j] = pij[j][i] = MathUtil.max(pij_i[j] * scale, MIN_PIJ);
        }
    }
    return pij;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration)

Example 50 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class NearestNeighborAffinityMatrixBuilder method computePij.

/**
 * Compute the sparse pij using the nearest neighbors only.
 *
 * @param ids ID range
 * @param knnq kNN query
 * @param square Use squared distances
 * @param numberOfNeighbours Number of neighbors to get
 * @param pij Output of distances
 * @param indices Output of indexes
 * @param initialScale Initial scaling factor
 */
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
    final double logPerp = FastMath.log(perplexity);
    // Scratch arrays, resizable
    DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
    IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
    // Compute nearest-neighbor sparse affinity matrix
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
        dists.clear();
        inds.clear();
        KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
        convertNeighbors(ids, ix, square, neighbours, dists, inds);
        double beta = computeSigma(// 
        ix.getOffset(), // 
        dists, // 
        perplexity, // 
        logPerp, pij[ix.getOffset()] = new double[dists.size()]);
        if (mv != null) {
            // Sigma
            mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
        }
        indices[ix.getOffset()] = inds.toArray();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // Sum of the sparse affinity matrix:
    double sum = 0.;
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int j = 0; j < pij_i.length; j++) {
            sum += pij_i[j];
        }
    }
    final double scale = initialScale / (2 * sum);
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                assert (indices[j][offj] == i);
                // Exploit symmetry:
                if (i < j) {
                    // Symmetrize
                    final double val = pij_i[offi] + pij[j][offj];
                    pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
                }
            } else {
                // Not found
                // TODO: the original code produces a symmetric matrix
                // And it will now not sum to EARLY_EXAGGERATION anymore.
                pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
            }
        }
    }
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
    }
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12