Search in sources :

Example 6 with DoubleArray

use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.

the class AbstractDistributionTest method load.

protected void load(String name) {
    data = new HashMap<>();
    try (// 
    InputStream in = new GZIPInputStream(getClass().getResourceAsStream(name));
        TokenizedReader reader = new TokenizedReader(Pattern.compile(" "), "\"", Pattern.compile("^\\s*#.*"))) {
        Tokenizer t = reader.getTokenizer();
        DoubleArray buf = new DoubleArray();
        reader.reset(in);
        while (reader.nextLineExceptComments()) {
            assertTrue(t.valid());
            String key = t.getStrippedSubstring();
            buf.clear();
            for (t.advance(); t.valid(); t.advance()) {
                buf.add(t.getDouble());
            }
            data.put(key, buf.toArray());
        }
    } catch (IOException e) {
        fail("Cannot load data.");
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) TokenizedReader(de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader) IOException(java.io.IOException) Tokenizer(de.lmu.ifi.dbs.elki.utilities.io.Tokenizer) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)

Example 7 with DoubleArray

use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.

the class IntrinsicNearestNeighborAffinityMatrixBuilder method computePij.

/**
 * Compute the sparse pij using the nearest neighbors only.
 *
 * @param ids ID range
 * @param knnq kNN query
 * @param square Use squared distances
 * @param numberOfNeighbours Number of neighbors to get
 * @param pij Output of distances
 * @param indices Output of indexes
 * @param initialScale Initial scaling factor
 */
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
    final double logPerp = FastMath.log(perplexity);
    // Scratch arrays, resizable
    DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
    IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
    // Compute nearest-neighbor sparse affinity matrix
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    Mean mid = LOG.isStatistics() ? new Mean() : null;
    for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
        dists.clear();
        inds.clear();
        KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
        convertNeighbors(ids, ix, square, neighbours, dists, inds, mid);
        double beta = computeSigma(// 
        ix.getOffset(), // 
        dists, // 
        perplexity, // 
        logPerp, pij[ix.getOffset()] = new double[dists.size()]);
        if (mv != null) {
            // Sigma
            mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
        }
        indices[ix.getOffset()] = inds.toArray();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (mid != null) {
        LOG.statistics(new DoubleStatistic(getClass() + ".average-original-id", mid.getMean()));
    }
    // Sum of the sparse affinity matrix:
    double sum = 0.;
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            if (j > i) {
                // Exploit symmetry.
                continue;
            }
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                sum += FastMath.sqrt(pij_i[offi] * pij[j][offj]);
            }
        }
    }
    final double scale = initialScale / (2 * sum);
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                assert (indices[j][offj] == i);
                // Exploit symmetry:
                if (i < j) {
                    // Symmetrize
                    final double val = FastMath.sqrt(pij_i[offi] * pij[j][offj]);
                    pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
                }
            } else {
                // Not found, so zero.
                pij_i[offi] = 0;
            }
        }
    }
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
    }
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)

Aggregations

DoubleArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)7 TokenizedReader (de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader)3 Tokenizer (de.lmu.ifi.dbs.elki.utilities.io.Tokenizer)3 InputStream (java.io.InputStream)3 GZIPInputStream (java.util.zip.GZIPInputStream)3 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)2 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)2 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)2 IntegerArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)2 IOException (java.io.IOException)2 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)1 Mean (de.lmu.ifi.dbs.elki.math.Mean)1 AbstractDistributionTest (de.lmu.ifi.dbs.elki.math.statistics.distribution.AbstractDistributionTest)1 Test (org.junit.Test)1