Search in sources :

Example 1 with DoubleArray

use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.

the class AbstractDistributionEstimatorTest method load.

protected void load(String name) {
    data = new HashMap<>();
    try (// 
    InputStream in = new GZIPInputStream(AbstractDistributionTest.class.getResourceAsStream(name));
        TokenizedReader reader = new TokenizedReader(Pattern.compile(" "), "\"", Pattern.compile("^\\s*#.*"))) {
        Tokenizer t = reader.getTokenizer();
        DoubleArray buf = new DoubleArray();
        reader.reset(in);
        while (reader.nextLineExceptComments()) {
            assertTrue(t.valid());
            String key = t.getStrippedSubstring();
            buf.clear();
            for (t.advance(); t.valid(); t.advance()) {
                buf.add(t.getDouble());
            }
            data.put(key, buf.toArray());
        }
    } catch (IOException e) {
        fail("Cannot load data.");
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) AbstractDistributionTest(de.lmu.ifi.dbs.elki.math.statistics.distribution.AbstractDistributionTest) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) TokenizedReader(de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader) IOException(java.io.IOException) Tokenizer(de.lmu.ifi.dbs.elki.utilities.io.Tokenizer) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)

Example 2 with DoubleArray

use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.

the class XYCurve method areaUnderCurve.

/**
 * Compute the area under curve for a curve
 * <em>monotonously increasing in X</em>. You might need to relate this to the
 * total area of the chart.
 *
 * @param curve Curve
 * @return Area under curve.
 */
public static double areaUnderCurve(XYCurve curve) {
    DoubleArray data = curve.data;
    double prevx = data.get(0), prevy = data.get(1);
    if (prevx > curve.minx) {
        throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid.");
    }
    double area = 0.0;
    for (int pos = 2; pos < data.size(); pos += 2) {
        final double curx = data.get(pos), cury = data.get(pos + 1);
        if (prevx > curx) {
            throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid.");
        }
        // .5 = mean Y
        area += (curx - prevx) * (prevy + cury) * .5;
        prevx = curx;
        prevy = cury;
    }
    if (prevx < curve.maxx) {
        throw new UnsupportedOperationException("Curves must be complete on X for areaUnderCurve to be valid.");
    }
    return area;
}
Also used : DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)

Example 3 with DoubleArray

use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.

the class NearestNeighborAffinityMatrixBuilder method computePij.

/**
 * Compute the sparse pij using the nearest neighbors only.
 *
 * @param ids ID range
 * @param knnq kNN query
 * @param square Use squared distances
 * @param numberOfNeighbours Number of neighbors to get
 * @param pij Output of distances
 * @param indices Output of indexes
 * @param initialScale Initial scaling factor
 */
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
    final double logPerp = FastMath.log(perplexity);
    // Scratch arrays, resizable
    DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
    IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
    // Compute nearest-neighbor sparse affinity matrix
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
        dists.clear();
        inds.clear();
        KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
        convertNeighbors(ids, ix, square, neighbours, dists, inds);
        double beta = computeSigma(// 
        ix.getOffset(), // 
        dists, // 
        perplexity, // 
        logPerp, pij[ix.getOffset()] = new double[dists.size()]);
        if (mv != null) {
            // Sigma
            mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
        }
        indices[ix.getOffset()] = inds.toArray();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // Sum of the sparse affinity matrix:
    double sum = 0.;
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int j = 0; j < pij_i.length; j++) {
            sum += pij_i[j];
        }
    }
    final double scale = initialScale / (2 * sum);
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                assert (indices[j][offj] == i);
                // Exploit symmetry:
                if (i < j) {
                    // Symmetrize
                    final double val = pij_i[offi] + pij[j][offj];
                    pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
                }
            } else {
                // Not found
                // TODO: the original code produces a symmetric matrix
                // And it will now not sum to EARLY_EXAGGERATION anymore.
                pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
            }
        }
    }
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
    }
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)

Example 4 with DoubleArray

use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.

the class AbstractIntrinsicDimensionalityEstimator method estimate.

@Override
public double estimate(RangeQuery<?> rnq, DBIDRef cur, double range) {
    DoubleArray buf = new DoubleArray();
    int p = 0;
    for (DoubleDBIDListIter it = rnq.getRangeForDBID(cur, range).iter(); it.valid(); it.advance()) {
        if (it.doubleValue() == 0. || DBIDUtil.equal(cur, it)) {
            continue;
        }
        buf.add(it.doubleValue());
        p++;
    }
    if (p < 1) {
        throw new ArithmeticException("ID estimation requires non-zero distances.");
    }
    return estimate(buf, buf, p);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)

Example 5 with DoubleArray

use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.

the class KernelDensityFittingTest method testFitDoubleArray.

/**
 * The test will load the given data set and perform a Levenberq-Marquadt
 * fitting on a kernelized density estimation. The test evaluates the fitting
 * quality to ensure that the results remain stable and significantly better
 * than traditional estimation.
 */
@Test
public final void testFitDoubleArray() throws IOException {
    DoubleArray data = new DoubleArray();
    try (InputStream in = new GZIPInputStream(getClass().getResourceAsStream(dataset));
        TokenizedReader reader = new TokenizedReader(Pattern.compile(" "), "\"", Pattern.compile("^\\s*#.*"))) {
        Tokenizer t = reader.getTokenizer();
        reader.reset(in);
        while (reader.nextLineExceptComments() && t.valid()) {
            // Read first column only
            data.add(t.getDouble());
        }
    }
    // verify data set size.
    assertEquals("Data set size doesn't match parameters.", realsize, data.size());
    double splitval = 0.5;
    double[] fulldata = data.toArray();
    Arrays.sort(fulldata);
    // Check that the initial parameters match what we were expecting from the
    // data.
    double[] fullparams = estimateInitialParameters(fulldata);
    assertEquals("Full Mean before fitting", 0.4446105, fullparams[0], 0.0001);
    assertEquals("Full Stddev before fitting", 1.4012001, fullparams[1], 0.0001);
    // Do a fit using only part of the data and check the results are right.
    double[] fullfit = run(fulldata, fullparams);
    assertEquals("Full Mean after fitting", 0.64505, fullfit[0], 0.01);
    assertEquals("Full Stddev after fitting", 1.5227889, fullfit[1], 0.01);
    int splitpoint = 0;
    while (fulldata[splitpoint] < splitval && splitpoint < fulldata.length) {
        splitpoint++;
    }
    double[] halfdata = Arrays.copyOf(fulldata, splitpoint);
    // Check that the initial parameters match what we were expecting from the
    // data.
    double[] params = estimateInitialParameters(halfdata);
    assertEquals("Mean before fitting", -0.65723044, params[0], 0.0001);
    assertEquals("Stddev before fitting", 1.0112391, params[1], 0.0001);
    // Do a fit using only part of the data and check the results are right.
    double[] ps = run(halfdata, params);
    assertEquals("Mean after fitting", 0.45980, ps[0], 0.01);
    assertEquals("Stddev after fitting", 1.320427, ps[1], 0.01);
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) TokenizedReader(de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray) Tokenizer(de.lmu.ifi.dbs.elki.utilities.io.Tokenizer) Test(org.junit.Test)

Aggregations

DoubleArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)7 TokenizedReader (de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader)3 Tokenizer (de.lmu.ifi.dbs.elki.utilities.io.Tokenizer)3 InputStream (java.io.InputStream)3 GZIPInputStream (java.util.zip.GZIPInputStream)3 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)2 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)2 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)2 IntegerArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)2 IOException (java.io.IOException)2 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)1 Mean (de.lmu.ifi.dbs.elki.math.Mean)1 AbstractDistributionTest (de.lmu.ifi.dbs.elki.math.statistics.distribution.AbstractDistributionTest)1 Test (org.junit.Test)1