use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.
the class AbstractDistributionEstimatorTest method load.
protected void load(String name) {
data = new HashMap<>();
try (//
InputStream in = new GZIPInputStream(AbstractDistributionTest.class.getResourceAsStream(name));
TokenizedReader reader = new TokenizedReader(Pattern.compile(" "), "\"", Pattern.compile("^\\s*#.*"))) {
Tokenizer t = reader.getTokenizer();
DoubleArray buf = new DoubleArray();
reader.reset(in);
while (reader.nextLineExceptComments()) {
assertTrue(t.valid());
String key = t.getStrippedSubstring();
buf.clear();
for (t.advance(); t.valid(); t.advance()) {
buf.add(t.getDouble());
}
data.put(key, buf.toArray());
}
} catch (IOException e) {
fail("Cannot load data.");
}
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.
the class XYCurve method areaUnderCurve.
/**
* Compute the area under curve for a curve
* <em>monotonously increasing in X</em>. You might need to relate this to the
* total area of the chart.
*
* @param curve Curve
* @return Area under curve.
*/
public static double areaUnderCurve(XYCurve curve) {
DoubleArray data = curve.data;
double prevx = data.get(0), prevy = data.get(1);
if (prevx > curve.minx) {
throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid.");
}
double area = 0.0;
for (int pos = 2; pos < data.size(); pos += 2) {
final double curx = data.get(pos), cury = data.get(pos + 1);
if (prevx > curx) {
throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid.");
}
// .5 = mean Y
area += (curx - prevx) * (prevy + cury) * .5;
prevx = curx;
prevy = cury;
}
if (prevx < curve.maxx) {
throw new UnsupportedOperationException("Curves must be complete on X for areaUnderCurve to be valid.");
}
return area;
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.
the class NearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < pij_i.length; j++) {
sum += pij_i[j];
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = pij_i[offi] + pij[j][offj];
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found
// TODO: the original code produces a symmetric matrix
// And it will now not sum to EARLY_EXAGGERATION anymore.
pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.
the class AbstractIntrinsicDimensionalityEstimator method estimate.
@Override
public double estimate(RangeQuery<?> rnq, DBIDRef cur, double range) {
DoubleArray buf = new DoubleArray();
int p = 0;
for (DoubleDBIDListIter it = rnq.getRangeForDBID(cur, range).iter(); it.valid(); it.advance()) {
if (it.doubleValue() == 0. || DBIDUtil.equal(cur, it)) {
continue;
}
buf.add(it.doubleValue());
p++;
}
if (p < 1) {
throw new ArithmeticException("ID estimation requires non-zero distances.");
}
return estimate(buf, buf, p);
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray in project elki by elki-project.
the class KernelDensityFittingTest method testFitDoubleArray.
/**
* The test will load the given data set and perform a Levenberq-Marquadt
* fitting on a kernelized density estimation. The test evaluates the fitting
* quality to ensure that the results remain stable and significantly better
* than traditional estimation.
*/
@Test
public final void testFitDoubleArray() throws IOException {
DoubleArray data = new DoubleArray();
try (InputStream in = new GZIPInputStream(getClass().getResourceAsStream(dataset));
TokenizedReader reader = new TokenizedReader(Pattern.compile(" "), "\"", Pattern.compile("^\\s*#.*"))) {
Tokenizer t = reader.getTokenizer();
reader.reset(in);
while (reader.nextLineExceptComments() && t.valid()) {
// Read first column only
data.add(t.getDouble());
}
}
// verify data set size.
assertEquals("Data set size doesn't match parameters.", realsize, data.size());
double splitval = 0.5;
double[] fulldata = data.toArray();
Arrays.sort(fulldata);
// Check that the initial parameters match what we were expecting from the
// data.
double[] fullparams = estimateInitialParameters(fulldata);
assertEquals("Full Mean before fitting", 0.4446105, fullparams[0], 0.0001);
assertEquals("Full Stddev before fitting", 1.4012001, fullparams[1], 0.0001);
// Do a fit using only part of the data and check the results are right.
double[] fullfit = run(fulldata, fullparams);
assertEquals("Full Mean after fitting", 0.64505, fullfit[0], 0.01);
assertEquals("Full Stddev after fitting", 1.5227889, fullfit[1], 0.01);
int splitpoint = 0;
while (fulldata[splitpoint] < splitval && splitpoint < fulldata.length) {
splitpoint++;
}
double[] halfdata = Arrays.copyOf(fulldata, splitpoint);
// Check that the initial parameters match what we were expecting from the
// data.
double[] params = estimateInitialParameters(halfdata);
assertEquals("Mean before fitting", -0.65723044, params[0], 0.0001);
assertEquals("Stddev before fitting", 1.0112391, params[1], 0.0001);
// Do a fit using only part of the data and check the results are right.
double[] ps = run(halfdata, params);
assertEquals("Mean after fitting", 0.45980, ps[0], 0.01);
assertEquals("Stddev after fitting", 1.320427, ps[1], 0.01);
}
Aggregations