use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class NormalLevenbergMarquardtKDEEstimator method estimate.
@Override
public <A> NormalDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) {
// We first need the basic parameters:
final int len = adapter.size(data);
MeanVariance mv = new MeanVariance();
// X positions of samples
double[] x = new double[len];
for (int i = 0; i < len; i++) {
x[i] = adapter.getDouble(data, i);
mv.put(x[i]);
}
// Sort our copy.
Arrays.sort(x);
double median = (x[len >> 1] + x[(len + 1) >> 1]) * .5;
// Height = density, via KDE.
KernelDensityEstimator de = new KernelDensityEstimator(x, GaussianKernelDensityFunction.KERNEL, 1e-6);
double[] y = de.getDensity();
// Weights:
double[] s = new double[len];
Arrays.fill(s, 1.0);
// Initial parameter estimate:
double[] params = { median, mv.getSampleStddev(), 1 };
boolean[] dofit = { true, true, false };
LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(GaussianFittingFunction.STATIC, params, dofit, x, y, s);
fit.run();
double[] ps = fit.getParams();
return new NormalDistribution(ps[0], ps[1]);
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class MetricalIndexApproximationMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
final Logging log = getLogger();
DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
MetricalIndexTree<O, N, E> index = getMetricalIndex(relation);
createStorage();
MeanVariance pagesize = new MeanVariance();
MeanVariance ksize = new MeanVariance();
if (log.isVerbose()) {
log.verbose("Approximating nearest neighbor lists to database objects");
}
List<E> leaves = index.getLeaves();
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Processing leaf nodes", leaves.size(), getLogger()) : null;
for (E leaf : leaves) {
N node = index.getNode(leaf);
int size = node.getNumEntries();
pagesize.put(size);
if (log.isDebuggingFinest()) {
log.debugFinest("NumEntires = " + size);
}
// Collect the ids in this node.
ArrayModifiableDBIDs ids = DBIDUtil.newArray(size);
for (int i = 0; i < size; i++) {
ids.add(((LeafEntry) node.getEntry(i)).getDBID());
}
Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 2);
cache.defaultReturnValue(Double.NaN);
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(k);
for (DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) {
DBIDPair key = DBIDUtil.newPair(id, id2);
double d = cache.removeDouble(key);
if (d == d) {
// Not NaN
// consume the previous result.
kNN.insert(d, id2);
} else {
// compute new and store the previous result.
d = distanceQuery.distance(id, id2);
kNN.insert(d, id2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(id2, id);
cache.put(key, d);
}
}
ksize.put(kNN.size());
storage.put(id, kNN.toKNNList());
}
if (log.isDebugging() && cache.size() > 0) {
log.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
}
log.incrementProcessed(progress);
}
log.ensureCompleted(progress);
if (log.isVerbose()) {
log.verbose("Average page size = " + pagesize.getMean() + " +- " + pagesize.getSampleStddev());
log.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
}
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with separate radius relation
*
* @param database Database
* @param relation Relation
* @param radrel Radius relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation, Relation<NumberVector> radrel) {
if (queries != null) {
throw new AbortException("This 'run' method will not use the given query set!");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
double r = radrel.get(iditer).doubleValue(0);
DoubleDBIDList rres = rangeQuery.getRangeForDBID(iditer, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with a separate query set.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
if (queries == null) {
throw new AbortException("A query set is required for this 'run' method.");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
int dim = RelationUtil.dimensionality(relation);
// Separate query set.
TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
StringBuilder buf = new StringBuilder();
buf.append("No compatible data type in query input was found. Expected: ");
buf.append(res.toString());
buf.append(" have: ");
for (int i = 0; i < bundle.metaLength(); i++) {
if (i > 0) {
buf.append(' ');
}
buf.append(bundle.meta(i).toString());
}
throw new IncompatibleDataException(buf.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
double[] buf = new double[dim];
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
NumberVector o = (NumberVector) bundle.data(off, col);
for (int i = 0; i < dim; i++) {
buf[i] = o.doubleValue(i);
}
O v = ofactory.newNumberVector(buf);
double r = o.doubleValue(dim);
DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class ValidateApproximativeKNNIndex method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
// Approximate query:
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
}
// Exact query:
KNNQuery<O> truekNNQuery;
if (forcelinear) {
truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
} else {
truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
}
if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
LOG.warning("Query classes are the same. This experiment may be invalid!");
}
// No query set - use original database.
if (queries == null || pattern != null) {
// Relation to filter on
Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
// Query index:
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
// Query index:
KNNList knns = knnQuery.getKNNForObject(o, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
}
return null;
}
Aggregations