use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class MetricalIndexApproximationMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
final Logging log = getLogger();
DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
MetricalIndexTree<O, N, E> index = getMetricalIndex(relation);
createStorage();
MeanVariance pagesize = new MeanVariance();
MeanVariance ksize = new MeanVariance();
if (log.isVerbose()) {
log.verbose("Approximating nearest neighbor lists to database objects");
}
List<E> leaves = index.getLeaves();
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Processing leaf nodes", leaves.size(), getLogger()) : null;
for (E leaf : leaves) {
N node = index.getNode(leaf);
int size = node.getNumEntries();
pagesize.put(size);
if (log.isDebuggingFinest()) {
log.debugFinest("NumEntires = " + size);
}
// Collect the ids in this node.
ArrayModifiableDBIDs ids = DBIDUtil.newArray(size);
for (int i = 0; i < size; i++) {
ids.add(((LeafEntry) node.getEntry(i)).getDBID());
}
Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 2);
cache.defaultReturnValue(Double.NaN);
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(k);
for (DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) {
DBIDPair key = DBIDUtil.newPair(id, id2);
double d = cache.removeDouble(key);
if (d == d) {
// Not NaN
// consume the previous result.
kNN.insert(d, id2);
} else {
// compute new and store the previous result.
d = distanceQuery.distance(id, id2);
kNN.insert(d, id2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(id2, id);
cache.put(key, d);
}
}
ksize.put(kNN.size());
storage.put(id, kNN.toKNNList());
}
if (log.isDebugging() && cache.size() > 0) {
log.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
}
log.incrementProcessed(progress);
}
log.ensureCompleted(progress);
if (log.isVerbose()) {
log.verbose("Average page size = " + pagesize.getMean() + " +- " + pagesize.getSampleStddev());
log.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
}
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with separate radius relation
*
* @param database Database
* @param relation Relation
* @param radrel Radius relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation, Relation<NumberVector> radrel) {
if (queries != null) {
throw new AbortException("This 'run' method will not use the given query set!");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
double r = radrel.get(iditer).doubleValue(0);
DoubleDBIDList rres = rangeQuery.getRangeForDBID(iditer, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with a separate query set.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
if (queries == null) {
throw new AbortException("A query set is required for this 'run' method.");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
int dim = RelationUtil.dimensionality(relation);
// Separate query set.
TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
StringBuilder buf = new StringBuilder();
buf.append("No compatible data type in query input was found. Expected: ");
buf.append(res.toString());
buf.append(" have: ");
for (int i = 0; i < bundle.metaLength(); i++) {
if (i > 0) {
buf.append(' ');
}
buf.append(bundle.meta(i).toString());
}
throw new IncompatibleDataException(buf.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
double[] buf = new double[dim];
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
NumberVector o = (NumberVector) bundle.data(off, col);
for (int i = 0; i < dim; i++) {
buf[i] = o.doubleValue(i);
}
O v = ofactory.newNumberVector(buf);
double r = o.doubleValue(dim);
DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class ValidateApproximativeKNNIndex method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
// Approximate query:
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
}
// Exact query:
KNNQuery<O> truekNNQuery;
if (forcelinear) {
truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
} else {
truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
}
if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
LOG.warning("Query classes are the same. This experiment may be invalid!");
}
// No query set - use original database.
if (queries == null || pattern != null) {
// Relation to filter on
Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
// Query index:
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
// Query index:
KNNList knns = knnQuery.getKNNForObject(o, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
}
return null;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class DiSH method extractClusters.
/**
* Extracts the clusters from the cluster order.
*
* @param relation the database storing the objects
* @param clusterOrder the cluster order to extract the clusters from
* @return the extracted clusters
*/
private Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> extractClusters(Relation<V> relation, DiSHClusterOrder clusterOrder) {
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", relation.size(), LOG) : null;
Object2ObjectOpenCustomHashMap<long[], List<ArrayModifiableDBIDs>> clustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
// Note clusterOrder currently contains DBID objects anyway.
WritableDataStore<Pair<long[], ArrayModifiableDBIDs>> entryToClusterMap = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Pair.class);
for (DBIDIter iter = clusterOrder.iter(); iter.valid(); iter.advance()) {
V object = relation.get(iter);
long[] preferenceVector = clusterOrder.getCommonPreferenceVector(iter);
// get the list of (parallel) clusters for the preference vector
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(preferenceVector);
if (parallelClusters == null) {
parallelClusters = new ArrayList<>();
clustersMap.put(preferenceVector, parallelClusters);
}
// look for the proper cluster
ArrayModifiableDBIDs cluster = null;
for (ArrayModifiableDBIDs c : parallelClusters) {
NumberVector c_centroid = ProjectedCentroid.make(preferenceVector, relation, c);
long[] commonPreferenceVector = BitsUtil.andCMin(preferenceVector, preferenceVector);
int subspaceDim = subspaceDimensionality(object, c_centroid, preferenceVector, preferenceVector, commonPreferenceVector);
if (subspaceDim == clusterOrder.getCorrelationValue(iter)) {
double d = weightedDistance(object, c_centroid, commonPreferenceVector);
if (d <= 2 * epsilon) {
cluster = c;
break;
}
}
}
if (cluster == null) {
cluster = DBIDUtil.newArray();
parallelClusters.add(cluster);
}
cluster.add(iter);
entryToClusterMap.put(iter, new Pair<>(preferenceVector, cluster));
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isDebuggingFiner()) {
int dim = RelationUtil.dimensionality(relation);
StringBuilder msg = new StringBuilder("Step 0");
for (Map.Entry<long[], List<ArrayModifiableDBIDs>> clusterList : clustersMap.entrySet()) {
for (ArrayModifiableDBIDs c : clusterList.getValue()) {
msg.append('\n').append(BitsUtil.toStringLow(clusterList.getKey(), dim)).append(" ids ").append(c.size());
}
}
LOG.debugFiner(msg.toString());
}
// add the predecessor to the cluster
DBIDVar cur = DBIDUtil.newVar(), pre = DBIDUtil.newVar();
for (long[] pv : clustersMap.keySet()) {
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
for (ArrayModifiableDBIDs cluster : parallelClusters) {
if (cluster.isEmpty()) {
continue;
}
cluster.assignVar(0, cur);
clusterOrder.getPredecessor(cur, pre);
if (!pre.isSet() || DBIDUtil.equal(pre, cur)) {
continue;
}
// parallel cluster
if (BitsUtil.equal(clusterOrder.getCommonPreferenceVector(pre), clusterOrder.getCommonPreferenceVector(cur))) {
continue;
}
if (//
clusterOrder.getCorrelationValue(pre) < clusterOrder.getCorrelationValue(cur) || clusterOrder.getReachability(pre) < clusterOrder.getReachability(cur)) {
continue;
}
Pair<long[], ArrayModifiableDBIDs> oldCluster = entryToClusterMap.get(pre);
oldCluster.second.remove(pre);
cluster.add(pre);
entryToClusterMap.put(pre, new Pair<>(pv, cluster));
}
}
return clustersMap;
}
Aggregations