use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.
the class ValidateApproximativeKNNIndex method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
// Approximate query:
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
}
// Exact query:
KNNQuery<O> truekNNQuery;
if (forcelinear) {
truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
} else {
truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
}
if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
LOG.warning("Query classes are the same. This experiment may be invalid!");
}
// No query set - use original database.
if (queries == null || pattern != null) {
// Relation to filter on
Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
// Query index:
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
// Query index:
KNNList knns = knnQuery.getKNNForObject(o, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
}
return null;
}
Aggregations