Search in sources :

Example 1 with LinearScanQuery

use of de.lmu.ifi.dbs.elki.database.query.LinearScanQuery in project elki by elki-project.

the class ValidateApproximativeKNNIndex method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    // Approximate query:
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
    if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
        throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
    }
    // Exact query:
    KNNQuery<O> truekNNQuery;
    if (forcelinear) {
        truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
    } else {
        truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
    }
    if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
        LOG.warning("Query classes are the same. This experiment may be invalid!");
    }
    // No query set - use original database.
    if (queries == null || pattern != null) {
        // Relation to filter on
        Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
                // Query index:
                KNNList knns = knnQuery.getKNNForDBID(iditer, k);
                // Query reference:
                KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
                // Put adjusted knn size:
                mv.put(knns.size() * k / (double) trueknns.size());
                // Put recall:
                mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
                if (knns.size() >= k) {
                    double kdist = knns.getKNNDistance();
                    final double tdist = trueknns.getKNNDistance();
                    if (tdist > 0.0) {
                        mvdist.put(kdist);
                        mvdaerr.put(kdist - tdist);
                        mvdrerr.put(kdist / tdist);
                    }
                } else {
                    // Less than k objects.
                    misses++;
                }
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            // Query index:
            KNNList knns = knnQuery.getKNNForObject(o, k);
            // Query reference:
            KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
            // Put adjusted knn size:
            mv.put(knns.size() * k / (double) trueknns.size());
            // Put recall:
            mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
            if (knns.size() >= k) {
                double kdist = knns.getKNNDistance();
                final double tdist = trueknns.getKNNDistance();
                if (tdist > 0.0) {
                    mvdist.put(kdist);
                    mvdaerr.put(kdist - tdist);
                    mvdrerr.put(kdist / tdist);
                }
            } else {
                // Less than k objects.
                misses++;
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) LinearScanQuery(de.lmu.ifi.dbs.elki.database.query.LinearScanQuery) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 LinearScanQuery (de.lmu.ifi.dbs.elki.database.query.LinearScanQuery)1 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1