Search in sources :

Example 1 with IncompatibleDataException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException in project elki by elki-project.

the class KNNBenchmarkAlgorithm method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
    // No query set - use original database.
    if (queries == null) {
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            KNNList knns = knnQuery.getKNNForDBID(iditer, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            KNNList knns = knnQuery.getKNNForObject(o, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange)

Example 2 with IncompatibleDataException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException in project elki by elki-project.

the class RangeQueryBenchmarkAlgorithm method run.

/**
 * Run the algorithm, with a separate query set.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    if (queries == null) {
        throw new AbortException("A query set is required for this 'run' method.");
    }
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
    int dim = RelationUtil.dimensionality(relation);
    // Separate query set.
    TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
    MultipleObjectsBundle bundle = queries.loadData();
    int col = -1;
    for (int i = 0; i < bundle.metaLength(); i++) {
        if (res.isAssignableFromType(bundle.meta(i))) {
            col = i;
            break;
        }
    }
    if (col < 0) {
        StringBuilder buf = new StringBuilder();
        buf.append("No compatible data type in query input was found. Expected: ");
        buf.append(res.toString());
        buf.append(" have: ");
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (i > 0) {
                buf.append(' ');
            }
            buf.append(bundle.meta(i).toString());
        }
        throw new IncompatibleDataException(buf.toString());
    }
    // Random sampling is a bit of hack, sorry.
    // But currently, we don't (yet) have an "integer random sample" function.
    DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
    final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
    FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
    int hash = 0;
    MeanVariance mv = new MeanVariance();
    double[] buf = new double[dim];
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
        int off = sids.binarySearch(iditer);
        assert (off >= 0);
        NumberVector o = (NumberVector) bundle.data(off, col);
        for (int i = 0; i < dim; i++) {
            buf[i] = o.doubleValue(i);
        }
        O v = ofactory.newNumberVector(buf);
        double r = o.doubleValue(dim);
        DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
        int ichecksum = 0;
        for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
            ichecksum += DBIDUtil.asInteger(it);
        }
        hash = Util.mixHashCodes(hash, ichecksum);
        mv.put(rres.size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics("Result hashcode: " + hash);
        LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)2 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)2 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)2 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)2 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)2 IncompatibleDataException (de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)1 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1