Search in sources :

Example 66 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class OutlierROCCurve method processNewResult.

@Override
public void processNewResult(ResultHierarchy hier, Result result) {
    Database db = ResultUtil.findDatabase(hier);
    // Prepare
    SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
    if (positiveids.size() == 0) {
        LOG.warning("Computing a ROC curve failed - no objects matched.");
        return;
    }
    boolean nonefound = true;
    List<OutlierResult> oresults = OutlierResult.getOutlierResults(result);
    List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
    // Outlier results are the main use case.
    for (OutlierResult o : oresults) {
        ROCResult rocres = computeROCResult(o.getScores().size(), positiveids, o);
        db.getHierarchy().add(o, rocres);
        EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), o, "Evaluation of ranking", "ranking-evaluation");
        MeasurementGroup g = ev.findOrCreateGroup("Evaluation measures");
        if (!g.hasMeasure(ROCAUC_LABEL)) {
            g.addMeasure(ROCAUC_LABEL, rocres.auc, 0., 1., false);
        }
        // Process each ordering only once.
        orderings.remove(o.getOrdering());
        nonefound = false;
    }
    // otherwise apply an ordering to the database IDs.
    for (OrderingResult or : orderings) {
        DBIDs sorted = or.order(or.getDBIDs());
        ROCResult rocres = computeROCResult(or.getDBIDs().size(), positiveids, sorted);
        db.getHierarchy().add(or, rocres);
        EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), or, "Evaluation of ranking", "ranking-evaluation");
        MeasurementGroup g = ev.findOrCreateGroup("Evaluation measures");
        if (!g.hasMeasure(ROCAUC_LABEL)) {
            g.addMeasure(ROCAUC_LABEL, rocres.auc, 0., 1., false);
        }
        nonefound = false;
    }
    if (nonefound) {
        return;
    // logger.warning("No results found to process with ROC curve analyzer.
    // Got "+iterables.size()+" iterables, "+orderings.size()+" orderings.");
    }
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs) Database(de.lmu.ifi.dbs.elki.database.Database) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)

Example 67 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class RangeQueryBenchmarkAlgorithm method run.

/**
 * Run the algorithm, with separate radius relation
 *
 * @param database Database
 * @param relation Relation
 * @param radrel Radius relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation, Relation<NumberVector> radrel) {
    if (queries != null) {
        throw new AbortException("This 'run' method will not use the given query set!");
    }
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
    int hash = 0;
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
        double r = radrel.get(iditer).doubleValue(0);
        DoubleDBIDList rres = rangeQuery.getRangeForDBID(iditer, r);
        int ichecksum = 0;
        for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
            ichecksum += DBIDUtil.asInteger(it);
        }
        hash = Util.mixHashCodes(hash, ichecksum);
        mv.put(rres.size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics("Result hashcode: " + hash);
        LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
    }
    return null;
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 68 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class RangeQueryBenchmarkAlgorithm method run.

/**
 * Run the algorithm, with a separate query set.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    if (queries == null) {
        throw new AbortException("A query set is required for this 'run' method.");
    }
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
    int dim = RelationUtil.dimensionality(relation);
    // Separate query set.
    TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
    MultipleObjectsBundle bundle = queries.loadData();
    int col = -1;
    for (int i = 0; i < bundle.metaLength(); i++) {
        if (res.isAssignableFromType(bundle.meta(i))) {
            col = i;
            break;
        }
    }
    if (col < 0) {
        StringBuilder buf = new StringBuilder();
        buf.append("No compatible data type in query input was found. Expected: ");
        buf.append(res.toString());
        buf.append(" have: ");
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (i > 0) {
                buf.append(' ');
            }
            buf.append(bundle.meta(i).toString());
        }
        throw new IncompatibleDataException(buf.toString());
    }
    // Random sampling is a bit of hack, sorry.
    // But currently, we don't (yet) have an "integer random sample" function.
    DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
    final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
    FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
    int hash = 0;
    MeanVariance mv = new MeanVariance();
    double[] buf = new double[dim];
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
        int off = sids.binarySearch(iditer);
        assert (off >= 0);
        NumberVector o = (NumberVector) bundle.data(off, col);
        for (int i = 0; i < dim; i++) {
            buf[i] = o.doubleValue(i);
        }
        O v = ofactory.newNumberVector(buf);
        double r = o.doubleValue(dim);
        DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
        int ichecksum = 0;
        for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
            ichecksum += DBIDUtil.asInteger(it);
        }
        hash = Util.mixHashCodes(hash, ichecksum);
        mv.put(rres.size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics("Result hashcode: " + hash);
        LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 69 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class ValidateApproximativeKNNIndex method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    // Approximate query:
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
    if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
        throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
    }
    // Exact query:
    KNNQuery<O> truekNNQuery;
    if (forcelinear) {
        truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
    } else {
        truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
    }
    if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
        LOG.warning("Query classes are the same. This experiment may be invalid!");
    }
    // No query set - use original database.
    if (queries == null || pattern != null) {
        // Relation to filter on
        Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
                // Query index:
                KNNList knns = knnQuery.getKNNForDBID(iditer, k);
                // Query reference:
                KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
                // Put adjusted knn size:
                mv.put(knns.size() * k / (double) trueknns.size());
                // Put recall:
                mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
                if (knns.size() >= k) {
                    double kdist = knns.getKNNDistance();
                    final double tdist = trueknns.getKNNDistance();
                    if (tdist > 0.0) {
                        mvdist.put(kdist);
                        mvdaerr.put(kdist - tdist);
                        mvdrerr.put(kdist / tdist);
                    }
                } else {
                    // Less than k objects.
                    misses++;
                }
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            // Query index:
            KNNList knns = knnQuery.getKNNForObject(o, k);
            // Query reference:
            KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
            // Put adjusted knn size:
            mv.put(knns.size() * k / (double) trueknns.size());
            // Put recall:
            mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
            if (knns.size() >= k) {
                double kdist = knns.getKNNDistance();
                final double tdist = trueknns.getKNNDistance();
                if (tdist > 0.0) {
                    mvdist.put(kdist);
                    mvdaerr.put(kdist - tdist);
                    mvdrerr.put(kdist / tdist);
                }
            } else {
                // Less than k objects.
                misses++;
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) LinearScanQuery(de.lmu.ifi.dbs.elki.database.query.LinearScanQuery) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 70 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class CASH method preprocess.

/**
 * Preprocess the dataset, precomputing the parameterization functions.
 *
 * @param db Database
 * @param vrel Vector relation
 * @return Preprocessed relation
 */
private Relation<ParameterizationFunction> preprocess(Database db, Relation<V> vrel) {
    DBIDs ids = vrel.getDBIDs();
    SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<>(ParameterizationFunction.class);
    WritableDataStore<ParameterizationFunction> prep = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, ParameterizationFunction.class);
    // Project
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        prep.put(iter, new ParameterizationFunction(vrel.get(iter)));
    }
    return new MaterializedRelation<>(type, ids, null, prep);
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ParameterizationFunction(de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.ParameterizationFunction) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Aggregations

DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)139 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)77 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)45 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)44 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)40 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)39 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)38 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)38 ArrayList (java.util.ArrayList)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)29 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)25 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)22 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)19 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)18 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)15 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)14