Search in sources :

Example 86 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class AveragePrecisionAtK method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public CollectionResult<double[]> run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final int qk = k + (includeSelf ? 0 : 1);
    final KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, qk);
    MeanVarianceMinMax[] mvs = MeanVarianceMinMax.newArray(k);
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
    // sort neighbors
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNList knn = knnQuery.getKNNForDBID(iter, qk);
        Object label = lrelation.get(iter);
        int positive = 0, i = 0;
        for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
            if (!includeSelf && DBIDUtil.equal(iter, ri)) {
                // Do not increment i.
                continue;
            }
            positive += match(label, lrelation.get(ri)) ? 1 : 0;
            final double precision = positive / (double) (i + 1);
            mvs[i].put(precision);
            i++;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    // Transform Histogram into a Double Vector array.
    Collection<double[]> res = new ArrayList<>(k);
    for (int i = 0; i < k; i++) {
        final MeanVarianceMinMax mv = mvs[i];
        final double std = mv.getCount() > 1. ? mv.getSampleStddev() : 0.;
        res.add(new double[] { i + 1, mv.getMean(), std, mv.getMin(), mv.getMax(), mv.getCount() });
    }
    return new CollectionResult<>("Average Precision", "average-precision", res);
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) CollectionResult(de.lmu.ifi.dbs.elki.result.CollectionResult) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 87 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class DummyAlgorithm method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and knn query for the Euclidean distance
    // Hardcoded, only use this if you only allow the eucliden distance
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, 10);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Get the actual object from the database (but discard the result)
        relation.get(iditer);
        // run a 10NN query for each point (but discard the result)
        knnQuery.getKNNForDBID(iditer, 10);
    }
    return null;
}
Also used : DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 88 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class KNNBenchmarkAlgorithm method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
    // No query set - use original database.
    if (queries == null) {
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            KNNList knns = knnQuery.getKNNForDBID(iditer, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            KNNList knns = knnQuery.getKNNForObject(o, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange)

Example 89 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class CASH method buildDB.

/**
 * Builds a dim-1 dimensional database where the objects are projected into
 * the specified subspace.
 *
 * @param dim the dimensionality of the database
 * @param basis the basis defining the subspace
 * @param ids the ids for the new database
 * @param relation the database storing the parameterization functions
 * @return a dim-1 dimensional database where the objects are projected into
 *         the specified subspace
 */
private MaterializedRelation<ParameterizationFunction> buildDB(int dim, double[][] basis, DBIDs ids, Relation<ParameterizationFunction> relation) {
    ProxyDatabase proxy = new ProxyDatabase(ids);
    SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<>(ParameterizationFunction.class);
    WritableDataStore<ParameterizationFunction> prep = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, ParameterizationFunction.class);
    // Project
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        prep.put(iter, project(basis, relation.get(iter)));
    }
    if (LOG.isDebugging()) {
        LOG.debugFine("db fuer dim " + (dim - 1) + ": " + ids.size());
    }
    MaterializedRelation<ParameterizationFunction> prel = new MaterializedRelation<>(type, ids, null, prep);
    proxy.addRelation(prel);
    return prel;
}
Also used : ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) ParameterizationFunction(de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.ParameterizationFunction) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 90 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class KMeansOutlierDetection method run.

/**
 * Run the outlier detection algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceFunction<? super O> df = clusterer.getDistanceFunction();
    DistanceQuery<O> dq = database.getDistanceQuery(relation, df);
    // TODO: improve ELKI api to ensure we're using the same DBIDs!
    Clustering<?> c = clusterer.run(database, relation);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    DoubleMinMax mm = new DoubleMinMax();
    @SuppressWarnings("unchecked") NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory();
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    for (Cluster<?> cluster : clusters) {
        // FIXME: use a primitive distance function on number vectors instead.
        O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation));
        for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double dist = dq.distance(mean, iter);
            scores.put(iter, dist);
            mm.put(dist);
        }
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DataStoreFactory(de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21