Search in sources :

Example 1 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class LOCI method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
    DBIDs ids = relation.getDBIDs();
    // LOCI preprocessing step
    WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class);
    precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
    // LOCI main step
    FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    // Shared instance, to save allocations.
    MeanVariance mv_n_r_alpha = new MeanVariance();
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        final DoubleIntArrayList cdist = interestingDistances.get(iditer);
        final double maxdist = cdist.getDouble(cdist.size() - 1);
        final int maxneig = cdist.getInt(cdist.size() - 1);
        double maxmdefnorm = 0.0;
        double maxnormr = 0;
        if (maxneig >= nmin) {
            // Compute the largest neighborhood we will need.
            DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
            // For any critical distance, compute the normalized MDEF score.
            for (int i = 0, size = cdist.size(); i < size; i++) {
                // Only start when minimum size is fulfilled
                if (cdist.getInt(i) < nmin) {
                    continue;
                }
                final double r = cdist.getDouble(i);
                final double alpha_r = alpha * r;
                // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
                final int n_alphar = cdist.getInt(cdist.find(alpha_r));
                // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
                mv_n_r_alpha.reset();
                for (DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
                    // Stop at radius r
                    if (neighbor.doubleValue() > r) {
                        break;
                    }
                    DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
                    int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
                    mv_n_r_alpha.put(rn_alphar);
                }
                // We only use the average and standard deviation
                final double nhat_r_alpha = mv_n_r_alpha.getMean();
                final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
                // Redundant divisions by nhat_r_alpha removed.
                final double mdef = nhat_r_alpha - n_alphar;
                final double sigmamdef = sigma_nhat_r_alpha;
                final double mdefnorm = mdef / sigmamdef;
                if (mdefnorm > maxmdefnorm) {
                    maxmdefnorm = mdefnorm;
                    maxnormr = r;
                }
            }
        } else {
            // FIXME: when nmin was not fulfilled - what is the proper value then?
            maxmdefnorm = Double.POSITIVE_INFINITY;
            maxnormr = maxdist;
        }
        mdef_norm.putDouble(iditer, maxmdefnorm);
        mdef_radius.putDouble(iditer, maxnormr);
        minmax.put(maxmdefnorm);
        LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult = new MaterializedDoubleRelation("LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    result.addChildResult(new MaterializedDoubleRelation("LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 2 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class DBOutlierDetection method computeOutlierScores.

@Override
protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    // Prefer kNN query if available, as this will usually stop earlier.
    KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
    RangeQuery<O> rangeQuery = knnQuery == null ? database.getRangeQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY, d) : null;
    // maximum number of objects in the D-neighborhood of an outlier
    int m = (int) Math.floor((distFunc.getRelation().size()) * (1 - p));
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("DBOutlier detection", distFunc.getRelation().size(), LOG) : null;
    // is more than d -> object is outlier
    if (knnQuery != null) {
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            KNNList knns = knnQuery.getKNNForDBID(iditer, m);
            scores.putDouble(iditer, (knns.getKNNDistance() > d) ? 1. : 0.);
            LOG.incrementProcessed(prog);
        }
    } else if (rangeQuery != null) {
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, d);
            scores.putDouble(iditer, (neighbors.size() < m) ? 1. : 0.);
            LOG.incrementProcessed(prog);
        }
    } else {
        // Linear scan neighbors for each object, but stop early.
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            int count = 0;
            for (DBIDIter iterator = relation.iterDBIDs(); iterator.valid(); iterator.advance()) {
                double currentDistance = distFunc.distance(iditer, iterator);
                if (currentDistance <= d) {
                    if (++count >= m) {
                        break;
                    }
                }
            }
            scores.putDouble(iditer, (count < m) ? 1.0 : 0);
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    return scores;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class ReferenceBasedOutlierDetection method run.

/**
 * Run the algorithm on the given relation.
 *
 * @param database Database
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
    @SuppressWarnings("unchecked") PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction);
    Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
    if (refPoints.isEmpty()) {
        throw new AbortException("Cannot compute ROS without reference points!");
    }
    DBIDs ids = relation.getDBIDs();
    if (k >= ids.size()) {
        throw new AbortException("k must not be chosen larger than the database size!");
    }
    // storage of distance/score values.
    WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);
    // Compute density estimation:
    for (NumberVector refPoint : refPoints) {
        DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
        updateDensities(rbod_score, referenceDists);
    }
    // compute maximum density
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        mm.put(rbod_score.doubleValue(iditer));
    }
    // compute ROS
    double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
    // Reuse
    mm.reset();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = 1 - (rbod_score.doubleValue(iditer) * scale);
        mm.put(score);
        rbod_score.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // adds reference points to the result. header information for the
    // visualizer to find the reference points in the result
    result.addChildResult(new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) PrimitiveDistanceQuery(de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 4 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class MaterializedKNNAndRKNNPreprocessorTest method testRKNNQueries.

private void testRKNNQueries(Relation<DoubleVector> rep, RKNNQuery<DoubleVector> lin_rknn_query, RKNNQuery<DoubleVector> preproc_rknn_query, int k) {
    ArrayDBIDs sample = DBIDUtil.ensureArray(rep.getDBIDs());
    List<? extends DoubleDBIDList> lin_rknn_ids = lin_rknn_query.getRKNNForBulkDBIDs(sample, k);
    List<? extends DoubleDBIDList> preproc_rknn_ids = preproc_rknn_query.getRKNNForBulkDBIDs(sample, k);
    for (int i = 0; i < rep.size(); i++) {
        DoubleDBIDList lin_rknn = lin_rknn_ids.get(i);
        DoubleDBIDList pre_rknn = preproc_rknn_ids.get(i);
        DoubleDBIDListIter lin = lin_rknn.iter(), pre = pre_rknn.iter();
        for (; lin.valid() && pre.valid(); lin.advance(), pre.advance(), i++) {
            assertTrue(DBIDUtil.equal(lin, pre) || lin.doubleValue() == pre.doubleValue());
        }
        assertEquals("rkNN sizes do not agree for k=" + k, lin_rknn.size(), pre_rknn.size());
        for (int j = 0; j < lin_rknn.size(); j++) {
            assertTrue("rkNNs of linear scan and preprocessor do not match!", DBIDUtil.equal(lin_rknn.get(j), pre_rknn.get(j)));
            assertEquals("rkNNs of linear scan and preprocessor do not match!", lin_rknn.get(j).doubleValue(), pre_rknn.get(j).doubleValue(), 0.);
        }
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)

Example 5 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class AbstractIndexStructureTest method testExactCosine.

/**
 * Actual test routine, for cosine distance
 *
 * @param inputparams
 */
protected void testExactCosine(ListParameterization inputparams, Class<?> expectKNNQuery, Class<?> expectRangeQuery) {
    // Use a fixed DBID - historically, we used 1 indexed - to reduce random
    // variation in results due to different hash codes everywhere.
    inputparams.addParameter(AbstractDatabaseConnection.Parameterizer.FILTERS_ID, new FixedDBIDsFilter(1));
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds, inputparams);
    Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> dist = db.getDistanceQuery(rep, CosineDistanceFunction.STATIC);
    if (expectKNNQuery != null) {
        // get the 10 next neighbors
        DoubleVector dv = DoubleVector.wrap(querypoint);
        KNNQuery<DoubleVector> knnq = db.getKNNQuery(dist, k);
        assertTrue("Returned knn query is not of expected class: expected " + expectKNNQuery + " got " + knnq.getClass(), expectKNNQuery.isAssignableFrom(knnq.getClass()));
        KNNList ids = knnq.getKNNForObject(dv, k);
        assertEquals("Result size does not match expectation!", cosshouldd.length, ids.size());
        // verify that the neighbors match.
        int i = 0;
        for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
            // Verify distance
            assertEquals("Expected distance doesn't match.", cosshouldd[i], res.doubleValue(), 1e-15);
            // verify vector
            DoubleVector c = rep.get(res);
            DoubleVector c2 = DoubleVector.wrap(cosshouldc[i]);
            assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
        }
    }
    if (expectRangeQuery != null) {
        // Do a range query
        DoubleVector dv = DoubleVector.wrap(querypoint);
        RangeQuery<DoubleVector> rangeq = db.getRangeQuery(dist, coseps);
        assertTrue("Returned range query is not of expected class: expected " + expectRangeQuery + " got " + rangeq.getClass(), expectRangeQuery.isAssignableFrom(rangeq.getClass()));
        DoubleDBIDList ids = rangeq.getRangeForObject(dv, coseps);
        assertEquals("Result size does not match expectation!", cosshouldd.length, ids.size());
        // verify that the neighbors match.
        int i = 0;
        for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
            // Verify distance
            assertEquals("Expected distance doesn't match.", cosshouldd[i], res.doubleValue(), 1e-15);
            // verify vector
            DoubleVector c = rep.get(res);
            DoubleVector c2 = DoubleVector.wrap(cosshouldc[i]);
            assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
        }
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FixedDBIDsFilter(de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Aggregations

DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)19 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)13 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)12 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)9 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)4 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)3 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)3 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)3 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)2 Database (de.lmu.ifi.dbs.elki.database.Database)2 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)2 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)2 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)2 FixedDBIDsFilter (de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter)2