Search in sources :

Example 16 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class KNNWeightOutlier method run.

/**
 * Runs the algorithm in the timed evaluation part.
 *
 * @param database Database context
 * @param relation Data relation
 */
public OutlierResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    // + query point
    KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Compute kNN weights", relation.size(), LOG) : null;
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // + query point
        final KNNList knn = knnQuery.getKNNForDBID(iditer, k + 1);
        // sum of the distances to the k nearest neighbors
        double skn = 0;
        // number of neighbors so far
        int i = 0;
        for (DoubleDBIDListIter neighbor = knn.iter(); i < k && neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(iditer, neighbor)) {
                continue;
            }
            skn += neighbor.doubleValue();
            ++i;
        }
        if (i < k) {
            // Less than k neighbors found
            // Approximative index, or k > data set size!
            skn = Double.POSITIVE_INFINITY;
        }
        knnw_score.putDouble(iditer, skn);
        minmax.put(skn);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    DoubleRelation res = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", knnw_score, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
    return new OutlierResult(meta, res);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 17 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class IDOS method computeIDOS.

/**
 * Computes all IDOS scores.
 *
 * @param ids the DBIDs to process
 * @param knnQ the KNN query
 * @param intDims Precomputed intrinsic dimensionalities
 * @param idosminmax Output of minimum and maximum, for metadata
 * @return ID scores
 */
protected DoubleDataStore computeIDOS(DBIDs ids, KNNQuery<O> knnQ, DoubleDataStore intDims, DoubleMinMax idosminmax) {
    WritableDoubleDataStore ldms = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("ID Outlier Scores for objects", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final KNNList neighbors = knnQ.getKNNForDBID(iter, k_r);
        double sum = 0.;
        int cnt = 0;
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(iter, neighbor)) {
                continue;
            }
            final double id = intDims.doubleValue(neighbor);
            sum += id > 0 ? 1.0 / id : 0.;
            if (++cnt == k_r) {
                // Always stop after at most k_r elements.
                break;
            }
        }
        final double id_q = intDims.doubleValue(iter);
        final double idos = id_q > 0 ? id_q * sum / cnt : 0.;
        ldms.putDouble(iter, idos);
        idosminmax.put(idos);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return ldms;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) GreaterEqualConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 18 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class LDOF method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k);
    // track the maximum value for normalization
    DoubleMinMax ldofminmax = new DoubleMinMax();
    // compute the ldof values
    WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    // compute LOF_SCORE of each db object
    if (LOG.isVerbose()) {
        LOG.verbose("Computing LDOFs");
    }
    FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
    Mean dxp = new Mean(), Dxp = new Mean();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
        dxp.reset();
        Dxp.reset();
        DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
        for (; neighbor1.valid(); neighbor1.advance()) {
            // skip the point itself
            if (DBIDUtil.equal(neighbor1, iditer)) {
                continue;
            }
            dxp.put(neighbor1.doubleValue());
            for (neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
                // skip the point itself
                if (DBIDUtil.equal(neighbor2, iditer)) {
                    continue;
                }
                Dxp.put(distFunc.distance(neighbor1, neighbor2));
            }
        }
        double ldof = dxp.getMean() / Dxp.getMean();
        if (Double.isNaN(ldof) || Double.isInfinite(ldof)) {
            ldof = 1.0;
        }
        ldofs.putDouble(iditer, ldof);
        // update maximum
        ldofminmax.put(ldof);
        LOG.incrementProcessed(progressLDOFs);
    }
    LOG.ensureCompleted(progressLDOFs);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 19 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class LOF method computeLRD.

/**
 * Compute a single local reachability distance.
 *
 * @param knnq kNN Query
 * @param curr Current object
 * @return Local Reachability Density
 */
protected double computeLRD(KNNQuery<O> knnq, DBIDIter curr) {
    final KNNList neighbors = knnq.getKNNForDBID(curr, k);
    double sum = 0.0;
    int count = 0;
    for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
        if (DBIDUtil.equal(curr, neighbor)) {
            continue;
        }
        KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
        sum += MathUtil.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance());
        count++;
    }
    // Avoid division by 0
    return (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 20 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class CTLuGLSBackwardSearchAlgorithm method singleIteration.

/**
 * Run a single iteration of the GLS-SOD modeling step
 *
 * @param relationx Geo relation
 * @param relationy Attribute relation
 * @return Top outlier and associated score
 */
private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
    final int dim = RelationUtil.dimensionality(relationx);
    final int dimy = RelationUtil.dimensionality(relationy);
    assert (dim == 2);
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
    // We need stable indexed DBIDs
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
    // Sort, so we can do a binary search below.
    ids.sort();
    // init F,X,Z
    double[][] X = new double[ids.size()][6];
    double[][] F = new double[ids.size()][ids.size()];
    double[][] Y = new double[ids.size()][dimy];
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            // Fill the data matrix
            {
                V vec = relationx.get(id);
                double la = vec.doubleValue(0);
                double lo = vec.doubleValue(1);
                X[i][0] = 1.0;
                X[i][1] = la;
                X[i][2] = lo;
                X[i][3] = la * lo;
                X[i][4] = la * la;
                X[i][5] = lo * lo;
            }
            {
                final NumberVector vecy = relationy.get(id);
                for (int d = 0; d < dimy; d++) {
                    double idy = vecy.doubleValue(d);
                    Y[i][d] = idy;
                }
            }
            // Fill the neighborhood matrix F:
            {
                KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
                ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
                for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                    if (DBIDUtil.equal(id, neighbor)) {
                        continue;
                    }
                    neighborhood.add(neighbor);
                }
                // Weight object itself positively.
                F[i][i] = 1.0;
                final int nweight = -1 / neighborhood.size();
                // unfortunately.
                for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
                    int pos = ids.binarySearch(iter);
                    assert (pos >= 0);
                    F[pos][i] = nweight;
                }
            }
        }
    }
    // Estimate the parameter beta
    // Common term that we can save recomputing.
    double[][] common = times(transposeTimesTranspose(X, F), F);
    double[][] b = times(inverse(times(common, X)), times(common, Y));
    // Estimate sigma_0 and sigma:
    // sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
    double[][] sigmaMat = times(F, minusEquals(times(X, b), times(F, Y)));
    final double sigma_sum_square = normF(sigmaMat) / (relationx.size() - 6 - 1);
    final double norm = 1 / FastMath.sqrt(sigma_sum_square);
    // calculate the absolute values of standard residuals
    double[][] E = timesEquals(times(F, minus(Y, times(X, b))), norm);
    DBIDVar worstid = DBIDUtil.newVar();
    double worstscore = Double.NEGATIVE_INFINITY;
    int i = 0;
    for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
        double err = squareSum(getRow(E, i));
        // double err = Math.abs(E.get(i, 0));
        if (err > worstscore) {
            worstscore = err;
            worstid.set(id);
        }
    }
    return new Pair<>(worstid, FastMath.sqrt(worstscore));
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Aggregations

KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)53 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)38 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)32 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)21 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)20 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)18 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)18 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)18 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)18 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)15 ArrayList (java.util.ArrayList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)10 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)9 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)9 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)8 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)8 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)6