Search in sources :

Example 61 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class CTLuGLSBackwardSearchAlgorithm method singleIteration.

/**
 * Run a single iteration of the GLS-SOD modeling step
 *
 * @param relationx Geo relation
 * @param relationy Attribute relation
 * @return Top outlier and associated score
 */
private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
    final int dim = RelationUtil.dimensionality(relationx);
    final int dimy = RelationUtil.dimensionality(relationy);
    assert (dim == 2);
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
    // We need stable indexed DBIDs
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
    // Sort, so we can do a binary search below.
    ids.sort();
    // init F,X,Z
    double[][] X = new double[ids.size()][6];
    double[][] F = new double[ids.size()][ids.size()];
    double[][] Y = new double[ids.size()][dimy];
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            // Fill the data matrix
            {
                V vec = relationx.get(id);
                double la = vec.doubleValue(0);
                double lo = vec.doubleValue(1);
                X[i][0] = 1.0;
                X[i][1] = la;
                X[i][2] = lo;
                X[i][3] = la * lo;
                X[i][4] = la * la;
                X[i][5] = lo * lo;
            }
            {
                final NumberVector vecy = relationy.get(id);
                for (int d = 0; d < dimy; d++) {
                    double idy = vecy.doubleValue(d);
                    Y[i][d] = idy;
                }
            }
            // Fill the neighborhood matrix F:
            {
                KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
                ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
                for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                    if (DBIDUtil.equal(id, neighbor)) {
                        continue;
                    }
                    neighborhood.add(neighbor);
                }
                // Weight object itself positively.
                F[i][i] = 1.0;
                final int nweight = -1 / neighborhood.size();
                // unfortunately.
                for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
                    int pos = ids.binarySearch(iter);
                    assert (pos >= 0);
                    F[pos][i] = nweight;
                }
            }
        }
    }
    // Estimate the parameter beta
    // Common term that we can save recomputing.
    double[][] common = times(transposeTimesTranspose(X, F), F);
    double[][] b = times(inverse(times(common, X)), times(common, Y));
    // Estimate sigma_0 and sigma:
    // sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
    double[][] sigmaMat = times(F, minusEquals(times(X, b), times(F, Y)));
    final double sigma_sum_square = normF(sigmaMat) / (relationx.size() - 6 - 1);
    final double norm = 1 / FastMath.sqrt(sigma_sum_square);
    // calculate the absolute values of standard residuals
    double[][] E = timesEquals(times(F, minus(Y, times(X, b))), norm);
    DBIDVar worstid = DBIDUtil.newVar();
    double worstscore = Double.NEGATIVE_INFINITY;
    int i = 0;
    for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
        double err = squareSum(getRow(E, i));
        // double err = Math.abs(E.get(i, 0));
        if (err > worstscore) {
            worstscore = err;
            worstid.set(id);
        }
    }
    return new Pair<>(worstid, FastMath.sqrt(worstscore));
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 62 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class CTLuMedianAlgorithm method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        final double median;
        {
            double[] fi = new double[neighbors.size()];
            // calculate and store Median of neighborhood
            int c = 0;
            for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
                if (DBIDUtil.equal(iditer, iter)) {
                    continue;
                }
                fi[c] = relation.get(iter).doubleValue(0);
                c++;
            }
            if (c > 0) {
                median = QuickSelect.median(fi, 0, c);
            } else {
                median = relation.get(iditer).doubleValue(0);
            }
        }
        double h = relation.get(iditer).doubleValue(0) - median;
        scores.putDouble(iditer, h);
        mv.put(h);
    }
    // Normalize scores
    final double mean = mv.getMean();
    final double stddev = mv.getNaiveStddev();
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = Math.abs((scores.doubleValue(iditer) - mean) / stddev);
        minmax.put(score);
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("MO", "Median-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 63 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class DWOF method run.

/**
 * Performs the Generalized DWOF_SCORE algorithm on the given database by
 * calling all the other methods in the proper order.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return new OutlierResult instance
 */
public OutlierResult run(Database database, Relation<O> relation) {
    final DBIDs ids = relation.getDBIDs();
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    // Get k nearest neighbor and range query on the relation.
    KNNQuery<O> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
    RangeQuery<O> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
    StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
    // DWOF output score storage.
    WritableDoubleDataStore dwofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT, 0.);
    if (stepProg != null) {
        stepProg.beginStep(1, "Initializing objects' Radii", LOG);
    }
    WritableDoubleDataStore radii = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
    // Find an initial radius for each object:
    initializeRadii(ids, knnq, distFunc, radii);
    WritableIntegerDataStore oldSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
    WritableIntegerDataStore newSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
    int countUnmerged = relation.size();
    if (stepProg != null) {
        stepProg.beginStep(2, "Clustering-Evaluating Cycles.", LOG);
    }
    IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
    while (countUnmerged > 0) {
        LOG.incrementProcessed(clusEvalProgress);
        // Increase radii
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            radii.putDouble(iter, radii.doubleValue(iter) * delta);
        }
        // stores the clustering label for each object
        WritableDataStore<ModifiableDBIDs> labels = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, ModifiableDBIDs.class);
        // Cluster objects based on the current radius
        clusterData(ids, rnnQuery, radii, labels);
        // simple reference swap
        WritableIntegerDataStore temp = newSizes;
        newSizes = oldSizes;
        oldSizes = temp;
        // Update the cluster size count for each object.
        countUnmerged = updateSizes(ids, labels, newSizes);
        labels.destroy();
        // Update DWOF scores.
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double newScore = (newSizes.intValue(iter) > 0) ? ((double) (oldSizes.intValue(iter) - 1) / (double) newSizes.intValue(iter)) : 0.0;
            dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
        }
    }
    LOG.setCompleted(clusEvalProgress);
    LOG.setCompleted(stepProg);
    // Build result representation.
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        minmax.put(dwofs.doubleValue(iter));
    }
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    DoubleRelation rel = new MaterializedDoubleRelation("Dynamic-Window Outlier Factors", "dwof-outlier", dwofs, ids);
    return new OutlierResult(meta, rel);
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 64 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class GaussianUniformMixture method run.

/**
 * Run the algorithm
 *
 * @param relation Data relation
 * @return Outlier result
 */
public OutlierResult run(Relation<V> relation) {
    // Use an array list of object IDs for fast random access by an offset
    ArrayDBIDs objids = DBIDUtil.ensureArray(relation.getDBIDs());
    // A bit set to flag objects as anomalous, none at the beginning
    long[] bits = BitsUtil.zero(objids.size());
    // Positive masked collection
    DBIDs normalObjs = new MaskedDBIDs(objids, bits, true);
    // Positive masked collection
    DBIDs anomalousObjs = new MaskedDBIDs(objids, bits, false);
    // resulting scores
    WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    // compute loglikelihood
    double logLike = relation.size() * logml + loglikelihoodNormal(normalObjs, relation);
    // LOG.debugFine("normalsize " + normalObjs.size() + " anormalsize " +
    // anomalousObjs.size() + " all " + (anomalousObjs.size() +
    // normalObjs.size()));
    // LOG.debugFine(logLike + " loglike beginning" +
    // loglikelihoodNormal(normalObjs, database));
    DoubleMinMax minmax = new DoubleMinMax();
    DBIDIter iter = objids.iter();
    for (int i = 0; i < objids.size(); i++, iter.advance()) {
        // LOG.debugFine("i " + i);
        // Change mask to make the current object anomalous
        BitsUtil.setI(bits, i);
        // Compute new likelihoods
        double currentLogLike = normalObjs.size() * logml + loglikelihoodNormal(normalObjs, relation) + anomalousObjs.size() * logl + loglikelihoodAnomalous(anomalousObjs);
        // if the loglike increases more than a threshold, object stays in
        // anomalous set and is flagged as outlier
        final double loglikeGain = currentLogLike - logLike;
        oscores.putDouble(iter, loglikeGain);
        minmax.put(loglikeGain);
        if (loglikeGain > c) {
            // flag as outlier
            // LOG.debugFine("Outlier: " + curid + " " + (currentLogLike -
            // logLike));
            // Update best logLike
            logLike = currentLogLike;
        } else {
            // LOG.debugFine("Inlier: " + curid + " " + (currentLogLike - logLike));
            // undo bit set
            BitsUtil.clearI(bits, i);
        }
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0);
    DoubleRelation res = new MaterializedDoubleRelation("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", oscores, relation.getDBIDs());
    return new OutlierResult(meta, res);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) MaskedDBIDs(de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaskedDBIDs(de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs)

Example 65 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class GaussianUniformMixture method loglikelihoodNormal.

/**
 * Computes the loglikelihood of all normal objects. Gaussian model
 *
 * @param objids Object IDs for 'normal' objects.
 * @param relation Database
 * @return loglikelihood for normal objects
 */
private double loglikelihoodNormal(DBIDs objids, Relation<V> relation) {
    if (objids.isEmpty()) {
        return 0;
    }
    CovarianceMatrix builder = CovarianceMatrix.make(relation, objids);
    double[] mean = builder.getMeanVector();
    double[][] covarianceMatrix = builder.destroyToSampleMatrix();
    // test singulaere matrix
    double[][] covInv = inverse(covarianceMatrix);
    double covarianceDet = new LUDecomposition(covarianceMatrix).det();
    double fakt = 1.0 / FastMath.sqrt(MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceDet);
    // for each object compute probability and sum
    double prob = 0;
    for (DBIDIter iter = objids.iter(); iter.valid(); iter.advance()) {
        double[] x = minusEquals(relation.get(iter).toArray(), mean);
        double mDist = transposeTimesTimes(x, covInv, x);
        prob += FastMath.log(fakt * FastMath.exp(-mDist * .5));
    }
    return prob;
}
Also used : LUDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.LUDecomposition) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21