Search in sources :

Example 1 with NeighborSetPredicate

use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.

the class CTLuMedianAlgorithm method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        final double median;
        {
            double[] fi = new double[neighbors.size()];
            // calculate and store Median of neighborhood
            int c = 0;
            for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
                if (DBIDUtil.equal(iditer, iter)) {
                    continue;
                }
                fi[c] = relation.get(iter).doubleValue(0);
                c++;
            }
            if (c > 0) {
                median = QuickSelect.median(fi, 0, c);
            } else {
                median = relation.get(iditer).doubleValue(0);
            }
        }
        double h = relation.get(iditer).doubleValue(0) - median;
        scores.putDouble(iditer, h);
        mv.put(h);
    }
    // Normalize scores
    final double mean = mv.getMean();
    final double stddev = mv.getNaiveStddev();
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = Math.abs((scores.doubleValue(iditer) - mean) / stddev);
        minmax.put(score);
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("MO", "Median-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 2 with NeighborSetPredicate

use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.

the class CTLuScatterplotOutlier method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
    // Calculate average of neighborhood for each object and perform a linear
    // regression using the covariance matrix
    CovarianceMatrix covm = new CovarianceMatrix(2);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final double local = relation.get(iditer).doubleValue(0);
        // Compute mean of neighbors
        Mean mean = new Mean();
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(iditer, iter)) {
                continue;
            }
            mean.put(relation.get(iter).doubleValue(0));
        }
        final double m;
        if (mean.getCount() > 0) {
            m = mean.getMean();
        } else {
            // if object id has no neighbors ==> avg = non-spatial attribute of id
            m = local;
        }
        // Store the mean for the score calculation
        means.putDouble(iditer, m);
        covm.put(new double[] { local, m });
    }
    // Finalize covariance matrix, compute linear regression
    final double slope, inter;
    {
        double[] meanv = covm.getMeanVector();
        double[][] fmat = covm.destroyToSampleMatrix();
        final double covxx = fmat[0][0], covxy = fmat[0][1];
        slope = covxy / covxx;
        inter = meanv[1] - slope * meanv[0];
    }
    // calculate mean and variance for error
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Compute the error from the linear regression
        double y_i = relation.get(iditer).doubleValue(0);
        double e = means.doubleValue(iditer) - (slope * y_i + inter);
        scores.putDouble(iditer, e);
        mv.put(e);
    }
    // Normalize scores
    DoubleMinMax minmax = new DoubleMinMax();
    {
        final double mean = mv.getMean();
        final double variance = mv.getNaiveStddev();
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            double score = Math.abs((scores.doubleValue(iditer) - mean) / variance);
            minmax.put(score);
            scores.putDouble(iditer, score);
        }
    }
    // build representation
    DoubleRelation scoreResult = new MaterializedDoubleRelation("SPO", "Scatterplot-Outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 3 with NeighborSetPredicate

use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.

the class CTLuMedianMultipleAttributes method run.

/**
 * Run the algorithm
 *
 * @param database Database
 * @param spatial Spatial relation
 * @param attributes Attributes relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> attributes) {
    final int dim = RelationUtil.dimensionality(attributes);
    if (LOG.isDebugging()) {
        LOG.debug("Dimensionality: " + dim);
    }
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial);
    CovarianceMatrix covmaker = new CovarianceMatrix(dim);
    WritableDataStore<double[]> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
    for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final O obj = attributes.get(iditer);
        final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        // Compute the median vector
        final double[] median = new double[dim];
        {
            double[][] data = new double[dim][neighbors.size()];
            int i = 0;
            // Load data
            for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
                // TODO: skip object itself within neighbors?
                O nobj = attributes.get(iter);
                for (int d = 0; d < dim; d++) {
                    data[d][i] = nobj.doubleValue(d);
                }
                i++;
            }
            for (int d = 0; d < dim; d++) {
                median[d] = QuickSelect.median(data[d]);
            }
        }
        // Delta vector "h"
        double[] delta = minusEquals(obj.toArray(), median);
        deltas.put(iditer, delta);
        covmaker.put(delta);
    }
    // Finalize covariance matrix:
    double[] mean = covmaker.getMeanVector();
    double[][] cmati = inverse(covmaker.destroyToSampleMatrix());
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final double score = mahalanobisDistance(cmati, deltas.get(iditer), mean);
        minmax.put(score);
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Median multiple attributes outlier", "median-outlier", scores, attributes.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 4 with NeighborSetPredicate

use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.

the class CTLuZTestOutlier method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    MeanVariance zmv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        // Compute Mean of neighborhood
        Mean localmean = new Mean();
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(iditer, iter)) {
                continue;
            }
            localmean.put(relation.get(iter).doubleValue(0));
        }
        final double localdiff;
        if (localmean.getCount() > 0) {
            localdiff = relation.get(iditer).doubleValue(0) - localmean.getMean();
        } else {
            localdiff = 0.0;
        }
        scores.putDouble(iditer, localdiff);
        zmv.put(localdiff);
    }
    // Normalize scores using mean and variance
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = Math.abs(scores.doubleValue(iditer) - zmv.getMean()) / zmv.getSampleStddev();
        minmax.put(score);
        scores.putDouble(iditer, score);
    }
    // Wrap result
    DoubleRelation scoreResult = new MaterializedDoubleRelation("ZTest", "Z Test score", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 5 with NeighborSetPredicate

use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.

the class CTLuMoranScatterplotOutlier method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    // Compute the global mean and variance
    MeanVariance globalmv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        globalmv.put(relation.get(iditer).doubleValue(0));
    }
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    // calculate neighborhood average of normalized attribute values.
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Compute global z score
        final double globalZ = (relation.get(iditer).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev();
        // Compute local average z score
        Mean localm = new Mean();
        for (DBIDIter iter = npred.getNeighborDBIDs(iditer).iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(iditer, iter)) {
                continue;
            }
            localm.put((relation.get(iter).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev());
        }
        // if neighors.size == 0
        final double localZ;
        if (localm.getCount() > 0) {
            localZ = localm.getMean();
        } else {
            // if s has no neighbors => Wzi = zi
            localZ = globalZ;
        }
        // compute score
        // Note: in the original moran scatterplot, any object with a score < 0 would be an outlier.
        final double score = Math.max(-globalZ * localZ, 0);
        minmax.put(score);
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("MoranOutlier", "Moran Scatterplot Outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)9 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)9 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)9 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)9 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)9 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)9 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)8 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)8 Mean (de.lmu.ifi.dbs.elki.math.Mean)4 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)4 CovarianceMatrix (de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)3 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)1