Search in sources :

Example 6 with Mean

use of de.lmu.ifi.dbs.elki.math.Mean in project elki by elki-project.

the class CTLuMoranScatterplotOutlier method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    // Compute the global mean and variance
    MeanVariance globalmv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        globalmv.put(relation.get(iditer).doubleValue(0));
    }
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    // calculate neighborhood average of normalized attribute values.
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Compute global z score
        final double globalZ = (relation.get(iditer).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev();
        // Compute local average z score
        Mean localm = new Mean();
        for (DBIDIter iter = npred.getNeighborDBIDs(iditer).iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(iditer, iter)) {
                continue;
            }
            localm.put((relation.get(iter).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev());
        }
        // if neighors.size == 0
        final double localZ;
        if (localm.getCount() > 0) {
            localZ = localm.getMean();
        } else {
            // if s has no neighbors => Wzi = zi
            localZ = globalZ;
        }
        // compute score
        // Note: in the original moran scatterplot, any object with a score < 0 would be an outlier.
        final double score = Math.max(-globalZ * localZ, 0);
        minmax.put(score);
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("MoranOutlier", "Moran Scatterplot Outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 7 with Mean

use of de.lmu.ifi.dbs.elki.math.Mean in project elki by elki-project.

the class TrimmedMeanApproach method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data Relation (1 dimensional!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        int num = 0;
        double[] values = new double[neighbors.size()];
        // calculate trimmedMean
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            values[num] = relation.get(iter).doubleValue(0);
            num++;
        }
        // calculate local trimmed Mean and error term
        final double tm;
        if (num > 0) {
            int left = (int) Math.floor(p * (num - 1));
            int right = (int) Math.floor((1 - p) * (num - 1));
            Arrays.sort(values, 0, num);
            Mean mean = new Mean();
            for (int i = left; i <= right; i++) {
                mean.put(values[i]);
            }
            tm = mean.getMean();
        } else {
            tm = relation.get(iditer).doubleValue(0);
        }
        // Error: deviation from trimmed mean
        errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isVerbose()) {
        LOG.verbose("Computing median error.");
    }
    double median_dev_from_median;
    {
        // calculate the median error
        double[] ei = new double[relation.size()];
        {
            int i = 0;
            for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
                ei[i] = errors.doubleValue(iditer);
                i++;
            }
        }
        double median_i = QuickSelect.median(ei);
        // Update to deviation from median
        for (int i = 0; i < ei.length; i++) {
            ei[i] = Math.abs(ei[i] - median_i);
        }
        // Again, extract median
        median_dev_from_median = QuickSelect.median(ei);
    }
    if (LOG.isVerbose()) {
        LOG.verbose("Normalizing scores.");
    }
    // calculate score
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median;
        scores.putDouble(iditer, score);
        minmax.put(score);
    }
    // 
    DoubleRelation scoreResult = new MaterializedDoubleRelation("TrimmedMean", "Trimmed Mean Score", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 8 with Mean

use of de.lmu.ifi.dbs.elki.math.Mean in project elki by elki-project.

the class TrivialAverageCoordinateOutlier method run.

/**
 * Run the actual algorithm.
 *
 * @param relation Relation
 * @return Result
 */
public OutlierResult run(Relation<? extends NumberVector> relation) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
    DoubleMinMax minmax = new DoubleMinMax();
    Mean m = new Mean();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        m.reset();
        NumberVector nv = relation.get(iditer);
        for (int i = 0; i < nv.getDimensionality(); i++) {
            m.put(nv.doubleValue(i));
        }
        final double score = m.getMean();
        scores.putDouble(iditer, score);
        minmax.put(score);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial mean score", "mean-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    return new OutlierResult(meta, scoreres);
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 9 with Mean

use of de.lmu.ifi.dbs.elki.math.Mean in project elki by elki-project.

the class EvaluateDaviesBouldin method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param c Clustering
 * @return DB-index
 */
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    NumberVector[] centroids = new NumberVector[clusters.size()];
    int noisecount = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
    double[] withinGroupDistance = withinGroupDistances(rel, clusters, centroids);
    Mean daviesBouldin = new Mean();
    for (int i = 0; i < clusters.size(); i++) {
        final NumberVector centroid = centroids[i];
        final double withinGroupDistancei = withinGroupDistance[i];
        // maximum within-to-between cluster spread
        double max = 0;
        for (int j = 0; j < clusters.size(); j++) {
            NumberVector ocentroid = centroids[j];
            if (ocentroid == centroid) {
                continue;
            }
            // Both are real clusters:
            if (centroid != null && ocentroid != null) {
                // bD = between group distance
                double bD = distanceFunction.distance(centroid, ocentroid);
                // d = within-to-between cluster spread
                double d = (withinGroupDistancei + withinGroupDistance[j]) / bD;
                max = d > max ? d : max;
            } else if (noiseOption != NoiseHandling.IGNORE_NOISE) {
                if (centroid != null) {
                    double d = Double.POSITIVE_INFINITY;
                    // Find the closest element
                    for (DBIDIter it = clusters.get(j).getIDs().iter(); it.valid(); it.advance()) {
                        double d2 = distanceFunction.distance(centroid, rel.get(it));
                        d = d2 < d ? d2 : d;
                    }
                    d = withinGroupDistancei / d;
                    max = d > max ? d : max;
                } else if (ocentroid != null) {
                    double d = Double.POSITIVE_INFINITY;
                    // Find the closest element
                    for (DBIDIter it = clusters.get(i).getIDs().iter(); it.valid(); it.advance()) {
                        double d2 = distanceFunction.distance(rel.get(it), ocentroid);
                        d = d2 < d ? d2 : d;
                    }
                    d = withinGroupDistance[j] / d;
                    max = d > max ? d : max;
                }
            // else: (0+0) / d = 0.
            }
        }
        daviesBouldin.put(max);
    }
    // For a single cluster, we return 2 (result for equidistant points)
    final double daviesBouldinMean = daviesBouldin.getCount() > 1 ? daviesBouldin.getMean() : 2.;
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(key + ".db-index.noise-handling", noiseOption.toString()));
        if (noisecount > 0) {
            LOG.statistics(new LongStatistic(key + ".db-index.ignored", noisecount));
        }
        LOG.statistics(new DoubleStatistic(key + ".db-index", daviesBouldinMean));
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Davies Bouldin Index", daviesBouldinMean, 0., Double.POSITIVE_INFINITY, 0., true);
    db.getHierarchy().resultChanged(ev);
    return daviesBouldinMean;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Example 10 with Mean

use of de.lmu.ifi.dbs.elki.math.Mean in project elki by elki-project.

the class SqrtStandardDeviationScaling method prepare.

@Override
public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) {
    if (pmean == null) {
        MeanVarianceMinMax mv = new MeanVarianceMinMax();
        final int size = adapter.size(array);
        for (int i = 0; i < size; i++) {
            double val = adapter.getDouble(array, i);
            val = (val <= min) ? 0 : FastMath.sqrt(val - min);
            mv.put(val);
        }
        min = (pmin == null) ? mv.getMin() : pmin;
        mean = mv.getMean();
        factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2;
    } else {
        mean = pmean;
        Mean sqsum = new Mean();
        double mm = Double.POSITIVE_INFINITY;
        final int size = adapter.size(array);
        for (int i = 0; i < size; i++) {
            double val = adapter.getDouble(array, i);
            mm = Math.min(mm, val);
            val = (val <= min) ? 0 : FastMath.sqrt(val - min);
            sqsum.put((val - mean) * (val - mean));
        }
        min = (pmin == null) ? mm : pmin;
        factor = plambda * FastMath.sqrt(sqsum.getMean()) * MathUtil.SQRT2;
    }
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax)

Aggregations

Mean (de.lmu.ifi.dbs.elki.math.Mean)15 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)11 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)8 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)7 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)6 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)6 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)6 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)6 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)5 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)2 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)1 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)1 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)1