Search in sources :

Example 41 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class SqrtStandardDeviationScaling method prepare.

@Override
public void prepare(OutlierResult or) {
    if (pmean == null) {
        MeanVarianceMinMax mv = new MeanVarianceMinMax();
        DoubleRelation scores = or.getScores();
        for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
            double val = scores.doubleValue(id);
            val = (val <= min) ? 0 : FastMath.sqrt(val - min);
            mv.put(val);
        }
        min = (pmin == null) ? mv.getMin() : pmin;
        mean = mv.getMean();
        factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2;
    } else {
        mean = pmean;
        double sqsum = 0;
        int cnt = 0;
        DoubleRelation scores = or.getScores();
        double mm = Double.POSITIVE_INFINITY;
        for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
            double val = scores.doubleValue(id);
            mm = Math.min(mm, val);
            val = (val <= min) ? 0 : FastMath.sqrt(val - min);
            sqsum += (val - mean) * (val - mean);
            cnt += 1;
        }
        min = (pmin == null) ? mm : pmin;
        factor = plambda * FastMath.sqrt(sqsum / cnt) * MathUtil.SQRT2;
    }
}
Also used : DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 42 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class OnlineLOFTest method testOnlineLOF.

/**
 * First, run the {@link LOF} algorithm on the database. Second, run the
 * {@link OnlineLOF} algorithm on the database, insert new objects and
 * afterwards delete them. Then, compare the two results for equality.
 */
@SuppressWarnings("unchecked")
@Test
public void testOnlineLOF() {
    UpdatableDatabase db = getDatabase();
    // 1. Run LOF
    FlexibleLOF<DoubleVector> lof = new FlexibleLOF<>(k, k, neighborhoodDistanceFunction, reachabilityDistanceFunction);
    DoubleRelation scores1 = lof.run(db).getScores();
    // 2. Run OnlineLOF (with insertions and removals) on database
    DoubleRelation scores2 = runOnlineLOF(db).getScores();
    // 3. Compare results
    for (DBIDIter id = scores1.getDBIDs().iter(); id.valid(); id.advance()) {
        double lof1 = scores1.doubleValue(id);
        double lof2 = scores2.doubleValue(id);
        assertEquals("lof(" + DBIDUtil.toString(id) + ") != lof(" + DBIDUtil.toString(id) + "): " + lof1 + " != " + lof2, lof1, lof2, 1e-10);
    }
}
Also used : UpdatableDatabase(de.lmu.ifi.dbs.elki.database.UpdatableDatabase) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Test(org.junit.Test)

Example 43 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class OutlierSmROCCurve method computeSmROCResult.

private SmROCResult computeSmROCResult(SetDBIDs positiveids, OutlierResult or) {
    DoubleRelation scores = or.getScores();
    final int size = scores.size();
    // Compute mean, for inversion
    double mean = 0.0;
    for (DBIDIter iditer = scores.iterDBIDs(); iditer.valid(); iditer.advance()) {
        mean += scores.doubleValue(iditer) / size;
    }
    SmROCResult curve = new SmROCResult(positiveids.size() + 2);
    // start in bottom left
    curve.add(0.0, 0.0);
    int poscnt = 0, negcnt = 0;
    double prevscore = Double.NaN;
    double x = 0, y = 0;
    for (DBIDIter nei = or.getOrdering().order(or.getOrdering().getDBIDs()).iter(); nei.valid(); nei.advance()) {
        // Analyze next point
        final double curscore = scores.doubleValue(nei);
        // defer calculation for ties
        if (!Double.isNaN(prevscore) && (Double.compare(prevscore, curscore) == 0)) {
            // positive or negative match?
            if (positiveids.contains(nei)) {
                poscnt += 1;
            } else {
                negcnt += 1;
            }
            continue;
        } else {
            // Add point for *previous* result (since we are no longer tied with it)
            if (prevscore > mean) {
                y += poscnt * prevscore + negcnt * (1.0 - prevscore);
                x += poscnt * (1.0 - prevscore) + negcnt * prevscore;
            } else if (prevscore < mean) {
                y += poscnt * (1.0 - prevscore) + negcnt * prevscore;
                x += poscnt * prevscore + negcnt * (1.0 - prevscore);
            }
            curve.addAndSimplify(x, y);
            // positive or negative match?
            if (positiveids.contains(nei)) {
                poscnt = 1;
                negcnt = 0;
            } else {
                poscnt = 0;
                negcnt = 1;
            }
            prevscore = curscore;
        }
    }
    // Last point
    {
        if (prevscore > mean) {
            y += poscnt * prevscore + negcnt * (1.0 - prevscore);
            x += poscnt * (1.0 - prevscore) + negcnt * prevscore;
        } else if (prevscore < mean) {
            y += poscnt * (1.0 - prevscore) + negcnt * prevscore;
            x += poscnt * prevscore + negcnt * (1.0 - prevscore);
        }
        curve.addAndSimplify(x, y);
    }
    double rocauc = XYCurve.areaUnderCurve(curve) / (x * y);
    if (LOG.isVerbose()) {
        LOG.verbose(SMROCAUC_LABEL + ": " + rocauc);
    }
    curve.rocauc = rocauc;
    return curve;
}
Also used : DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 44 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class OutlierMinusLogScaling method prepare.

@Override
public void prepare(OutlierResult or) {
    DoubleMinMax mm = new DoubleMinMax();
    DoubleRelation scores = or.getScores();
    for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
        double val = scores.doubleValue(id);
        if (!Double.isNaN(val) && !Double.isInfinite(val)) {
            mm.put(val);
        }
    }
    max = mm.getMax();
    mlogmax = -FastMath.log(mm.getMin() / max);
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 45 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class SimpleCOP method run.

public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
    DBIDs ids = data.getDBIDs();
    WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDataStore<double[]> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
    WritableDataStore<double[][]> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[][].class);
    WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
    WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class);
    {
        // compute neighbors of each db object
        FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
        double sqrt2 = MathUtil.SQRT2;
        for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
            KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
            ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
            nids.remove(id);
            // TODO: do we want to use the query point as centroid?
            CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids);
            double stddev = depsol.getStandardDeviation();
            double distance = depsol.distance(data.get(id));
            double prob = NormalDistribution.erf(distance / (stddev * sqrt2));
            cop_score.putDouble(id, prob);
            cop_err_v.put(id, times(depsol.errorVector(data.get(id)), -1));
            double[][] datav = depsol.dataProjections(data.get(id));
            cop_datav.put(id, datav);
            cop_dim.putInt(id, depsol.getCorrelationDimensionality());
            cop_sol.put(id, depsol);
            LOG.incrementProcessed(progressLocalPCA);
        }
        LOG.ensureCompleted(progressLocalPCA);
    }
    // combine results.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Original Correlation Outlier Probabilities", "origcop-outlier", cop_score, ids);
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // extra results
    result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
    result.addChildResult(new MaterializedRelation<>("Error vectors", COP.COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
    result.addChildResult(new MaterializedRelation<>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
    result.addChildResult(new MaterializedRelation<>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) CorrelationAnalysisSolution(de.lmu.ifi.dbs.elki.data.model.CorrelationAnalysisSolution) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)89 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)72 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)70 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)70 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)70 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)69 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)65 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)38 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)21 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)14 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)11 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)8 Mean (de.lmu.ifi.dbs.elki.math.Mean)8