Search in sources :

Example 1 with PCAResult

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult in project elki by elki-project.

the class COP method run.

/**
 * Process a single relation.
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
public OutlierResult run(Relation<V> relation) {
    final DBIDs ids = relation.getDBIDs();
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
    final int dim = RelationUtil.dimensionality(relation);
    if (k <= dim + 1) {
        LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
    }
    WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDataStore<double[]> cop_err_v = null;
    WritableIntegerDataStore cop_dim = null;
    if (models) {
        cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
        cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
    }
    // compute neighbors of each db object
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
        ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
        // Do not use query object
        nids.remove(id);
        double[] centroid = Centroid.make(relation, nids).getArrayRef();
        double[] relative = minusEquals(relation.get(id).toArray(), centroid);
        PCAResult pcares = pca.processIds(nids, relation);
        double[][] evecs = pcares.getEigenvectors();
        double[] projected = transposeTimes(evecs, relative);
        double[] evs = pcares.getEigenvalues();
        double min = Double.POSITIVE_INFINITY;
        int vdim = dim;
        switch(dist) {
            case CHISQUARED:
                {
                    double sqdevs = 0;
                    for (int d = 0; d < dim; d++) {
                        // Scale with Stddev
                        double dev = projected[d];
                        // Accumulate
                        sqdevs += dev * dev / evs[d];
                        // Evaluate
                        double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
                        if (score < min) {
                            min = score;
                            vdim = d + 1;
                        }
                    }
                    break;
                }
            case GAMMA:
                {
                    double[][] dists = new double[dim][nids.size()];
                    int j = 0;
                    double[] srel = new double[dim];
                    for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
                        V vec = relation.get(s);
                        for (int d = 0; d < dim; d++) {
                            srel[d] = vec.doubleValue(d) - centroid[d];
                        }
                        double[] serr = transposeTimes(evecs, srel);
                        double sqdist = 0.0;
                        for (int d = 0; d < dim; d++) {
                            double serrd = serr[d];
                            sqdist += serrd * serrd / evs[d];
                            dists[d][j] = sqdist;
                        }
                        j++;
                    }
                    double sqdevs = 0;
                    for (int d = 0; d < dim; d++) {
                        // Scale with Stddev
                        final double dev = projected[d];
                        // Accumulate
                        sqdevs += dev * dev / evs[d];
                        // Sort, so we can trim the top 15% below.
                        Arrays.sort(dists[d]);
                        // Evaluate
                        double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs);
                        if (score < min) {
                            min = score;
                            vdim = d + 1;
                        }
                    }
                    break;
                }
        }
        // Normalize the value
        final double prob = expect * (1 - min) / (expect + min);
        // Construct the error vector:
        for (int d = vdim; d < dim; d++) {
            projected[d] = 0.;
        }
        double[] ev = timesEquals(times(evecs, projected), -1 * prob);
        cop_score.putDouble(id, prob);
        if (models) {
            cop_err_v.put(id, ev);
            cop_dim.putInt(id, dim + 1 - vdim);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // combine results.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids);
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    if (models) {
        result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
        result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) GreaterConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) PCAResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 2 with PCAResult

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult in project elki by elki-project.

the class ERiCNeighborPredicate method instantiate.

/**
 * Full instantiation interface.
 *
 * @param database Database
 * @param relation Relation
 * @return Instance
 */
public Instance instantiate(Database database, Relation<V> relation) {
    DistanceQuery<V> dq = database.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
    KNNQuery<V> knnq = database.getKNNQuery(dq, settings.k);
    WritableDataStore<PCAFilteredResult> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, PCAFilteredResult.class);
    PCARunner pca = settings.pca;
    EigenPairFilter filter = settings.filter;
    Duration time = LOG.newDuration(this.getClass().getName() + ".preprocessing-time").begin();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DoubleDBIDList ref = knnq.getKNNForDBID(iditer, settings.k);
        PCAResult pcares = pca.processQueryResult(ref, relation);
        storage.put(iditer, new PCAFilteredResult(pcares.getEigenPairs(), filter.filter(pcares.getEigenvalues()), 1., 0.));
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    LOG.statistics(time.end());
    return new Instance(relation.getDBIDs(), storage, relation);
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) PCARunner(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) PCAResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)

Example 3 with PCAResult

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult in project elki by elki-project.

the class GlobalPrincipalComponentAnalysisTransform method prepareComplete.

@Override
protected void prepareComplete() {
    mean = covmat.getMeanVector();
    PCAResult pcares = (new PCARunner(null)).processCovarMatrix(covmat.destroyToPopulationMatrix());
    SortedEigenPairs eps = pcares.getEigenPairs();
    covmat = null;
    if (filter == null) {
        proj = new double[dim][dim];
        for (int d = 0; d < dim; d++) {
            EigenPair ep = eps.getEigenPair(d);
            double[] ev = ep.getEigenvector();
            double mult = 1. / FastMath.sqrt(ep.getEigenvalue());
            // Fill weighted and transposed:
            for (int i = 0; i < dim; i++) {
                proj[d][i] = ev[i] * mult;
            }
        }
    } else {
        final int pdim = filter.filter(eps.eigenValues());
        if (LOG.isVerbose()) {
            LOG.verbose("Reducing dimensionality from " + dim + " to " + pdim + " via PCA.");
        }
        proj = new double[pdim][dim];
        for (int d = 0; d < pdim; d++) {
            EigenPair ep = eps.getEigenPair(d);
            double[] ev = ep.getEigenvector();
            double mult = 1. / FastMath.sqrt(ep.getEigenvalue());
            // Fill weighted and transposed:
            for (int i = 0; i < dim; i++) {
                proj[d][i] = ev[i] * mult;
            }
        }
    }
    buf = new double[dim];
}
Also used : PCAResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult) PCARunner(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) EigenPair(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.EigenPair)

Aggregations

PCAResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult)3 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 PCARunner (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner)2 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)1 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)1 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)1 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)1 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)1 EigenPair (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.EigenPair)1 PCAFilteredResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)1 SortedEigenPairs (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs)1 EigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter)1 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)1 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)1 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)1