Search in sources :

Example 76 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class COP method run.

/**
 * Process a single relation.
 *
 * @param relation Relation to process
 * @return Outlier detection result
 */
public OutlierResult run(Relation<V> relation) {
    final DBIDs ids = relation.getDBIDs();
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
    final int dim = RelationUtil.dimensionality(relation);
    if (k <= dim + 1) {
        LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
    }
    WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDataStore<double[]> cop_err_v = null;
    WritableIntegerDataStore cop_dim = null;
    if (models) {
        cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
        cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
    }
    // compute neighbors of each db object
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
        ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
        // Do not use query object
        nids.remove(id);
        double[] centroid = Centroid.make(relation, nids).getArrayRef();
        double[] relative = minusEquals(relation.get(id).toArray(), centroid);
        PCAResult pcares = pca.processIds(nids, relation);
        double[][] evecs = pcares.getEigenvectors();
        double[] projected = transposeTimes(evecs, relative);
        double[] evs = pcares.getEigenvalues();
        double min = Double.POSITIVE_INFINITY;
        int vdim = dim;
        switch(dist) {
            case CHISQUARED:
                {
                    double sqdevs = 0;
                    for (int d = 0; d < dim; d++) {
                        // Scale with Stddev
                        double dev = projected[d];
                        // Accumulate
                        sqdevs += dev * dev / evs[d];
                        // Evaluate
                        double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
                        if (score < min) {
                            min = score;
                            vdim = d + 1;
                        }
                    }
                    break;
                }
            case GAMMA:
                {
                    double[][] dists = new double[dim][nids.size()];
                    int j = 0;
                    double[] srel = new double[dim];
                    for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
                        V vec = relation.get(s);
                        for (int d = 0; d < dim; d++) {
                            srel[d] = vec.doubleValue(d) - centroid[d];
                        }
                        double[] serr = transposeTimes(evecs, srel);
                        double sqdist = 0.0;
                        for (int d = 0; d < dim; d++) {
                            double serrd = serr[d];
                            sqdist += serrd * serrd / evs[d];
                            dists[d][j] = sqdist;
                        }
                        j++;
                    }
                    double sqdevs = 0;
                    for (int d = 0; d < dim; d++) {
                        // Scale with Stddev
                        final double dev = projected[d];
                        // Accumulate
                        sqdevs += dev * dev / evs[d];
                        // Sort, so we can trim the top 15% below.
                        Arrays.sort(dists[d]);
                        // Evaluate
                        double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs);
                        if (score < min) {
                            min = score;
                            vdim = d + 1;
                        }
                    }
                    break;
                }
        }
        // Normalize the value
        final double prob = expect * (1 - min) / (expect + min);
        // Construct the error vector:
        for (int d = vdim; d < dim; d++) {
            projected[d] = 0.;
        }
        double[] ev = timesEquals(times(evecs, projected), -1 * prob);
        cop_score.putDouble(id, prob);
        if (models) {
            cop_err_v.put(id, ev);
            cop_dim.putInt(id, dim + 1 - vdim);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // combine results.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids);
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    if (models) {
        result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
        result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) GreaterConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) PCAResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 77 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class AbstractLayout3DPC method computeSimilarityMatrix.

/**
 * Compute a column-wise dependency matrix for the given relation.
 *
 * @param sim Dependence measure
 * @param rel Vector relation
 * @return Similarity matrix (lower triangular form)
 */
public static double[] computeSimilarityMatrix(DependenceMeasure sim, Relation<? extends NumberVector> rel) {
    final int dim = RelationUtil.dimensionality(rel);
    final int size = rel.size();
    // TODO: we could use less memory (no copy), but this would likely be
    // slower. Maybe as a fallback option?
    double[][] data = new double[dim][size];
    int r = 0;
    for (DBIDIter it = rel.iterDBIDs(); it.valid(); it.advance(), r++) {
        NumberVector v = rel.get(it);
        for (int d = 0; d < dim; d++) {
            data[d][r] = v.doubleValue(d);
        }
    }
    return sim.dependence(DoubleArrayAdapter.STATIC, Arrays.asList(data));
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 78 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class SharedNearestNeighborPreprocessor method initialize.

@Override
public void initialize() {
    if (getLogger().isVerbose()) {
        getLogger().verbose("Assigning nearest neighbor lists to database objects");
    }
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, ArrayDBIDs.class);
    KNNQuery<O> knnquery = QueryUtil.getKNNQuery(relation, distanceFunction, numberOfNeighbors);
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("assigning nearest neighbor lists", relation.size(), getLogger()) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        ArrayModifiableDBIDs neighbors = DBIDUtil.newArray(numberOfNeighbors);
        DBIDs kNN = knnquery.getKNNForDBID(iditer, numberOfNeighbors);
        for (DBIDIter iter = kNN.iter(); iter.valid(); iter.advance()) {
            // if(!id.equals(nid)) {
            neighbors.add(iter);
            // Size limitation to exactly numberOfNeighbors
            if (neighbors.size() >= numberOfNeighbors) {
                break;
            }
        }
        neighbors.sort();
        storage.put(iditer, neighbors);
        getLogger().incrementProcessed(progress);
    }
    getLogger().ensureCompleted(progress);
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 79 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class RdKNNTree method reverseKNNQuery.

public DoubleDBIDList reverseKNNQuery(DBID oid, int k, SpatialPrimitiveDistanceFunction<? super O> distanceFunction, KNNQuery<O> knnQuery) {
    checkDistanceFunction(distanceFunction);
    if (k > settings.k_max) {
        throw new IllegalArgumentException("Parameter k is not supported, k > k_max: " + k + " > " + settings.k_max);
    }
    // get candidates
    ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
    doReverseKNN(getRoot(), oid, candidates);
    if (k == settings.k_max) {
        candidates.sort();
        return candidates;
    }
    // refinement of candidates, if k < k_max
    ArrayModifiableDBIDs candidateIDs = DBIDUtil.newArray(candidates);
    candidateIDs.sort();
    List<? extends KNNList> knnLists = knnQuery.getKNNForBulkDBIDs(candidateIDs, k);
    ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
    int i = 0;
    for (DBIDIter iter = candidateIDs.iter(); iter.valid(); iter.advance(), i++) {
        for (DoubleDBIDListIter qr = knnLists.get(i).iter(); qr.valid(); qr.advance()) {
            if (DBIDUtil.equal(oid, qr)) {
                result.add(qr.doubleValue(), iter);
                break;
            }
        }
    }
    result.sort();
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 80 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class RStarTreeIndex method insertAll.

/**
 * Inserts the specified objects into this index. If a bulk load mode is
 * implemented, the objects are inserted in one bulk.
 *
 * @param ids the objects to be inserted
 */
@Override
public void insertAll(DBIDs ids) {
    if (ids.isEmpty() || (ids.size() == 1)) {
        return;
    }
    // Make an example leaf
    if (canBulkLoad()) {
        List<SpatialEntry> leafs = new ArrayList<>(ids.size());
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            leafs.add(createNewLeafEntry(iter));
        }
        bulkLoad(leafs);
    } else {
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            insert(DBIDUtil.deref(iter));
        }
    }
    doExtraIntegrityChecks();
}
Also used : ArrayList(java.util.ArrayList) SpatialEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21