Search in sources :

Example 1 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class OnlineLOF method run.

/**
 * Performs the Generalized LOF_SCORE algorithm on the given database by
 * calling {@code #doRunInTime(Database)} and adds a {@link LOFKNNListener} to
 * the preprocessors.
 */
@Override
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
    Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
    KNNQuery<O> kNNRefer = queries.getFirst().getFirst();
    KNNQuery<O> kNNReach = queries.getFirst().getSecond();
    RKNNQuery<O> rkNNRefer = queries.getSecond().getFirst();
    RKNNQuery<O> rkNNReach = queries.getSecond().getSecond();
    LOFResult<O> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
    lofResult.setRkNNRefer(rkNNRefer);
    lofResult.setRkNNReach(rkNNReach);
    // add listener
    KNNListener l = new LOFKNNListener(lofResult);
    ((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
    ((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
    return lofResult.getResult();
}
Also used : StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) AbstractMaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor) MaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor) KNNListener(de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNListener) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 2 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class OnlineLOF method getKNNAndRkNNQueries.

/**
 * Get the kNN and rkNN queries for the algorithm.
 *
 * @param relation Data
 * @param stepprog Progress logger
 * @return the kNN and rkNN queries
 */
private Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
    DistanceQuery<O> drefQ = database.getDistanceQuery(relation, referenceDistanceFunction);
    // Use "HEAVY" flag, since this is an online algorithm
    KNNQuery<O> kNNRefer = database.getKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
    RKNNQuery<O> rkNNRefer = database.getRKNNQuery(drefQ, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
    // No optimized kNN query or RkNN query - use a preprocessor!
    if (kNNRefer == null || rkNNRefer == null) {
        if (stepprog != null) {
            stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
        }
        MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
        kNNRefer = preproc.getKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE);
        rkNNRefer = preproc.getRKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE);
        // add as index
        database.getHierarchy().add(relation, preproc);
    } else {
        if (stepprog != null) {
            stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
        }
    }
    DistanceQuery<O> dreachQ = database.getDistanceQuery(relation, reachabilityDistanceFunction);
    KNNQuery<O> kNNReach = database.getKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
    RKNNQuery<O> rkNNReach = database.getRKNNQuery(dreachQ, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
    if (kNNReach == null || rkNNReach == null) {
        if (stepprog != null) {
            stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
        }
        ListParameterization config = new ListParameterization();
        config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
        config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, kreach);
        MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
        kNNReach = preproc.getKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE);
        rkNNReach = preproc.getRKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE);
        // add as index
        database.getHierarchy().add(relation, preproc);
    }
    Pair<KNNQuery<O>, KNNQuery<O>> kNNPair = new Pair<>(kNNRefer, kNNReach);
    Pair<RKNNQuery<O>, RKNNQuery<O>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
    return new Pair<>(kNNPair, rkNNPair);
}
Also used : RKNNQuery(de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery) MaterializeKNNAndRKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNAndRKNNPreprocessor) PreprocessorKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery) RKNNQuery(de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 3 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class CTLuGLSBackwardSearchAlgorithm method singleIteration.

/**
 * Run a single iteration of the GLS-SOD modeling step
 *
 * @param relationx Geo relation
 * @param relationy Attribute relation
 * @return Top outlier and associated score
 */
private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
    final int dim = RelationUtil.dimensionality(relationx);
    final int dimy = RelationUtil.dimensionality(relationy);
    assert (dim == 2);
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
    // We need stable indexed DBIDs
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
    // Sort, so we can do a binary search below.
    ids.sort();
    // init F,X,Z
    double[][] X = new double[ids.size()][6];
    double[][] F = new double[ids.size()][ids.size()];
    double[][] Y = new double[ids.size()][dimy];
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            // Fill the data matrix
            {
                V vec = relationx.get(id);
                double la = vec.doubleValue(0);
                double lo = vec.doubleValue(1);
                X[i][0] = 1.0;
                X[i][1] = la;
                X[i][2] = lo;
                X[i][3] = la * lo;
                X[i][4] = la * la;
                X[i][5] = lo * lo;
            }
            {
                final NumberVector vecy = relationy.get(id);
                for (int d = 0; d < dimy; d++) {
                    double idy = vecy.doubleValue(d);
                    Y[i][d] = idy;
                }
            }
            // Fill the neighborhood matrix F:
            {
                KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
                ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
                for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                    if (DBIDUtil.equal(id, neighbor)) {
                        continue;
                    }
                    neighborhood.add(neighbor);
                }
                // Weight object itself positively.
                F[i][i] = 1.0;
                final int nweight = -1 / neighborhood.size();
                // unfortunately.
                for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
                    int pos = ids.binarySearch(iter);
                    assert (pos >= 0);
                    F[pos][i] = nweight;
                }
            }
        }
    }
    // Estimate the parameter beta
    // Common term that we can save recomputing.
    double[][] common = times(transposeTimesTranspose(X, F), F);
    double[][] b = times(inverse(times(common, X)), times(common, Y));
    // Estimate sigma_0 and sigma:
    // sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
    double[][] sigmaMat = times(F, minusEquals(times(X, b), times(F, Y)));
    final double sigma_sum_square = normF(sigmaMat) / (relationx.size() - 6 - 1);
    final double norm = 1 / FastMath.sqrt(sigma_sum_square);
    // calculate the absolute values of standard residuals
    double[][] E = timesEquals(times(F, minus(Y, times(X, b))), norm);
    DBIDVar worstid = DBIDUtil.newVar();
    double worstscore = Double.NEGATIVE_INFINITY;
    int i = 0;
    for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
        double err = squareSum(getRow(E, i));
        // double err = Math.abs(E.get(i, 0));
        if (err > worstscore) {
            worstscore = err;
            worstid.set(id);
        }
    }
    return new Pair<>(worstid, FastMath.sqrt(worstscore));
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 4 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class DiSH method checkClusters.

/**
 * Removes the clusters with size < minpts from the cluster map and adds them
 * to their parents.
 *
 * @param relation the relation storing the objects
 * @param clustersMap the map containing the clusters
 */
private void checkClusters(Relation<V> relation, Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    // check if there are clusters < minpts
    // and add them to not assigned
    List<Pair<long[], ArrayModifiableDBIDs>> notAssigned = new ArrayList<>();
    Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> newClustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
    Pair<long[], ArrayModifiableDBIDs> noise = new Pair<>(BitsUtil.zero(dimensionality), DBIDUtil.newArray());
    for (long[] pv : clustersMap.keySet()) {
        // noise
        if (BitsUtil.cardinality(pv) == 0) {
            List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
            for (ArrayModifiableDBIDs c : parallelClusters) {
                noise.second.addDBIDs(c);
            }
        } else // clusters
        {
            List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
            List<ArrayModifiableDBIDs> newParallelClusters = new ArrayList<>(parallelClusters.size());
            for (ArrayModifiableDBIDs c : parallelClusters) {
                if (!BitsUtil.isZero(pv) && c.size() < mu) {
                    notAssigned.add(new Pair<>(pv, c));
                } else {
                    newParallelClusters.add(c);
                }
            }
            newClustersMap.put(pv, newParallelClusters);
        }
    }
    clustersMap.clear();
    clustersMap.putAll(newClustersMap);
    for (Pair<long[], ArrayModifiableDBIDs> c : notAssigned) {
        if (c.second.isEmpty()) {
            continue;
        }
        Pair<long[], ArrayModifiableDBIDs> parent = findParent(relation, c, clustersMap);
        if (parent != null) {
            parent.second.addDBIDs(c.second);
        } else {
            noise.second.addDBIDs(c.second);
        }
    }
    List<ArrayModifiableDBIDs> noiseList = new ArrayList<>(1);
    noiseList.add(noise.second);
    clustersMap.put(noise.first, noiseList);
}
Also used : Object2ObjectOpenCustomHashMap(it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 5 with Pair

use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.

the class IndexStatistics method processNewResult.

@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
    Database db = ResultUtil.findDatabase(hier);
    Collection<String> header = null;
    final ArrayList<IndexTree<?, ?>> indexes = ResultUtil.filterResults(hier, newResult, IndexTree.class);
    if (indexes == null || indexes.isEmpty()) {
        return;
    }
    for (IndexTree<?, ?> index : indexes) {
        header = new ArrayList<>();
        header.add(index.toString());
    }
    Collection<Pair<String, String>> col = new ArrayList<>();
    IndexMetaResult analysis = new IndexMetaResult(col, header);
    db.getHierarchy().add(db, analysis);
}
Also used : IndexTree(de.lmu.ifi.dbs.elki.index.tree.IndexTree) Database(de.lmu.ifi.dbs.elki.database.Database) ArrayList(java.util.ArrayList) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Aggregations

Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)16 ArrayList (java.util.ArrayList)10 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)6 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)6 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)6 List (java.util.List)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)3 Subspace (de.lmu.ifi.dbs.elki.data.Subspace)3 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)3 IOException (java.io.IOException)3 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)2 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)2 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)2 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)2 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)2 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)2 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)2 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)2