Search in sources :

Example 71 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class CASH method buildDerivatorDB.

/**
 * Builds a database for the derivator consisting of the ids in the specified
 * interval.
 *
 * @param relation the database storing the parameterization functions
 * @param interval the interval to build the database from
 * @return a database for the derivator consisting of the ids in the specified
 *         interval
 */
private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, CASHInterval interval) {
    DBIDs ids = interval.getIDs();
    ProxyDatabase proxy = new ProxyDatabase(ids);
    int dim = dimensionality(relation);
    SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    WritableDataStore<DoubleVector> prep = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, DoubleVector.class);
    // Project
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        prep.put(iter, DoubleVector.wrap(relation.get(iter).getColumnVector()));
    }
    if (LOG.isDebugging()) {
        LOG.debugFine("db fuer derivator : " + ids.size());
    }
    MaterializedRelation<DoubleVector> prel = new MaterializedRelation<>(type, ids, null, prep);
    proxy.addRelation(prel);
    return proxy;
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 72 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class SimpleCOP method run.

public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
    DBIDs ids = data.getDBIDs();
    WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDataStore<double[]> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
    WritableDataStore<double[][]> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[][].class);
    WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
    WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class);
    {
        // compute neighbors of each db object
        FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
        double sqrt2 = MathUtil.SQRT2;
        for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
            KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
            ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
            nids.remove(id);
            // TODO: do we want to use the query point as centroid?
            CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids);
            double stddev = depsol.getStandardDeviation();
            double distance = depsol.distance(data.get(id));
            double prob = NormalDistribution.erf(distance / (stddev * sqrt2));
            cop_score.putDouble(id, prob);
            cop_err_v.put(id, times(depsol.errorVector(data.get(id)), -1));
            double[][] datav = depsol.dataProjections(data.get(id));
            cop_datav.put(id, datav);
            cop_dim.putInt(id, depsol.getCorrelationDimensionality());
            cop_sol.put(id, depsol);
            LOG.incrementProcessed(progressLocalPCA);
        }
        LOG.ensureCompleted(progressLocalPCA);
    }
    // combine results.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Original Correlation Outlier Probabilities", "origcop-outlier", cop_score, ids);
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // extra results
    result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
    result.addChildResult(new MaterializedRelation<>("Error vectors", COP.COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
    result.addChildResult(new MaterializedRelation<>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
    result.addChildResult(new MaterializedRelation<>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) CorrelationAnalysisSolution(de.lmu.ifi.dbs.elki.data.model.CorrelationAnalysisSolution) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 73 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class GreedyEnsembleExperiment method run.

@Override
public void run() {
    // Note: the database contains the *result vectors*, not the original data.
    final Database database = inputstep.getDatabase();
    Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
    final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
    final String firstlabel = labels.get(firstid);
    if (!firstlabel.matches("bylabel")) {
        throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
    }
    relation = applyPrescaling(prescaling, relation, firstid);
    final int numcand = relation.size() - 1;
    // Dimensionality and reference vector
    final int dim = RelationUtil.dimensionality(relation);
    final NumberVector refvec = relation.get(firstid);
    // Build the positive index set for ROC AUC.
    VectorNonZero positive = new VectorNonZero(refvec);
    final int desired_outliers = (int) (rate * dim);
    int union_outliers = 0;
    final int[] outliers_seen = new int[dim];
    // Merge the top-k for each ensemble member, until we have enough
    // candidates.
    {
        int k = 0;
        ArrayList<DecreasingVectorIter> iters = new ArrayList<>(numcand);
        if (minvote >= numcand) {
            minvote = Math.max(1, numcand - 1);
        }
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            // Skip "by label", obviously
            if (DBIDUtil.equal(firstid, iditer)) {
                continue;
            }
            iters.add(new DecreasingVectorIter(relation.get(iditer)));
        }
        loop: while (union_outliers < desired_outliers) {
            for (DecreasingVectorIter iter : iters) {
                if (!iter.valid()) {
                    LOG.warning("Union_outliers=" + union_outliers + " < desired_outliers=" + desired_outliers + " minvote=" + minvote);
                    break loop;
                }
                int cur = iter.dim();
                outliers_seen[cur] += 1;
                if (outliers_seen[cur] == minvote) {
                    union_outliers += 1;
                }
                iter.advance();
            }
            k++;
        }
        LOG.verbose("Merged top " + k + " outliers to: " + union_outliers + " outliers (desired: at least " + desired_outliers + ")");
    }
    // Build the final weight vector.
    final double[] estimated_weights = new double[dim];
    final double[] estimated_truth = new double[dim];
    updateEstimations(outliers_seen, union_outliers, estimated_weights, estimated_truth);
    DoubleVector estimated_truth_vec = DoubleVector.wrap(estimated_truth);
    PrimitiveDistanceFunction<NumberVector> wdist = getDistanceFunction(estimated_weights);
    PrimitiveDistanceFunction<NumberVector> tdist = wdist;
    // Build the naive ensemble:
    final double[] naiveensemble = new double[dim];
    {
        double[] buf = new double[numcand];
        for (int d = 0; d < dim; d++) {
            int i = 0;
            for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
                if (DBIDUtil.equal(firstid, iditer)) {
                    continue;
                }
                final NumberVector vec = relation.get(iditer);
                buf[i] = vec.doubleValue(d);
                i++;
            }
            naiveensemble[d] = voting.combine(buf, i);
            if (Double.isNaN(naiveensemble[d])) {
                LOG.warning("NaN after combining: " + FormatUtil.format(buf) + " i=" + i + " " + voting.toString());
            }
        }
    }
    DoubleVector naivevec = DoubleVector.wrap(naiveensemble);
    // Compute single AUC scores and estimations.
    // Remember the method most similar to the estimation
    double bestauc = 0.0;
    String bestaucstr = "";
    double bestcost = Double.POSITIVE_INFINITY;
    String bestcoststr = "";
    DBID bestid = null;
    double bestest = Double.POSITIVE_INFINITY;
    {
        final double[] greedyensemble = new double[dim];
        // Compute individual scores
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            if (DBIDUtil.equal(firstid, iditer)) {
                continue;
            }
            // fout.append(labels.get(id));
            final NumberVector vec = relation.get(iditer);
            singleEnsemble(greedyensemble, vec);
            double auc = ROCEvaluation.computeROCAUC(positive, new DecreasingVectorIter(DoubleVector.wrap(greedyensemble)));
            double estimated = wdist.distance(DoubleVector.wrap(greedyensemble), estimated_truth_vec);
            double cost = tdist.distance(DoubleVector.wrap(greedyensemble), refvec);
            LOG.verbose("ROC AUC: " + auc + " estimated " + estimated + " cost " + cost + " " + labels.get(iditer));
            if (auc > bestauc) {
                bestauc = auc;
                bestaucstr = labels.get(iditer);
            }
            if (cost < bestcost) {
                bestcost = cost;
                bestcoststr = labels.get(iditer);
            }
            if (estimated < bestest || bestid == null) {
                bestest = estimated;
                bestid = DBIDUtil.deref(iditer);
            }
        }
    }
    // Initialize ensemble with "best" method
    if (prescaling != null) {
        LOG.verbose("Input prescaling: " + prescaling);
    }
    LOG.verbose("Distance function: " + wdist);
    LOG.verbose("Ensemble voting: " + voting);
    if (scaling != null) {
        LOG.verbose("Ensemble rescaling: " + scaling);
    }
    LOG.verbose("Initial estimation of outliers: " + union_outliers);
    LOG.verbose("Initializing ensemble with: " + labels.get(bestid));
    ModifiableDBIDs ensemble = DBIDUtil.newArray(bestid);
    ModifiableDBIDs enscands = DBIDUtil.newHashSet(relation.getDBIDs());
    ModifiableDBIDs dropped = DBIDUtil.newHashSet(relation.size());
    dropped.add(firstid);
    enscands.remove(bestid);
    enscands.remove(firstid);
    final double[] greedyensemble = new double[dim];
    singleEnsemble(greedyensemble, relation.get(bestid));
    // Greedily grow the ensemble
    final double[] testensemble = new double[dim];
    while (enscands.size() > 0) {
        NumberVector greedyvec = DoubleVector.wrap(greedyensemble);
        final double oldd = wdist.distance(estimated_truth_vec, greedyvec);
        final int heapsize = enscands.size();
        ModifiableDoubleDBIDList heap = DBIDUtil.newDistanceDBIDList(heapsize);
        double[] tmp = new double[dim];
        for (DBIDIter iter = enscands.iter(); iter.valid(); iter.advance()) {
            final NumberVector vec = relation.get(iter);
            singleEnsemble(tmp, vec);
            double diversity = wdist.distance(DoubleVector.wrap(greedyensemble), greedyvec);
            heap.add(diversity, iter);
        }
        heap.sort();
        for (DoubleDBIDListMIter it = heap.iter(); heap.size() > 0; it.remove()) {
            // Last
            it.seek(heap.size() - 1);
            enscands.remove(it);
            final NumberVector vec = relation.get(it);
            // Build combined ensemble.
            {
                double[] buf = new double[ensemble.size() + 1];
                for (int i = 0; i < dim; i++) {
                    int j = 0;
                    for (DBIDIter iter = ensemble.iter(); iter.valid(); iter.advance()) {
                        buf[j] = relation.get(iter).doubleValue(i);
                        j++;
                    }
                    buf[j] = vec.doubleValue(i);
                    testensemble[i] = voting.combine(buf, j + 1);
                }
            }
            applyScaling(testensemble, scaling);
            NumberVector testvec = DoubleVector.wrap(testensemble);
            double newd = wdist.distance(estimated_truth_vec, testvec);
            // labels.get(bestadd));
            if (newd < oldd) {
                System.arraycopy(testensemble, 0, greedyensemble, 0, dim);
                ensemble.add(it);
                // Recompute heap
                break;
            } else {
                dropped.add(it);
                // logger.verbose("Discarding: " + labels.get(bestadd));
                if (refine_truth) {
                    // Update target vectors and weights
                    ArrayList<DecreasingVectorIter> iters = new ArrayList<>(numcand);
                    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
                        // Skip "by label", obviously
                        if (DBIDUtil.equal(firstid, iditer) || dropped.contains(iditer)) {
                            continue;
                        }
                        iters.add(new DecreasingVectorIter(relation.get(iditer)));
                    }
                    if (minvote >= iters.size()) {
                        minvote = iters.size() - 1;
                    }
                    union_outliers = 0;
                    Arrays.fill(outliers_seen, 0);
                    while (union_outliers < desired_outliers) {
                        for (DecreasingVectorIter iter : iters) {
                            if (!iter.valid()) {
                                break;
                            }
                            int cur = iter.dim();
                            if (outliers_seen[cur] == 0) {
                                outliers_seen[cur] = 1;
                            } else {
                                outliers_seen[cur] += 1;
                            }
                            if (outliers_seen[cur] == minvote) {
                                union_outliers += 1;
                            }
                            iter.advance();
                        }
                    }
                    LOG.warning("New num outliers: " + union_outliers);
                    updateEstimations(outliers_seen, union_outliers, estimated_weights, estimated_truth);
                    estimated_truth_vec = DoubleVector.wrap(estimated_truth);
                }
            }
        }
    }
    // Build the improved ensemble:
    StringBuilder greedylbl = new StringBuilder();
    {
        for (DBIDIter iter = ensemble.iter(); iter.valid(); iter.advance()) {
            if (greedylbl.length() > 0) {
                greedylbl.append(' ');
            }
            greedylbl.append(labels.get(iter));
        }
    }
    DoubleVector greedyvec = DoubleVector.wrap(greedyensemble);
    if (refine_truth) {
        LOG.verbose("Estimated outliers remaining: " + union_outliers);
    }
    LOG.verbose("Greedy ensemble (" + ensemble.size() + "): " + greedylbl.toString());
    LOG.verbose("Best single ROC AUC: " + bestauc + " (" + bestaucstr + ")");
    LOG.verbose("Best single cost:    " + bestcost + " (" + bestcoststr + ")");
    // Evaluate the naive ensemble and the "shrunk" ensemble
    double naiveauc, naivecost;
    {
        naiveauc = ROCEvaluation.computeROCAUC(positive, new DecreasingVectorIter(naivevec));
        naivecost = tdist.distance(naivevec, refvec);
        LOG.verbose("Naive ensemble AUC:   " + naiveauc + " cost: " + naivecost);
        LOG.verbose("Naive ensemble Gain:  " + gain(naiveauc, bestauc, 1) + " cost gain: " + gain(naivecost, bestcost, 0));
    }
    double greedyauc, greedycost;
    {
        greedyauc = ROCEvaluation.computeROCAUC(positive, new DecreasingVectorIter(greedyvec));
        greedycost = tdist.distance(greedyvec, refvec);
        LOG.verbose("Greedy ensemble AUC:  " + greedyauc + " cost: " + greedycost);
        LOG.verbose("Greedy ensemble Gain to best:  " + gain(greedyauc, bestauc, 1) + " cost gain: " + gain(greedycost, bestcost, 0));
        LOG.verbose("Greedy ensemble Gain to naive: " + gain(greedyauc, naiveauc, 1) + " cost gain: " + gain(greedycost, naivecost, 0));
    }
    {
        MeanVariance meanauc = new MeanVariance();
        MeanVariance meancost = new MeanVariance();
        HashSetModifiableDBIDs candidates = DBIDUtil.newHashSet(relation.getDBIDs());
        candidates.remove(firstid);
        for (int i = 0; i < 1000; i++) {
            // Build the improved ensemble:
            final double[] randomensemble = new double[dim];
            {
                DBIDs random = DBIDUtil.randomSample(candidates, ensemble.size(), (long) i);
                double[] buf = new double[random.size()];
                for (int d = 0; d < dim; d++) {
                    int j = 0;
                    for (DBIDIter iter = random.iter(); iter.valid(); iter.advance()) {
                        assert (!DBIDUtil.equal(firstid, iter));
                        final NumberVector vec = relation.get(iter);
                        buf[j] = vec.doubleValue(d);
                        j++;
                    }
                    randomensemble[d] = voting.combine(buf, j);
                }
            }
            applyScaling(randomensemble, scaling);
            NumberVector randomvec = DoubleVector.wrap(randomensemble);
            double auc = ROCEvaluation.computeROCAUC(positive, new DecreasingVectorIter(randomvec));
            meanauc.put(auc);
            double cost = tdist.distance(randomvec, refvec);
            meancost.put(cost);
        }
        LOG.verbose("Random ensemble AUC:  " + meanauc.getMean() + " + stddev: " + meanauc.getSampleStddev() + " = " + (meanauc.getMean() + meanauc.getSampleStddev()));
        LOG.verbose("Random ensemble Gain: " + gain(meanauc.getMean(), bestauc, 1));
        LOG.verbose("Greedy improvement:   " + (greedyauc - meanauc.getMean()) / meanauc.getSampleStddev() + " standard deviations.");
        LOG.verbose("Random ensemble Cost: " + meancost.getMean() + " + stddev: " + meancost.getSampleStddev() + " = " + (meancost.getMean() + meanauc.getSampleStddev()));
        LOG.verbose("Random ensemble Gain: " + gain(meancost.getMean(), bestcost, 0));
        LOG.verbose("Greedy improvement:   " + (meancost.getMean() - greedycost) / meancost.getSampleStddev() + " standard deviations.");
        LOG.verbose("Naive ensemble Gain to random: " + gain(naiveauc, meanauc.getMean(), 1) + " cost gain: " + gain(naivecost, meancost.getMean(), 0));
        LOG.verbose("Random ensemble Gain to naive: " + gain(meanauc.getMean(), naiveauc, 1) + " cost gain: " + gain(meancost.getMean(), naivecost, 0));
        LOG.verbose("Greedy ensemble Gain to random: " + gain(greedyauc, meanauc.getMean(), 1) + " cost gain: " + gain(greedycost, meancost.getMean(), 0));
    }
}
Also used : DecreasingVectorIter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DecreasingVectorIter) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) Database(de.lmu.ifi.dbs.elki.database.Database) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleDBIDListMIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListMIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) VectorNonZero(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.VectorNonZero)

Example 74 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class OPTICSOF method run.

/**
 * Perform OPTICS-based outlier detection.
 *
 * @param database Database
 * @param relation Relation
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, minpts);
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    DBIDs ids = relation.getDBIDs();
    // FIXME: implicit preprocessor.
    WritableDataStore<KNNList> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
    WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        KNNList minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
        double d = minptsNeighbours.getKNNDistance();
        nMinPts.put(iditer, minptsNeighbours);
        coreDistance.putDouble(iditer, d);
        minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
    }
    // Pass 2
    WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
    WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        List<Double> core = new ArrayList<>();
        double lrd = 0;
        // TODO: optimize for double distances
        for (DoubleDBIDListIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
            double coreDist = coreDistance.doubleValue(neighbor);
            double dist = distQuery.distance(iditer, neighbor);
            double rd = MathUtil.max(coreDist, dist);
            lrd = rd + lrd;
            core.add(rd);
        }
        lrd = minPtsNeighborhoodSize.intValue(iditer) / lrd;
        reachDistance.put(iditer, core);
        lrds.putDouble(iditer, lrd);
    }
    // Pass 3
    DoubleMinMax ofminmax = new DoubleMinMax();
    WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double of = 0;
        for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
            double lrd = lrds.doubleValue(iditer);
            double lrdN = lrds.doubleValue(neighbor);
            of = of + lrdN / lrd;
        }
        of = of / minPtsNeighborhoodSize.intValue(iditer);
        ofs.putDouble(iditer, of);
        // update minimum and maximum
        ofminmax.put(of);
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("OPTICS Outlier Scores", "optics-outlier", ofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayList(java.util.ArrayList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) List(java.util.List) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 75 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class CBLOF method run.

/**
 * Runs the CBLOF algorithm on the given database.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return CBLOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("CBLOF", 3) : null;
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Computing clustering.");
    Clustering<MeanModel> clustering = clusteringAlgorithm.run(database);
    LOG.beginStep(stepprog, 2, "Computing boundary between large and small clusters.");
    List<? extends Cluster<MeanModel>> clusters = clustering.getAllClusters();
    Collections.sort(clusters, new Comparator<Cluster<MeanModel>>() {

        @Override
        public int compare(Cluster<MeanModel> o1, Cluster<MeanModel> o2) {
            // Sort in descending order by size
            return Integer.compare(o2.size(), o1.size());
        }
    });
    int clusterBoundary = getClusterBoundary(relation, clusters);
    List<? extends Cluster<MeanModel>> largeClusters = clusters.subList(0, clusterBoundary + 1);
    List<? extends Cluster<MeanModel>> smallClusters = clusters.subList(clusterBoundary + 1, clusters.size());
    LOG.beginStep(stepprog, 3, "Computing Cluster-Based Local Outlier Factors (CBLOF).");
    WritableDoubleDataStore cblofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
    DoubleMinMax cblofMinMax = new DoubleMinMax();
    computeCBLOFs(relation, distance, cblofs, cblofMinMax, largeClusters, smallClusters);
    LOG.setCompleted(stepprog);
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Cluster-Based Local Outlier Factor", "cblof-outlier", cblofs, ids);
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(cblofMinMax.getMin(), cblofMinMax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)139 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)77 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)45 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)44 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)40 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)39 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)38 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)38 ArrayList (java.util.ArrayList)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)29 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)25 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)22 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)19 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)18 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)15 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)14