Search in sources :

Example 51 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class DiSHPreferenceVectorIndex method initialize.

@Override
public void initialize() {
    if (relation == null || relation.size() == 0) {
        throw new EmptyDataException();
    }
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, long[].class);
    if (LOG.isDebugging()) {
        LOG.debugFine(// 
        new StringBuilder().append("eps ").append(Arrays.asList(epsilon)).append("\n minpts ").append(// 
        minpts).append("\n strategy ").append(strategy).toString());
    }
    long start = System.currentTimeMillis();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null;
    // only one epsilon value specified
    int dim = RelationUtil.dimensionality(relation);
    if (epsilon.length == 1 && dim != 1) {
        double eps = epsilon[0];
        epsilon = new double[dim];
        Arrays.fill(epsilon, eps);
    }
    // epsilons as string
    RangeQuery<V>[] rangeQueries = initRangeQueries(relation, dim);
    StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        if (msg != null) {
            msg.setLength(0);
            msg.append("\nid = ").append(DBIDUtil.toString(it));
        // msg.append(" ").append(database.get(id));
        // msg.append(" ").append(database.getObjectLabelQuery().get(id));
        }
        // determine neighbors in each dimension
        ModifiableDBIDs[] allNeighbors = new ModifiableDBIDs[dim];
        for (int d = 0; d < dim; d++) {
            allNeighbors[d] = DBIDUtil.newHashSet(rangeQueries[d].getRangeForDBID(it, epsilon[d]));
        }
        if (msg != null) {
            for (int d = 0; d < dim; d++) {
                // 
                msg.append("\n neighbors [").append(d).append(']').append(" (").append(allNeighbors[d].size()).append(") = ").append(allNeighbors[d]);
            }
        }
        storage.put(it, determinePreferenceVector(relation, allNeighbors, msg));
        if (msg != null) {
            LOG.debugFine(msg.toString());
        }
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    // TODO: re-add timing code!
    if (LOG.isVerbose()) {
        long end = System.currentTimeMillis();
        long elapsedTime = end - start;
        LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
    }
}
Also used : EmptyDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.EmptyDataException) RangeQuery(de.lmu.ifi.dbs.elki.database.query.range.RangeQuery) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 52 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class GreedyEnsembleExperiment method run.

@Override
public void run() {
    // Note: the database contains the *result vectors*, not the original data.
    final Database database = inputstep.getDatabase();
    Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
    final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
    final String firstlabel = labels.get(firstid);
    if (!firstlabel.matches("bylabel")) {
        throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
    }
    relation = applyPrescaling(prescaling, relation, firstid);
    final int numcand = relation.size() - 1;
    // Dimensionality and reference vector
    final int dim = RelationUtil.dimensionality(relation);
    final NumberVector refvec = relation.get(firstid);
    // Build the positive index set for ROC AUC.
    VectorNonZero positive = new VectorNonZero(refvec);
    final int desired_outliers = (int) (rate * dim);
    int union_outliers = 0;
    final int[] outliers_seen = new int[dim];
    // Merge the top-k for each ensemble member, until we have enough
    // candidates.
    {
        int k = 0;
        ArrayList<DecreasingVectorIter> iters = new ArrayList<>(numcand);
        if (minvote >= numcand) {
            minvote = Math.max(1, numcand - 1);
        }
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            // Skip "by label", obviously
            if (DBIDUtil.equal(firstid, iditer)) {
                continue;
            }
            iters.add(new DecreasingVectorIter(relation.get(iditer)));
        }
        loop: while (union_outliers < desired_outliers) {
            for (DecreasingVectorIter iter : iters) {
                if (!iter.valid()) {
                    LOG.warning("Union_outliers=" + union_outliers + " < desired_outliers=" + desired_outliers + " minvote=" + minvote);
                    break loop;
                }
                int cur = iter.dim();
                outliers_seen[cur] += 1;
                if (outliers_seen[cur] == minvote) {
                    union_outliers += 1;
                }
                iter.advance();
            }
            k++;
        }
        LOG.verbose("Merged top " + k + " outliers to: " + union_outliers + " outliers (desired: at least " + desired_outliers + ")");
    }
    // Build the final weight vector.
    final double[] estimated_weights = new double[dim];
    final double[] estimated_truth = new double[dim];
    updateEstimations(outliers_seen, union_outliers, estimated_weights, estimated_truth);
    DoubleVector estimated_truth_vec = DoubleVector.wrap(estimated_truth);
    PrimitiveDistanceFunction<NumberVector> wdist = getDistanceFunction(estimated_weights);
    PrimitiveDistanceFunction<NumberVector> tdist = wdist;
    // Build the naive ensemble:
    final double[] naiveensemble = new double[dim];
    {
        double[] buf = new double[numcand];
        for (int d = 0; d < dim; d++) {
            int i = 0;
            for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
                if (DBIDUtil.equal(firstid, iditer)) {
                    continue;
                }
                final NumberVector vec = relation.get(iditer);
                buf[i] = vec.doubleValue(d);
                i++;
            }
            naiveensemble[d] = voting.combine(buf, i);
            if (Double.isNaN(naiveensemble[d])) {
                LOG.warning("NaN after combining: " + FormatUtil.format(buf) + " i=" + i + " " + voting.toString());
            }
        }
    }
    DoubleVector naivevec = DoubleVector.wrap(naiveensemble);
    // Compute single AUC scores and estimations.
    // Remember the method most similar to the estimation
    double bestauc = 0.0;
    String bestaucstr = "";
    double bestcost = Double.POSITIVE_INFINITY;
    String bestcoststr = "";
    DBID bestid = null;
    double bestest = Double.POSITIVE_INFINITY;
    {
        final double[] greedyensemble = new double[dim];
        // Compute individual scores
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            if (DBIDUtil.equal(firstid, iditer)) {
                continue;
            }
            // fout.append(labels.get(id));
            final NumberVector vec = relation.get(iditer);
            singleEnsemble(greedyensemble, vec);
            double auc = ROCEvaluation.computeROCAUC(positive, new DecreasingVectorIter(DoubleVector.wrap(greedyensemble)));
            double estimated = wdist.distance(DoubleVector.wrap(greedyensemble), estimated_truth_vec);
            double cost = tdist.distance(DoubleVector.wrap(greedyensemble), refvec);
            LOG.verbose("ROC AUC: " + auc + " estimated " + estimated + " cost " + cost + " " + labels.get(iditer));
            if (auc > bestauc) {
                bestauc = auc;
                bestaucstr = labels.get(iditer);
            }
            if (cost < bestcost) {
                bestcost = cost;
                bestcoststr = labels.get(iditer);
            }
            if (estimated < bestest || bestid == null) {
                bestest = estimated;
                bestid = DBIDUtil.deref(iditer);
            }
        }
    }
    // Initialize ensemble with "best" method
    if (prescaling != null) {
        LOG.verbose("Input prescaling: " + prescaling);
    }
    LOG.verbose("Distance function: " + wdist);
    LOG.verbose("Ensemble voting: " + voting);
    if (scaling != null) {
        LOG.verbose("Ensemble rescaling: " + scaling);
    }
    LOG.verbose("Initial estimation of outliers: " + union_outliers);
    LOG.verbose("Initializing ensemble with: " + labels.get(bestid));
    ModifiableDBIDs ensemble = DBIDUtil.newArray(bestid);
    ModifiableDBIDs enscands = DBIDUtil.newHashSet(relation.getDBIDs());
    ModifiableDBIDs dropped = DBIDUtil.newHashSet(relation.size());
    dropped.add(firstid);
    enscands.remove(bestid);
    enscands.remove(firstid);
    final double[] greedyensemble = new double[dim];
    singleEnsemble(greedyensemble, relation.get(bestid));
    // Greedily grow the ensemble
    final double[] testensemble = new double[dim];
    while (enscands.size() > 0) {
        NumberVector greedyvec = DoubleVector.wrap(greedyensemble);
        final double oldd = wdist.distance(estimated_truth_vec, greedyvec);
        final int heapsize = enscands.size();
        ModifiableDoubleDBIDList heap = DBIDUtil.newDistanceDBIDList(heapsize);
        double[] tmp = new double[dim];
        for (DBIDIter iter = enscands.iter(); iter.valid(); iter.advance()) {
            final NumberVector vec = relation.get(iter);
            singleEnsemble(tmp, vec);
            double diversity = wdist.distance(DoubleVector.wrap(greedyensemble), greedyvec);
            heap.add(diversity, iter);
        }
        heap.sort();
        for (DoubleDBIDListMIter it = heap.iter(); heap.size() > 0; it.remove()) {
            // Last
            it.seek(heap.size() - 1);
            enscands.remove(it);
            final NumberVector vec = relation.get(it);
            // Build combined ensemble.
            {
                double[] buf = new double[ensemble.size() + 1];
                for (int i = 0; i < dim; i++) {
                    int j = 0;
                    for (DBIDIter iter = ensemble.iter(); iter.valid(); iter.advance()) {
                        buf[j] = relation.get(iter).doubleValue(i);
                        j++;
                    }
                    buf[j] = vec.doubleValue(i);
                    testensemble[i] = voting.combine(buf, j + 1);
                }
            }
            applyScaling(testensemble, scaling);
            NumberVector testvec = DoubleVector.wrap(testensemble);
            double newd = wdist.distance(estimated_truth_vec, testvec);
            // labels.get(bestadd));
            if (newd < oldd) {
                System.arraycopy(testensemble, 0, greedyensemble, 0, dim);
                ensemble.add(it);
                // Recompute heap
                break;
            } else {
                dropped.add(it);
                // logger.verbose("Discarding: " + labels.get(bestadd));
                if (refine_truth) {
                    // Update target vectors and weights
                    ArrayList<DecreasingVectorIter> iters = new ArrayList<>(numcand);
                    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
                        // Skip "by label", obviously
                        if (DBIDUtil.equal(firstid, iditer) || dropped.contains(iditer)) {
                            continue;
                        }
                        iters.add(new DecreasingVectorIter(relation.get(iditer)));
                    }
                    if (minvote >= iters.size()) {
                        minvote = iters.size() - 1;
                    }
                    union_outliers = 0;
                    Arrays.fill(outliers_seen, 0);
                    while (union_outliers < desired_outliers) {
                        for (DecreasingVectorIter iter : iters) {
                            if (!iter.valid()) {
                                break;
                            }
                            int cur = iter.dim();
                            if (outliers_seen[cur] == 0) {
                                outliers_seen[cur] = 1;
                            } else {
                                outliers_seen[cur] += 1;
                            }
                            if (outliers_seen[cur] == minvote) {
                                union_outliers += 1;
                            }
                            iter.advance();
                        }
                    }
                    LOG.warning("New num outliers: " + union_outliers);
                    updateEstimations(outliers_seen, union_outliers, estimated_weights, estimated_truth);
                    estimated_truth_vec = DoubleVector.wrap(estimated_truth);
                }
            }
        }
    }
    // Build the improved ensemble:
    StringBuilder greedylbl = new StringBuilder();
    {
        for (DBIDIter iter = ensemble.iter(); iter.valid(); iter.advance()) {
            if (greedylbl.length() > 0) {
                greedylbl.append(' ');
            }
            greedylbl.append(labels.get(iter));
        }
    }
    DoubleVector greedyvec = DoubleVector.wrap(greedyensemble);
    if (refine_truth) {
        LOG.verbose("Estimated outliers remaining: " + union_outliers);
    }
    LOG.verbose("Greedy ensemble (" + ensemble.size() + "): " + greedylbl.toString());
    LOG.verbose("Best single ROC AUC: " + bestauc + " (" + bestaucstr + ")");
    LOG.verbose("Best single cost:    " + bestcost + " (" + bestcoststr + ")");
    // Evaluate the naive ensemble and the "shrunk" ensemble
    double naiveauc, naivecost;
    {
        naiveauc = ROCEvaluation.computeROCAUC(positive, new DecreasingVectorIter(naivevec));
        naivecost = tdist.distance(naivevec, refvec);
        LOG.verbose("Naive ensemble AUC:   " + naiveauc + " cost: " + naivecost);
        LOG.verbose("Naive ensemble Gain:  " + gain(naiveauc, bestauc, 1) + " cost gain: " + gain(naivecost, bestcost, 0));
    }
    double greedyauc, greedycost;
    {
        greedyauc = ROCEvaluation.computeROCAUC(positive, new DecreasingVectorIter(greedyvec));
        greedycost = tdist.distance(greedyvec, refvec);
        LOG.verbose("Greedy ensemble AUC:  " + greedyauc + " cost: " + greedycost);
        LOG.verbose("Greedy ensemble Gain to best:  " + gain(greedyauc, bestauc, 1) + " cost gain: " + gain(greedycost, bestcost, 0));
        LOG.verbose("Greedy ensemble Gain to naive: " + gain(greedyauc, naiveauc, 1) + " cost gain: " + gain(greedycost, naivecost, 0));
    }
    {
        MeanVariance meanauc = new MeanVariance();
        MeanVariance meancost = new MeanVariance();
        HashSetModifiableDBIDs candidates = DBIDUtil.newHashSet(relation.getDBIDs());
        candidates.remove(firstid);
        for (int i = 0; i < 1000; i++) {
            // Build the improved ensemble:
            final double[] randomensemble = new double[dim];
            {
                DBIDs random = DBIDUtil.randomSample(candidates, ensemble.size(), (long) i);
                double[] buf = new double[random.size()];
                for (int d = 0; d < dim; d++) {
                    int j = 0;
                    for (DBIDIter iter = random.iter(); iter.valid(); iter.advance()) {
                        assert (!DBIDUtil.equal(firstid, iter));
                        final NumberVector vec = relation.get(iter);
                        buf[j] = vec.doubleValue(d);
                        j++;
                    }
                    randomensemble[d] = voting.combine(buf, j);
                }
            }
            applyScaling(randomensemble, scaling);
            NumberVector randomvec = DoubleVector.wrap(randomensemble);
            double auc = ROCEvaluation.computeROCAUC(positive, new DecreasingVectorIter(randomvec));
            meanauc.put(auc);
            double cost = tdist.distance(randomvec, refvec);
            meancost.put(cost);
        }
        LOG.verbose("Random ensemble AUC:  " + meanauc.getMean() + " + stddev: " + meanauc.getSampleStddev() + " = " + (meanauc.getMean() + meanauc.getSampleStddev()));
        LOG.verbose("Random ensemble Gain: " + gain(meanauc.getMean(), bestauc, 1));
        LOG.verbose("Greedy improvement:   " + (greedyauc - meanauc.getMean()) / meanauc.getSampleStddev() + " standard deviations.");
        LOG.verbose("Random ensemble Cost: " + meancost.getMean() + " + stddev: " + meancost.getSampleStddev() + " = " + (meancost.getMean() + meanauc.getSampleStddev()));
        LOG.verbose("Random ensemble Gain: " + gain(meancost.getMean(), bestcost, 0));
        LOG.verbose("Greedy improvement:   " + (meancost.getMean() - greedycost) / meancost.getSampleStddev() + " standard deviations.");
        LOG.verbose("Naive ensemble Gain to random: " + gain(naiveauc, meanauc.getMean(), 1) + " cost gain: " + gain(naivecost, meancost.getMean(), 0));
        LOG.verbose("Random ensemble Gain to naive: " + gain(meanauc.getMean(), naiveauc, 1) + " cost gain: " + gain(meancost.getMean(), naivecost, 0));
        LOG.verbose("Greedy ensemble Gain to random: " + gain(greedyauc, meanauc.getMean(), 1) + " cost gain: " + gain(greedycost, meancost.getMean(), 0));
    }
}
Also used : DecreasingVectorIter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DecreasingVectorIter) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) Database(de.lmu.ifi.dbs.elki.database.Database) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleDBIDListMIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListMIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) VectorNonZero(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.VectorNonZero)

Example 53 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class RANSACCovarianceMatrixBuilder method processIds.

@// 
Reference(// 
title = "Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography", // 
authors = "M.A. Fischler, R.C. Bolles", // 
booktitle = "Communications of the ACM, Vol. 24 Issue 6", url = "http://dx.doi.org/10.1145/358669.358692")
@Override
public double[][] processIds(DBIDs ids, Relation<? extends NumberVector> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    ModifiableDBIDs best = DBIDUtil.newHashSet(), support = DBIDUtil.newHashSet();
    double tresh = ChiSquaredDistribution.quantile(0.85, dim);
    CovarianceMatrix cv = new CovarianceMatrix(dim);
    Random random = rnd.getSingleThreadedRandom();
    for (int i = 0; i < iterations; i++) {
        DBIDs sample = DBIDUtil.randomSample(ids, dim + 1, random);
        cv.reset();
        for (DBIDIter it = sample.iter(); it.valid(); it.advance()) {
            cv.put(relation.get(it));
        }
        double[] centroid = cv.getMeanVector();
        double[][] p = inverse(cv.destroyToSampleMatrix());
        support.clear();
        for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
            double[] vec = minusEquals(relation.get(id).toArray(), centroid);
            double sqlen = transposeTimesTimes(vec, p, vec);
            if (sqlen < tresh) {
                support.add(id);
            }
        }
        if (support.size() > best.size()) {
            ModifiableDBIDs swap = best;
            best = support;
            support = swap;
        }
        if (support.size() >= ids.size()) {
            // Can't get better than this!
            break;
        }
    }
    // Fall back to regular PCA if too few samples.
    return CovarianceMatrix.make(relation, best.size() > dim ? best : ids).destroyToSampleMatrix();
}
Also used : Random(java.util.Random) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Reference(de.lmu.ifi.dbs.elki.utilities.documentation.Reference)

Example 54 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class SimpleOutlierEnsemble method run.

@Override
public OutlierResult run(Database database) throws IllegalStateException {
    int num = algorithms.size();
    // Run inner outlier algorithms
    ModifiableDBIDs ids = DBIDUtil.newHashSet();
    ArrayList<OutlierResult> results = new ArrayList<>(num);
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null;
        for (Algorithm alg : algorithms) {
            Result res = alg.run(database);
            List<OutlierResult> ors = OutlierResult.getOutlierResults(res);
            for (OutlierResult or : ors) {
                results.add(or);
                ids.addDBIDs(or.getScores().getDBIDs());
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
    }
    // Combine
    WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    {
        FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", ids.size(), LOG) : null;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
            double[] scores = new double[num];
            int i = 0;
            for (OutlierResult r : results) {
                double score = r.getScores().doubleValue(id);
                if (!Double.isNaN(score)) {
                    scores[i] = score;
                    i++;
                } else {
                    LOG.warning("DBID " + id + " was not given a score by result " + r);
                }
            }
            if (i > 0) {
                // Shrink array if necessary.
                if (i < scores.length) {
                    scores = Arrays.copyOf(scores, i);
                }
                double combined = voting.combine(scores);
                sumscore.putDouble(id, combined);
                minmax.put(combined);
            } else {
                LOG.warning("DBID " + id + " was not given any score at all.");
            }
            LOG.incrementProcessed(cprog);
        }
        LOG.ensureCompleted(cprog);
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    DoubleRelation scores = new MaterializedDoubleRelation("Simple Outlier Ensemble", "ensemble-outlier", sumscore, ids);
    return new OutlierResult(meta, scores);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayList(java.util.ArrayList) Algorithm(de.lmu.ifi.dbs.elki.algorithm.Algorithm) OutlierAlgorithm(de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm) AbstractAlgorithm(de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) Result(de.lmu.ifi.dbs.elki.result.Result) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) List(java.util.List) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 55 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class CTLuGLSBackwardSearchAlgorithm method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relationx Spatial relation
 * @param relationy Attribute relation
 * @return Algorithm result
 */
public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector> relationy) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);
    // Outlier detection loop
    {
        ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
        ProxyView<V> proxy = new ProxyView<>(idview, relationx);
        double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha * .5);
        // Detect outliers while significant.
        while (true) {
            Pair<DBIDVar, Double> candidate = singleIteration(proxy, relationy);
            if (candidate.second < phialpha) {
                break;
            }
            scores.putDouble(candidate.first, candidate.second);
            if (!Double.isNaN(candidate.second)) {
                mm.put(candidate.second);
            }
            idview.remove(candidate.first);
        }
        // Remaining objects are inliers
        for (DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) {
            scores.putDouble(iter, 0.0);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("GLSSODBackward", "GLSSODbackward-outlier", scores, relationx.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0, Double.POSITIVE_INFINITY, 0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : ProxyView(de.lmu.ifi.dbs.elki.database.relation.ProxyView) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)44 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)30 ArrayList (java.util.ArrayList)30 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)28 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)18 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)15 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)12 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)12 Model (de.lmu.ifi.dbs.elki.data.model.Model)11 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)10 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)9 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)8 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)7