Search in sources :

Example 56 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class IDOS method computeIDOS.

/**
 * Computes all IDOS scores.
 *
 * @param ids the DBIDs to process
 * @param knnQ the KNN query
 * @param intDims Precomputed intrinsic dimensionalities
 * @param idosminmax Output of minimum and maximum, for metadata
 * @return ID scores
 */
protected DoubleDataStore computeIDOS(DBIDs ids, KNNQuery<O> knnQ, DoubleDataStore intDims, DoubleMinMax idosminmax) {
    WritableDoubleDataStore ldms = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("ID Outlier Scores for objects", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        final KNNList neighbors = knnQ.getKNNForDBID(iter, k_r);
        double sum = 0.;
        int cnt = 0;
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            if (DBIDUtil.equal(iter, neighbor)) {
                continue;
            }
            final double id = intDims.doubleValue(neighbor);
            sum += id > 0 ? 1.0 / id : 0.;
            if (++cnt == k_r) {
                // Always stop after at most k_r elements.
                break;
            }
        }
        final double id_q = intDims.doubleValue(iter);
        final double idos = id_q > 0 ? id_q * sum / cnt : 0.;
        ldms.putDouble(iter, idos);
        idosminmax.put(idos);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return ldms;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) GreaterEqualConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 57 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class IntrinsicDimensionalityOutlier method run.

/**
 * Run the algorithm
 *
 * @param database Database
 * @param relation Data relation
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore id_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double id = 0.;
        try {
            id = estimator.estimate(knnQuery, iditer, k + 1);
        } catch (ArithmeticException e) {
            id = 0.;
        }
        id_score.putDouble(iditer, id);
        minmax.put(id);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    DoubleRelation scoreres = new MaterializedDoubleRelation("Intrinsic dimensionality", "id-score", id_score, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 58 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class ALOCI method run.

public OutlierResult run(Database database, Relation<O> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    final Random random = rnd.getSingleThreadedRandom();
    FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
    // Compute extend of dataset.
    double[] min, max;
    {
        double[][] hbbs = RelationUtil.computeMinMax(relation);
        min = hbbs[0];
        max = hbbs[1];
        double maxd = 0;
        for (int i = 0; i < dim; i++) {
            maxd = MathUtil.max(maxd, max[i] - min[i]);
        }
        // Enlarge bounding box to have equal lengths.
        for (int i = 0; i < dim; i++) {
            double diff = (maxd - (max[i] - min[i])) * .5;
            min[i] -= diff;
            max[i] += diff;
        }
    }
    List<ALOCIQuadTree> qts = new ArrayList<>(g);
    double[] nshift = new double[dim];
    ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
    qts.add(qt);
    LOG.incrementProcessed(progressPreproc);
    /*
     * create the remaining g-1 shifted QuadTrees. This not clearly described in
     * the paper and therefore implemented in a way that achieves good results
     * with the test data.
     */
    for (int shift = 1; shift < g; shift++) {
        double[] svec = new double[dim];
        for (int i = 0; i < dim; i++) {
            svec[i] = random.nextDouble() * (max[i] - min[i]);
        }
        qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
        qts.add(qt);
        LOG.incrementProcessed(progressPreproc);
    }
    LOG.ensureCompleted(progressPreproc);
    // aLOCI main loop: evaluate
    FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final O obj = relation.get(iditer);
        double maxmdefnorm = 0;
        // For each level
        for (int l = 0; ; l++) {
            // Find the closest C_i
            Node ci = null;
            for (int i = 0; i < g; i++) {
                Node ci2 = qts.get(i).findClosestNode(obj, l);
                if (ci2.getLevel() != l) {
                    continue;
                }
                // TODO: always use manhattan?
                if (ci == null || distFunc.distance(ci, obj) > distFunc.distance(ci2, obj)) {
                    ci = ci2;
                }
            }
            // LOG.debug("level:" + (ci != null ? ci.getLevel() : -1) +" l:"+l);
            if (ci == null) {
                // no matching tree for this level.
                break;
            }
            // Find the closest C_j
            Node cj = null;
            for (int i = 0; i < g; i++) {
                Node cj2 = qts.get(i).findClosestNode(ci, l - alpha);
                // TODO: allow higher levels or not?
                if (cj != null && cj2.getLevel() < cj.getLevel()) {
                    continue;
                }
                // TODO: always use manhattan?
                if (cj == null || distFunc.distance(cj, ci) > distFunc.distance(cj2, ci)) {
                    cj = cj2;
                }
            }
            // LOG.debug("level:" + (cj != null ? cj.getLevel() : -1) +" l:"+l);
            if (cj == null) {
                // no matching tree for this level.
                continue;
            }
            double mdefnorm = calculate_MDEF_norm(cj, ci);
            // LOG.warning("level:" + ci.getLevel() + "/" + cj.getLevel() +
            // " mdef: " + mdefnorm);
            maxmdefnorm = MathUtil.max(maxmdefnorm, mdefnorm);
        }
        // Store results
        mdef_norm.putDouble(iditer, maxmdefnorm);
        minmax.put(maxmdefnorm);
        LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult = new MaterializedDoubleRelation("aLOCI normalized MDEF", "aloci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Random(java.util.Random) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 59 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class LDOF method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k);
    // track the maximum value for normalization
    DoubleMinMax ldofminmax = new DoubleMinMax();
    // compute the ldof values
    WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    // compute LOF_SCORE of each db object
    if (LOG.isVerbose()) {
        LOG.verbose("Computing LDOFs");
    }
    FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
    Mean dxp = new Mean(), Dxp = new Mean();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
        dxp.reset();
        Dxp.reset();
        DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
        for (; neighbor1.valid(); neighbor1.advance()) {
            // skip the point itself
            if (DBIDUtil.equal(neighbor1, iditer)) {
                continue;
            }
            dxp.put(neighbor1.doubleValue());
            for (neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
                // skip the point itself
                if (DBIDUtil.equal(neighbor2, iditer)) {
                    continue;
                }
                Dxp.put(distFunc.distance(neighbor1, neighbor2));
            }
        }
        double ldof = dxp.getMean() / Dxp.getMean();
        if (Double.isNaN(ldof) || Double.isInfinite(ldof)) {
            ldof = 1.0;
        }
        ldofs.putDouble(iditer, ldof);
        // update maximum
        ldofminmax.put(ldof);
        LOG.incrementProcessed(progressLDOFs);
    }
    LOG.ensureCompleted(progressLDOFs);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 60 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class ExternalDoubleOutlierScore method run.

/**
 * Run the algorithm.
 *
 * @param database Database to use
 * @param relation Relation to use
 * @return Result
 */
public OutlierResult run(Database database, Relation<?> relation) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    try (// 
    InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
        TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
        Tokenizer tokenizer = reader.getTokenizer();
        CharSequence buf = reader.getBuffer();
        Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf);
        reader.reset(in);
        while (reader.nextLineExceptComments()) {
            Integer id = null;
            double score = Double.NaN;
            for (; /* initialized by nextLineExceptComments */
            tokenizer.valid(); tokenizer.advance()) {
                mi.region(tokenizer.getStart(), tokenizer.getEnd());
                ms.region(tokenizer.getStart(), tokenizer.getEnd());
                final boolean mif = mi.find();
                final boolean msf = ms.find();
                if (mif && msf) {
                    throw new AbortException("ID pattern and score pattern both match value: " + tokenizer.getSubstring());
                }
                if (mif) {
                    if (id != null) {
                        throw new AbortException("ID pattern matched twice: previous value " + id + " second value: " + tokenizer.getSubstring());
                    }
                    id = ParseUtil.parseIntBase10(buf, mi.end(), tokenizer.getEnd());
                }
                if (msf) {
                    if (!Double.isNaN(score)) {
                        throw new AbortException("Score pattern matched twice: previous value " + score + " second value: " + tokenizer.getSubstring());
                    }
                    score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd());
                }
            }
            if (id != null && !Double.isNaN(score)) {
                scores.putDouble(DBIDUtil.importInteger(id), score);
                minmax.put(score);
            } else if (id == null && Double.isNaN(score)) {
                LOG.warning("Line did not match either ID nor score nor comment: " + reader.getLineNumber());
            } else {
                throw new AbortException("Line matched only ID or only SCORE patterns: " + reader.getLineNumber());
            }
        }
    } catch (IOException e) {
        throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
    }
    OutlierScoreMeta meta;
    if (inverted) {
        meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    } else {
        meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    }
    DoubleRelation scoresult = new MaterializedDoubleRelation("External Outlier", "external-outlier", scores, relation.getDBIDs());
    OutlierResult or = new OutlierResult(meta, scoresult);
    // Apply scaling
    if (scaling instanceof OutlierScalingFunction) {
        ((OutlierScalingFunction) scaling).prepare(or);
    }
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double val = scoresult.doubleValue(iditer);
        val = scaling.getScaled(val);
        scores.putDouble(iditer, val);
        mm.put(val);
    }
    meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
    or = new OutlierResult(meta, scoresult);
    return or;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) Matcher(java.util.regex.Matcher) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) IOException(java.io.IOException) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) FileInputStream(java.io.FileInputStream) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) TokenizedReader(de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader) Tokenizer(de.lmu.ifi.dbs.elki.utilities.io.Tokenizer) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21