Search in sources :

Example 11 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class ALOCI method run.

public OutlierResult run(Database database, Relation<O> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    final Random random = rnd.getSingleThreadedRandom();
    FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
    // Compute extend of dataset.
    double[] min, max;
    {
        double[][] hbbs = RelationUtil.computeMinMax(relation);
        min = hbbs[0];
        max = hbbs[1];
        double maxd = 0;
        for (int i = 0; i < dim; i++) {
            maxd = MathUtil.max(maxd, max[i] - min[i]);
        }
        // Enlarge bounding box to have equal lengths.
        for (int i = 0; i < dim; i++) {
            double diff = (maxd - (max[i] - min[i])) * .5;
            min[i] -= diff;
            max[i] += diff;
        }
    }
    List<ALOCIQuadTree> qts = new ArrayList<>(g);
    double[] nshift = new double[dim];
    ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
    qts.add(qt);
    LOG.incrementProcessed(progressPreproc);
    /*
     * create the remaining g-1 shifted QuadTrees. This not clearly described in
     * the paper and therefore implemented in a way that achieves good results
     * with the test data.
     */
    for (int shift = 1; shift < g; shift++) {
        double[] svec = new double[dim];
        for (int i = 0; i < dim; i++) {
            svec[i] = random.nextDouble() * (max[i] - min[i]);
        }
        qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
        qts.add(qt);
        LOG.incrementProcessed(progressPreproc);
    }
    LOG.ensureCompleted(progressPreproc);
    // aLOCI main loop: evaluate
    FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final O obj = relation.get(iditer);
        double maxmdefnorm = 0;
        // For each level
        for (int l = 0; ; l++) {
            // Find the closest C_i
            Node ci = null;
            for (int i = 0; i < g; i++) {
                Node ci2 = qts.get(i).findClosestNode(obj, l);
                if (ci2.getLevel() != l) {
                    continue;
                }
                // TODO: always use manhattan?
                if (ci == null || distFunc.distance(ci, obj) > distFunc.distance(ci2, obj)) {
                    ci = ci2;
                }
            }
            // LOG.debug("level:" + (ci != null ? ci.getLevel() : -1) +" l:"+l);
            if (ci == null) {
                // no matching tree for this level.
                break;
            }
            // Find the closest C_j
            Node cj = null;
            for (int i = 0; i < g; i++) {
                Node cj2 = qts.get(i).findClosestNode(ci, l - alpha);
                // TODO: allow higher levels or not?
                if (cj != null && cj2.getLevel() < cj.getLevel()) {
                    continue;
                }
                // TODO: always use manhattan?
                if (cj == null || distFunc.distance(cj, ci) > distFunc.distance(cj2, ci)) {
                    cj = cj2;
                }
            }
            // LOG.debug("level:" + (cj != null ? cj.getLevel() : -1) +" l:"+l);
            if (cj == null) {
                // no matching tree for this level.
                continue;
            }
            double mdefnorm = calculate_MDEF_norm(cj, ci);
            // LOG.warning("level:" + ci.getLevel() + "/" + cj.getLevel() +
            // " mdef: " + mdefnorm);
            maxmdefnorm = MathUtil.max(maxmdefnorm, mdefnorm);
        }
        // Store results
        mdef_norm.putDouble(iditer, maxmdefnorm);
        minmax.put(maxmdefnorm);
        LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult = new MaterializedDoubleRelation("aLOCI normalized MDEF", "aloci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Random(java.util.Random) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 12 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class FlexibleLOF method doRunInTime.

/**
 * Performs the Generalized LOF_SCORE algorithm on the given database and
 * returns a {@link FlexibleLOF.LOFResult} encapsulating information that may
 * be needed by an OnlineLOF algorithm.
 *
 * @param ids Object ids
 * @param kNNRefer the kNN query w.r.t. reference neighborhood distance
 *        function
 * @param kNNReach the kNN query w.r.t. reachability distance function
 * @param stepprog Progress logger
 * @return LOF result
 */
protected LOFResult<O> doRunInTime(DBIDs ids, KNNQuery<O> kNNRefer, KNNQuery<O> kNNReach, StepProgress stepprog) {
    // Assert we got something
    if (kNNRefer == null) {
        throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
    }
    if (kNNReach == null) {
        throw new AbortException("No kNN queries supported by database for reachability distance function.");
    }
    // Compute LRDs
    LOG.beginStep(stepprog, 2, "Computing LRDs.");
    WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    computeLRDs(kNNReach, ids, lrds);
    // compute LOF_SCORE of each db object
    LOG.beginStep(stepprog, 3, "Computing LOFs.");
    WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
    // track the maximum value for normalization.
    DoubleMinMax lofminmax = new DoubleMinMax();
    computeLOFs(kNNRefer, ids, lrds, lofs, lofminmax);
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids);
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return new LOFResult<>(result, kNNRefer, kNNReach, lrds, lofs);
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 13 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class KDEOS method run.

/**
 * Run the KDEOS outlier detection algorithm.
 *
 * @param database Database to query
 * @param rel Relation to process
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<O> rel) {
    final DBIDs ids = rel.getDBIDs();
    LOG.verbose("Running kNN preprocessor.");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, rel, getDistanceFunction(), kmax + 1);
    // Initialize store for densities
    WritableDataStore<double[]> densities = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, double[].class);
    estimateDensities(rel, knnq, ids, densities);
    // Compute scores:
    WritableDoubleDataStore kofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
    DoubleMinMax minmax = new DoubleMinMax();
    computeOutlierScores(knnq, ids, densities, kofs, minmax);
    DoubleRelation scoreres = new MaterializedDoubleRelation("Kernel Density Estimation Outlier Scores", "kdeos-outlier", kofs, ids);
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax());
    return new OutlierResult(meta, scoreres);
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)

Example 14 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class LDOF method run.

/**
 * Run the algorithm
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k);
    // track the maximum value for normalization
    DoubleMinMax ldofminmax = new DoubleMinMax();
    // compute the ldof values
    WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    // compute LOF_SCORE of each db object
    if (LOG.isVerbose()) {
        LOG.verbose("Computing LDOFs");
    }
    FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
    Mean dxp = new Mean(), Dxp = new Mean();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
        dxp.reset();
        Dxp.reset();
        DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
        for (; neighbor1.valid(); neighbor1.advance()) {
            // skip the point itself
            if (DBIDUtil.equal(neighbor1, iditer)) {
                continue;
            }
            dxp.put(neighbor1.doubleValue());
            for (neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
                // skip the point itself
                if (DBIDUtil.equal(neighbor2, iditer)) {
                    continue;
                }
                Dxp.put(distFunc.distance(neighbor1, neighbor2));
            }
        }
        double ldof = dxp.getMean() / Dxp.getMean();
        if (Double.isNaN(ldof) || Double.isInfinite(ldof)) {
            ldof = 1.0;
        }
        ldofs.putDouble(iditer, ldof);
        // update maximum
        ldofminmax.put(ldof);
        LOG.incrementProcessed(progressLDOFs);
    }
    LOG.ensureCompleted(progressLDOFs);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 15 with DoubleMinMax

use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.

the class ExternalDoubleOutlierScore method run.

/**
 * Run the algorithm.
 *
 * @param database Database to use
 * @param relation Relation to use
 * @return Result
 */
public OutlierResult run(Database database, Relation<?> relation) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    try (// 
    InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
        TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
        Tokenizer tokenizer = reader.getTokenizer();
        CharSequence buf = reader.getBuffer();
        Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf);
        reader.reset(in);
        while (reader.nextLineExceptComments()) {
            Integer id = null;
            double score = Double.NaN;
            for (; /* initialized by nextLineExceptComments */
            tokenizer.valid(); tokenizer.advance()) {
                mi.region(tokenizer.getStart(), tokenizer.getEnd());
                ms.region(tokenizer.getStart(), tokenizer.getEnd());
                final boolean mif = mi.find();
                final boolean msf = ms.find();
                if (mif && msf) {
                    throw new AbortException("ID pattern and score pattern both match value: " + tokenizer.getSubstring());
                }
                if (mif) {
                    if (id != null) {
                        throw new AbortException("ID pattern matched twice: previous value " + id + " second value: " + tokenizer.getSubstring());
                    }
                    id = ParseUtil.parseIntBase10(buf, mi.end(), tokenizer.getEnd());
                }
                if (msf) {
                    if (!Double.isNaN(score)) {
                        throw new AbortException("Score pattern matched twice: previous value " + score + " second value: " + tokenizer.getSubstring());
                    }
                    score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd());
                }
            }
            if (id != null && !Double.isNaN(score)) {
                scores.putDouble(DBIDUtil.importInteger(id), score);
                minmax.put(score);
            } else if (id == null && Double.isNaN(score)) {
                LOG.warning("Line did not match either ID nor score nor comment: " + reader.getLineNumber());
            } else {
                throw new AbortException("Line matched only ID or only SCORE patterns: " + reader.getLineNumber());
            }
        }
    } catch (IOException e) {
        throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
    }
    OutlierScoreMeta meta;
    if (inverted) {
        meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    } else {
        meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    }
    DoubleRelation scoresult = new MaterializedDoubleRelation("External Outlier", "external-outlier", scores, relation.getDBIDs());
    OutlierResult or = new OutlierResult(meta, scoresult);
    // Apply scaling
    if (scaling instanceof OutlierScalingFunction) {
        ((OutlierScalingFunction) scaling).prepare(or);
    }
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double val = scoresult.doubleValue(iditer);
        val = scaling.getScaled(val);
        scores.putDouble(iditer, val);
        mm.put(val);
    }
    meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
    or = new OutlierResult(meta, scoresult);
    return or;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) Matcher(java.util.regex.Matcher) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) IOException(java.io.IOException) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) FileInputStream(java.io.FileInputStream) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) TokenizedReader(de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader) Tokenizer(de.lmu.ifi.dbs.elki.utilities.io.Tokenizer) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)89 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)65 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)62 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)62 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)62 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)62 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)54 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)25 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)15 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)15 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)12 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)11 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)7 Mean (de.lmu.ifi.dbs.elki.math.Mean)6 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)6