Search in sources :

Example 1 with OutlierScalingFunction

use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.

the class BubbleVisualization method makeVisualization.

@Override
public Visualization makeVisualization(VisualizerContext context, VisualizationTask task, VisualizationPlot plot, double width, double height, Projection proj) {
    if (settings.scaling != null && settings.scaling instanceof OutlierScalingFunction) {
        final OutlierResult outlierResult = task.getResult();
        ((OutlierScalingFunction) settings.scaling).prepare(outlierResult);
    }
    return new Instance(context, task, plot, width, height, proj);
}
Also used : OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)

Example 2 with OutlierScalingFunction

use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.

the class ExternalDoubleOutlierScore method run.

/**
 * Run the algorithm.
 *
 * @param database Database to use
 * @param relation Relation to use
 * @return Result
 */
public OutlierResult run(Database database, Relation<?> relation) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();
    try (// 
    InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
        TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
        Tokenizer tokenizer = reader.getTokenizer();
        CharSequence buf = reader.getBuffer();
        Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf);
        reader.reset(in);
        while (reader.nextLineExceptComments()) {
            Integer id = null;
            double score = Double.NaN;
            for (; /* initialized by nextLineExceptComments */
            tokenizer.valid(); tokenizer.advance()) {
                mi.region(tokenizer.getStart(), tokenizer.getEnd());
                ms.region(tokenizer.getStart(), tokenizer.getEnd());
                final boolean mif = mi.find();
                final boolean msf = ms.find();
                if (mif && msf) {
                    throw new AbortException("ID pattern and score pattern both match value: " + tokenizer.getSubstring());
                }
                if (mif) {
                    if (id != null) {
                        throw new AbortException("ID pattern matched twice: previous value " + id + " second value: " + tokenizer.getSubstring());
                    }
                    id = ParseUtil.parseIntBase10(buf, mi.end(), tokenizer.getEnd());
                }
                if (msf) {
                    if (!Double.isNaN(score)) {
                        throw new AbortException("Score pattern matched twice: previous value " + score + " second value: " + tokenizer.getSubstring());
                    }
                    score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd());
                }
            }
            if (id != null && !Double.isNaN(score)) {
                scores.putDouble(DBIDUtil.importInteger(id), score);
                minmax.put(score);
            } else if (id == null && Double.isNaN(score)) {
                LOG.warning("Line did not match either ID nor score nor comment: " + reader.getLineNumber());
            } else {
                throw new AbortException("Line matched only ID or only SCORE patterns: " + reader.getLineNumber());
            }
        }
    } catch (IOException e) {
        throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
    }
    OutlierScoreMeta meta;
    if (inverted) {
        meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    } else {
        meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    }
    DoubleRelation scoresult = new MaterializedDoubleRelation("External Outlier", "external-outlier", scores, relation.getDBIDs());
    OutlierResult or = new OutlierResult(meta, scoresult);
    // Apply scaling
    if (scaling instanceof OutlierScalingFunction) {
        ((OutlierScalingFunction) scaling).prepare(or);
    }
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double val = scoresult.doubleValue(iditer);
        val = scaling.getScaled(val);
        scores.putDouble(iditer, val);
        mm.put(val);
    }
    meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
    or = new OutlierResult(meta, scoresult);
    return or;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) Matcher(java.util.regex.Matcher) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) IOException(java.io.IOException) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) FileInputStream(java.io.FileInputStream) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) TokenizedReader(de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader) Tokenizer(de.lmu.ifi.dbs.elki.utilities.io.Tokenizer) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 3 with OutlierScalingFunction

use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.

the class ComputeKNNOutlierScores method writeResult.

/**
 * Write a single output line.
 *
 * @param out Output stream
 * @param ids DBIDs
 * @param result Outlier result
 * @param scaling Scaling function
 * @param label Identification label
 */
void writeResult(PrintStream out, DBIDs ids, OutlierResult result, ScalingFunction scaling, String label) {
    if (scaling instanceof OutlierScalingFunction) {
        ((OutlierScalingFunction) scaling).prepare(result);
    }
    out.append(label);
    DoubleRelation scores = result.getScores();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double value = scores.doubleValue(iter);
        value = scaling != null ? scaling.getScaled(value) : value;
        out.append(' ').append(Double.toString(value));
    }
    out.append(FormatUtil.NEWLINE);
}
Also used : OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 4 with OutlierScalingFunction

use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.

the class OutlierThresholdClustering method split.

private Clustering<Model> split(OutlierResult or) {
    DoubleRelation scores = or.getScores();
    if (scaling instanceof OutlierScalingFunction) {
        ((OutlierScalingFunction) scaling).prepare(or);
    }
    ArrayList<ModifiableDBIDs> idlists = new ArrayList<>(threshold.length + 1);
    for (int i = 0; i <= threshold.length; i++) {
        idlists.add(DBIDUtil.newHashSet());
    }
    for (DBIDIter iter = scores.getDBIDs().iter(); iter.valid(); iter.advance()) {
        double score = scores.doubleValue(iter);
        if (scaling != null) {
            score = scaling.getScaled(score);
        }
        int i = 0;
        for (; i < threshold.length; i++) {
            if (score < threshold[i]) {
                break;
            }
        }
        idlists.get(i).add(iter);
    }
    Clustering<Model> c = new Clustering<>("Outlier threshold clustering", "threshold-clustering");
    for (int i = 0; i <= threshold.length; i++) {
        String name = (i == 0) ? "Inlier" : "Outlier_" + threshold[i - 1];
        c.addToplevelCluster(new Cluster<>(name, idlists.get(i), (i > 0)));
    }
    return c;
}
Also used : OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Model(de.lmu.ifi.dbs.elki.data.model.Model) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 5 with OutlierScalingFunction

use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.

the class ComputeOutlierHistogram method evaluateOutlierResult.

/**
 * Evaluate a single outlier result as histogram.
 *
 * @param database Database to process
 * @param or Outlier result
 * @return Result
 */
public HistogramResult evaluateOutlierResult(Database database, OutlierResult or) {
    if (scaling instanceof OutlierScalingFunction) {
        OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
        oscaling.prepare(or);
    }
    ModifiableDBIDs ids = DBIDUtil.newHashSet(or.getScores().getDBIDs());
    DBIDs outlierIds = DatabaseUtil.getObjectsByLabelMatch(database, positiveClassName);
    // first value for outliers, second for each object
    // If we have useful (finite) min/max, use these for binning.
    double min = scaling.getMin();
    double max = scaling.getMax();
    final ObjHistogram<DoubleDoublePair> hist;
    if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
        hist = new AbstractObjDynamicHistogram<DoubleDoublePair>(bins) {

            @Override
            public DoubleDoublePair aggregate(DoubleDoublePair first, DoubleDoublePair second) {
                first.first += second.first;
                first.second += second.second;
                return first;
            }

            @Override
            protected DoubleDoublePair makeObject() {
                return new DoubleDoublePair(0., 0.);
            }

            @Override
            protected DoubleDoublePair cloneForCache(DoubleDoublePair data) {
                return new DoubleDoublePair(data.first, data.second);
            }

            @Override
            protected DoubleDoublePair downsample(Object[] data, int start, int end, int size) {
                DoubleDoublePair sum = new DoubleDoublePair(0, 0);
                for (int i = start; i < end; i++) {
                    DoubleDoublePair p = (DoubleDoublePair) data[i];
                    if (p != null) {
                        sum.first += p.first;
                        sum.second += p.second;
                    }
                }
                return sum;
            }
        };
    } else {
        hist = new AbstractObjStaticHistogram<DoubleDoublePair>(bins, min, max) {

            @Override
            protected DoubleDoublePair makeObject() {
                return new DoubleDoublePair(0., 0.);
            }

            @Override
            public void putData(double coord, DoubleDoublePair data) {
                DoubleDoublePair exist = get(coord);
                exist.first += data.first;
                exist.second += data.second;
            }
        };
    }
    // first fill histogram only with values of outliers
    DoubleDoublePair negative, positive;
    if (!splitfreq) {
        negative = new DoubleDoublePair(1. / ids.size(), 0);
        positive = new DoubleDoublePair(0, 1. / ids.size());
    } else {
        negative = new DoubleDoublePair(1. / (ids.size() - outlierIds.size()), 0);
        positive = new DoubleDoublePair(0, 1. / outlierIds.size());
    }
    ids.removeDBIDs(outlierIds);
    // fill histogram with values of each object
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double result = or.getScores().doubleValue(iter);
        result = scaling.getScaled(result);
        if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
            hist.putData(result, negative);
        }
    }
    for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
        double result = or.getScores().doubleValue(iter);
        result = scaling.getScaled(result);
        if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
            hist.putData(result, positive);
        }
    }
    Collection<double[]> collHist = new ArrayList<>(hist.getNumBins());
    for (ObjHistogram.Iter<DoubleDoublePair> iter = hist.iter(); iter.valid(); iter.advance()) {
        DoubleDoublePair data = iter.getValue();
        collHist.add(new double[] { iter.getCenter(), data.first, data.second });
    }
    return new HistogramResult("Outlier Score Histogram", "outlier-histogram", collHist);
}
Also used : ObjHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram) HistogramResult(de.lmu.ifi.dbs.elki.result.HistogramResult) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleDoublePair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Aggregations

OutlierScalingFunction (de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction)7 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)6 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)3 ArrayList (java.util.ArrayList)3 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)2 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)2 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)2 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)2 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)2 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 HistogramResult (de.lmu.ifi.dbs.elki.result.HistogramResult)1 Result (de.lmu.ifi.dbs.elki.result.Result)1 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)1 ObjHistogram (de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1 TokenizedReader (de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader)1