Search in sources :

Example 1 with LinearScaling

use of de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling in project elki by elki-project.

the class VisualizePairwiseGainMatrix method run.

@Override
public void run() {
    final Database database = inputstep.getDatabase();
    ResultHierarchy hier = database.getHierarchy();
    Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
    final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
    final String firstlabel = labels.get(firstid);
    if (!firstlabel.matches(".*by.?label.*")) {
        throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
    }
    relation = GreedyEnsembleExperiment.applyPrescaling(prescaling, relation, firstid);
    // Dimensionality and reference vector
    final int dim = RelationUtil.dimensionality(relation);
    final NumberVector refvec = relation.get(firstid);
    // Build the truth vector
    VectorNonZero pos = new VectorNonZero(refvec);
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    ids.remove(firstid);
    ids.sort();
    final int size = ids.size();
    double[][] data = new double[size][size];
    DoubleMinMax minmax = new DoubleMinMax(), commax = new DoubleMinMax();
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing ensemble gain.", size * (size + 1) >> 1, LOG) : null;
        // Vote combination buffer.
        double[] buf = new double[2];
        int a = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), a++) {
            final NumberVector veca = relation.get(id);
            // Direct AUC score:
            {
                double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(veca));
                data[a][a] = auc;
                // minmax.put(auc);
                LOG.incrementProcessed(prog);
            }
            // Compare to others, exploiting symmetry
            DBIDArrayIter id2 = ids.iter();
            id2.seek(a + 1);
            for (int b = a + 1; b < size; b++, id2.advance()) {
                final NumberVector vecb = relation.get(id2);
                double[] combined = new double[dim];
                for (int d = 0; d < dim; d++) {
                    buf[0] = veca.doubleValue(d);
                    buf[1] = vecb.doubleValue(d);
                    combined[d] = voting.combine(buf);
                }
                double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(DoubleVector.wrap(combined)));
                // logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
                // labels.get(ids.get(b)));
                data[a][b] = auc;
                data[b][a] = auc;
                commax.put(data[a][b]);
                // minmax.put(auc);
                LOG.incrementProcessed(prog);
            }
        }
        LOG.ensureCompleted(prog);
    }
    for (int a = 0; a < size; a++) {
        for (int b = a + 1; b < size; b++) {
            double ref = Math.max(data[a][a], data[b][b]);
            data[a][b] = (data[a][b] - ref) / (1 - ref);
            data[b][a] = (data[b][a] - ref) / (1 - ref);
            // logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
            // labels.get(ids.get(b)));
            minmax.put(data[a][b]);
        }
    }
    for (int a = 0; a < size; a++) {
        data[a][a] = 0;
    }
    LOG.verbose("Gain: " + minmax.toString() + " AUC: " + commax.toString());
    boolean hasneg = (minmax.getMin() < -1E-3);
    LinearScaling scale;
    if (!hasneg) {
        scale = LinearScaling.fromMinMax(0., minmax.getMax());
    } else {
        scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
    }
    scale = LinearScaling.fromMinMax(0., .5);
    BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
    for (int x = 0; x < size; x++) {
        for (int y = x; y < size; y++) {
            double val = data[x][y];
            val = Math.max(-1, Math.min(1., scale.getScaled(val)));
            // Compute color:
            final int col;
            {
                if (val >= 0) {
                    int ival = 0xFF & (int) (255 * val);
                    col = 0xff000000 | (ival << 8);
                } else {
                    int ival = 0xFF & (int) (255 * -val);
                    col = 0xff000000 | (ival << 16);
                }
            }
            img.setRGB(x, y, col);
            img.setRGB(y, x, col);
        }
    }
    SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
    hier.add(database, smat);
    VisualizerContext context = vispar.newContext(hier, smat);
    // Attach visualizers to results
    SimilarityMatrixVisualizer factory = new SimilarityMatrixVisualizer();
    factory.processNewResult(context, database);
    VisualizationTree.findVis(context).filter(VisualizationTask.class).forEach(task -> {
        if (task.getFactory() == factory) {
            showVisualization(context, factory, task);
        }
    });
}
Also used : DecreasingVectorIter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DecreasingVectorIter) SimilarityMatrix(de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage.SimilarityMatrix) BufferedImage(java.awt.image.BufferedImage) SimilarityMatrixVisualizer(de.lmu.ifi.dbs.elki.visualization.visualizers.visunproj.SimilarityMatrixVisualizer) LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) VisualizationTask(de.lmu.ifi.dbs.elki.visualization.VisualizationTask) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Database(de.lmu.ifi.dbs.elki.database.Database) ResultHierarchy(de.lmu.ifi.dbs.elki.result.ResultHierarchy) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VisualizerContext(de.lmu.ifi.dbs.elki.visualization.VisualizerContext) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) VectorNonZero(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.VectorNonZero)

Example 2 with LinearScaling

use of de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling in project elki by elki-project.

the class JudgeOutlierScores method computeScore.

/**
 * Evaluate a single outlier score result.
 *
 * @param ids Inlier IDs
 * @param outlierIds Outlier IDs
 * @param or Outlier Result to evaluate
 * @return Outlier score result
 * @throws IllegalStateException
 */
protected ScoreResult computeScore(DBIDs ids, DBIDs outlierIds, OutlierResult or) throws IllegalStateException {
    if (scaling instanceof OutlierScalingFunction) {
        OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
        oscaling.prepare(or);
    }
    final ScalingFunction innerScaling;
    // If we have useful (finite) min/max, use these for binning.
    double min = scaling.getMin();
    double max = scaling.getMax();
    if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
        innerScaling = new IdentityScaling();
        // TODO: does the outlier score give us this guarantee?
        LOG.warning("JudgeOutlierScores expects values between 0.0 and 1.0, but we don't have such a guarantee by the scaling function: min:" + min + " max:" + max);
    } else {
        if (min == 0.0 && max == 1.0) {
            innerScaling = new IdentityScaling();
        } else {
            innerScaling = new LinearScaling(1.0 / (max - min), -min);
        }
    }
    double posscore = 0.0;
    double negscore = 0.0;
    // fill histogram with values of each object
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double result = or.getScores().doubleValue(iter);
        result = innerScaling.getScaled(scaling.getScaled(result));
        posscore += (1.0 - result);
    }
    for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
        double result = or.getScores().doubleValue(iter);
        result = innerScaling.getScaled(scaling.getScaled(result));
        negscore += result;
    }
    posscore /= ids.size();
    negscore /= outlierIds.size();
    LOG.verbose("Scores: " + posscore + " " + negscore);
    ArrayList<double[]> s = new ArrayList<>(1);
    s.add(new double[] { (posscore + negscore) * .5, posscore, negscore });
    return new ScoreResult(s);
}
Also used : ScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) IdentityScaling(de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with LinearScaling

use of de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling in project elki by elki-project.

the class ComputeSimilarityMatrixImage method computeSimilarityMatrixImage.

/**
 * Compute the actual similarity image.
 *
 * @param relation Relation
 * @param iter DBID iterator
 * @return result object
 */
private SimilarityMatrix computeSimilarityMatrixImage(Relation<O> relation, DBIDIter iter) {
    ArrayModifiableDBIDs order = DBIDUtil.newArray(relation.size());
    for (; iter.valid(); iter.advance()) {
        order.add(iter);
    }
    if (order.size() != relation.size()) {
        throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
    }
    DistanceQuery<O> dq = distanceFunction.instantiate(relation);
    final int size = order.size();
    // When the logging is in the outer loop, it's just 2*size (providing enough
    // resolution)
    // size * (size + 1);
    final int ltotal = 2 * size;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Similarity Matrix Image", ltotal, LOG) : null;
    // Note: we assume that we have an efficient distance cache available,
    // since we are using 2*O(n*n) distance computations.
    DoubleMinMax minmax = new DoubleMinMax();
    {
        DBIDArrayIter id1 = order.iter();
        DBIDArrayIter id2 = order.iter();
        for (; id1.valid(); id1.advance()) {
            id2.seek(id1.getOffset());
            for (; id2.valid(); id2.advance()) {
                final double dist = dq.distance(id1, id2);
                if (!Double.isNaN(dist) && !Double.isInfinite(dist)) /* && dist > 0.0 */
                {
                    if (!skipzero || dist > 0.0) {
                        minmax.put(dist);
                    }
                }
            }
            LOG.incrementProcessed(prog);
        }
    }
    double zoom = minmax.getMax() - minmax.getMin();
    if (zoom > 0.0) {
        zoom = 1. / zoom;
    }
    LinearScaling scale = new LinearScaling(zoom, -minmax.getMin() * zoom);
    BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
    {
        DBIDArrayIter id1 = order.iter();
        DBIDArrayIter id2 = order.iter();
        for (int x = 0; x < size && id1.valid(); x++, id1.advance()) {
            id2.seek(id1.getOffset());
            for (int y = x; y < size && id2.valid(); y++, id2.advance()) {
                double ddist = dq.distance(id1, id2);
                if (ddist > 0.0) {
                    ddist = scale.getScaled(ddist);
                }
                // Apply extra scaling
                if (scaling != null) {
                    ddist = scaling.getScaled(ddist);
                }
                int dist = 0xFF & (int) (255 * ddist);
                int col = 0xff000000 | (dist << 16) | (dist << 8) | dist;
                img.setRGB(x, y, col);
                img.setRGB(y, x, col);
            }
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    return new SimilarityMatrix(img, relation, order);
}
Also used : LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) BufferedImage(java.awt.image.BufferedImage) ImageIO(javax.imageio.ImageIO)

Example 4 with LinearScaling

use of de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling in project elki by elki-project.

the class AutomaticEvaluation method autoEvaluateOutliers.

protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) {
    Collection<OutlierResult> outliers = ResultUtil.filterResults(hier, newResult, OutlierResult.class);
    if (LOG.isDebugging()) {
        LOG.debug("Number of new outlier results: " + outliers.size());
    }
    if (!outliers.isEmpty()) {
        Database db = ResultUtil.findDatabase(hier);
        ensureClusteringResult(db, db);
        Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class);
        if (clusterings.isEmpty()) {
            LOG.warning("Could not find a clustering result, even after running 'ensureClusteringResult'?!?");
            return;
        }
        Clustering<?> basec = clusterings.iterator().next();
        // Find minority class label
        int min = Integer.MAX_VALUE;
        int total = 0;
        String label = null;
        if (basec.getAllClusters().size() > 1) {
            for (Cluster<?> c : basec.getAllClusters()) {
                final int csize = c.getIDs().size();
                total += csize;
                if (csize < min) {
                    min = csize;
                    label = c.getName();
                }
            }
        }
        if (label == null) {
            LOG.warning("Could not evaluate outlier results, as I could not find a minority label.");
            return;
        }
        if (min == 1) {
            LOG.warning("The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column.");
        }
        if (min > 0.05 * total) {
            LOG.warning("The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!");
        }
        LOG.verbose("Evaluating using minority class: " + label);
        Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$");
        // Evaluate rankings.
        new OutlierRankingEvaluation(pat).processNewResult(hier, newResult);
        // Compute ROC curve
        new OutlierROCCurve(pat).processNewResult(hier, newResult);
        // Compute Precision at k
        new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult);
        // Compute ROC curve
        new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult);
        // Compute outlier histogram
        new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false).processNewResult(hier, newResult);
    }
}
Also used : Pattern(java.util.regex.Pattern) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) EvaluateClustering(de.lmu.ifi.dbs.elki.evaluation.clustering.EvaluateClustering) ByLabelOrAllInOneClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) ByLabelClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering) LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) Database(de.lmu.ifi.dbs.elki.database.Database)

Aggregations

LinearScaling (de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling)4 Database (de.lmu.ifi.dbs.elki.database.Database)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)2 BufferedImage (java.awt.image.BufferedImage)2 ByLabelClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering)1 ByLabelOrAllInOneClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering)1 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)1 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 EvaluateClustering (de.lmu.ifi.dbs.elki.evaluation.clustering.EvaluateClustering)1 DecreasingVectorIter (de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DecreasingVectorIter)1 VectorNonZero (de.lmu.ifi.dbs.elki.evaluation.scores.adapter.VectorNonZero)1 SimilarityMatrix (de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage.SimilarityMatrix)1 ResultHierarchy (de.lmu.ifi.dbs.elki.result.ResultHierarchy)1 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)1 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)1 IdentityScaling (de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling)1