Search in sources :

Example 36 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class VisualizePairwiseGainMatrix method run.

@Override
public void run() {
    final Database database = inputstep.getDatabase();
    ResultHierarchy hier = database.getHierarchy();
    Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
    final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
    final String firstlabel = labels.get(firstid);
    if (!firstlabel.matches(".*by.?label.*")) {
        throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
    }
    relation = GreedyEnsembleExperiment.applyPrescaling(prescaling, relation, firstid);
    // Dimensionality and reference vector
    final int dim = RelationUtil.dimensionality(relation);
    final NumberVector refvec = relation.get(firstid);
    // Build the truth vector
    VectorNonZero pos = new VectorNonZero(refvec);
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    ids.remove(firstid);
    ids.sort();
    final int size = ids.size();
    double[][] data = new double[size][size];
    DoubleMinMax minmax = new DoubleMinMax(), commax = new DoubleMinMax();
    {
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing ensemble gain.", size * (size + 1) >> 1, LOG) : null;
        // Vote combination buffer.
        double[] buf = new double[2];
        int a = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), a++) {
            final NumberVector veca = relation.get(id);
            // Direct AUC score:
            {
                double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(veca));
                data[a][a] = auc;
                // minmax.put(auc);
                LOG.incrementProcessed(prog);
            }
            // Compare to others, exploiting symmetry
            DBIDArrayIter id2 = ids.iter();
            id2.seek(a + 1);
            for (int b = a + 1; b < size; b++, id2.advance()) {
                final NumberVector vecb = relation.get(id2);
                double[] combined = new double[dim];
                for (int d = 0; d < dim; d++) {
                    buf[0] = veca.doubleValue(d);
                    buf[1] = vecb.doubleValue(d);
                    combined[d] = voting.combine(buf);
                }
                double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(DoubleVector.wrap(combined)));
                // logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
                // labels.get(ids.get(b)));
                data[a][b] = auc;
                data[b][a] = auc;
                commax.put(data[a][b]);
                // minmax.put(auc);
                LOG.incrementProcessed(prog);
            }
        }
        LOG.ensureCompleted(prog);
    }
    for (int a = 0; a < size; a++) {
        for (int b = a + 1; b < size; b++) {
            double ref = Math.max(data[a][a], data[b][b]);
            data[a][b] = (data[a][b] - ref) / (1 - ref);
            data[b][a] = (data[b][a] - ref) / (1 - ref);
            // logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
            // labels.get(ids.get(b)));
            minmax.put(data[a][b]);
        }
    }
    for (int a = 0; a < size; a++) {
        data[a][a] = 0;
    }
    LOG.verbose("Gain: " + minmax.toString() + " AUC: " + commax.toString());
    boolean hasneg = (minmax.getMin() < -1E-3);
    LinearScaling scale;
    if (!hasneg) {
        scale = LinearScaling.fromMinMax(0., minmax.getMax());
    } else {
        scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
    }
    scale = LinearScaling.fromMinMax(0., .5);
    BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
    for (int x = 0; x < size; x++) {
        for (int y = x; y < size; y++) {
            double val = data[x][y];
            val = Math.max(-1, Math.min(1., scale.getScaled(val)));
            // Compute color:
            final int col;
            {
                if (val >= 0) {
                    int ival = 0xFF & (int) (255 * val);
                    col = 0xff000000 | (ival << 8);
                } else {
                    int ival = 0xFF & (int) (255 * -val);
                    col = 0xff000000 | (ival << 16);
                }
            }
            img.setRGB(x, y, col);
            img.setRGB(y, x, col);
        }
    }
    SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
    hier.add(database, smat);
    VisualizerContext context = vispar.newContext(hier, smat);
    // Attach visualizers to results
    SimilarityMatrixVisualizer factory = new SimilarityMatrixVisualizer();
    factory.processNewResult(context, database);
    VisualizationTree.findVis(context).filter(VisualizationTask.class).forEach(task -> {
        if (task.getFactory() == factory) {
            showVisualization(context, factory, task);
        }
    });
}
Also used : DecreasingVectorIter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DecreasingVectorIter) SimilarityMatrix(de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage.SimilarityMatrix) BufferedImage(java.awt.image.BufferedImage) SimilarityMatrixVisualizer(de.lmu.ifi.dbs.elki.visualization.visualizers.visunproj.SimilarityMatrixVisualizer) LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) VisualizationTask(de.lmu.ifi.dbs.elki.visualization.VisualizationTask) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) Database(de.lmu.ifi.dbs.elki.database.Database) ResultHierarchy(de.lmu.ifi.dbs.elki.result.ResultHierarchy) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VisualizerContext(de.lmu.ifi.dbs.elki.visualization.VisualizerContext) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) VectorNonZero(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.VectorNonZero)

Example 37 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class SharedNearestNeighborPreprocessor method initialize.

@Override
public void initialize() {
    if (getLogger().isVerbose()) {
        getLogger().verbose("Assigning nearest neighbor lists to database objects");
    }
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, ArrayDBIDs.class);
    KNNQuery<O> knnquery = QueryUtil.getKNNQuery(relation, distanceFunction, numberOfNeighbors);
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("assigning nearest neighbor lists", relation.size(), getLogger()) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        ArrayModifiableDBIDs neighbors = DBIDUtil.newArray(numberOfNeighbors);
        DBIDs kNN = knnquery.getKNNForDBID(iditer, numberOfNeighbors);
        for (DBIDIter iter = kNN.iter(); iter.valid(); iter.advance()) {
            // if(!id.equals(nid)) {
            neighbors.add(iter);
            // Size limitation to exactly numberOfNeighbors
            if (neighbors.size() >= numberOfNeighbors) {
                break;
            }
        }
        neighbors.sort();
        storage.put(iditer, neighbors);
        getLogger().incrementProcessed(progress);
    }
    getLogger().ensureCompleted(progress);
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 38 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class SpatialApproximationMaterializeKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    SpatialIndexTree<N, E> index = getSpatialIndex(relation);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    MeanVariance pagesize = new MeanVariance();
    MeanVariance ksize = new MeanVariance();
    final Logging log = getLogger();
    if (log.isVerbose()) {
        log.verbose("Approximating nearest neighbor lists to database objects");
    }
    List<E> leaves = index.getLeaves();
    FiniteProgress progress = log.isVerbose() ? new FiniteProgress("Processing leaf nodes", leaves.size(), log) : null;
    for (E leaf : leaves) {
        N node = index.getNode(leaf);
        int size = node.getNumEntries();
        pagesize.put(size);
        if (log.isDebuggingFinest()) {
            log.debugFinest("NumEntires = " + size);
        }
        // Collect the ids in this node.
        ArrayModifiableDBIDs ids = DBIDUtil.newArray(size);
        for (int i = 0; i < size; i++) {
            ids.add(((LeafEntry) node.getEntry(i)).getDBID());
        }
        Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 3);
        for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
            KNNHeap kNN = DBIDUtil.newHeap(k);
            for (DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) {
                DBIDPair key = DBIDUtil.newPair(id, id2);
                double d = cache.removeDouble(key);
                if (d == d) {
                    // Not NaN
                    // consume the previous result.
                    kNN.insert(d, id2);
                } else {
                    // compute new and store the previous result.
                    d = distanceQuery.distance(id, id2);
                    kNN.insert(d, id2);
                    // put it into the cache, but with the keys reversed
                    key = DBIDUtil.newPair(id2, id);
                    cache.put(key, d);
                }
            }
            ksize.put(kNN.size());
            storage.put(id, kNN.toKNNList());
        }
        if (log.isDebugging() && cache.size() > 0) {
            log.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
        }
        log.incrementProcessed(progress);
    }
    log.ensureCompleted(progress);
    if (log.isVerbose()) {
        log.verbose("Average page size = " + pagesize.getMean() + " +- " + pagesize.getSampleStddev());
        log.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
    }
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) Object2DoubleOpenHashMap(it.unimi.dsi.fastutil.objects.Object2DoubleOpenHashMap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance)

Example 39 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class DeLiClu method run.

public ClusterOrder run(Database database, Relation<NV> relation) {
    Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database.getHierarchy(), relation, DeLiCluTreeIndex.class);
    if (indexes.size() != 1) {
        throw new MissingPrerequisitesException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
    }
    DeLiCluTreeIndex<NV> index = indexes.iterator().next();
    if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction<?>)) {
        throw new IllegalArgumentException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
    }
    @SuppressWarnings("unchecked") SpatialPrimitiveDistanceFunction<NV> distFunction = (SpatialPrimitiveDistanceFunction<NV>) getDistanceFunction();
    // first do the knn-Join
    if (LOG.isVerbose()) {
        LOG.verbose("knnJoin...");
    }
    Relation<KNNList> knns = knnJoin.run(relation);
    DBIDs ids = relation.getDBIDs();
    final int size = ids.size();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", size, LOG) : null;
    ClusterOrder clusterOrder = new ClusterOrder(ids, "DeLiClu Clustering", "deliclu-clustering");
    heap = new UpdatableHeap<>();
    // add start object to cluster order and (root, root) to priority queue
    DBID startID = DBIDUtil.deref(ids.iter());
    clusterOrder.add(startID, Double.POSITIVE_INFINITY, null);
    int numHandled = 1;
    index.setHandled(startID, relation.get(startID));
    SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) index.getRootEntry();
    SpatialObjectPair spatialObjectPair = new SpatialObjectPair(0., rootEntry, rootEntry, true);
    heap.add(spatialObjectPair);
    while (numHandled < size) {
        if (heap.isEmpty()) {
            throw new AbortException("DeLiClu heap was empty when it shouldn't have been.");
        }
        SpatialObjectPair dataPair = heap.poll();
        // pair of nodes
        if (dataPair.isExpandable) {
            expandNodes(index, distFunction, dataPair, knns);
        } else // pair of objects
        {
            // set handled
            LeafEntry e1 = (LeafEntry) dataPair.entry1;
            LeafEntry e2 = (LeafEntry) dataPair.entry2;
            final DBID e1id = e1.getDBID();
            IndexTreePath<DeLiCluEntry> path = index.setHandled(e1id, relation.get(e1id));
            if (path == null) {
                throw new RuntimeException("snh: parent(" + e1id + ") = null!!!");
            }
            // add to cluster order
            clusterOrder.add(e1id, dataPair.distance, e2.getDBID());
            numHandled++;
            // reinsert expanded leafs
            reinsertExpanded(distFunction, index, path, knns);
            if (progress != null) {
                progress.setProcessed(numHandled, LOG);
            }
        }
    }
    LOG.ensureCompleted(progress);
    return clusterOrder;
}
Also used : DeLiCluEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluEntry) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) LeafEntry(de.lmu.ifi.dbs.elki.index.tree.LeafEntry) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MissingPrerequisitesException(de.lmu.ifi.dbs.elki.utilities.exceptions.MissingPrerequisitesException) SpatialDirectoryEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DeLiCluTreeIndex(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluTreeIndex) SpatialPrimitiveDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 40 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class PerplexityAffinityMatrixBuilder method computePij.

/**
 * Compute the pij from the distance matrix.
 *
 * @param dist Distance matrix.
 * @param perplexity Desired perplexity
 * @param initialScale Initial scale
 * @return Affinity matrix pij
 */
protected static double[][] computePij(double[][] dist, double perplexity, double initialScale) {
    final int size = dist.length;
    final double logPerp = FastMath.log(perplexity);
    double[][] pij = new double[size][size];
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Optimizing perplexities", size, LOG) : null;
    Duration timer = LOG.isStatistics() ? LOG.newDuration(PerplexityAffinityMatrixBuilder.class.getName() + ".runtime.pijmatrix").begin() : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    for (int i = 0; i < size; i++) {
        double beta = computePi(i, dist[i], pij[i], perplexity, logPerp);
        if (mv != null) {
            // Sigma
            mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(PerplexityAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(PerplexityAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
    }
    // Scale pij to have the desired sum EARLY_EXAGGERATION
    double sum = 0.;
    for (int i = 1; i < size; i++) {
        final double[] pij_i = pij[i];
        for (int j = 0; j < i; j++) {
            // Nur über halbe Matrix!
            // Symmetrie herstellen
            sum += (pij_i[j] += pij[j][i]);
        }
    }
    // Scaling taken from original tSNE code:
    final double scale = initialScale / (2. * sum);
    for (int i = 1; i < size; i++) {
        final double[] pij_i = pij[i];
        for (int j = 0; j < i; j++) {
            pij_i[j] = pij[j][i] = MathUtil.max(pij_i[j] * scale, MIN_PIJ);
        }
    }
    return pij;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12