Search in sources :

Example 71 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class SimpleCOP method run.

public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
    DBIDs ids = data.getDBIDs();
    WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDataStore<double[]> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
    WritableDataStore<double[][]> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[][].class);
    WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
    WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class);
    {
        // compute neighbors of each db object
        FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
        double sqrt2 = MathUtil.SQRT2;
        for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
            KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
            ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
            nids.remove(id);
            // TODO: do we want to use the query point as centroid?
            CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids);
            double stddev = depsol.getStandardDeviation();
            double distance = depsol.distance(data.get(id));
            double prob = NormalDistribution.erf(distance / (stddev * sqrt2));
            cop_score.putDouble(id, prob);
            cop_err_v.put(id, times(depsol.errorVector(data.get(id)), -1));
            double[][] datav = depsol.dataProjections(data.get(id));
            cop_datav.put(id, datav);
            cop_dim.putInt(id, depsol.getCorrelationDimensionality());
            cop_sol.put(id, depsol);
            LOG.incrementProcessed(progressLocalPCA);
        }
        LOG.ensureCompleted(progressLocalPCA);
    }
    // combine results.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Original Correlation Outlier Probabilities", "origcop-outlier", cop_score, ids);
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // extra results
    result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
    result.addChildResult(new MaterializedRelation<>("Error vectors", COP.COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
    result.addChildResult(new MaterializedRelation<>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
    result.addChildResult(new MaterializedRelation<>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) CorrelationAnalysisSolution(de.lmu.ifi.dbs.elki.data.model.CorrelationAnalysisSolution) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 72 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class ComputeSimilarityMatrixImage method computeSimilarityMatrixImage.

/**
 * Compute the actual similarity image.
 *
 * @param relation Relation
 * @param iter DBID iterator
 * @return result object
 */
private SimilarityMatrix computeSimilarityMatrixImage(Relation<O> relation, DBIDIter iter) {
    ArrayModifiableDBIDs order = DBIDUtil.newArray(relation.size());
    for (; iter.valid(); iter.advance()) {
        order.add(iter);
    }
    if (order.size() != relation.size()) {
        throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
    }
    DistanceQuery<O> dq = distanceFunction.instantiate(relation);
    final int size = order.size();
    // When the logging is in the outer loop, it's just 2*size (providing enough
    // resolution)
    // size * (size + 1);
    final int ltotal = 2 * size;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Similarity Matrix Image", ltotal, LOG) : null;
    // Note: we assume that we have an efficient distance cache available,
    // since we are using 2*O(n*n) distance computations.
    DoubleMinMax minmax = new DoubleMinMax();
    {
        DBIDArrayIter id1 = order.iter();
        DBIDArrayIter id2 = order.iter();
        for (; id1.valid(); id1.advance()) {
            id2.seek(id1.getOffset());
            for (; id2.valid(); id2.advance()) {
                final double dist = dq.distance(id1, id2);
                if (!Double.isNaN(dist) && !Double.isInfinite(dist)) /* && dist > 0.0 */
                {
                    if (!skipzero || dist > 0.0) {
                        minmax.put(dist);
                    }
                }
            }
            LOG.incrementProcessed(prog);
        }
    }
    double zoom = minmax.getMax() - minmax.getMin();
    if (zoom > 0.0) {
        zoom = 1. / zoom;
    }
    LinearScaling scale = new LinearScaling(zoom, -minmax.getMin() * zoom);
    BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
    {
        DBIDArrayIter id1 = order.iter();
        DBIDArrayIter id2 = order.iter();
        for (int x = 0; x < size && id1.valid(); x++, id1.advance()) {
            id2.seek(id1.getOffset());
            for (int y = x; y < size && id2.valid(); y++, id2.advance()) {
                double ddist = dq.distance(id1, id2);
                if (ddist > 0.0) {
                    ddist = scale.getScaled(ddist);
                }
                // Apply extra scaling
                if (scaling != null) {
                    ddist = scaling.getScaled(ddist);
                }
                int dist = 0xFF & (int) (255 * ddist);
                int col = 0xff000000 | (dist << 16) | (dist << 8) | dist;
                img.setRGB(x, y, col);
                img.setRGB(y, x, col);
            }
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    return new SimilarityMatrix(img, relation, order);
}
Also used : LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) BufferedImage(java.awt.image.BufferedImage) ImageIO(javax.imageio.ImageIO)

Example 73 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class DiSHPreferenceVectorIndex method initialize.

@Override
public void initialize() {
    if (relation == null || relation.size() == 0) {
        throw new EmptyDataException();
    }
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, long[].class);
    if (LOG.isDebugging()) {
        LOG.debugFine(// 
        new StringBuilder().append("eps ").append(Arrays.asList(epsilon)).append("\n minpts ").append(// 
        minpts).append("\n strategy ").append(strategy).toString());
    }
    long start = System.currentTimeMillis();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null;
    // only one epsilon value specified
    int dim = RelationUtil.dimensionality(relation);
    if (epsilon.length == 1 && dim != 1) {
        double eps = epsilon[0];
        epsilon = new double[dim];
        Arrays.fill(epsilon, eps);
    }
    // epsilons as string
    RangeQuery<V>[] rangeQueries = initRangeQueries(relation, dim);
    StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        if (msg != null) {
            msg.setLength(0);
            msg.append("\nid = ").append(DBIDUtil.toString(it));
        // msg.append(" ").append(database.get(id));
        // msg.append(" ").append(database.getObjectLabelQuery().get(id));
        }
        // determine neighbors in each dimension
        ModifiableDBIDs[] allNeighbors = new ModifiableDBIDs[dim];
        for (int d = 0; d < dim; d++) {
            allNeighbors[d] = DBIDUtil.newHashSet(rangeQueries[d].getRangeForDBID(it, epsilon[d]));
        }
        if (msg != null) {
            for (int d = 0; d < dim; d++) {
                // 
                msg.append("\n neighbors [").append(d).append(']').append(" (").append(allNeighbors[d].size()).append(") = ").append(allNeighbors[d]);
            }
        }
        storage.put(it, determinePreferenceVector(relation, allNeighbors, msg));
        if (msg != null) {
            LOG.debugFine(msg.toString());
        }
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    // TODO: re-add timing code!
    if (LOG.isVerbose()) {
        long end = System.currentTimeMillis();
        long elapsedTime = end - start;
        LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
    }
}
Also used : EmptyDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.EmptyDataException) RangeQuery(de.lmu.ifi.dbs.elki.database.query.range.RangeQuery) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 74 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class GaussianAffinityMatrixBuilder method buildDistanceMatrix.

/**
 * Build a distance matrix of squared distances.
 *
 * @param ids DBIDs
 * @param dq Distance query
 * @return Distance matrix
 */
protected double[][] buildDistanceMatrix(ArrayDBIDs ids, DistanceQuery<?> dq) {
    final int size = ids.size();
    double[][] dmat = new double[size][size];
    final boolean square = !dq.getDistanceFunction().isSquared();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing distance matrix", (size * (size - 1)) >>> 1, LOG) : null;
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.distancematrix").begin() : null;
    DBIDArrayIter ix = ids.iter(), iy = ids.iter();
    for (ix.seek(0); ix.valid(); ix.advance()) {
        double[] dmat_x = dmat[ix.getOffset()];
        for (iy.seek(ix.getOffset() + 1); iy.valid(); iy.advance()) {
            final double dist = dq.distance(ix, iy);
            dmat[iy.getOffset()][ix.getOffset()] = dmat_x[iy.getOffset()] = square ? (dist * dist) : dist;
        }
        if (prog != null) {
            int row = ix.getOffset() + 1;
            prog.setProcessed(row * size - ((row * (row + 1)) >>> 1), LOG);
        }
    }
    LOG.ensureCompleted(prog);
    if (timer != null) {
        LOG.statistics(timer.end());
    }
    return dmat;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 75 with FiniteProgress

use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.

the class IntrinsicNearestNeighborAffinityMatrixBuilder method computePij.

/**
 * Compute the sparse pij using the nearest neighbors only.
 *
 * @param ids ID range
 * @param knnq kNN query
 * @param square Use squared distances
 * @param numberOfNeighbours Number of neighbors to get
 * @param pij Output of distances
 * @param indices Output of indexes
 * @param initialScale Initial scaling factor
 */
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
    Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
    final double logPerp = FastMath.log(perplexity);
    // Scratch arrays, resizable
    DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
    IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
    // Compute nearest-neighbor sparse affinity matrix
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
    MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
    Mean mid = LOG.isStatistics() ? new Mean() : null;
    for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
        dists.clear();
        inds.clear();
        KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
        convertNeighbors(ids, ix, square, neighbours, dists, inds, mid);
        double beta = computeSigma(// 
        ix.getOffset(), // 
        dists, // 
        perplexity, // 
        logPerp, pij[ix.getOffset()] = new double[dists.size()]);
        if (mv != null) {
            // Sigma
            mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
        }
        indices[ix.getOffset()] = inds.toArray();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (mid != null) {
        LOG.statistics(new DoubleStatistic(getClass() + ".average-original-id", mid.getMean()));
    }
    // Sum of the sparse affinity matrix:
    double sum = 0.;
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            if (j > i) {
                // Exploit symmetry.
                continue;
            }
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                sum += FastMath.sqrt(pij_i[offi] * pij[j][offj]);
            }
        }
    }
    final double scale = initialScale / (2 * sum);
    for (int i = 0; i < pij.length; i++) {
        final double[] pij_i = pij[i];
        for (int offi = 0; offi < pij_i.length; offi++) {
            int j = indices[i][offi];
            assert (i != j);
            int offj = containsIndex(indices[j], i);
            if (offj >= 0) {
                // Found
                assert (indices[j][offj] == i);
                // Exploit symmetry:
                if (i < j) {
                    // Symmetrize
                    final double val = FastMath.sqrt(pij_i[offi] * pij[j][offj]);
                    pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
                }
            } else {
                // Not found, so zero.
                pij_i[offi] = 0;
            }
        }
    }
    if (LOG.isStatistics()) {
        // timer != null, mv != null
        LOG.statistics(timer.end());
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
        LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
    }
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray)

Aggregations

FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)145 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)78 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)34 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)33 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)29 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)25 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)25 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)23 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)23 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)23 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)21 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)20 ArrayList (java.util.ArrayList)18 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)17 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)16 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)16 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12