Search in sources :

Example 86 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class KNNKernelDensityMinimaClustering method run.

/**
 * Run the clustering algorithm on a data relation.
 *
 * @param relation Relation
 * @return Clustering result
 */
public Clustering<ClusterModel> run(Relation<V> relation) {
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    final int size = ids.size();
    // Sort by the sole dimension
    ids.sort(new VectorUtil.SortDBIDsBySingleDimension(relation, dim));
    // Density storage.
    WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    DBIDArrayIter iter = ids.iter(), iter2 = ids.iter();
    StepProgress sprog = LOG.isVerbose() ? new StepProgress("Clustering steps", 2) : null;
    LOG.beginStep(sprog, 1, "Kernel density estimation.");
    {
        double[] scratch = new double[2 * k];
        iter.seek(0);
        for (int i = 0; i < size; i++, iter.advance()) {
            // Current value.
            final double curv = relation.get(iter).doubleValue(dim);
            final int pre = Math.max(i - k, 0), prek = i - pre;
            final int pos = Math.min(i + k, size - 1), posk = pos - i;
            iter2.seek(pre);
            for (int j = 0; j < prek; j++, iter2.advance()) {
                scratch[j] = curv - relation.get(iter2).doubleValue(dim);
            }
            assert (iter2.getOffset() == i);
            iter2.advance();
            for (int j = 0; j < posk; j++, iter2.advance()) {
                scratch[prek + j] = relation.get(iter2).doubleValue(dim) - curv;
            }
            assert (prek + posk >= k);
            double kdist = QuickSelect.quickSelect(scratch, 0, prek + posk, k);
            switch(mode) {
                case BALLOON:
                    {
                        double dens = 0.;
                        if (kdist > 0.) {
                            for (int j = 0; j < prek + posk; j++) {
                                dens += kernel.density(scratch[j] / kdist);
                            }
                        } else {
                            dens = Double.POSITIVE_INFINITY;
                        }
                        assert (iter.getOffset() == i);
                        density.putDouble(iter, dens);
                        break;
                    }
                case SAMPLE:
                    {
                        if (kdist > 0.) {
                            iter2.seek(pre);
                            for (int j = 0; j < prek; j++, iter2.advance()) {
                                double delta = curv - relation.get(iter2).doubleValue(dim);
                                density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
                            }
                            assert (iter2.getOffset() == i);
                            iter2.advance();
                            for (int j = 0; j < posk; j++, iter2.advance()) {
                                double delta = relation.get(iter2).doubleValue(dim) - curv;
                                density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
                            }
                        } else {
                            iter2.seek(pre);
                            for (int j = 0; j < prek; j++, iter2.advance()) {
                                double delta = curv - relation.get(iter2).doubleValue(dim);
                                if (!(delta > 0.)) {
                                    density.putDouble(iter2, Double.POSITIVE_INFINITY);
                                }
                            }
                            assert (iter2.getOffset() == i);
                            iter2.advance();
                            for (int j = 0; j < posk; j++, iter2.advance()) {
                                double delta = relation.get(iter2).doubleValue(dim) - curv;
                                if (!(delta > 0.)) {
                                    density.putDouble(iter2, Double.POSITIVE_INFINITY);
                                }
                            }
                        }
                        break;
                    }
                default:
                    throw new UnsupportedOperationException("Unknown mode specified.");
            }
        }
    }
    LOG.beginStep(sprog, 2, "Local minima detection.");
    Clustering<ClusterModel> clustering = new Clustering<>("onedimensional-kde-clustering", "One-Dimensional clustering using kernel density estimation.");
    {
        double[] scratch = new double[2 * minwindow + 1];
        int begin = 0;
        int halfw = (minwindow + 1) >> 1;
        iter.seek(0);
        // Fill initial buffer.
        for (int i = 0; i < size; i++, iter.advance()) {
            final int m = i % scratch.length, t = (i - minwindow - 1) % scratch.length;
            scratch[m] = density.doubleValue(iter);
            if (i > scratch.length) {
                double min = Double.POSITIVE_INFINITY;
                for (int j = 0; j < scratch.length; j++) {
                    if (j != t && scratch[j] < min) {
                        min = scratch[j];
                    }
                }
                // Local minimum:
                if (scratch[t] < min) {
                    int end = i - minwindow + 1;
                    {
                        // Test on which side the kNN is
                        iter2.seek(end);
                        double curv = relation.get(iter2).doubleValue(dim);
                        iter2.seek(end - halfw);
                        double left = relation.get(iter2).doubleValue(dim) - curv;
                        iter2.seek(end + halfw);
                        double right = curv - relation.get(iter2).doubleValue(dim);
                        if (left < right) {
                            end++;
                        }
                    }
                    iter2.seek(begin);
                    ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
                    for (int j = 0; j < end - begin; j++, iter2.advance()) {
                        cids.add(iter2);
                    }
                    clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
                    begin = end;
                }
            }
        }
        // Extract last cluster
        int end = size;
        iter2.seek(begin);
        ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
        for (int j = 0; j < end - begin; j++, iter2.advance()) {
            cids.add(iter2);
        }
        clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
    }
    LOG.ensureCompleted(sprog);
    return clustering;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) VectorUtil(de.lmu.ifi.dbs.elki.data.VectorUtil) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)

Example 87 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class FarthestSumPointsInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    means.add(first);
    DBIDVar prevmean = DBIDUtil.newVar(first);
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        // Find farthest object:
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double dsum = prev + distQ.distance(prevmean, it);
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, dsum);
            }
            if (dsum > maxdist) {
                maxdist = dsum;
                best.set(it);
            }
        }
        // Add new mean:
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean.set(best);
        means.add(best);
    }
    store.destroy();
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 88 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class DBOutlierScore method computeOutlierScores.

@Override
protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
    final double size = distFunc.getRelation().size();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("DBOutlier scores", distFunc.getRelation().size(), LOG) : null;
    // TODO: use bulk when implemented.
    for (DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
        // compute percentage of neighbors in the given neighborhood with size d
        double n = rangeQuery.getRangeForDBID(iditer, d).size() / size;
        scores.putDouble(iditer, 1.0 - n);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return scores;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 89 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class KNNDD method run.

/**
 * Runs the algorithm in the timed evaluation part.
 *
 * @param relation Data relation
 */
public OutlierResult run(Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = relation.getDistanceQuery(getDistanceFunction());
    final KNNQuery<O> knnQuery = relation.getKNNQuery(distanceQuery, k);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
    WritableDoubleDataStore knnDist = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    WritableDBIDDataStore neighbor = DataStoreUtil.makeDBIDStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    DBIDVar var = DBIDUtil.newVar();
    // Find nearest neighbors, and store the distances.
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        final KNNList knn = knnQuery.getKNNForDBID(it, k);
        knnDist.putDouble(it, knn.getKNNDistance());
        neighbor.put(it, knn.assignVar(knn.size() - 1, var));
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    prog = LOG.isVerbose() ? new FiniteProgress("kNN distance descriptor", relation.size(), LOG) : null;
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        // Distance
        double d = knnDist.doubleValue(it);
        // Distance of neighbor
        double nd = knnDist.doubleValue(neighbor.assignVar(it, var));
        double knndd = nd > 0 ? d / nd : d > 0 ? Double.POSITIVE_INFINITY : 1.;
        scores.put(it, knndd);
        minmax.put(knndd);
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Data Descriptor", "knndd-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
    return new OutlierResult(meta, scoreres);
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) WritableDBIDDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 90 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class GaussianModel method run.

/**
 * Run the algorithm
 *
 * @param relation Data relation
 * @return Outlier result
 */
public OutlierResult run(Relation<V> relation) {
    DoubleMinMax mm = new DoubleMinMax();
    // resulting scores
    WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    // Compute mean and covariance Matrix
    CovarianceMatrix temp = CovarianceMatrix.make(relation);
    double[] mean = temp.getMeanVector(relation).toArray();
    // debugFine(mean.toString());
    double[][] covarianceMatrix = temp.destroyToPopulationMatrix();
    // debugFine(covarianceMatrix.toString());
    double[][] covarianceTransposed = inverse(covarianceMatrix);
    // Normalization factors for Gaussian PDF
    double det = new LUDecomposition(covarianceMatrix).det();
    final double fakt = 1.0 / FastMath.sqrt(MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * det);
    // for each object compute Mahalanobis distance
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double[] x = minusEquals(relation.get(iditer).toArray(), mean);
        // Gaussian PDF
        final double mDist = transposeTimesTimes(x, covarianceTransposed, x);
        final double prob = fakt * FastMath.exp(-mDist * .5);
        mm.put(prob);
        oscores.putDouble(iditer, prob);
    }
    final OutlierScoreMeta meta;
    if (invert) {
        double max = mm.getMax() != 0 ? mm.getMax() : 1.;
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max);
        }
        meta = new BasicOutlierScoreMeta(0.0, 1.0);
    } else {
        meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY);
    }
    DoubleRelation res = new MaterializedDoubleRelation("Gaussian Model Outlier Score", "gaussian-model-outlier", oscores, relation.getDBIDs());
    return new OutlierResult(meta, res);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) LUDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.LUDecomposition) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)90 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)70 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)70 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)70 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)70 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)68 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)61 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)43 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)33 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)20 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)12 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)12 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)12 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)12 ArrayList (java.util.ArrayList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)10 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9