Search in sources :

Example 76 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class SNE method run.

public Relation<DoubleVector> run(Relation<O> relation) {
    AffinityMatrix pij = affinity.computeAffinityMatrix(relation, 1.);
    // Create initial solution.
    final int size = pij.size();
    double[][] sol = randomInitialSolution(size, dim, random.getSingleThreadedRandom());
    projectedDistances.setLong(0L);
    optimizeSNE(pij, sol);
    LOG.statistics(projectedDistances);
    // Remove the original (unprojected) data unless configured otherwise.
    removePreviousRelation(relation);
    // Transform into output data format.
    DBIDs ids = relation.getDBIDs();
    WritableDataStore<DoubleVector> proj = DataStoreFactory.FACTORY.makeStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_SORTED, DoubleVector.class);
    VectorFieldTypeInformation<DoubleVector> otype = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    for (DBIDArrayIter it = pij.iterDBIDs(); it.valid(); it.advance()) {
        proj.put(it, DoubleVector.wrap(sol[it.getOffset()]));
    }
    return new MaterializedRelation<>("SNE", "SNE", otype, proj, ids);
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 77 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class EstimateIntrinsicDimensionality method run.

public Result run(Database database, Relation<O> relation) {
    DBIDs allids = relation.getDBIDs();
    // Number of samples to draw.
    int ssize = (int) ((samples > 1.) ? samples : Math.ceil(samples * allids.size()));
    // Number of neighbors to fetch (+ query point)
    int kk = 1 + (int) ((krate > 1.) ? krate : Math.ceil(krate * allids.size()));
    DBIDs sampleids = DBIDUtil.randomSample(allids, ssize, RandomFactory.DEFAULT);
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(dq, kk);
    double[] idim = new double[ssize];
    int samples = 0;
    for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
        idim[samples] = estimator.estimate(knnq, iter, kk);
        ++samples;
    }
    double id = (samples > 1) ? QuickSelect.median(idim, 0, samples) : -1;
    LOG.statistics(new DoubleStatistic(EstimateIntrinsicDimensionality.class.getName() + ".intrinsic-dimensionality", id));
    return null;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 78 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class RangeQuerySelectivity method run.

public Result run(Database database, Relation<V> relation) {
    DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<V> rangeQuery = database.getRangeQuery(distQuery, radius);
    MeanVariance numres = new MeanVariance();
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Performing range queries", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        numres.put(rangeQuery.getRangeForDBID(iter, radius).size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    final String prefix = this.getClass().getName();
    LOG.statistics(new DoubleStatistic(prefix + ".mean", numres.getMean()));
    LOG.statistics(new DoubleStatistic(prefix + ".std", numres.getSampleStddev()));
    LOG.statistics(new DoubleStatistic(prefix + ".norm.mean", numres.getMean() / relation.size()));
    LOG.statistics(new DoubleStatistic(prefix + ".norm.std", numres.getSampleStddev() / relation.size()));
    LOG.statistics(new LongStatistic(prefix + ".samplesize", ids.size()));
    return null;
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 79 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class KNNDistancesSampler method run.

/**
 * Provides an order of the kNN-distances for all objects within the specified
 * database.
 *
 * @param database Database
 * @param relation Relation
 * @return Result
 */
public KNNDistanceOrderResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
    final int size = (int) ((sample <= 1.) ? Math.ceil(relation.size() * sample) : sample);
    DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), size, rnd);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling kNN distances", size, LOG) : null;
    double[] knnDistances = new double[size];
    int i = 0;
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance(), i++) {
        final KNNList neighbors = knnQuery.getKNNForDBID(iditer, k + 1);
        knnDistances[i] = neighbors.getKNNDistance();
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return new KNNDistanceOrderResult(knnDistances, k);
}
Also used : KNNDistanceOrderResult(de.lmu.ifi.dbs.elki.algorithm.KNNDistancesSampler.KNNDistanceOrderResult) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 80 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class RANSACCovarianceMatrixBuilder method processIds.

@// 
Reference(// 
title = "Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography", // 
authors = "M.A. Fischler, R.C. Bolles", // 
booktitle = "Communications of the ACM, Vol. 24 Issue 6", url = "http://dx.doi.org/10.1145/358669.358692")
@Override
public double[][] processIds(DBIDs ids, Relation<? extends NumberVector> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    ModifiableDBIDs best = DBIDUtil.newHashSet(), support = DBIDUtil.newHashSet();
    double tresh = ChiSquaredDistribution.quantile(0.85, dim);
    CovarianceMatrix cv = new CovarianceMatrix(dim);
    Random random = rnd.getSingleThreadedRandom();
    for (int i = 0; i < iterations; i++) {
        DBIDs sample = DBIDUtil.randomSample(ids, dim + 1, random);
        cv.reset();
        for (DBIDIter it = sample.iter(); it.valid(); it.advance()) {
            cv.put(relation.get(it));
        }
        double[] centroid = cv.getMeanVector();
        double[][] p = inverse(cv.destroyToSampleMatrix());
        support.clear();
        for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
            double[] vec = minusEquals(relation.get(id).toArray(), centroid);
            double sqlen = transposeTimesTimes(vec, p, vec);
            if (sqlen < tresh) {
                support.add(id);
            }
        }
        if (support.size() > best.size()) {
            ModifiableDBIDs swap = best;
            best = support;
            support = swap;
        }
        if (support.size() >= ids.size()) {
            // Can't get better than this!
            break;
        }
    }
    // Fall back to regular PCA if too few samples.
    return CovarianceMatrix.make(relation, best.size() > dim ? best : ids).destroyToSampleMatrix();
}
Also used : Random(java.util.Random) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Reference(de.lmu.ifi.dbs.elki.utilities.documentation.Reference)

Aggregations

DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)139 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)77 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)45 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)44 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)40 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)39 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)38 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)38 ArrayList (java.util.ArrayList)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)29 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)25 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)22 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)19 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)18 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)15 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)14