Search in sources :

Example 31 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class DeLiClu method run.

public ClusterOrder run(Database database, Relation<NV> relation) {
    Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database.getHierarchy(), relation, DeLiCluTreeIndex.class);
    if (indexes.size() != 1) {
        throw new MissingPrerequisitesException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
    }
    DeLiCluTreeIndex<NV> index = indexes.iterator().next();
    if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction<?>)) {
        throw new IllegalArgumentException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
    }
    @SuppressWarnings("unchecked") SpatialPrimitiveDistanceFunction<NV> distFunction = (SpatialPrimitiveDistanceFunction<NV>) getDistanceFunction();
    // first do the knn-Join
    if (LOG.isVerbose()) {
        LOG.verbose("knnJoin...");
    }
    Relation<KNNList> knns = knnJoin.run(relation);
    DBIDs ids = relation.getDBIDs();
    final int size = ids.size();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", size, LOG) : null;
    ClusterOrder clusterOrder = new ClusterOrder(ids, "DeLiClu Clustering", "deliclu-clustering");
    heap = new UpdatableHeap<>();
    // add start object to cluster order and (root, root) to priority queue
    DBID startID = DBIDUtil.deref(ids.iter());
    clusterOrder.add(startID, Double.POSITIVE_INFINITY, null);
    int numHandled = 1;
    index.setHandled(startID, relation.get(startID));
    SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) index.getRootEntry();
    SpatialObjectPair spatialObjectPair = new SpatialObjectPair(0., rootEntry, rootEntry, true);
    heap.add(spatialObjectPair);
    while (numHandled < size) {
        if (heap.isEmpty()) {
            throw new AbortException("DeLiClu heap was empty when it shouldn't have been.");
        }
        SpatialObjectPair dataPair = heap.poll();
        // pair of nodes
        if (dataPair.isExpandable) {
            expandNodes(index, distFunction, dataPair, knns);
        } else // pair of objects
        {
            // set handled
            LeafEntry e1 = (LeafEntry) dataPair.entry1;
            LeafEntry e2 = (LeafEntry) dataPair.entry2;
            final DBID e1id = e1.getDBID();
            IndexTreePath<DeLiCluEntry> path = index.setHandled(e1id, relation.get(e1id));
            if (path == null) {
                throw new RuntimeException("snh: parent(" + e1id + ") = null!!!");
            }
            // add to cluster order
            clusterOrder.add(e1id, dataPair.distance, e2.getDBID());
            numHandled++;
            // reinsert expanded leafs
            reinsertExpanded(distFunction, index, path, knns);
            if (progress != null) {
                progress.setProcessed(numHandled, LOG);
            }
        }
    }
    LOG.ensureCompleted(progress);
    return clusterOrder;
}
Also used : DeLiCluEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluEntry) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) LeafEntry(de.lmu.ifi.dbs.elki.index.tree.LeafEntry) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MissingPrerequisitesException(de.lmu.ifi.dbs.elki.utilities.exceptions.MissingPrerequisitesException) SpatialDirectoryEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DeLiCluTreeIndex(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluTreeIndex) SpatialPrimitiveDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 32 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class TSNE method run.

public Relation<DoubleVector> run(Relation<O> relation) {
    AffinityMatrix pij = affinity.computeAffinityMatrix(relation, EARLY_EXAGGERATION);
    // Create initial solution.
    final int size = pij.size();
    double[][] sol = randomInitialSolution(size, dim, random.getSingleThreadedRandom());
    projectedDistances.setLong(0L);
    optimizetSNE(pij, sol);
    LOG.statistics(projectedDistances);
    // Remove the original (unprojected) data unless configured otherwise.
    removePreviousRelation(relation);
    // Transform into output data format.
    DBIDs ids = relation.getDBIDs();
    WritableDataStore<DoubleVector> proj = DataStoreFactory.FACTORY.makeStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_SORTED, DoubleVector.class);
    VectorFieldTypeInformation<DoubleVector> otype = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    for (DBIDArrayIter it = pij.iterDBIDs(); it.valid(); it.advance()) {
        proj.put(it, DoubleVector.wrap(sol[it.getOffset()]));
    }
    return new MaterializedRelation<>("tSNE", "t-SNE", otype, proj, ids);
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 33 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class AveragePrecisionAtK method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public CollectionResult<double[]> run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final int qk = k + (includeSelf ? 0 : 1);
    final KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, qk);
    MeanVarianceMinMax[] mvs = MeanVarianceMinMax.newArray(k);
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
    // sort neighbors
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNList knn = knnQuery.getKNNForDBID(iter, qk);
        Object label = lrelation.get(iter);
        int positive = 0, i = 0;
        for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
            if (!includeSelf && DBIDUtil.equal(iter, ri)) {
                // Do not increment i.
                continue;
            }
            positive += match(label, lrelation.get(ri)) ? 1 : 0;
            final double precision = positive / (double) (i + 1);
            mvs[i].put(precision);
            i++;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    // Transform Histogram into a Double Vector array.
    Collection<double[]> res = new ArrayList<>(k);
    for (int i = 0; i < k; i++) {
        final MeanVarianceMinMax mv = mvs[i];
        final double std = mv.getCount() > 1. ? mv.getSampleStddev() : 0.;
        res.add(new double[] { i + 1, mv.getMean(), std, mv.getMin(), mv.getMax(), mv.getCount() });
    }
    return new CollectionResult<>("Average Precision", "average-precision", res);
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) CollectionResult(de.lmu.ifi.dbs.elki.result.CollectionResult) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 34 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class KNNJoin method run.

/**
 * Joins in the given spatial database to each object its k-nearest neighbors.
 *
 * @param relation Relation to process
 * @return result
 */
public Relation<KNNList> run(Relation<V> relation) {
    DBIDs ids = relation.getDBIDs();
    WritableDataStore<KNNList> knnLists = run(relation, ids);
    // Wrap as relation:
    return new MaterializedRelation<>("k nearest neighbors", "kNNs", TypeUtil.KNNLIST, knnLists, ids);
}
Also used : KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 35 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class KNNBenchmarkAlgorithm method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
    // No query set - use original database.
    if (queries == null) {
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            KNNList knns = knnQuery.getKNNForDBID(iditer, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            KNNList knns = knnQuery.getKNNForObject(o, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange)

Aggregations

DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)139 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)77 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)45 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)44 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)40 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)39 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)38 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)38 ArrayList (java.util.ArrayList)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)29 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)25 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)22 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)19 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)18 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)15 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)14