Search in sources :

Example 21 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class PAMInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    if (relation.size() < k) {
        throw new AbortException("Database has less than k objects.");
    }
    // Ugly cast; but better than code duplication.
    @SuppressWarnings("unchecked") Relation<O> rel = (Relation<O>) relation;
    // Get a distance query
    @SuppressWarnings("unchecked") final PrimitiveDistanceFunction<? super O> distF = (PrimitiveDistanceFunction<? super O>) distanceFunction;
    final DistanceQuery<O> distQ = database.getDistanceQuery(rel, distF);
    DBIDs medids = chooseInitialMedoids(k, rel.getDBIDs(), distQ);
    double[][] medoids = new double[k][];
    DBIDIter iter = medids.iter();
    for (int i = 0; i < k; i++, iter.advance()) {
        medoids[i] = relation.get(iter).toArray();
    }
    return medoids;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) PrimitiveDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 22 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class SLINKHDBSCANLinearMemory method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerDensityHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    final DistanceQuery<O> distQ = db.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<O> knnQ = db.getKNNQuery(distQ, minPts);
    // We need array addressing later.
    final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    // Compute the core distances
    // minPts + 1: ignore query point.
    final WritableDoubleDataStore coredists = computeCoreDists(ids, knnQ, minPts);
    WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
    // Temporary storage for m.
    WritableDoubleDataStore m = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running HDBSCAN*-SLINK", ids.size(), LOG) : null;
    // has to be an array for monotonicity reasons!
    ModifiableDBIDs processedIDs = DBIDUtil.newArray(ids.size());
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        // Steps 1,3,4 are exactly as in SLINK
        step1(id, pi, lambda);
        // Step 2 is modified to use a different distance
        step2(id, processedIDs, distQ, coredists, m);
        step3(id, pi, lambda, processedIDs, m);
        step4(id, pi, lambda, processedIDs);
        processedIDs.add(id);
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    return new PointerDensityHierarchyRepresentationResult(ids, pi, lambda, distQ.getDistanceFunction().isSquared(), coredists);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) WritableDBIDDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 23 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class SLINKHDBSCANLinearMemory method step2.

/**
 * Second step: Determine the pairwise distances from all objects in the
 * pointer representation to the new object with the specified id.
 *
 * @param id the id of the object to be inserted into the pointer
 *        representation
 * @param processedIDs the already processed ids
 * @param distQuery Distance query
 * @param m Data store
 */
private void step2(DBIDRef id, DBIDs processedIDs, DistanceQuery<? super O> distQuery, DoubleDataStore coredists, WritableDoubleDataStore m) {
    double coreP = coredists.doubleValue(id);
    for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
        // M(i) = dist(i, n+1)
        double coreQ = coredists.doubleValue(it);
        double dist = MathUtil.max(coreP, coreQ, distQuery.distance(id, it));
        m.putDouble(it, dist);
    }
}
Also used : DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 24 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class PreDeConNeighborPredicate method computeLocalModel.

@Override
protected PreDeConModel computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<V> relation) {
    final int referenceSetSize = neighbors.size();
    mvSize.put(referenceSetSize);
    // Shouldn't happen:
    if (referenceSetSize < 0) {
        LOG.warning("Empty reference set - should at least include the query point!");
        return new PreDeConModel(Integer.MAX_VALUE, DBIDUtil.EMPTYDBIDS);
    }
    V obj = relation.get(id);
    final int dim = obj.getDimensionality();
    // Per-dimension variances:
    double[] s = new double[dim];
    for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
        V o = relation.get(neighbor);
        for (int d = 0; d < dim; d++) {
            final double diff = obj.doubleValue(d) - o.doubleValue(d);
            s[d] += diff * diff;
        }
    }
    // Adjust for sample size
    for (int d = 0; d < dim; d++) {
        s[d] /= referenceSetSize;
        mvVar.put(s[d]);
    }
    // Preference weight vector
    double[] weights = new double[dim];
    int pdim = 0;
    for (int d = 0; d < dim; d++) {
        if (s[d] <= settings.delta) {
            weights[d] = settings.kappa;
            pdim++;
        } else {
            weights[d] = 1.;
        }
    }
    // Check which neighbors survive
    HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet(referenceSetSize);
    for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
        V o = relation.get(neighbor);
        // Weighted Euclidean distance:
        double dev = 0.;
        for (int d = 0; d < dim; d++) {
            final double diff = obj.doubleValue(d) - o.doubleValue(d);
            dev += weights[d] * diff * diff;
        }
        // Note: epsilon was squared - this saves us the sqrt here:
        if (dev <= epsilon) {
            survivors.add(neighbor);
        }
    }
    return new PreDeConModel(pdim, survivors);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 25 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class BIRCHLeafClustering method run.

/**
 * Run the clustering algorithm.
 *
 * @param relation Input data
 * @return Clustering
 */
public Clustering<MeanModel> run(Relation<NumberVector> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    CFTree tree = cffactory.newTree(relation.getDBIDs(), relation);
    // The CFTree does not store points. We have to reassign them (and the
    // quality is better than if we used the initial assignment, because centers
    // move in particular in the beginning, so we always had many outliers.
    Map<ClusteringFeature, ModifiableDBIDs> idmap = new HashMap<ClusteringFeature, ModifiableDBIDs>(tree.leaves);
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        ClusteringFeature cf = tree.findLeaf(relation.get(iter));
        ModifiableDBIDs ids = idmap.get(cf);
        if (ids == null) {
            idmap.put(cf, ids = DBIDUtil.newArray(cf.n));
        }
        ids.add(iter);
    }
    Clustering<MeanModel> result = new Clustering<>("BIRCH-leaves", "BIRCH leaves");
    for (Map.Entry<ClusteringFeature, ModifiableDBIDs> ent : idmap.entrySet()) {
        ClusteringFeature leaf = ent.getKey();
        double[] center = new double[dim];
        for (int i = 0; i < dim; i++) {
            center[i] = leaf.centroid(i);
        }
        result.addToplevelCluster(new Cluster<>(ent.getValue(), new MeanModel(center)));
    }
    return result;
}
Also used : HashMap(java.util.HashMap) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21