Search in sources :

Example 41 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class SNNClustering method run.

/**
 * Perform SNN clustering
 *
 * @param database Database
 * @param relation Relation
 * @return Result
 */
public Clustering<Model> run(Database database, Relation<O> relation) {
    SimilarityQuery<O> snnInstance = similarityFunction.instantiate(relation);
    FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("SNNClustering", relation.size(), LOG) : null;
    IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
    resultList = new ArrayList<>();
    noise = DBIDUtil.newHashSet();
    processedIDs = DBIDUtil.newHashSet(relation.size());
    if (relation.size() >= minpts) {
        for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
            if (!processedIDs.contains(id)) {
                expandCluster(snnInstance, id, objprog, clusprog);
                if (processedIDs.size() == relation.size() && noise.size() == 0) {
                    break;
                }
            }
            if (objprog != null && clusprog != null) {
                objprog.setProcessed(processedIDs.size(), LOG);
                clusprog.setProcessed(resultList.size(), LOG);
            }
        }
    } else {
        for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
            noise.add(id);
            if (objprog != null && clusprog != null) {
                objprog.setProcessed(noise.size(), LOG);
                clusprog.setProcessed(resultList.size(), LOG);
            }
        }
    }
    // Finish progress logging
    LOG.ensureCompleted(objprog);
    LOG.setCompleted(clusprog);
    Clustering<Model> result = new Clustering<>("Shared-Nearest-Neighbor Clustering", "snn-clustering");
    for (Iterator<ModifiableDBIDs> resultListIter = resultList.iterator(); resultListIter.hasNext(); ) {
        result.addToplevelCluster(new Cluster<Model>(resultListIter.next(), ClusterModel.CLUSTER));
    }
    result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
    return result;
}
Also used : IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 42 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class LSDBC method run.

/**
 * Run the LSDBC algorithm
 *
 * @param database Database to process
 * @param relation Data relation
 * @return Clustering result
 */
public Clustering<Model> run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LSDBC", 3) : null;
    final int dim = RelationUtil.dimensionality(relation);
    final double factor = FastMath.pow(2., alpha / dim);
    final DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Materializing kNN neighborhoods");
    KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
    LOG.beginStep(stepprog, 2, "Sorting by density");
    WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    fillDensities(knnq, ids, dens);
    ArrayModifiableDBIDs sids = DBIDUtil.newArray(ids);
    sids.sort(new DataStoreUtil.AscendingByDoubleDataStore(dens));
    LOG.beginStep(stepprog, 3, "Computing clusters");
    // Setup progress logging
    final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("LSDBC Clustering", ids.size(), LOG) : null;
    final IndefiniteProgress clusprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters found", LOG) : null;
    // (Temporary) store the cluster ID assigned.
    final WritableIntegerDataStore clusterids = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, UNPROCESSED);
    // Note: these are not exact, as objects may be stolen from noise.
    final IntArrayList clustersizes = new IntArrayList();
    // Unprocessed dummy value.
    clustersizes.add(0);
    // Noise counter.
    clustersizes.add(0);
    // Implementation Note: using Integer objects should result in
    // reduced memory use in the HashMap!
    int clusterid = NOISE + 1;
    // Iterate over all objects in the database.
    for (DBIDIter id = sids.iter(); id.valid(); id.advance()) {
        // Skip already processed ids.
        if (clusterids.intValue(id) != UNPROCESSED) {
            continue;
        }
        // Evaluate Neighborhood predicate
        final KNNList neighbors = knnq.getKNNForDBID(id, k);
        // Evaluate Core-Point predicate:
        if (isLocalMaximum(neighbors.getKNNDistance(), neighbors, dens)) {
            double mindens = factor * neighbors.getKNNDistance();
            clusterids.putInt(id, clusterid);
            clustersizes.add(expandCluster(clusterid, clusterids, knnq, neighbors, mindens, progress));
            // start next cluster on next iteration.
            ++clusterid;
            if (clusprogress != null) {
                clusprogress.setProcessed(clusterid, LOG);
            }
        } else {
            // otherwise, it's a noise point
            clusterids.putInt(id, NOISE);
            clustersizes.set(NOISE, clustersizes.getInt(NOISE) + 1);
        }
        // We've completed this element
        LOG.incrementProcessed(progress);
    }
    // Finish progress logging.
    LOG.ensureCompleted(progress);
    LOG.setCompleted(clusprogress);
    LOG.setCompleted(stepprog);
    // Transform cluster ID mapping into a clustering result:
    ArrayList<ArrayModifiableDBIDs> clusterlists = new ArrayList<>(clusterid);
    // add storage containers for clusters
    for (int i = 0; i < clustersizes.size(); i++) {
        clusterlists.add(DBIDUtil.newArray(clustersizes.getInt(i)));
    }
    // do the actual inversion
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        // Negative values are non-core points:
        int cid = clusterids.intValue(id);
        int cluster = Math.abs(cid);
        clusterlists.get(cluster).add(id);
    }
    clusterids.destroy();
    Clustering<Model> result = new Clustering<>("LSDBC", "lsdbc-clustering");
    for (int cid = NOISE; cid < clusterlists.size(); cid++) {
        boolean isNoise = (cid == NOISE);
        Cluster<Model> c;
        c = new Cluster<Model>(clusterlists.get(cid), isNoise, ClusterModel.CLUSTER);
        result.addToplevelCluster(c);
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DataStoreUtil(de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList)

Example 43 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class ORCLUS method run.

/**
 * Performs the ORCLUS algorithm on the given database.
 *
 * @param database Database
 * @param relation Relation
 */
public Clustering<Model> run(Database database, Relation<V> relation) {
    // current dimensionality associated with each seed
    int dim_c = RelationUtil.dimensionality(relation);
    if (dim_c < l) {
        throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + dim_c + " < " + l + ")");
    }
    // current number of seeds
    int k_c = Math.min(relation.size(), k_i * k);
    // pick k0 > k points from the database
    List<ORCLUSCluster> clusters = initialSeeds(relation, k_c);
    double beta = FastMath.exp(-FastMath.log(dim_c / (double) l) * FastMath.log(1 / alpha) / FastMath.log(k_c / (double) k));
    IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
    while (k_c > k) {
        // find partitioning induced by the seeds of the clusters
        assign(relation, clusters);
        // determine current subspace associated with each cluster
        for (ORCLUSCluster cluster : clusters) {
            if (cluster.objectIDs.size() > 0) {
                cluster.basis = findBasis(relation, cluster, dim_c);
            }
        }
        // reduce number of seeds and dimensionality associated with
        // each seed
        k_c = (int) Math.max(k, k_c * alpha);
        dim_c = (int) Math.max(l, dim_c * beta);
        merge(relation, clusters, k_c, dim_c, cprogress);
        if (cprogress != null) {
            cprogress.setProcessed(clusters.size(), LOG);
        }
    }
    assign(relation, clusters);
    LOG.setCompleted(cprogress);
    // get the result
    Clustering<Model> r = new Clustering<>("ORCLUS clustering", "orclus-clustering");
    for (ORCLUSCluster c : clusters) {
        r.addToplevelCluster(new Cluster<Model>(c.objectIDs, ClusterModel.CLUSTER));
    }
    return r;
}
Also used : IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) AbstractProjectedClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering)

Example 44 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class COPAC method run.

/**
 * Run the COPAC algorithm.
 *
 * @param database Database
 * @param relation Vector field relation
 * @return COPAC clustering
 */
public Clustering<DimensionModel> run(Database database, Relation<V> relation) {
    COPACNeighborPredicate.Instance npred = new COPACNeighborPredicate<V>(settings).instantiate(database, relation);
    CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database);
    Clustering<Model> dclusters = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
    // Re-wrap the detected clusters for COPAC:
    Clustering<DimensionModel> result = new Clustering<>("COPAC clustering", "copac-clustering");
    // Generalized DBSCAN clusterings will be flat.
    for (It<Cluster<Model>> iter = dclusters.iterToplevelClusters(); iter.valid(); iter.advance()) {
        Cluster<Model> clus = iter.get();
        if (clus.size() > 0) {
            int dim = npred.dimensionality(clus.getIDs().iter());
            DimensionModel model = new DimensionModel(dim);
            result.addToplevelCluster(new Cluster<>(clus.getIDs(), model));
        }
    }
    return result;
}
Also used : MinPtsCorePredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.MinPtsCorePredicate) CorePredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.CorePredicate) DimensionModel(de.lmu.ifi.dbs.elki.data.model.DimensionModel) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) MinPtsCorePredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.MinPtsCorePredicate) COPACNeighborPredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.COPACNeighborPredicate) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DimensionModel(de.lmu.ifi.dbs.elki.data.model.DimensionModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) GeneralizedDBSCAN(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.GeneralizedDBSCAN)

Example 45 with Model

use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.

the class SUBCLU method runDBSCAN.

/**
 * Runs the DBSCAN algorithm on the specified partition of the database in the
 * given subspace. If parameter {@code ids} is null DBSCAN will be applied to
 * the whole database.
 *
 * @param relation the database holding the objects to run DBSCAN on
 * @param ids the IDs of the database defining the partition to run DBSCAN on
 *        - if this parameter is null DBSCAN will be applied to the whole
 *        database
 * @param subspace the subspace to run DBSCAN on
 * @return the clustering result of the DBSCAN run
 */
private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
    // distance function
    distanceFunction.setSelectedDimensions(subspace.getDimensions());
    ProxyDatabase proxy;
    if (ids == null) {
        // TODO: in this case, we might want to use an index - the proxy below
        // will prevent this!
        ids = relation.getDBIDs();
    }
    proxy = new ProxyDatabase(ids, relation);
    DBSCAN<V> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
    // run DBSCAN
    if (LOG.isVerbose()) {
        LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
    }
    Clustering<Model> dbsres = dbscan.run(proxy);
    // separate cluster and noise
    List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters();
    List<Cluster<Model>> clusters = new ArrayList<>();
    for (Cluster<Model> c : clusterAndNoise) {
        if (!c.isNoise()) {
            clusters.add(c);
        }
    }
    return clusters;
}
Also used : SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayList(java.util.ArrayList) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DBSCAN(de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN) Cluster(de.lmu.ifi.dbs.elki.data.Cluster)

Aggregations

Model (de.lmu.ifi.dbs.elki.data.model.Model)60 Database (de.lmu.ifi.dbs.elki.database.Database)29 Test (org.junit.Test)24 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)21 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)18 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)17 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)13 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)12 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)11 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)10 ArrayList (java.util.ArrayList)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)7 HashMap (java.util.HashMap)5 ByLabelClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering)3 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)3 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)3 CorePredicate (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.CorePredicate)2