Search in sources :

Example 31 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class KMediansLloyd method run.

@Override
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Medians Clustering", "kmedians-clustering");
    }
    // Choose initial medians
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] medians = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] distsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medians iteration", LOG) : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = assignToNearestCluster(relation, medians, clusters, assignment, distsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
        // Recompute medians.
        medians = medians(clusters, medians, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<MeanModel> result = new Clustering<>("k-Medians Clustering", "kmedians-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        MeanModel model = new MeanModel(medians[i]);
        result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) ArrayList(java.util.ArrayList) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 32 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class KMedoidsPAM method run.

/**
 * Run k-medoids
 *
 * @param database Database
 * @param relation relation to use
 * @return result
 */
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("PAM Clustering", "pam-clustering");
    }
    if (k > 0x7FFF) {
        throw new NotImplementedException("PAM supports at most " + 0x7FFF + " clusters.");
    }
    DistanceQuery<V> distQ = DatabaseUtil.precomputedDistanceQuery(database, relation, getDistanceFunction(), LOG);
    DBIDs ids = relation.getDBIDs();
    // Choose initial medoids
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, ids, distQ));
    if (medoids.size() != k) {
        throw new AbortException("Initializer " + initializer.toString() + " did not return " + k + " means, but " + medoids.size());
    }
    // Setup cluster assignment store
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
    run(distQ, ids, medoids, assignment);
    ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k);
    // Wrap result
    Clustering<MedoidModel> result = new Clustering<>("PAM Clustering", "pam-clustering");
    for (DBIDArrayIter it = medoids.iter(); it.valid(); it.advance()) {
        result.addToplevelCluster(new Cluster<>(clusters[it.getOffset()], new MedoidModel(DBIDUtil.deref(it))));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) NotImplementedException(de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) MedoidModel(de.lmu.ifi.dbs.elki.data.model.MedoidModel) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 33 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class SingleAssignmentKMeans method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Assignment", "kmeans-assignment");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    assignToNearestCluster(relation, means, clusters, assignment, varsum);
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("Nearest Centroid Clustering", "nearest-center-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
    }
    return result;
}
Also used : StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) ArrayList(java.util.ArrayList) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 34 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class ByLabelOrAllInOneClustering method run.

@Override
public Clustering<Model> run(Database database) {
    // Prefer a true class label
    try {
        Relation<ClassLabel> relation = database.getRelation(TypeUtil.CLASSLABEL);
        return run(relation);
    } catch (NoSupportedDataTypeException e) {
    // Ignore.
    }
    try {
        Relation<ClassLabel> relation = database.getRelation(TypeUtil.GUESSED_LABEL);
        return run(relation);
    } catch (NoSupportedDataTypeException e) {
    // Ignore.
    }
    final DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
    Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
    Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
    result.addToplevelCluster(c);
    return result;
}
Also used : ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) NoSupportedDataTypeException(de.lmu.ifi.dbs.elki.data.type.NoSupportedDataTypeException) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Model(de.lmu.ifi.dbs.elki.data.model.Model) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 35 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class TrivialAllNoise method run.

public Clustering<Model> run(Relation<?> relation) {
    final DBIDs ids = relation.getDBIDs();
    Clustering<Model> result = new Clustering<>("All-in-noise trivial Clustering", "allinnoise-clustering");
    Cluster<Model> c = new Cluster<Model>(ids, true, ClusterModel.CLUSTER);
    result.addToplevelCluster(c);
    return result;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Aggregations

Clustering (de.lmu.ifi.dbs.elki.data.Clustering)68 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)32 ArrayList (java.util.ArrayList)27 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)23 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)21 Model (de.lmu.ifi.dbs.elki.data.model.Model)21 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)20 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)16 Database (de.lmu.ifi.dbs.elki.database.Database)14 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)14 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)14 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)13 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)12 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)12 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)9 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)6 MedoidModel (de.lmu.ifi.dbs.elki.data.model.MedoidModel)5