Search in sources :

Example 91 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class ClusteringAlgorithmUtil method partitionsFromIntegerLabels.

/**
 * Collect clusters from their [0;k-1] integer labels.
 *
 * @param ids Objects
 * @param assignment Cluster assignment
 * @param k Number of labels (must be labeled 0 to k-1)
 * @return Partitions
 */
public static ArrayModifiableDBIDs[] partitionsFromIntegerLabels(DBIDs ids, IntegerDataStore assignment, int k) {
    int[] sizes = new int[k];
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        sizes[assignment.intValue(iter)] += 1;
    }
    ArrayModifiableDBIDs[] clusters = new ArrayModifiableDBIDs[k];
    for (int i = 0; i < k; i++) {
        clusters[i] = DBIDUtil.newArray(sizes[i]);
    }
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        clusters[assignment.intValue(iter)].add(iter);
    }
    return clusters;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 92 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class SameSizeKMeansAlgorithm method updateDistances.

/**
 * Compute the distances of each object to all means. Update
 * {@link Meta#secondary} to point to the best cluster number except the
 * current cluster assignment
 *
 * @param relation Data relation
 * @param means Means
 * @param metas Metadata storage
 * @param df Distance function
 */
protected void updateDistances(Relation<V> relation, double[][] means, final WritableDataStore<Meta> metas, NumberVectorDistanceFunction<? super V> df) {
    for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
        Meta c = metas.get(id);
        V fv = relation.get(id);
        // Update distances to means.
        c.secondary = -1;
        for (int i = 0; i < k; i++) {
            c.dists[i] = df.distance(fv, DoubleVector.wrap(means[i]));
            if (c.primary != i) {
                if (c.secondary < 0 || c.dists[i] < c.dists[c.secondary]) {
                    c.secondary = i;
                }
            }
        }
        // Changed.
        metas.put(id, c);
    }
}
Also used : DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 93 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class SameSizeKMeansAlgorithm method initializeMeta.

/**
 * Initialize the metadata storage.
 *
 * @param relation Relation to process
 * @param means Mean vectors
 * @return Initialized storage
 */
protected WritableDataStore<Meta> initializeMeta(Relation<V> relation, double[][] means) {
    NumberVectorDistanceFunction<? super V> df = getDistanceFunction();
    // The actual storage
    final WritableDataStore<Meta> metas = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Meta.class);
    // Build the metadata, track the two nearest cluster centers.
    for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
        Meta c = new Meta(k);
        V fv = relation.get(id);
        for (int i = 0; i < k; i++) {
            final double d = c.dists[i] = df.distance(fv, DoubleVector.wrap(means[i]));
            if (i > 0) {
                if (d < c.dists[c.primary]) {
                    c.primary = i;
                } else if (d > c.dists[c.secondary]) {
                    c.secondary = i;
                }
            }
        }
        metas.put(id, c);
    }
    return metas;
}
Also used : DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 94 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class PassingDataToELKI method main.

/**
 * Main method
 *
 * @param args Command line parameters (not supported)
 */
public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[1000][2];
    for (int i = 0; i < data.length; i++) {
        for (int j = 0; j < data[i].length; j++) {
            data[i][j] = Math.random();
        }
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Create a database (which may contain multiple relations!)
    Database db = new StaticArrayDatabase(dbc, null);
    // Load the data into the database (do NOT forget to initialize...)
    db.initialize();
    // Relation containing the number vectors:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We know that the ids must be a continuous range:
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // K-means should be used with squared Euclidean (least squares):
    SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
    // Default initialization, using global random:
    // To fix the random seed, use: new RandomFactory(seed);
    RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
    // Textbook k-means clustering:
    KMeansLloyd<NumberVector> km = new // 
    KMeansLloyd<>(// 
    dist, // 
    3, /* k - number of partitions */
    0, /* maximum number of iterations: no limit */
    init);
    // K-means will automatically choose a numerical relation from the data set:
    // But we could make it explicit (if there were more than one numeric
    // relation!): km.run(db, rel);
    Clustering<KMeansModel> c = km.run(db);
    // Output all clusters:
    int i = 0;
    for (Cluster<KMeansModel> clu : c.getAllClusters()) {
        // K-means will name all clusters "Cluster" in lack of noise support:
        System.out.println("#" + i + ": " + clu.getNameAutomatic());
        System.out.println("Size: " + clu.size());
        System.out.println("Center: " + clu.getModel().getPrototype().toString());
        // Iterate over objects:
        System.out.print("Objects: ");
        for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
            // To get the vector use:
            // NumberVector v = rel.get(it);
            // Offset within our DBID range: "line number"
            final int offset = ids.getOffset(it);
            System.out.print(" " + offset);
        // Do NOT rely on using "internalGetIndex()" directly!
        }
        System.out.println();
        ++i;
    }
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) RandomlyGeneratedInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 95 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class UKMeans method run.

/**
 * Run the clustering.
 *
 * @param database the Database
 * @param relation the Relation
 * @return Clustering result
 */
public Clustering<?> run(final Database database, final Relation<DiscreteUncertainObject> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
    }
    // Choose initial means randomly
    DBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
    List<double[]> means = new ArrayList<>(k);
    for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
        means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
    }
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
    DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
        logVarstat(varstat, varsum);
        // Stop if no cluster assignment changed.
        if (!changed) {
            break;
        }
        // Recompute means.
        means = means(clusters, means, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.isEmpty()) {
            continue;
        }
        result.addToplevelCluster(new Cluster<>(ids, new KMeansModel(means.get(i), varsum[i])));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21