use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class ClusteringAlgorithmUtil method partitionsFromIntegerLabels.
/**
* Collect clusters from their [0;k-1] integer labels.
*
* @param ids Objects
* @param assignment Cluster assignment
* @param k Number of labels (must be labeled 0 to k-1)
* @return Partitions
*/
public static ArrayModifiableDBIDs[] partitionsFromIntegerLabels(DBIDs ids, IntegerDataStore assignment, int k) {
int[] sizes = new int[k];
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
sizes[assignment.intValue(iter)] += 1;
}
ArrayModifiableDBIDs[] clusters = new ArrayModifiableDBIDs[k];
for (int i = 0; i < k; i++) {
clusters[i] = DBIDUtil.newArray(sizes[i]);
}
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
clusters[assignment.intValue(iter)].add(iter);
}
return clusters;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class SameSizeKMeansAlgorithm method updateDistances.
/**
* Compute the distances of each object to all means. Update
* {@link Meta#secondary} to point to the best cluster number except the
* current cluster assignment
*
* @param relation Data relation
* @param means Means
* @param metas Metadata storage
* @param df Distance function
*/
protected void updateDistances(Relation<V> relation, double[][] means, final WritableDataStore<Meta> metas, NumberVectorDistanceFunction<? super V> df) {
for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
Meta c = metas.get(id);
V fv = relation.get(id);
// Update distances to means.
c.secondary = -1;
for (int i = 0; i < k; i++) {
c.dists[i] = df.distance(fv, DoubleVector.wrap(means[i]));
if (c.primary != i) {
if (c.secondary < 0 || c.dists[i] < c.dists[c.secondary]) {
c.secondary = i;
}
}
}
// Changed.
metas.put(id, c);
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class SameSizeKMeansAlgorithm method initializeMeta.
/**
* Initialize the metadata storage.
*
* @param relation Relation to process
* @param means Mean vectors
* @return Initialized storage
*/
protected WritableDataStore<Meta> initializeMeta(Relation<V> relation, double[][] means) {
NumberVectorDistanceFunction<? super V> df = getDistanceFunction();
// The actual storage
final WritableDataStore<Meta> metas = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Meta.class);
// Build the metadata, track the two nearest cluster centers.
for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
Meta c = new Meta(k);
V fv = relation.get(id);
for (int i = 0; i < k; i++) {
final double d = c.dists[i] = df.distance(fv, DoubleVector.wrap(means[i]));
if (i > 0) {
if (d < c.dists[c.primary]) {
c.primary = i;
} else if (d > c.dists[c.secondary]) {
c.secondary = i;
}
}
}
metas.put(id, c);
}
return metas;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class PassingDataToELKI method main.
/**
* Main method
*
* @param args Command line parameters (not supported)
*/
public static void main(String[] args) {
// Set the logging level to statistics:
LoggingConfiguration.setStatistics();
// Generate a random data set.
// Note: ELKI has a nice data generator class, use that instead.
double[][] data = new double[1000][2];
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[i].length; j++) {
data[i][j] = Math.random();
}
}
// Adapter to load data from an existing array.
DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
// Create a database (which may contain multiple relations!)
Database db = new StaticArrayDatabase(dbc, null);
// Load the data into the database (do NOT forget to initialize...)
db.initialize();
// Relation containing the number vectors:
Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// We know that the ids must be a continuous range:
DBIDRange ids = (DBIDRange) rel.getDBIDs();
// K-means should be used with squared Euclidean (least squares):
SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
// Default initialization, using global random:
// To fix the random seed, use: new RandomFactory(seed);
RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
// Textbook k-means clustering:
KMeansLloyd<NumberVector> km = new //
KMeansLloyd<>(//
dist, //
3, /* k - number of partitions */
0, /* maximum number of iterations: no limit */
init);
// K-means will automatically choose a numerical relation from the data set:
// But we could make it explicit (if there were more than one numeric
// relation!): km.run(db, rel);
Clustering<KMeansModel> c = km.run(db);
// Output all clusters:
int i = 0;
for (Cluster<KMeansModel> clu : c.getAllClusters()) {
// K-means will name all clusters "Cluster" in lack of noise support:
System.out.println("#" + i + ": " + clu.getNameAutomatic());
System.out.println("Size: " + clu.size());
System.out.println("Center: " + clu.getModel().getPrototype().toString());
// Iterate over objects:
System.out.print("Objects: ");
for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
// To get the vector use:
// NumberVector v = rel.get(it);
// Offset within our DBID range: "line number"
final int offset = ids.getOffset(it);
System.out.print(" " + offset);
// Do NOT rely on using "internalGetIndex()" directly!
}
System.out.println();
++i;
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class UKMeans method run.
/**
* Run the clustering.
*
* @param database the Database
* @param relation the Relation
* @return Clustering result
*/
public Clustering<?> run(final Database database, final Relation<DiscreteUncertainObject> relation) {
if (relation.size() <= 0) {
return new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
}
// Choose initial means randomly
DBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
List<double[]> means = new ArrayList<>(k);
for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
}
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.isEmpty()) {
continue;
}
result.addToplevelCluster(new Cluster<>(ids, new KMeansModel(means.get(i), varsum[i])));
}
return result;
}
Aggregations