use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class FastOPTICS method run.
/**
* Run the algorithm.
*
* @param db Database
* @param rel Relation
*/
public ClusterOrder run(Database db, Relation<V> rel) {
DBIDs ids = rel.getDBIDs();
DistanceQuery<V> dq = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
// initialize points used and reachability distance
reachDist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, UNDEFINED_DISTANCE);
// compute projections, density estimates and neighborhoods
// project points
index.computeSetsBounds(rel, minPts, ids);
// compute densities
inverseDensities = index.computeAverageDistInSet();
// get neighbors of points
neighs = index.getNeighs();
// compute ordering as for OPTICS
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("FastOPTICS clustering", ids.size(), LOG) : null;
processed = DBIDUtil.newHashSet(ids.size());
order = new ClusterOrder(ids, "FastOPTICS Cluster Order", "fast-optics");
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
if (!processed.contains(it)) {
expandClusterOrder(DBIDUtil.deref(it), order, dq, prog);
}
}
index.logStatistics();
LOG.ensureCompleted(prog);
return order;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class KMeansBatchedLloyd method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), blocks, random);
double[][] meanshift = new double[k][dim];
int[] changesize = new int[k];
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = false;
FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Batch", parts.length, LOG) : null;
for (int p = 0; p < parts.length; p++) {
// Initialize new means scratch space.
for (int i = 0; i < k; i++) {
Arrays.fill(meanshift[i], 0.);
}
Arrays.fill(changesize, 0);
Arrays.fill(varsum, 0.);
changed |= assignToNearestCluster(relation, parts[p], means, meanshift, changesize, clusters, assignment, varsum);
// Recompute means.
updateMeans(means, meanshift, clusters, changesize);
LOG.incrementProcessed(pprog);
}
LOG.ensureCompleted(pprog);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class FarthestSumPointsInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
// Get a distance query
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
// Chose first mean
List<T> means = new ArrayList<>(k);
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
T prevmean = relation.get(first);
means.add(prevmean);
// Find farthest object each.
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double dsum = prev + distQ.distance(prevmean, it);
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, dsum);
}
if (dsum > maxdist) {
maxdist = dsum;
best.set(it);
}
}
// Add new mean (and drop the initial mean when desired)
if (i == 0) {
// Remove temporary first element.
means.clear();
}
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean = relation.get(best);
means.add(prevmean);
}
// Explicitly destroy temporary data.
store.destroy();
return unboxVectors(means);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class ByLabelClustering method singleAssignment.
/**
* Assigns the objects of the database to single clusters according to their
* labels.
*
* @param data the database storing the objects
* @return a mapping of labels to ids
*/
private HashMap<String, DBIDs> singleAssignment(Relation<?> data) {
HashMap<String, DBIDs> labelMap = new HashMap<>();
for (DBIDIter iditer = data.iterDBIDs(); iditer.valid(); iditer.advance()) {
final Object val = data.get(iditer);
String label = (val != null) ? val.toString() : null;
assign(labelMap, label, iditer);
}
return labelMap;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class ByLabelOrAllInOneClustering method run.
@Override
public Clustering<Model> run(Database database) {
// Prefer a true class label
try {
Relation<ClassLabel> relation = database.getRelation(TypeUtil.CLASSLABEL);
return run(relation);
} catch (NoSupportedDataTypeException e) {
// Ignore.
}
try {
Relation<ClassLabel> relation = database.getRelation(TypeUtil.GUESSED_LABEL);
return run(relation);
} catch (NoSupportedDataTypeException e) {
// Ignore.
}
final DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
return result;
}
Aggregations