use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class DOC method makeCluster.
/**
* Utility method to create a subspace cluster from a list of DBIDs and the
* relevant attributes.
*
* @param relation to compute a centroid.
* @param C the cluster points.
* @param D the relevant dimensions.
* @return an object representing the subspace cluster.
*/
protected Cluster<SubspaceModel> makeCluster(Relation<V> relation, DBIDs C, long[] D) {
// copy, also to lose distance values!
DBIDs ids = DBIDUtil.newHashSet(C);
Cluster<SubspaceModel> cluster = new Cluster<>(ids);
cluster.setModel(new SubspaceModel(new Subspace(D), Centroid.make(relation, ids).getArrayRef()));
return cluster;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class PROCLUS method run.
/**
* Performs the PROCLUS algorithm on the given database.
*
* @param database Database to process
* @param relation Relation to process
*/
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
if (RelationUtil.dimensionality(relation) < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! (" + RelationUtil.dimensionality(relation) + " < " + l + ")");
}
DistanceQuery<V> distFunc = database.getDistanceQuery(relation, SquaredEuclideanDistanceFunction.STATIC);
RangeQuery<V> rangeQuery = database.getRangeQuery(distFunc);
final Random random = rnd.getSingleThreadedRandom();
// initialization phase
if (LOG.isVerbose()) {
LOG.verbose("1. Initialization phase...");
}
int sampleSize = Math.min(relation.size(), k_i * k);
DBIDs sampleSet = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random);
int medoidSize = Math.min(relation.size(), m_i * k);
ArrayDBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
if (LOG.isDebugging()) {
LOG.debugFine(//
new StringBuilder().append("sampleSize ").append(sampleSize).append('\n').append("sampleSet ").append(sampleSet).append(//
'\n').append("medoidSize ").append(medoidSize).append(//
'\n').append("m ").append(medoids).toString());
}
// iterative phase
if (LOG.isVerbose()) {
LOG.verbose("2. Iterative phase...");
}
double bestObjective = Double.POSITIVE_INFINITY;
ArrayDBIDs m_best = null;
DBIDs m_bad = null;
ArrayDBIDs m_current = initialSet(medoids, k, random);
if (LOG.isDebugging()) {
LOG.debugFine(new StringBuilder().append("m_c ").append(m_current).toString());
}
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
ArrayList<PROCLUSCluster> clusters = null;
int loops = 0;
while (loops < 10) {
long[][] dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
clusters = assignPoints(m_current, dimensions, relation);
double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
if (objectiveFunction < bestObjective) {
// restart counting loops
loops = 0;
bestObjective = objectiveFunction;
m_best = m_current;
m_bad = computeBadMedoids(m_current, clusters, (int) (relation.size() * 0.1 / k));
}
m_current = computeM_current(medoids, m_best, m_bad, random);
loops++;
if (cprogress != null) {
cprogress.setProcessed(clusters.size(), LOG);
}
}
LOG.setCompleted(cprogress);
// refinement phase
if (LOG.isVerbose()) {
LOG.verbose("3. Refinement phase...");
}
List<Pair<double[], long[]>> dimensions = findDimensions(clusters, relation);
List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
// build result
int numClusters = 1;
Clustering<SubspaceModel> result = new Clustering<>("ProClus clustering", "proclus-clustering");
for (PROCLUSCluster c : finalClusters) {
Cluster<SubspaceModel> cluster = new Cluster<>(c.objectIDs);
cluster.setModel(new SubspaceModel(new Subspace(c.getDimensions()), c.centroid));
cluster.setName("cluster_" + numClusters++);
result.addToplevelCluster(cluster);
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class SUBCLU method bestSubspace.
/**
* Determines the {@code d}-dimensional subspace of the {@code (d+1)}
* -dimensional candidate with minimal number of objects in the cluster.
*
* @param subspaces the list of {@code d}-dimensional subspaces containing
* clusters
* @param candidate the {@code (d+1)}-dimensional candidate subspace
* @param clusterMap the mapping of subspaces to clusters
* @return the {@code d}-dimensional subspace of the {@code (d+1)}
* -dimensional candidate with minimal number of objects in the
* cluster
*/
private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
Subspace bestSubspace = null;
for (Subspace subspace : subspaces) {
int min = Integer.MAX_VALUE;
if (subspace.isSubspace(candidate)) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
for (Cluster<Model> cluster : clusters) {
int clusterSize = cluster.size();
if (clusterSize < min) {
min = clusterSize;
bestSubspace = subspace;
}
}
}
}
return bestSubspace;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class CLIQUE method run.
/**
* Performs the CLIQUE algorithm on the given database.
*
* @param relation Data relation to process
* @return Clustering result
*/
public Clustering<SubspaceModel> run(Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress step = new StepProgress(2);
// 1. Identification of subspaces that contain clusters
step.beginStep(1, "Identification of subspaces that contain clusters", LOG);
ArrayList<List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new ArrayList<>(dimensionality);
List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
dimensionToDenseSubspaces.add(denseSubspaces);
if (LOG.isVerbose()) {
LOG.verbose("1-dimensional dense subspaces: " + denseSubspaces.size());
}
if (LOG.isDebugging()) {
for (CLIQUESubspace<V> s : denseSubspaces) {
LOG.debug(s.toString(" "));
}
}
for (int k = 2; k <= dimensionality && !denseSubspaces.isEmpty(); k++) {
denseSubspaces = findDenseSubspaces(relation, denseSubspaces);
assert (dimensionToDenseSubspaces.size() == k - 1);
dimensionToDenseSubspaces.add(denseSubspaces);
if (LOG.isVerbose()) {
LOG.verbose(k + "-dimensional dense subspaces: " + denseSubspaces.size());
}
if (LOG.isDebugging()) {
for (CLIQUESubspace<V> s : denseSubspaces) {
LOG.debug(s.toString(" "));
}
}
}
// 2. Identification of clusters
step.beginStep(2, "Identification of clusters", LOG);
// build result
Clustering<SubspaceModel> result = new Clustering<>("CLIQUE clustering", "clique-clustering");
for (int dim = 0; dim < dimensionToDenseSubspaces.size(); dim++) {
List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
if (LOG.isVerbose()) {
LOG.verbose((dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
}
for (Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
Cluster<SubspaceModel> newCluster = new Cluster<>(modelAndCluster.second);
newCluster.setModel(new SubspaceModel(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).getArrayRef()));
result.addToplevelCluster(newCluster);
}
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class ByLabelOrAllInOneClustering method run.
@Override
public Clustering<Model> run(Database database) {
// Prefer a true class label
try {
Relation<ClassLabel> relation = database.getRelation(TypeUtil.CLASSLABEL);
return run(relation);
} catch (NoSupportedDataTypeException e) {
// Ignore.
}
try {
Relation<ClassLabel> relation = database.getRelation(TypeUtil.GUESSED_LABEL);
return run(relation);
} catch (NoSupportedDataTypeException e) {
// Ignore.
}
final DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
return result;
}
Aggregations