use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.
the class PROCLUS method run.
/**
* Performs the PROCLUS algorithm on the given database.
*
* @param database Database to process
* @param relation Relation to process
*/
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
if (RelationUtil.dimensionality(relation) < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! (" + RelationUtil.dimensionality(relation) + " < " + l + ")");
}
DistanceQuery<V> distFunc = database.getDistanceQuery(relation, SquaredEuclideanDistanceFunction.STATIC);
RangeQuery<V> rangeQuery = database.getRangeQuery(distFunc);
final Random random = rnd.getSingleThreadedRandom();
// initialization phase
if (LOG.isVerbose()) {
LOG.verbose("1. Initialization phase...");
}
int sampleSize = Math.min(relation.size(), k_i * k);
DBIDs sampleSet = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random);
int medoidSize = Math.min(relation.size(), m_i * k);
ArrayDBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
if (LOG.isDebugging()) {
LOG.debugFine(//
new StringBuilder().append("sampleSize ").append(sampleSize).append('\n').append("sampleSet ").append(sampleSet).append(//
'\n').append("medoidSize ").append(medoidSize).append(//
'\n').append("m ").append(medoids).toString());
}
// iterative phase
if (LOG.isVerbose()) {
LOG.verbose("2. Iterative phase...");
}
double bestObjective = Double.POSITIVE_INFINITY;
ArrayDBIDs m_best = null;
DBIDs m_bad = null;
ArrayDBIDs m_current = initialSet(medoids, k, random);
if (LOG.isDebugging()) {
LOG.debugFine(new StringBuilder().append("m_c ").append(m_current).toString());
}
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
ArrayList<PROCLUSCluster> clusters = null;
int loops = 0;
while (loops < 10) {
long[][] dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
clusters = assignPoints(m_current, dimensions, relation);
double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
if (objectiveFunction < bestObjective) {
// restart counting loops
loops = 0;
bestObjective = objectiveFunction;
m_best = m_current;
m_bad = computeBadMedoids(m_current, clusters, (int) (relation.size() * 0.1 / k));
}
m_current = computeM_current(medoids, m_best, m_bad, random);
loops++;
if (cprogress != null) {
cprogress.setProcessed(clusters.size(), LOG);
}
}
LOG.setCompleted(cprogress);
// refinement phase
if (LOG.isVerbose()) {
LOG.verbose("3. Refinement phase...");
}
List<Pair<double[], long[]>> dimensions = findDimensions(clusters, relation);
List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
// build result
int numClusters = 1;
Clustering<SubspaceModel> result = new Clustering<>("ProClus clustering", "proclus-clustering");
for (PROCLUSCluster c : finalClusters) {
Cluster<SubspaceModel> cluster = new Cluster<>(c.objectIDs);
cluster.setModel(new SubspaceModel(new Subspace(c.getDimensions()), c.centroid));
cluster.setName("cluster_" + numClusters++);
result.addToplevelCluster(cluster);
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.
the class SUBCLU method lowerSubspaces.
/**
* Returns the list of all {@code (d-1)}-dimensional subspaces of the
* specified {@code d}-dimensional subspace.
*
* @param subspace the {@code d}-dimensional subspace
* @return a list of all {@code (d-1)}-dimensional subspaces
*/
private List<Subspace> lowerSubspaces(Subspace subspace) {
int dimensionality = subspace.dimensionality();
if (dimensionality <= 1) {
return null;
}
// order result according to the dimensions
List<Subspace> result = new ArrayList<>();
long[] dimensions = subspace.getDimensions();
for (int dim = BitsUtil.nextSetBit(dimensions, 0); dim >= 0; dim = BitsUtil.nextSetBit(dimensions, dim + 1)) {
long[] newDimensions = dimensions.clone();
BitsUtil.clearI(newDimensions, dim);
result.add(new Subspace(newDimensions));
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.
the class SUBCLU method bestSubspace.
/**
* Determines the {@code d}-dimensional subspace of the {@code (d+1)}
* -dimensional candidate with minimal number of objects in the cluster.
*
* @param subspaces the list of {@code d}-dimensional subspaces containing
* clusters
* @param candidate the {@code (d+1)}-dimensional candidate subspace
* @param clusterMap the mapping of subspaces to clusters
* @return the {@code d}-dimensional subspace of the {@code (d+1)}
* -dimensional candidate with minimal number of objects in the
* cluster
*/
private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
Subspace bestSubspace = null;
for (Subspace subspace : subspaces) {
int min = Integer.MAX_VALUE;
if (subspace.isSubspace(candidate)) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
for (Cluster<Model> cluster : clusters) {
int clusterSize = cluster.size();
if (clusterSize < min) {
min = clusterSize;
bestSubspace = subspace;
}
}
}
}
return bestSubspace;
}
use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.
the class CLIQUE method run.
/**
* Performs the CLIQUE algorithm on the given database.
*
* @param relation Data relation to process
* @return Clustering result
*/
public Clustering<SubspaceModel> run(Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress step = new StepProgress(2);
// 1. Identification of subspaces that contain clusters
step.beginStep(1, "Identification of subspaces that contain clusters", LOG);
ArrayList<List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new ArrayList<>(dimensionality);
List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
dimensionToDenseSubspaces.add(denseSubspaces);
if (LOG.isVerbose()) {
LOG.verbose("1-dimensional dense subspaces: " + denseSubspaces.size());
}
if (LOG.isDebugging()) {
for (CLIQUESubspace<V> s : denseSubspaces) {
LOG.debug(s.toString(" "));
}
}
for (int k = 2; k <= dimensionality && !denseSubspaces.isEmpty(); k++) {
denseSubspaces = findDenseSubspaces(relation, denseSubspaces);
assert (dimensionToDenseSubspaces.size() == k - 1);
dimensionToDenseSubspaces.add(denseSubspaces);
if (LOG.isVerbose()) {
LOG.verbose(k + "-dimensional dense subspaces: " + denseSubspaces.size());
}
if (LOG.isDebugging()) {
for (CLIQUESubspace<V> s : denseSubspaces) {
LOG.debug(s.toString(" "));
}
}
}
// 2. Identification of clusters
step.beginStep(2, "Identification of clusters", LOG);
// build result
Clustering<SubspaceModel> result = new Clustering<>("CLIQUE clustering", "clique-clustering");
for (int dim = 0; dim < dimensionToDenseSubspaces.size(); dim++) {
List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
if (LOG.isVerbose()) {
LOG.verbose((dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
}
for (Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
Cluster<SubspaceModel> newCluster = new Cluster<>(modelAndCluster.second);
newCluster.setModel(new SubspaceModel(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).getArrayRef()));
result.addToplevelCluster(newCluster);
}
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.
the class DiSH method buildHierarchy.
/**
* Builds the cluster hierarchy.
*
* @param clustering Clustering we process
* @param clusters the sorted list of clusters
* @param dimensionality the dimensionality of the data
* @param database the database containing the data objects
*/
private void buildHierarchy(Relation<V> database, Clustering<SubspaceModel> clustering, List<Cluster<SubspaceModel>> clusters, int dimensionality) {
StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
final int db_dim = RelationUtil.dimensionality(database);
Hierarchy<Cluster<SubspaceModel>> hier = clustering.getClusterHierarchy();
for (int i = 0; i < clusters.size() - 1; i++) {
Cluster<SubspaceModel> c_i = clusters.get(i);
final Subspace s_i = c_i.getModel().getSubspace();
int subspaceDim_i = dimensionality - s_i.dimensionality();
NumberVector ci_centroid = ProjectedCentroid.make(s_i.getDimensions(), database, c_i.getIDs());
long[] pv1 = s_i.getDimensions();
for (int j = i + 1; j < clusters.size(); j++) {
Cluster<SubspaceModel> c_j = clusters.get(j);
final Subspace s_j = c_j.getModel().getSubspace();
int subspaceDim_j = dimensionality - s_j.dimensionality();
if (subspaceDim_i < subspaceDim_j) {
if (msg != null) {
msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim)).append(']');
msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim)).append(']');
}
// noise level reached
if (s_j.dimensionality() == 0) {
// no parents exists -> parent is noise
if (hier.numParents(c_i) == 0) {
clustering.addChildCluster(c_j, c_i);
if (msg != null) {
msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
msg.append("] is parent of [").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
msg.append(']');
}
}
} else {
NumberVector cj_centroid = ProjectedCentroid.make(c_j.getModel().getDimensions(), database, c_j.getIDs());
long[] pv2 = s_j.getDimensions();
long[] commonPreferenceVector = BitsUtil.andCMin(pv1, pv2);
int subspaceDim = subspaceDimensionality(ci_centroid, cj_centroid, pv1, pv2, commonPreferenceVector);
double d = weightedDistance(ci_centroid, cj_centroid, commonPreferenceVector);
if (msg != null) {
msg.append("\n dist = ").append(subspaceDim);
}
if (subspaceDim == subspaceDim_j) {
if (msg != null) {
msg.append("\n d = ").append(d);
}
if (d <= 2 * epsilon) {
// existing parents
if (hier.numParents(c_i) == 0 || !isParent(database, c_j, hier.iterParents(c_i), db_dim)) {
clustering.addChildCluster(c_j, c_i);
if (msg != null) {
msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
msg.append("] is parent of [");
msg.append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
msg.append(']');
}
}
} else {
throw new RuntimeException("Should never happen: d = " + d);
}
}
}
}
}
}
if (msg != null) {
LOG.debug(msg.toString());
}
}
Aggregations