use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.
the class DiSH method buildHierarchy.
/**
* Builds the cluster hierarchy.
*
* @param clustering Clustering we process
* @param clusters the sorted list of clusters
* @param dimensionality the dimensionality of the data
* @param database the database containing the data objects
*/
private void buildHierarchy(Relation<V> database, Clustering<SubspaceModel> clustering, List<Cluster<SubspaceModel>> clusters, int dimensionality) {
StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
final int db_dim = RelationUtil.dimensionality(database);
Hierarchy<Cluster<SubspaceModel>> hier = clustering.getClusterHierarchy();
for (int i = 0; i < clusters.size() - 1; i++) {
Cluster<SubspaceModel> c_i = clusters.get(i);
final Subspace s_i = c_i.getModel().getSubspace();
int subspaceDim_i = dimensionality - s_i.dimensionality();
NumberVector ci_centroid = ProjectedCentroid.make(s_i.getDimensions(), database, c_i.getIDs());
long[] pv1 = s_i.getDimensions();
for (int j = i + 1; j < clusters.size(); j++) {
Cluster<SubspaceModel> c_j = clusters.get(j);
final Subspace s_j = c_j.getModel().getSubspace();
int subspaceDim_j = dimensionality - s_j.dimensionality();
if (subspaceDim_i < subspaceDim_j) {
if (msg != null) {
msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim)).append(']');
msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim)).append(']');
}
// noise level reached
if (s_j.dimensionality() == 0) {
// no parents exists -> parent is noise
if (hier.numParents(c_i) == 0) {
clustering.addChildCluster(c_j, c_i);
if (msg != null) {
msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
msg.append("] is parent of [").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
msg.append(']');
}
}
} else {
NumberVector cj_centroid = ProjectedCentroid.make(c_j.getModel().getDimensions(), database, c_j.getIDs());
long[] pv2 = s_j.getDimensions();
long[] commonPreferenceVector = BitsUtil.andCMin(pv1, pv2);
int subspaceDim = subspaceDimensionality(ci_centroid, cj_centroid, pv1, pv2, commonPreferenceVector);
double d = weightedDistance(ci_centroid, cj_centroid, commonPreferenceVector);
if (msg != null) {
msg.append("\n dist = ").append(subspaceDim);
}
if (subspaceDim == subspaceDim_j) {
if (msg != null) {
msg.append("\n d = ").append(d);
}
if (d <= 2 * epsilon) {
// existing parents
if (hier.numParents(c_i) == 0 || !isParent(database, c_j, hier.iterParents(c_i), db_dim)) {
clustering.addChildCluster(c_j, c_i);
if (msg != null) {
msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
msg.append("] is parent of [");
msg.append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
msg.append(']');
}
}
} else {
throw new RuntimeException("Should never happen: d = " + d);
}
}
}
}
}
}
if (msg != null) {
LOG.debug(msg.toString());
}
}
use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.
the class SUBCLU method run.
/**
* Performs the SUBCLU algorithm on the given database.
*
* @param relation Relation to process
* @return Clustering result
*/
public Clustering<SubspaceModel> run(Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
// Generate all 1-dimensional clusters
LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
// mapping of dimensionality to set of subspaces
HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
// list of 1-dimensional subspaces containing clusters
List<Subspace> s_1 = new ArrayList<>();
subspaceMap.put(0, s_1);
// mapping of subspaces to list of clusters
TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
for (int d = 0; d < dimensionality; d++) {
Subspace currentSubspace = new Subspace(d);
List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
if (LOG.isDebuggingFiner()) {
StringBuilder msg = new StringBuilder();
msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
for (Cluster<Model> cluster : clusters) {
msg.append(" " + cluster.getIDs() + "\n");
}
LOG.debugFiner(msg.toString());
}
if (!clusters.isEmpty()) {
s_1.add(currentSubspace);
clusterMap.put(currentSubspace, clusters);
}
}
// Generate (d+1)-dimensional clusters from d-dimensional clusters
for (int d = 0; d < dimensionality - 1; d++) {
if (stepprog != null) {
stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
}
List<Subspace> subspaces = subspaceMap.get(d);
if (subspaces == null || subspaces.isEmpty()) {
if (stepprog != null) {
for (int dim = d + 1; dim < dimensionality - 1; dim++) {
stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
}
}
break;
}
List<Subspace> candidates = generateSubspaceCandidates(subspaces);
List<Subspace> s_d = new ArrayList<>();
for (Subspace candidate : candidates) {
Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
if (LOG.isDebuggingFine()) {
LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
}
List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
List<Cluster<Model>> clusters = new ArrayList<>();
for (Cluster<Model> cluster : bestSubspaceClusters) {
List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
if (!candidateClusters.isEmpty()) {
clusters.addAll(candidateClusters);
}
}
if (LOG.isDebuggingFine()) {
StringBuilder msg = new StringBuilder();
msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
for (Cluster<Model> c : clusters) {
msg.append(" " + c.getIDs() + "\n");
}
LOG.debugFine(msg.toString());
}
if (!clusters.isEmpty()) {
s_d.add(candidate);
clusterMap.put(candidate, clusters);
}
}
if (!s_d.isEmpty()) {
subspaceMap.put(d + 1, s_d);
}
}
// build result
int numClusters = 1;
result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
for (Subspace subspace : clusterMap.descendingKeySet()) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
for (Cluster<Model> cluster : clusters) {
Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()).getArrayRef()));
newCluster.setName("cluster_" + numClusters++);
result.addToplevelCluster(newCluster);
}
}
LOG.setCompleted(stepprog);
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.
the class CLIQUETest method testCLIQUESubspaceOverlappingPrune.
/**
* Run CLIQUE with fixed parameters and compare the result to a golden
* standard.
*/
@Test
public void testCLIQUESubspaceOverlappingPrune() {
Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-3-4d.ascii", 850);
Clustering<SubspaceModel> result = //
new ELKIBuilder<CLIQUE<DoubleVector>>(CLIQUE.class).with(CLIQUE.Parameterizer.TAU_ID, //
0.2).with(CLIQUE.Parameterizer.XSI_ID, //
6).with(//
CLIQUE.Parameterizer.PRUNE_ID).build().run(db);
// PairCounting is not appropriate here: overlapping clusterings!
// testFMeasure(db, result, 0.433661);
testClusterSizes(result, new int[] { 255, 409, 458, 458, 480 });
}
use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.
the class CLIQUETest method testCLIQUESubspaceOverlapping.
/**
* Run CLIQUE with fixed parameters and compare the result to a golden
* standard.
*/
@Test
public void testCLIQUESubspaceOverlapping() {
Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-3-4d.ascii", 850);
Clustering<SubspaceModel> result = //
new ELKIBuilder<CLIQUE<DoubleVector>>(CLIQUE.class).with(CLIQUE.Parameterizer.TAU_ID, //
0.2).with(CLIQUE.Parameterizer.XSI_ID, //
6).build().run(db);
// PairCounting is not appropriate here: overlapping clusterings!
// testFMeasure(db, result, 0.433661);
testClusterSizes(result, new int[] { 255, 409, 458, 458, 480 });
}
use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.
the class DiSHTest method testDiSHResults.
/**
* Run DiSH with fixed parameters and compare the result to a golden standard.
*/
@Test
public void testDiSHResults() {
Database db = makeSimpleDatabase(UNITTEST + "subspace-hierarchy.csv", 450);
Clustering<SubspaceModel> result = //
new ELKIBuilder<DiSH<DoubleVector>>(DiSH.class).with(DiSH.Parameterizer.EPSILON_ID, //
0.005).with(DiSH.Parameterizer.MU_ID, //
50).build().run(db);
testFMeasure(db, result, .99516369);
testClusterSizes(result, new int[] { 50, 199, 201 });
}
Aggregations