Search in sources :

Example 11 with Subspace

use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.

the class SUBCLU method generateSubspaceCandidates.

/**
 * Generates {@code d+1}-dimensional subspace candidates from the specified
 * {@code d}-dimensional subspaces.
 *
 * @param subspaces the {@code d}-dimensional subspaces
 * @return the {@code d+1}-dimensional subspace candidates
 */
private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
    List<Subspace> candidates = new ArrayList<>();
    if (subspaces.isEmpty()) {
        return candidates;
    }
    // Generate (d+1)-dimensional candidate subspaces
    int d = subspaces.get(0).dimensionality();
    StringBuilder msgFine = new StringBuilder("\n");
    if (LOG.isDebuggingFiner()) {
        msgFine.append("subspaces ").append(subspaces).append('\n');
    }
    for (int i = 0; i < subspaces.size(); i++) {
        Subspace s1 = subspaces.get(i);
        for (int j = i + 1; j < subspaces.size(); j++) {
            Subspace s2 = subspaces.get(j);
            Subspace candidate = s1.join(s2);
            if (candidate != null) {
                if (LOG.isDebuggingFiner()) {
                    msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n');
                }
                // prune irrelevant candidate subspaces
                List<Subspace> lowerSubspaces = lowerSubspaces(candidate);
                if (LOG.isDebuggingFiner()) {
                    msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n');
                }
                boolean irrelevantCandidate = false;
                for (Subspace s : lowerSubspaces) {
                    if (!subspaces.contains(s)) {
                        irrelevantCandidate = true;
                        break;
                    }
                }
                if (!irrelevantCandidate) {
                    candidates.add(candidate);
                }
            }
        }
    }
    if (LOG.isDebuggingFiner()) {
        LOG.debugFiner(msgFine.toString());
    }
    if (LOG.isDebugging()) {
        StringBuilder msg = new StringBuilder();
        msg.append(d + 1).append("-dimensional candidate subspaces: ");
        for (Subspace candidate : candidates) {
            msg.append(candidate.dimensonsToString()).append(' ');
        }
        LOG.debug(msg.toString());
    }
    return candidates;
}
Also used : Subspace(de.lmu.ifi.dbs.elki.data.Subspace) ArrayList(java.util.ArrayList)

Example 12 with Subspace

use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.

the class SUBCLU method run.

/**
 * Performs the SUBCLU algorithm on the given database.
 *
 * @param relation Relation to process
 * @return Clustering result
 */
public Clustering<SubspaceModel> run(Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
    // Generate all 1-dimensional clusters
    LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
    // mapping of dimensionality to set of subspaces
    HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
    // list of 1-dimensional subspaces containing clusters
    List<Subspace> s_1 = new ArrayList<>();
    subspaceMap.put(0, s_1);
    // mapping of subspaces to list of clusters
    TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
    for (int d = 0; d < dimensionality; d++) {
        Subspace currentSubspace = new Subspace(d);
        List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
        if (LOG.isDebuggingFiner()) {
            StringBuilder msg = new StringBuilder();
            msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
            for (Cluster<Model> cluster : clusters) {
                msg.append("      " + cluster.getIDs() + "\n");
            }
            LOG.debugFiner(msg.toString());
        }
        if (!clusters.isEmpty()) {
            s_1.add(currentSubspace);
            clusterMap.put(currentSubspace, clusters);
        }
    }
    // Generate (d+1)-dimensional clusters from d-dimensional clusters
    for (int d = 0; d < dimensionality - 1; d++) {
        if (stepprog != null) {
            stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
        }
        List<Subspace> subspaces = subspaceMap.get(d);
        if (subspaces == null || subspaces.isEmpty()) {
            if (stepprog != null) {
                for (int dim = d + 1; dim < dimensionality - 1; dim++) {
                    stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
                }
            }
            break;
        }
        List<Subspace> candidates = generateSubspaceCandidates(subspaces);
        List<Subspace> s_d = new ArrayList<>();
        for (Subspace candidate : candidates) {
            Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
            if (LOG.isDebuggingFine()) {
                LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
            }
            List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
            List<Cluster<Model>> clusters = new ArrayList<>();
            for (Cluster<Model> cluster : bestSubspaceClusters) {
                List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
                if (!candidateClusters.isEmpty()) {
                    clusters.addAll(candidateClusters);
                }
            }
            if (LOG.isDebuggingFine()) {
                StringBuilder msg = new StringBuilder();
                msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
                for (Cluster<Model> c : clusters) {
                    msg.append("      " + c.getIDs() + "\n");
                }
                LOG.debugFine(msg.toString());
            }
            if (!clusters.isEmpty()) {
                s_d.add(candidate);
                clusterMap.put(candidate, clusters);
            }
        }
        if (!s_d.isEmpty()) {
            subspaceMap.put(d + 1, s_d);
        }
    }
    // build result
    int numClusters = 1;
    result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
    for (Subspace subspace : clusterMap.descendingKeySet()) {
        List<Cluster<Model>> clusters = clusterMap.get(subspace);
        for (Cluster<Model> cluster : clusters) {
            Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
            newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()).getArrayRef()));
            newCluster.setName("cluster_" + numClusters++);
            result.addToplevelCluster(newCluster);
        }
    }
    LOG.setCompleted(stepprog);
    return result;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) TreeMap(java.util.TreeMap) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayList(java.util.ArrayList) List(java.util.List)

Example 13 with Subspace

use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.

the class CLIQUESubspace method determineClusters.

/**
 * Determines all clusters in this subspace by performing a depth-first search
 * algorithm to find connected dense units.
 *
 * @return the clusters in this subspace and the corresponding cluster models
 */
public List<Pair<Subspace, ModifiableDBIDs>> determineClusters() {
    List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<>();
    for (CLIQUEUnit<V> unit : getDenseUnits()) {
        if (!unit.isAssigned()) {
            ModifiableDBIDs cluster = DBIDUtil.newHashSet();
            CLIQUESubspace<V> model = new CLIQUESubspace<>(getDimensions());
            clusters.add(new Pair<Subspace, ModifiableDBIDs>(model, cluster));
            dfs(unit, cluster, model);
        }
    }
    return clusters;
}
Also used : ArrayList(java.util.ArrayList) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Aggregations

Subspace (de.lmu.ifi.dbs.elki.data.Subspace)13 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)10 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)7 ArrayList (java.util.ArrayList)7 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)3 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)3 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 Model (de.lmu.ifi.dbs.elki.data.model.Model)2 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)2 List (java.util.List)2 AbstractProjectedClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering)1 MultivariateGaussianModel (de.lmu.ifi.dbs.elki.algorithm.clustering.em.MultivariateGaussianModel)1 CLIQUESubspace (de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUESubspace)1 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)1 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)1