Search in sources :

Example 11 with SubspaceModel

use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.

the class DiSH method buildHierarchy.

/**
 * Builds the cluster hierarchy.
 *
 * @param clustering Clustering we process
 * @param clusters the sorted list of clusters
 * @param dimensionality the dimensionality of the data
 * @param database the database containing the data objects
 */
private void buildHierarchy(Relation<V> database, Clustering<SubspaceModel> clustering, List<Cluster<SubspaceModel>> clusters, int dimensionality) {
    StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
    final int db_dim = RelationUtil.dimensionality(database);
    Hierarchy<Cluster<SubspaceModel>> hier = clustering.getClusterHierarchy();
    for (int i = 0; i < clusters.size() - 1; i++) {
        Cluster<SubspaceModel> c_i = clusters.get(i);
        final Subspace s_i = c_i.getModel().getSubspace();
        int subspaceDim_i = dimensionality - s_i.dimensionality();
        NumberVector ci_centroid = ProjectedCentroid.make(s_i.getDimensions(), database, c_i.getIDs());
        long[] pv1 = s_i.getDimensions();
        for (int j = i + 1; j < clusters.size(); j++) {
            Cluster<SubspaceModel> c_j = clusters.get(j);
            final Subspace s_j = c_j.getModel().getSubspace();
            int subspaceDim_j = dimensionality - s_j.dimensionality();
            if (subspaceDim_i < subspaceDim_j) {
                if (msg != null) {
                    msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim)).append(']');
                    msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim)).append(']');
                }
                // noise level reached
                if (s_j.dimensionality() == 0) {
                    // no parents exists -> parent is noise
                    if (hier.numParents(c_i) == 0) {
                        clustering.addChildCluster(c_j, c_i);
                        if (msg != null) {
                            msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
                            msg.append("] is parent of [").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
                            msg.append(']');
                        }
                    }
                } else {
                    NumberVector cj_centroid = ProjectedCentroid.make(c_j.getModel().getDimensions(), database, c_j.getIDs());
                    long[] pv2 = s_j.getDimensions();
                    long[] commonPreferenceVector = BitsUtil.andCMin(pv1, pv2);
                    int subspaceDim = subspaceDimensionality(ci_centroid, cj_centroid, pv1, pv2, commonPreferenceVector);
                    double d = weightedDistance(ci_centroid, cj_centroid, commonPreferenceVector);
                    if (msg != null) {
                        msg.append("\n dist = ").append(subspaceDim);
                    }
                    if (subspaceDim == subspaceDim_j) {
                        if (msg != null) {
                            msg.append("\n d = ").append(d);
                        }
                        if (d <= 2 * epsilon) {
                            // existing parents
                            if (hier.numParents(c_i) == 0 || !isParent(database, c_j, hier.iterParents(c_i), db_dim)) {
                                clustering.addChildCluster(c_j, c_i);
                                if (msg != null) {
                                    msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
                                    msg.append("] is parent of [");
                                    msg.append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
                                    msg.append(']');
                                }
                            }
                        } else {
                            throw new RuntimeException("Should never happen: d = " + d);
                        }
                    }
                }
            }
        }
    }
    if (msg != null) {
        LOG.debug(msg.toString());
    }
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) Cluster(de.lmu.ifi.dbs.elki.data.Cluster)

Example 12 with SubspaceModel

use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.

the class SUBCLU method run.

/**
 * Performs the SUBCLU algorithm on the given database.
 *
 * @param relation Relation to process
 * @return Clustering result
 */
public Clustering<SubspaceModel> run(Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
    // Generate all 1-dimensional clusters
    LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
    // mapping of dimensionality to set of subspaces
    HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
    // list of 1-dimensional subspaces containing clusters
    List<Subspace> s_1 = new ArrayList<>();
    subspaceMap.put(0, s_1);
    // mapping of subspaces to list of clusters
    TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
    for (int d = 0; d < dimensionality; d++) {
        Subspace currentSubspace = new Subspace(d);
        List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
        if (LOG.isDebuggingFiner()) {
            StringBuilder msg = new StringBuilder();
            msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
            for (Cluster<Model> cluster : clusters) {
                msg.append("      " + cluster.getIDs() + "\n");
            }
            LOG.debugFiner(msg.toString());
        }
        if (!clusters.isEmpty()) {
            s_1.add(currentSubspace);
            clusterMap.put(currentSubspace, clusters);
        }
    }
    // Generate (d+1)-dimensional clusters from d-dimensional clusters
    for (int d = 0; d < dimensionality - 1; d++) {
        if (stepprog != null) {
            stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
        }
        List<Subspace> subspaces = subspaceMap.get(d);
        if (subspaces == null || subspaces.isEmpty()) {
            if (stepprog != null) {
                for (int dim = d + 1; dim < dimensionality - 1; dim++) {
                    stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
                }
            }
            break;
        }
        List<Subspace> candidates = generateSubspaceCandidates(subspaces);
        List<Subspace> s_d = new ArrayList<>();
        for (Subspace candidate : candidates) {
            Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
            if (LOG.isDebuggingFine()) {
                LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
            }
            List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
            List<Cluster<Model>> clusters = new ArrayList<>();
            for (Cluster<Model> cluster : bestSubspaceClusters) {
                List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
                if (!candidateClusters.isEmpty()) {
                    clusters.addAll(candidateClusters);
                }
            }
            if (LOG.isDebuggingFine()) {
                StringBuilder msg = new StringBuilder();
                msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
                for (Cluster<Model> c : clusters) {
                    msg.append("      " + c.getIDs() + "\n");
                }
                LOG.debugFine(msg.toString());
            }
            if (!clusters.isEmpty()) {
                s_d.add(candidate);
                clusterMap.put(candidate, clusters);
            }
        }
        if (!s_d.isEmpty()) {
            subspaceMap.put(d + 1, s_d);
        }
    }
    // build result
    int numClusters = 1;
    result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
    for (Subspace subspace : clusterMap.descendingKeySet()) {
        List<Cluster<Model>> clusters = clusterMap.get(subspace);
        for (Cluster<Model> cluster : clusters) {
            Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
            newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()).getArrayRef()));
            newCluster.setName("cluster_" + numClusters++);
            result.addToplevelCluster(newCluster);
        }
    }
    LOG.setCompleted(stepprog);
    return result;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) TreeMap(java.util.TreeMap) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ArrayList(java.util.ArrayList) List(java.util.List)

Example 13 with SubspaceModel

use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.

the class CLIQUETest method testCLIQUESubspaceOverlappingPrune.

/**
 * Run CLIQUE with fixed parameters and compare the result to a golden
 * standard.
 */
@Test
public void testCLIQUESubspaceOverlappingPrune() {
    Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-3-4d.ascii", 850);
    Clustering<SubspaceModel> result = // 
    new ELKIBuilder<CLIQUE<DoubleVector>>(CLIQUE.class).with(CLIQUE.Parameterizer.TAU_ID, // 
    0.2).with(CLIQUE.Parameterizer.XSI_ID, // 
    6).with(// 
    CLIQUE.Parameterizer.PRUNE_ID).build().run(db);
    // PairCounting is not appropriate here: overlapping clusterings!
    // testFMeasure(db, result, 0.433661);
    testClusterSizes(result, new int[] { 255, 409, 458, 458, 480 });
}
Also used : Database(de.lmu.ifi.dbs.elki.database.Database) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest) Test(org.junit.Test)

Example 14 with SubspaceModel

use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.

the class CLIQUETest method testCLIQUESubspaceOverlapping.

/**
 * Run CLIQUE with fixed parameters and compare the result to a golden
 * standard.
 */
@Test
public void testCLIQUESubspaceOverlapping() {
    Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-3-4d.ascii", 850);
    Clustering<SubspaceModel> result = // 
    new ELKIBuilder<CLIQUE<DoubleVector>>(CLIQUE.class).with(CLIQUE.Parameterizer.TAU_ID, // 
    0.2).with(CLIQUE.Parameterizer.XSI_ID, // 
    6).build().run(db);
    // PairCounting is not appropriate here: overlapping clusterings!
    // testFMeasure(db, result, 0.433661);
    testClusterSizes(result, new int[] { 255, 409, 458, 458, 480 });
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) Database(de.lmu.ifi.dbs.elki.database.Database) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest) Test(org.junit.Test)

Example 15 with SubspaceModel

use of de.lmu.ifi.dbs.elki.data.model.SubspaceModel in project elki by elki-project.

the class DiSHTest method testDiSHResults.

/**
 * Run DiSH with fixed parameters and compare the result to a golden standard.
 */
@Test
public void testDiSHResults() {
    Database db = makeSimpleDatabase(UNITTEST + "subspace-hierarchy.csv", 450);
    Clustering<SubspaceModel> result = // 
    new ELKIBuilder<DiSH<DoubleVector>>(DiSH.class).with(DiSH.Parameterizer.EPSILON_ID, // 
    0.005).with(DiSH.Parameterizer.MU_ID, // 
    50).build().run(db);
    testFMeasure(db, result, .99516369);
    testClusterSizes(result, new int[] { 50, 199, 201 });
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) Database(de.lmu.ifi.dbs.elki.database.Database) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest) Test(org.junit.Test)

Aggregations

SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)16 Subspace (de.lmu.ifi.dbs.elki.data.Subspace)9 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)7 Database (de.lmu.ifi.dbs.elki.database.Database)7 Test (org.junit.Test)7 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)6 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)5 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)5 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 ArrayList (java.util.ArrayList)4 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)3 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)3 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)2 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)2 List (java.util.List)2 AbstractProjectedClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering)1 MultivariateGaussianModel (de.lmu.ifi.dbs.elki.algorithm.clustering.em.MultivariateGaussianModel)1 CLIQUESubspace (de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUESubspace)1