Search in sources :

Example 6 with Subspace

use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.

the class PROCLUS method run.

/**
 * Performs the PROCLUS algorithm on the given database.
 *
 * @param database Database to process
 * @param relation Relation to process
 */
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
    if (RelationUtil.dimensionality(relation) < l) {
        throw new IllegalStateException("Dimensionality of data < parameter l! (" + RelationUtil.dimensionality(relation) + " < " + l + ")");
    }
    DistanceQuery<V> distFunc = database.getDistanceQuery(relation, SquaredEuclideanDistanceFunction.STATIC);
    RangeQuery<V> rangeQuery = database.getRangeQuery(distFunc);
    final Random random = rnd.getSingleThreadedRandom();
    // initialization phase
    if (LOG.isVerbose()) {
        LOG.verbose("1. Initialization phase...");
    }
    int sampleSize = Math.min(relation.size(), k_i * k);
    DBIDs sampleSet = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random);
    int medoidSize = Math.min(relation.size(), m_i * k);
    ArrayDBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
    if (LOG.isDebugging()) {
        LOG.debugFine(// 
        new StringBuilder().append("sampleSize ").append(sampleSize).append('\n').append("sampleSet ").append(sampleSet).append(// 
        '\n').append("medoidSize ").append(medoidSize).append(// 
        '\n').append("m ").append(medoids).toString());
    }
    // iterative phase
    if (LOG.isVerbose()) {
        LOG.verbose("2. Iterative phase...");
    }
    double bestObjective = Double.POSITIVE_INFINITY;
    ArrayDBIDs m_best = null;
    DBIDs m_bad = null;
    ArrayDBIDs m_current = initialSet(medoids, k, random);
    if (LOG.isDebugging()) {
        LOG.debugFine(new StringBuilder().append("m_c ").append(m_current).toString());
    }
    IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
    ArrayList<PROCLUSCluster> clusters = null;
    int loops = 0;
    while (loops < 10) {
        long[][] dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
        clusters = assignPoints(m_current, dimensions, relation);
        double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
        if (objectiveFunction < bestObjective) {
            // restart counting loops
            loops = 0;
            bestObjective = objectiveFunction;
            m_best = m_current;
            m_bad = computeBadMedoids(m_current, clusters, (int) (relation.size() * 0.1 / k));
        }
        m_current = computeM_current(medoids, m_best, m_bad, random);
        loops++;
        if (cprogress != null) {
            cprogress.setProcessed(clusters.size(), LOG);
        }
    }
    LOG.setCompleted(cprogress);
    // refinement phase
    if (LOG.isVerbose()) {
        LOG.verbose("3. Refinement phase...");
    }
    List<Pair<double[], long[]>> dimensions = findDimensions(clusters, relation);
    List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
    // build result
    int numClusters = 1;
    Clustering<SubspaceModel> result = new Clustering<>("ProClus clustering", "proclus-clustering");
    for (PROCLUSCluster c : finalClusters) {
        Cluster<SubspaceModel> cluster = new Cluster<>(c.objectIDs);
        cluster.setModel(new SubspaceModel(new Subspace(c.getDimensions()), c.centroid));
        cluster.setName("cluster_" + numClusters++);
        result.addToplevelCluster(cluster);
    }
    return result;
}
Also used : ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) AbstractProjectedClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) Random(java.util.Random) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 7 with Subspace

use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.

the class SUBCLU method lowerSubspaces.

/**
 * Returns the list of all {@code (d-1)}-dimensional subspaces of the
 * specified {@code d}-dimensional subspace.
 *
 * @param subspace the {@code d}-dimensional subspace
 * @return a list of all {@code (d-1)}-dimensional subspaces
 */
private List<Subspace> lowerSubspaces(Subspace subspace) {
    int dimensionality = subspace.dimensionality();
    if (dimensionality <= 1) {
        return null;
    }
    // order result according to the dimensions
    List<Subspace> result = new ArrayList<>();
    long[] dimensions = subspace.getDimensions();
    for (int dim = BitsUtil.nextSetBit(dimensions, 0); dim >= 0; dim = BitsUtil.nextSetBit(dimensions, dim + 1)) {
        long[] newDimensions = dimensions.clone();
        BitsUtil.clearI(newDimensions, dim);
        result.add(new Subspace(newDimensions));
    }
    return result;
}
Also used : Subspace(de.lmu.ifi.dbs.elki.data.Subspace) ArrayList(java.util.ArrayList)

Example 8 with Subspace

use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.

the class SUBCLU method bestSubspace.

/**
 * Determines the {@code d}-dimensional subspace of the {@code (d+1)}
 * -dimensional candidate with minimal number of objects in the cluster.
 *
 * @param subspaces the list of {@code d}-dimensional subspaces containing
 *        clusters
 * @param candidate the {@code (d+1)}-dimensional candidate subspace
 * @param clusterMap the mapping of subspaces to clusters
 * @return the {@code d}-dimensional subspace of the {@code (d+1)}
 *         -dimensional candidate with minimal number of objects in the
 *         cluster
 */
private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
    Subspace bestSubspace = null;
    for (Subspace subspace : subspaces) {
        int min = Integer.MAX_VALUE;
        if (subspace.isSubspace(candidate)) {
            List<Cluster<Model>> clusters = clusterMap.get(subspace);
            for (Cluster<Model> cluster : clusters) {
                int clusterSize = cluster.size();
                if (clusterSize < min) {
                    min = clusterSize;
                    bestSubspace = subspace;
                }
            }
        }
    }
    return bestSubspace;
}
Also used : Subspace(de.lmu.ifi.dbs.elki.data.Subspace) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) Cluster(de.lmu.ifi.dbs.elki.data.Cluster)

Example 9 with Subspace

use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.

the class CLIQUE method run.

/**
 * Performs the CLIQUE algorithm on the given database.
 *
 * @param relation Data relation to process
 * @return Clustering result
 */
public Clustering<SubspaceModel> run(Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    StepProgress step = new StepProgress(2);
    // 1. Identification of subspaces that contain clusters
    step.beginStep(1, "Identification of subspaces that contain clusters", LOG);
    ArrayList<List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new ArrayList<>(dimensionality);
    List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
    dimensionToDenseSubspaces.add(denseSubspaces);
    if (LOG.isVerbose()) {
        LOG.verbose("1-dimensional dense subspaces: " + denseSubspaces.size());
    }
    if (LOG.isDebugging()) {
        for (CLIQUESubspace<V> s : denseSubspaces) {
            LOG.debug(s.toString(" "));
        }
    }
    for (int k = 2; k <= dimensionality && !denseSubspaces.isEmpty(); k++) {
        denseSubspaces = findDenseSubspaces(relation, denseSubspaces);
        assert (dimensionToDenseSubspaces.size() == k - 1);
        dimensionToDenseSubspaces.add(denseSubspaces);
        if (LOG.isVerbose()) {
            LOG.verbose(k + "-dimensional dense subspaces: " + denseSubspaces.size());
        }
        if (LOG.isDebugging()) {
            for (CLIQUESubspace<V> s : denseSubspaces) {
                LOG.debug(s.toString(" "));
            }
        }
    }
    // 2. Identification of clusters
    step.beginStep(2, "Identification of clusters", LOG);
    // build result
    Clustering<SubspaceModel> result = new Clustering<>("CLIQUE clustering", "clique-clustering");
    for (int dim = 0; dim < dimensionToDenseSubspaces.size(); dim++) {
        List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
        List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
        if (LOG.isVerbose()) {
            LOG.verbose((dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
        }
        for (Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
            Cluster<SubspaceModel> newCluster = new Cluster<>(modelAndCluster.second);
            newCluster.setModel(new SubspaceModel(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).getArrayRef()));
            result.addToplevelCluster(newCluster);
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) CLIQUESubspace(de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUESubspace) CLIQUESubspace(de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUESubspace) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) ArrayList(java.util.ArrayList) List(java.util.List) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 10 with Subspace

use of de.lmu.ifi.dbs.elki.data.Subspace in project elki by elki-project.

the class DiSH method buildHierarchy.

/**
 * Builds the cluster hierarchy.
 *
 * @param clustering Clustering we process
 * @param clusters the sorted list of clusters
 * @param dimensionality the dimensionality of the data
 * @param database the database containing the data objects
 */
private void buildHierarchy(Relation<V> database, Clustering<SubspaceModel> clustering, List<Cluster<SubspaceModel>> clusters, int dimensionality) {
    StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
    final int db_dim = RelationUtil.dimensionality(database);
    Hierarchy<Cluster<SubspaceModel>> hier = clustering.getClusterHierarchy();
    for (int i = 0; i < clusters.size() - 1; i++) {
        Cluster<SubspaceModel> c_i = clusters.get(i);
        final Subspace s_i = c_i.getModel().getSubspace();
        int subspaceDim_i = dimensionality - s_i.dimensionality();
        NumberVector ci_centroid = ProjectedCentroid.make(s_i.getDimensions(), database, c_i.getIDs());
        long[] pv1 = s_i.getDimensions();
        for (int j = i + 1; j < clusters.size(); j++) {
            Cluster<SubspaceModel> c_j = clusters.get(j);
            final Subspace s_j = c_j.getModel().getSubspace();
            int subspaceDim_j = dimensionality - s_j.dimensionality();
            if (subspaceDim_i < subspaceDim_j) {
                if (msg != null) {
                    msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim)).append(']');
                    msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim)).append(']');
                }
                // noise level reached
                if (s_j.dimensionality() == 0) {
                    // no parents exists -> parent is noise
                    if (hier.numParents(c_i) == 0) {
                        clustering.addChildCluster(c_j, c_i);
                        if (msg != null) {
                            msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
                            msg.append("] is parent of [").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
                            msg.append(']');
                        }
                    }
                } else {
                    NumberVector cj_centroid = ProjectedCentroid.make(c_j.getModel().getDimensions(), database, c_j.getIDs());
                    long[] pv2 = s_j.getDimensions();
                    long[] commonPreferenceVector = BitsUtil.andCMin(pv1, pv2);
                    int subspaceDim = subspaceDimensionality(ci_centroid, cj_centroid, pv1, pv2, commonPreferenceVector);
                    double d = weightedDistance(ci_centroid, cj_centroid, commonPreferenceVector);
                    if (msg != null) {
                        msg.append("\n dist = ").append(subspaceDim);
                    }
                    if (subspaceDim == subspaceDim_j) {
                        if (msg != null) {
                            msg.append("\n d = ").append(d);
                        }
                        if (d <= 2 * epsilon) {
                            // existing parents
                            if (hier.numParents(c_i) == 0 || !isParent(database, c_j, hier.iterParents(c_i), db_dim)) {
                                clustering.addChildCluster(c_j, c_i);
                                if (msg != null) {
                                    msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
                                    msg.append("] is parent of [");
                                    msg.append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
                                    msg.append(']');
                                }
                            }
                        } else {
                            throw new RuntimeException("Should never happen: d = " + d);
                        }
                    }
                }
            }
        }
    }
    if (msg != null) {
        LOG.debug(msg.toString());
    }
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) Cluster(de.lmu.ifi.dbs.elki.data.Cluster)

Aggregations

Subspace (de.lmu.ifi.dbs.elki.data.Subspace)13 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)10 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)7 ArrayList (java.util.ArrayList)7 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)3 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)3 Pair (de.lmu.ifi.dbs.elki.utilities.pairs.Pair)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 Model (de.lmu.ifi.dbs.elki.data.model.Model)2 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)2 List (java.util.List)2 AbstractProjectedClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering)1 MultivariateGaussianModel (de.lmu.ifi.dbs.elki.algorithm.clustering.em.MultivariateGaussianModel)1 CLIQUESubspace (de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUESubspace)1 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)1 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)1