Search in sources :

Example 11 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class ERiC method run.

/**
 * Performs the ERiC algorithm on the given database.
 *
 * @param relation Relation to process
 * @return Clustering result
 */
public Clustering<CorrelationModel> run(Database database, Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
    // Run Generalized DBSCAN
    LOG.beginStep(stepprog, 1, "Preprocessing local correlation dimensionalities and partitioning data");
    // FIXME: how to ensure we are running on the same relation?
    ERiCNeighborPredicate<V>.Instance npred = new ERiCNeighborPredicate<V>(settings).instantiate(database, relation);
    CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database);
    Clustering<Model> copacResult = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
    // extract correlation clusters
    LOG.beginStep(stepprog, 2, "Extract correlation clusters");
    List<List<Cluster<CorrelationModel>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality, npred);
    if (LOG.isDebugging()) {
        StringBuilder msg = new StringBuilder("Step 2: Extract correlation clusters...");
        for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
            List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
            msg.append("\n\ncorrDim ").append(corrDim);
            for (Cluster<CorrelationModel> cluster : correlationClusters) {
                msg.append("\n  cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
            // .append(", level: ").append(cluster.getLevel()).append(", index:
            // ").append(cluster.getLevelIndex());
            // msg.append("\n basis " +
            // cluster.getPCA().getWeakEigenvectors().toString(" ", NF) +
            // " ids " + cluster.getIDs().size());
            }
        }
        LOG.debugFine(msg.toString());
    }
    if (LOG.isVerbose()) {
        int clusters = 0;
        for (List<Cluster<CorrelationModel>> correlationClusters : clusterMap) {
            clusters += correlationClusters.size();
        }
        LOG.verbose(clusters + " clusters extracted.");
    }
    // build hierarchy
    LOG.beginStep(stepprog, 3, "Building hierarchy");
    Clustering<CorrelationModel> clustering = new Clustering<>("ERiC clustering", "eric-clustering");
    buildHierarchy(clustering, clusterMap, npred);
    if (LOG.isDebugging()) {
        StringBuilder msg = new StringBuilder("Step 3: Build hierarchy");
        for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
            List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
            for (Cluster<CorrelationModel> cluster : correlationClusters) {
                msg.append("\n  cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
                // ").append(cluster.getLevelIndex());
                for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterParents(cluster); iter.valid(); iter.advance()) {
                    msg.append("\n   parent ").append(iter.get());
                }
                for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterChildren(cluster); iter.valid(); iter.advance()) {
                    msg.append("\n   child ").append(iter.get());
                }
            }
        }
        LOG.debugFine(msg.toString());
    }
    LOG.setCompleted(stepprog);
    for (Cluster<CorrelationModel> rc : clusterMap.get(clusterMap.size() - 1)) {
        clustering.addToplevelCluster(rc);
    }
    return clustering;
}
Also used : ERiCNeighborPredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.ERiCNeighborPredicate) MinPtsCorePredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.MinPtsCorePredicate) CorePredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.CorePredicate) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) MinPtsCorePredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.MinPtsCorePredicate) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) GeneralizedDBSCAN(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.GeneralizedDBSCAN) ArrayList(java.util.ArrayList) List(java.util.List)

Example 12 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class ClassicMultidimensionalScalingTransform method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    final int size = objects.dataLength();
    if (size == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int r = 0; r < objects.metaLength(); r++) {
        @SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        if (!dist.getInputTypeRestriction().isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Get the replacement type information
        @SuppressWarnings("unchecked") final List<I> castColumn = (List<I>) column;
        bundle.appendColumn(new VectorFieldTypeInformation<>(factory, tdim), castColumn);
        StepProgress prog = LOG.isVerbose() ? new StepProgress("Classic MDS", 2) : null;
        // Compute distance matrix.
        LOG.beginStep(prog, 1, "Computing distance matrix");
        double[][] mat = computeSquaredDistanceMatrix(castColumn, dist);
        doubleCenterSymmetric(mat);
        // Find eigenvectors.
        {
            LOG.beginStep(prog, 2, "Computing singular value decomposition");
            SingularValueDecomposition svd = new SingularValueDecomposition(mat);
            double[][] u = svd.getU();
            double[] lambda = svd.getSingularValues();
            // Undo squared, unless we were given a squared distance function:
            if (!dist.isSquared()) {
                for (int i = 0; i < tdim; i++) {
                    lambda[i] = FastMath.sqrt(Math.abs(lambda[i]));
                }
            }
            double[] buf = new double[tdim];
            for (int i = 0; i < size; i++) {
                double[] row = u[i];
                for (int x = 0; x < buf.length; x++) {
                    buf[x] = lambda[x] * row[x];
                }
                column.set(i, factory.newNumberVector(buf));
            }
        }
        LOG.setCompleted(prog);
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) List(java.util.List) SingularValueDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.SingularValueDecomposition)

Example 13 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class P3C method run.

/**
 * Performs the P3C algorithm on the given Database.
 */
public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    // Overall progress.
    StepProgress stepProgress = LOG.isVerbose() ? new StepProgress(8) : null;
    if (stepProgress != null) {
        stepProgress.beginStep(1, "Grid-partitioning data.", LOG);
    }
    // Desired number of bins, as per Sturge:
    final int binCount = (int) Math.ceil(1 + MathUtil.log2(relation.size()));
    // Perform 1-dimensional projections, and split into bins.
    SetDBIDs[][] partitions = partitionData(relation, binCount);
    if (stepProgress != null) {
        stepProgress.beginStep(2, "Searching for non-uniform bins in support histograms.", LOG);
    }
    // Set markers for each attribute until they're all deemed uniform.
    final long[][] markers = new long[dim][];
    for (int d = 0; d < dim; d++) {
        final SetDBIDs[] parts = partitions[d];
        if (parts == null) {
            // Never mark any on constant dimensions.
            continue;
        }
        final long[] marked = markers[d] = BitsUtil.zero(binCount);
        int card = 0;
        while (card < dim - 1) {
            // Find bin with largest support, test only the dimensions that were not
            // previously marked.
            int bestBin = chiSquaredUniformTest(parts, marked, card);
            if (bestBin < 0) {
                // Uniform
                break;
            }
            BitsUtil.setI(marked, bestBin);
            card++;
        }
        if (LOG.isDebugging()) {
            LOG.debug("Marked bins in dim " + d + ": " + BitsUtil.toString(marked, binCount));
        }
    }
    if (stepProgress != null) {
        stepProgress.beginStep(3, "Merging marked bins to 1-signatures.", LOG);
    }
    ArrayList<Signature> signatures = constructOneSignatures(partitions, markers);
    if (stepProgress != null) {
        stepProgress.beginStep(4, "Computing cluster cores from merged p-signatures.", LOG);
    }
    ArrayList<Signature> clusterCores = mergeClusterCores(binCount, signatures);
    if (stepProgress != null) {
        stepProgress.beginStep(5, "Pruning redundant cluster cores.", LOG);
    }
    clusterCores = pruneRedundantClusterCores(clusterCores);
    if (LOG.isVerbose()) {
        LOG.verbose("Number of cluster cores found: " + clusterCores.size());
    }
    if (clusterCores.isEmpty()) {
        LOG.setCompleted(stepProgress);
        Clustering<SubspaceModel> c = new Clustering<>("P3C", "P3C");
        c.addToplevelCluster(new Cluster<SubspaceModel>(relation.getDBIDs(), true));
        return c;
    }
    if (stepProgress != null) {
        stepProgress.beginStep(5, "Refining cluster cores to clusters via EM.", LOG);
    }
    // Track objects not assigned to any cluster:
    ModifiableDBIDs noise = DBIDUtil.newHashSet();
    WritableDataStore<double[]> probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
    int k = clusterCores.size();
    List<MultivariateGaussianModel> models = new ArrayList<>(k);
    computeFuzzyMembership(relation, clusterCores, noise, probClusterIGivenX, models, dim);
    // Initial estimate of covariances, to assign noise objects
    EM.recomputeCovarianceMatrices(relation, probClusterIGivenX, models, 0.);
    assignUnassigned(relation, probClusterIGivenX, models, noise);
    double emNew = EM.assignProbabilitiesToInstances(relation, models, probClusterIGivenX);
    for (int it = 1; it <= maxEmIterations || maxEmIterations < 0; it++) {
        final double emOld = emNew;
        EM.recomputeCovarianceMatrices(relation, probClusterIGivenX, models, 0.);
        // reassign probabilities
        emNew = EM.assignProbabilitiesToInstances(relation, models, probClusterIGivenX);
        if (LOG.isVerbose()) {
            LOG.verbose("iteration " + it + " - expectation value: " + emNew);
        }
        if ((emNew - emOld) <= emDelta) {
            break;
        }
    }
    if (stepProgress != null) {
        stepProgress.beginStep(6, "Generating hard clustering.", LOG);
    }
    // Create a hard clustering, making sure each data point only is part of one
    // cluster, based on the best match from the membership matrix.
    ArrayList<ClusterCandidate> clusterCandidates = hardClustering(probClusterIGivenX, clusterCores, relation.getDBIDs());
    if (stepProgress != null) {
        stepProgress.beginStep(7, "Looking for outliers and moving them to the noise set.", LOG);
    }
    // Outlier detection. Remove points from clusters that have a Mahalanobis
    // distance larger than the critical value of the ChiSquare distribution.
    findOutliers(relation, models, clusterCandidates, noise);
    if (stepProgress != null) {
        stepProgress.beginStep(8, "Removing empty clusters.", LOG);
    }
    // Remove near-empty clusters.
    for (Iterator<ClusterCandidate> it = clusterCandidates.iterator(); it.hasNext(); ) {
        ClusterCandidate cand = it.next();
        final int size = cand.ids.size();
        if (size < minClusterSize) {
            if (size > 0) {
                noise.addDBIDs(cand.ids);
            }
            it.remove();
        }
    }
    if (LOG.isVerbose()) {
        LOG.verbose("Number of clusters remaining: " + clusterCandidates.size());
    }
    if (stepProgress != null) {
        stepProgress.beginStep(9, "Generating final result.", LOG);
    }
    // Generate final output.
    Clustering<SubspaceModel> result = new Clustering<>("P3C", "P3C");
    for (int cluster = 0; cluster < clusterCandidates.size(); ++cluster) {
        ClusterCandidate candidate = clusterCandidates.get(cluster);
        CovarianceMatrix cvm = CovarianceMatrix.make(relation, candidate.ids);
        result.addToplevelCluster(new Cluster<>(candidate.ids, new SubspaceModel(new Subspace(candidate.dimensions), cvm.getMeanVector())));
    }
    LOG.verbose("Noise size: " + noise.size());
    if (noise.size() > 0) {
        result.addToplevelCluster(new Cluster<SubspaceModel>(noise, true));
    }
    LOG.ensureCompleted(stepProgress);
    return result;
}
Also used : ArrayList(java.util.ArrayList) MultivariateGaussianModel(de.lmu.ifi.dbs.elki.algorithm.clustering.em.MultivariateGaussianModel) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 14 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class CLIQUE method run.

/**
 * Performs the CLIQUE algorithm on the given database.
 *
 * @param relation Data relation to process
 * @return Clustering result
 */
public Clustering<SubspaceModel> run(Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    StepProgress step = new StepProgress(2);
    // 1. Identification of subspaces that contain clusters
    step.beginStep(1, "Identification of subspaces that contain clusters", LOG);
    ArrayList<List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new ArrayList<>(dimensionality);
    List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
    dimensionToDenseSubspaces.add(denseSubspaces);
    if (LOG.isVerbose()) {
        LOG.verbose("1-dimensional dense subspaces: " + denseSubspaces.size());
    }
    if (LOG.isDebugging()) {
        for (CLIQUESubspace<V> s : denseSubspaces) {
            LOG.debug(s.toString(" "));
        }
    }
    for (int k = 2; k <= dimensionality && !denseSubspaces.isEmpty(); k++) {
        denseSubspaces = findDenseSubspaces(relation, denseSubspaces);
        assert (dimensionToDenseSubspaces.size() == k - 1);
        dimensionToDenseSubspaces.add(denseSubspaces);
        if (LOG.isVerbose()) {
            LOG.verbose(k + "-dimensional dense subspaces: " + denseSubspaces.size());
        }
        if (LOG.isDebugging()) {
            for (CLIQUESubspace<V> s : denseSubspaces) {
                LOG.debug(s.toString(" "));
            }
        }
    }
    // 2. Identification of clusters
    step.beginStep(2, "Identification of clusters", LOG);
    // build result
    Clustering<SubspaceModel> result = new Clustering<>("CLIQUE clustering", "clique-clustering");
    for (int dim = 0; dim < dimensionToDenseSubspaces.size(); dim++) {
        List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
        List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
        if (LOG.isVerbose()) {
            LOG.verbose((dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
        }
        for (Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
            Cluster<SubspaceModel> newCluster = new Cluster<>(modelAndCluster.second);
            newCluster.setModel(new SubspaceModel(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).getArrayRef()));
            result.addToplevelCluster(newCluster);
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) CLIQUESubspace(de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUESubspace) CLIQUESubspace(de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique.CLIQUESubspace) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) ArrayList(java.util.ArrayList) List(java.util.List) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 15 with StepProgress

use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.

the class LogPanel method publish.

/**
 * Publish a logging record.
 *
 * @param record Log record to publish
 */
protected void publish(final LogRecord record) {
    if (record instanceof ProgressLogRecord) {
        ProgressLogRecord preg = (ProgressLogRecord) record;
        Progress prog = preg.getProgress();
        JProgressBar pbar = getOrCreateProgressBar(prog);
        updateProgressBar(prog, pbar);
        if (prog.isComplete()) {
            removeProgressBar(prog, pbar);
        }
        if (prog.isComplete() || prog instanceof StepProgress) {
            publishTextRecord(record);
        }
    } else {
        publishTextRecord(record);
    }
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Progress(de.lmu.ifi.dbs.elki.logging.progress.Progress) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) MutableProgress(de.lmu.ifi.dbs.elki.logging.progress.MutableProgress) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) JProgressBar(javax.swing.JProgressBar) ProgressLogRecord(de.lmu.ifi.dbs.elki.logging.progress.ProgressLogRecord) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress)

Aggregations

StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)26 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)13 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)12 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)11 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)11 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)11 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)11 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)11 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)6 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)6 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)6 ArrayList (java.util.ArrayList)6 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)5 List (java.util.List)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 Subspace (de.lmu.ifi.dbs.elki.data.Subspace)3 Model (de.lmu.ifi.dbs.elki.data.model.Model)3 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)3