Search in sources :

Example 1 with CorrelationModel

use of de.lmu.ifi.dbs.elki.data.model.CorrelationModel in project elki by elki-project.

the class ERiC method extractCorrelationClusters.

/**
 * Extracts the correlation clusters and noise from the copac result and
 * returns a mapping of correlation dimension to maps of clusters within this
 * correlation dimension. Each cluster is defined by the basis vectors
 * defining the subspace in which the cluster appears.
 *
 * @param dbscanResult
 *
 * @param relation the database containing the objects
 * @param dimensionality the dimensionality of the feature space
 * @param npred ERiC predicate
 * @return a list of clusters for each dimensionality
 */
private List<List<Cluster<CorrelationModel>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> relation, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
    // result
    List<List<Cluster<CorrelationModel>>> clusterMap = new ArrayList<>();
    for (int i = 0; i <= dimensionality; i++) {
        clusterMap.add(new ArrayList<Cluster<CorrelationModel>>());
    }
    // noise cluster containing all noise objects over all partitions
    Cluster<Model> noise = null;
    // iterate over correlation dimensions
    for (Cluster<Model> clus : dbscanResult.getAllClusters()) {
        DBIDs group = clus.getIDs();
        int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
        if (dim < dimensionality) {
            EigenPairFilter filter = new FirstNEigenPairFilter(dim);
            // get cluster list for this dimension.
            List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dim);
            SortedEigenPairs epairs = settings.pca.processIds(group, relation).getEigenPairs();
            int numstrong = filter.filter(epairs.eigenValues());
            PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
            double[] centroid = Centroid.make(relation, group).getArrayRef();
            Cluster<CorrelationModel> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel(pcares, centroid));
            correlationClusters.add(correlationCluster);
        } else // partition containing noise
        {
            if (noise == null) {
                noise = clus;
            } else {
                ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
                merged.addDBIDs(clus.getIDs());
                noise.setIDs(merged);
            }
        }
    }
    if (noise != null && noise.size() > 0) {
        // get cluster list for this dimension.
        List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dimensionality);
        EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
        SortedEigenPairs epairs = settings.pca.processIds(noise.getIDs(), relation).getEigenPairs();
        int numstrong = filter.filter(epairs.eigenValues());
        PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
        double[] centroid = Centroid.make(relation, noise.getIDs()).getArrayRef();
        Cluster<CorrelationModel> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel(pcares, centroid));
        correlationClusters.add(correlationCluster);
    }
    // Delete dimensionalities not found.
    for (int i = dimensionality; i > 0; i--) {
        if (!clusterMap.get(i).isEmpty()) {
            break;
        }
        clusterMap.remove(i);
    }
    return clusterMap;
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) PercentageEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.PercentageEigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) ArrayList(java.util.ArrayList) List(java.util.List) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)

Example 2 with CorrelationModel

use of de.lmu.ifi.dbs.elki.data.model.CorrelationModel in project elki by elki-project.

the class ERiC method run.

/**
 * Performs the ERiC algorithm on the given database.
 *
 * @param relation Relation to process
 * @return Clustering result
 */
public Clustering<CorrelationModel> run(Database database, Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
    // Run Generalized DBSCAN
    LOG.beginStep(stepprog, 1, "Preprocessing local correlation dimensionalities and partitioning data");
    // FIXME: how to ensure we are running on the same relation?
    ERiCNeighborPredicate<V>.Instance npred = new ERiCNeighborPredicate<V>(settings).instantiate(database, relation);
    CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database);
    Clustering<Model> copacResult = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
    // extract correlation clusters
    LOG.beginStep(stepprog, 2, "Extract correlation clusters");
    List<List<Cluster<CorrelationModel>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality, npred);
    if (LOG.isDebugging()) {
        StringBuilder msg = new StringBuilder("Step 2: Extract correlation clusters...");
        for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
            List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
            msg.append("\n\ncorrDim ").append(corrDim);
            for (Cluster<CorrelationModel> cluster : correlationClusters) {
                msg.append("\n  cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
            // .append(", level: ").append(cluster.getLevel()).append(", index:
            // ").append(cluster.getLevelIndex());
            // msg.append("\n basis " +
            // cluster.getPCA().getWeakEigenvectors().toString(" ", NF) +
            // " ids " + cluster.getIDs().size());
            }
        }
        LOG.debugFine(msg.toString());
    }
    if (LOG.isVerbose()) {
        int clusters = 0;
        for (List<Cluster<CorrelationModel>> correlationClusters : clusterMap) {
            clusters += correlationClusters.size();
        }
        LOG.verbose(clusters + " clusters extracted.");
    }
    // build hierarchy
    LOG.beginStep(stepprog, 3, "Building hierarchy");
    Clustering<CorrelationModel> clustering = new Clustering<>("ERiC clustering", "eric-clustering");
    buildHierarchy(clustering, clusterMap, npred);
    if (LOG.isDebugging()) {
        StringBuilder msg = new StringBuilder("Step 3: Build hierarchy");
        for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
            List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(corrDim);
            for (Cluster<CorrelationModel> cluster : correlationClusters) {
                msg.append("\n  cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
                // ").append(cluster.getLevelIndex());
                for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterParents(cluster); iter.valid(); iter.advance()) {
                    msg.append("\n   parent ").append(iter.get());
                }
                for (It<Cluster<CorrelationModel>> iter = clustering.getClusterHierarchy().iterChildren(cluster); iter.valid(); iter.advance()) {
                    msg.append("\n   child ").append(iter.get());
                }
            }
        }
        LOG.debugFine(msg.toString());
    }
    LOG.setCompleted(stepprog);
    for (Cluster<CorrelationModel> rc : clusterMap.get(clusterMap.size() - 1)) {
        clustering.addToplevelCluster(rc);
    }
    return clustering;
}
Also used : ERiCNeighborPredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.ERiCNeighborPredicate) MinPtsCorePredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.MinPtsCorePredicate) CorePredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.CorePredicate) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) MinPtsCorePredicate(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.MinPtsCorePredicate) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) GeneralizedDBSCAN(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.GeneralizedDBSCAN) ArrayList(java.util.ArrayList) List(java.util.List)

Example 3 with CorrelationModel

use of de.lmu.ifi.dbs.elki.data.model.CorrelationModel in project elki by elki-project.

the class ERiCTest method testERiCResults.

/**
 * Run ERiC with fixed parameters and compare the result to a golden standard.
 */
@Test
public void testERiCResults() {
    Database db = makeSimpleDatabase(UNITTEST + "hierarchical-3d2d1d.csv", 600);
    Clustering<CorrelationModel> result = // 
    new ELKIBuilder<ERiC<DoubleVector>>(ERiC.class).with(DBSCAN.Parameterizer.MINPTS_ID, // 
    30).with(ERiC.Parameterizer.DELTA_ID, // 
    0.20).with(ERiC.Parameterizer.TAU_ID, // 
    0.04).with(ERiC.Parameterizer.K_ID, // 
    50).with(PCARunner.Parameterizer.PCA_COVARIANCE_MATRIX, // 
    WeightedCovarianceMatrixBuilder.class).with(WeightedCovarianceMatrixBuilder.Parameterizer.WEIGHT_ID, // 
    ErfcWeight.class).with(EigenPairFilter.PCA_EIGENPAIR_FILTER, // 
    RelativeEigenPairFilter.class).with(RelativeEigenPairFilter.Parameterizer.EIGENPAIR_FILTER_RALPHA, // 
    1.60).build().run(db);
    // Hierarchical pairs scored: 0.9204825
    testFMeasure(db, result, 0.728074);
    testClusterSizes(result, new int[] { 109, 188, 303 });
}
Also used : RelativeEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.RelativeEigenPairFilter) ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) WeightedCovarianceMatrixBuilder(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.WeightedCovarianceMatrixBuilder) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) Test(org.junit.Test) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)

Example 4 with CorrelationModel

use of de.lmu.ifi.dbs.elki.data.model.CorrelationModel in project elki by elki-project.

the class ERiCTest method testERiCOverlap.

/**
 * Run ERiC with fixed parameters and compare the result to a golden standard.
 */
@Test
public void testERiCOverlap() {
    Database db = makeSimpleDatabase(UNITTEST + "correlation-overlap-3-5d.ascii", 650);
    Clustering<CorrelationModel> result = // 
    new ELKIBuilder<ERiC<DoubleVector>>(ERiC.class).with(DBSCAN.Parameterizer.MINPTS_ID, // 
    15).with(ERiC.Parameterizer.DELTA_ID, // 
    1.0).with(ERiC.Parameterizer.TAU_ID, // 
    1.0).with(ERiC.Parameterizer.K_ID, // 
    45).with(PCARunner.Parameterizer.PCA_COVARIANCE_MATRIX, // 
    WeightedCovarianceMatrixBuilder.class).with(WeightedCovarianceMatrixBuilder.Parameterizer.WEIGHT_ID, // 
    ErfcWeight.class).with(EigenPairFilter.PCA_EIGENPAIR_FILTER, // 
    PercentageEigenPairFilter.class).with(PercentageEigenPairFilter.Parameterizer.ALPHA_ID, // 
    0.6).build().run(db);
    testFMeasure(db, result, 0.831136946);
    testClusterSizes(result, new int[] { 29, 189, 207, 225 });
}
Also used : PercentageEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.PercentageEigenPairFilter) ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) WeightedCovarianceMatrixBuilder(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.WeightedCovarianceMatrixBuilder) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) Test(org.junit.Test) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)

Aggregations

CorrelationModel (de.lmu.ifi.dbs.elki.data.model.CorrelationModel)4 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)2 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)2 Model (de.lmu.ifi.dbs.elki.data.model.Model)2 Database (de.lmu.ifi.dbs.elki.database.Database)2 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)2 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 WeightedCovarianceMatrixBuilder (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.WeightedCovarianceMatrixBuilder)2 PercentageEigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.PercentageEigenPairFilter)2 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Test (org.junit.Test)2 CorePredicate (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.CorePredicate)1 ERiCNeighborPredicate (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.ERiCNeighborPredicate)1 GeneralizedDBSCAN (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.GeneralizedDBSCAN)1 MinPtsCorePredicate (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.MinPtsCorePredicate)1 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)1