Search in sources :

Example 1 with FirstNEigenPairFilter

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter in project elki by elki-project.

the class ERiC method extractCorrelationClusters.

/**
 * Extracts the correlation clusters and noise from the copac result and
 * returns a mapping of correlation dimension to maps of clusters within this
 * correlation dimension. Each cluster is defined by the basis vectors
 * defining the subspace in which the cluster appears.
 *
 * @param dbscanResult
 *
 * @param relation the database containing the objects
 * @param dimensionality the dimensionality of the feature space
 * @param npred ERiC predicate
 * @return a list of clusters for each dimensionality
 */
private List<List<Cluster<CorrelationModel>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> relation, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
    // result
    List<List<Cluster<CorrelationModel>>> clusterMap = new ArrayList<>();
    for (int i = 0; i <= dimensionality; i++) {
        clusterMap.add(new ArrayList<Cluster<CorrelationModel>>());
    }
    // noise cluster containing all noise objects over all partitions
    Cluster<Model> noise = null;
    // iterate over correlation dimensions
    for (Cluster<Model> clus : dbscanResult.getAllClusters()) {
        DBIDs group = clus.getIDs();
        int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
        if (dim < dimensionality) {
            EigenPairFilter filter = new FirstNEigenPairFilter(dim);
            // get cluster list for this dimension.
            List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dim);
            SortedEigenPairs epairs = settings.pca.processIds(group, relation).getEigenPairs();
            int numstrong = filter.filter(epairs.eigenValues());
            PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
            double[] centroid = Centroid.make(relation, group).getArrayRef();
            Cluster<CorrelationModel> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel(pcares, centroid));
            correlationClusters.add(correlationCluster);
        } else // partition containing noise
        {
            if (noise == null) {
                noise = clus;
            } else {
                ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
                merged.addDBIDs(clus.getIDs());
                noise.setIDs(merged);
            }
        }
    }
    if (noise != null && noise.size() > 0) {
        // get cluster list for this dimension.
        List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dimensionality);
        EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
        SortedEigenPairs epairs = settings.pca.processIds(noise.getIDs(), relation).getEigenPairs();
        int numstrong = filter.filter(epairs.eigenValues());
        PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
        double[] centroid = Centroid.make(relation, noise.getIDs()).getArrayRef();
        Cluster<CorrelationModel> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel(pcares, centroid));
        correlationClusters.add(correlationCluster);
    }
    // Delete dimensionalities not found.
    for (int i = dimensionality; i > 0; i--) {
        if (!clusterMap.get(i).isEmpty()) {
            break;
        }
        clusterMap.remove(i);
    }
    return clusterMap;
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) PercentageEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.PercentageEigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) ArrayList(java.util.ArrayList) List(java.util.List) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)

Example 2 with FirstNEigenPairFilter

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter in project elki by elki-project.

the class CASH method runDerivator.

/**
 * Runs the derivator on the specified interval and assigns all points having
 * a distance less then the standard deviation of the derivator model to the
 * model to this model.
 *
 * @param relation the database containing the parameterization functions
 * @param interval the interval to build the model
 * @param dim the dimensionality of the database
 * @param ids an empty set to assign the ids
 * @return a basis of the found subspace
 */
private double[][] runDerivator(Relation<ParameterizationFunction> relation, int dim, CASHInterval interval, ModifiableDBIDs ids) {
    Database derivatorDB = buildDerivatorDB(relation, interval);
    PCARunner pca = new PCARunner(new StandardCovarianceMatrixBuilder());
    EigenPairFilter filter = new FirstNEigenPairFilter(dim - 1);
    DependencyDerivator<DoubleVector> derivator = new DependencyDerivator<>(null, FormatUtil.NF4, pca, filter, 0, false);
    CorrelationAnalysisSolution<DoubleVector> model = derivator.run(derivatorDB);
    double[][] weightMatrix = model.getSimilarityMatrix();
    double[] centroid = model.getCentroid();
    double eps = .25;
    ids.addDBIDs(interval.getIDs());
    // Search for nearby vectors in original database
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double[] v = relation.get(iditer).getColumnVector();
        double d = mahalanobisDistance(weightMatrix, v, centroid);
        if (d <= eps) {
            ids.add(iditer);
        }
    }
    double[][] basis = model.getStrongEigenvectors();
    return getMatrix(basis, 0, basis.length, 0, dim - 1);
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) DependencyDerivator(de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) PCARunner(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Database(de.lmu.ifi.dbs.elki.database.Database) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) StandardCovarianceMatrixBuilder(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder)

Example 3 with FirstNEigenPairFilter

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter in project elki by elki-project.

the class CASH method runDerivator.

/**
 * Runs the derivator on the specified interval and assigns all points having
 * a distance less then the standard deviation of the derivator model to the
 * model to this model.
 *
 * @param relation the database containing the parameterization functions
 * @param ids the ids to build the model
 * @param dimensionality the dimensionality of the subspace
 * @return a basis of the found subspace
 */
private LinearEquationSystem runDerivator(Relation<ParameterizationFunction> relation, int dimensionality, DBIDs ids) {
    try {
        // build database for derivator
        Database derivatorDB = buildDerivatorDB(relation, ids);
        PCARunner pca = new PCARunner(new StandardCovarianceMatrixBuilder());
        EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
        DependencyDerivator<DoubleVector> derivator = new DependencyDerivator<>(null, FormatUtil.NF4, pca, filter, 0, false);
        CorrelationAnalysisSolution<DoubleVector> model = derivator.run(derivatorDB);
        LinearEquationSystem les = model.getNormalizedLinearEquationSystem(null);
        return les;
    } catch (NonNumericFeaturesException e) {
        throw new IllegalStateException("Error during normalization" + e);
    }
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) DependencyDerivator(de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) PCARunner(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner) NonNumericFeaturesException(de.lmu.ifi.dbs.elki.datasource.filter.normalization.NonNumericFeaturesException) Database(de.lmu.ifi.dbs.elki.database.Database) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) LinearEquationSystem(de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem) StandardCovarianceMatrixBuilder(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder)

Aggregations

EigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter)3 FirstNEigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter)3 DependencyDerivator (de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator)2 Database (de.lmu.ifi.dbs.elki.database.Database)2 ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)2 PCARunner (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner)2 StandardCovarianceMatrixBuilder (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder)2 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)1 CorrelationModel (de.lmu.ifi.dbs.elki.data.model.CorrelationModel)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)1 NonNumericFeaturesException (de.lmu.ifi.dbs.elki.datasource.filter.normalization.NonNumericFeaturesException)1 LinearEquationSystem (de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem)1 PCAFilteredResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)1 SortedEigenPairs (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs)1 PercentageEigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.PercentageEigenPairFilter)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1