Search in sources :

Example 1 with EigenPairFilter

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter in project elki by elki-project.

the class ERiC method extractCorrelationClusters.

/**
 * Extracts the correlation clusters and noise from the copac result and
 * returns a mapping of correlation dimension to maps of clusters within this
 * correlation dimension. Each cluster is defined by the basis vectors
 * defining the subspace in which the cluster appears.
 *
 * @param dbscanResult
 *
 * @param relation the database containing the objects
 * @param dimensionality the dimensionality of the feature space
 * @param npred ERiC predicate
 * @return a list of clusters for each dimensionality
 */
private List<List<Cluster<CorrelationModel>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> relation, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
    // result
    List<List<Cluster<CorrelationModel>>> clusterMap = new ArrayList<>();
    for (int i = 0; i <= dimensionality; i++) {
        clusterMap.add(new ArrayList<Cluster<CorrelationModel>>());
    }
    // noise cluster containing all noise objects over all partitions
    Cluster<Model> noise = null;
    // iterate over correlation dimensions
    for (Cluster<Model> clus : dbscanResult.getAllClusters()) {
        DBIDs group = clus.getIDs();
        int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
        if (dim < dimensionality) {
            EigenPairFilter filter = new FirstNEigenPairFilter(dim);
            // get cluster list for this dimension.
            List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dim);
            SortedEigenPairs epairs = settings.pca.processIds(group, relation).getEigenPairs();
            int numstrong = filter.filter(epairs.eigenValues());
            PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
            double[] centroid = Centroid.make(relation, group).getArrayRef();
            Cluster<CorrelationModel> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel(pcares, centroid));
            correlationClusters.add(correlationCluster);
        } else // partition containing noise
        {
            if (noise == null) {
                noise = clus;
            } else {
                ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
                merged.addDBIDs(clus.getIDs());
                noise.setIDs(merged);
            }
        }
    }
    if (noise != null && noise.size() > 0) {
        // get cluster list for this dimension.
        List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dimensionality);
        EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
        SortedEigenPairs epairs = settings.pca.processIds(noise.getIDs(), relation).getEigenPairs();
        int numstrong = filter.filter(epairs.eigenValues());
        PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
        double[] centroid = Centroid.make(relation, noise.getIDs()).getArrayRef();
        Cluster<CorrelationModel> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel(pcares, centroid));
        correlationClusters.add(correlationCluster);
    }
    // Delete dimensionalities not found.
    for (int i = dimensionality; i > 0; i--) {
        if (!clusterMap.get(i).isEmpty()) {
            break;
        }
        clusterMap.remove(i);
    }
    return clusterMap;
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) PercentageEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.PercentageEigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) ArrayList(java.util.ArrayList) List(java.util.List) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)

Example 2 with EigenPairFilter

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter in project elki by elki-project.

the class CASH method runDerivator.

/**
 * Runs the derivator on the specified interval and assigns all points having
 * a distance less then the standard deviation of the derivator model to the
 * model to this model.
 *
 * @param relation the database containing the parameterization functions
 * @param interval the interval to build the model
 * @param dim the dimensionality of the database
 * @param ids an empty set to assign the ids
 * @return a basis of the found subspace
 */
private double[][] runDerivator(Relation<ParameterizationFunction> relation, int dim, CASHInterval interval, ModifiableDBIDs ids) {
    Database derivatorDB = buildDerivatorDB(relation, interval);
    PCARunner pca = new PCARunner(new StandardCovarianceMatrixBuilder());
    EigenPairFilter filter = new FirstNEigenPairFilter(dim - 1);
    DependencyDerivator<DoubleVector> derivator = new DependencyDerivator<>(null, FormatUtil.NF4, pca, filter, 0, false);
    CorrelationAnalysisSolution<DoubleVector> model = derivator.run(derivatorDB);
    double[][] weightMatrix = model.getSimilarityMatrix();
    double[] centroid = model.getCentroid();
    double eps = .25;
    ids.addDBIDs(interval.getIDs());
    // Search for nearby vectors in original database
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double[] v = relation.get(iditer).getColumnVector();
        double d = mahalanobisDistance(weightMatrix, v, centroid);
        if (d <= eps) {
            ids.add(iditer);
        }
    }
    double[][] basis = model.getStrongEigenvectors();
    return getMatrix(basis, 0, basis.length, 0, dim - 1);
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) DependencyDerivator(de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) PCARunner(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Database(de.lmu.ifi.dbs.elki.database.Database) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) StandardCovarianceMatrixBuilder(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder)

Example 3 with EigenPairFilter

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter in project elki by elki-project.

the class CASH method runDerivator.

/**
 * Runs the derivator on the specified interval and assigns all points having
 * a distance less then the standard deviation of the derivator model to the
 * model to this model.
 *
 * @param relation the database containing the parameterization functions
 * @param ids the ids to build the model
 * @param dimensionality the dimensionality of the subspace
 * @return a basis of the found subspace
 */
private LinearEquationSystem runDerivator(Relation<ParameterizationFunction> relation, int dimensionality, DBIDs ids) {
    try {
        // build database for derivator
        Database derivatorDB = buildDerivatorDB(relation, ids);
        PCARunner pca = new PCARunner(new StandardCovarianceMatrixBuilder());
        EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
        DependencyDerivator<DoubleVector> derivator = new DependencyDerivator<>(null, FormatUtil.NF4, pca, filter, 0, false);
        CorrelationAnalysisSolution<DoubleVector> model = derivator.run(derivatorDB);
        LinearEquationSystem les = model.getNormalizedLinearEquationSystem(null);
        return les;
    } catch (NonNumericFeaturesException e) {
        throw new IllegalStateException("Error during normalization" + e);
    }
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) DependencyDerivator(de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) PCARunner(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner) NonNumericFeaturesException(de.lmu.ifi.dbs.elki.datasource.filter.normalization.NonNumericFeaturesException) Database(de.lmu.ifi.dbs.elki.database.Database) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) LinearEquationSystem(de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem) StandardCovarianceMatrixBuilder(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder)

Example 4 with EigenPairFilter

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter in project elki by elki-project.

the class ERiCNeighborPredicate method instantiate.

/**
 * Full instantiation interface.
 *
 * @param database Database
 * @param relation Relation
 * @return Instance
 */
public Instance instantiate(Database database, Relation<V> relation) {
    DistanceQuery<V> dq = database.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
    KNNQuery<V> knnq = database.getKNNQuery(dq, settings.k);
    WritableDataStore<PCAFilteredResult> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, PCAFilteredResult.class);
    PCARunner pca = settings.pca;
    EigenPairFilter filter = settings.filter;
    Duration time = LOG.newDuration(this.getClass().getName() + ".preprocessing-time").begin();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DoubleDBIDList ref = knnq.getKNNForDBID(iditer, settings.k);
        PCAResult pcares = pca.processQueryResult(ref, relation);
        storage.put(iditer, new PCAFilteredResult(pcares.getEigenPairs(), filter.filter(pcares.getEigenvalues()), 1., 0.));
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    LOG.statistics(time.end());
    return new Instance(relation.getDBIDs(), storage, relation);
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) PCARunner(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) PCAResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)

Aggregations

EigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter)4 PCARunner (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner)3 FirstNEigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter)3 DependencyDerivator (de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator)2 Database (de.lmu.ifi.dbs.elki.database.Database)2 ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)2 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)2 PCAFilteredResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)2 StandardCovarianceMatrixBuilder (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder)2 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)1 CorrelationModel (de.lmu.ifi.dbs.elki.data.model.CorrelationModel)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)1 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)1 NonNumericFeaturesException (de.lmu.ifi.dbs.elki.datasource.filter.normalization.NonNumericFeaturesException)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)1 LinearEquationSystem (de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem)1 PCAResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult)1