Search in sources :

Example 1 with PCAFilteredResult

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult in project elki by elki-project.

the class ERiC method extractCorrelationClusters.

/**
 * Extracts the correlation clusters and noise from the copac result and
 * returns a mapping of correlation dimension to maps of clusters within this
 * correlation dimension. Each cluster is defined by the basis vectors
 * defining the subspace in which the cluster appears.
 *
 * @param dbscanResult
 *
 * @param relation the database containing the objects
 * @param dimensionality the dimensionality of the feature space
 * @param npred ERiC predicate
 * @return a list of clusters for each dimensionality
 */
private List<List<Cluster<CorrelationModel>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> relation, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
    // result
    List<List<Cluster<CorrelationModel>>> clusterMap = new ArrayList<>();
    for (int i = 0; i <= dimensionality; i++) {
        clusterMap.add(new ArrayList<Cluster<CorrelationModel>>());
    }
    // noise cluster containing all noise objects over all partitions
    Cluster<Model> noise = null;
    // iterate over correlation dimensions
    for (Cluster<Model> clus : dbscanResult.getAllClusters()) {
        DBIDs group = clus.getIDs();
        int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
        if (dim < dimensionality) {
            EigenPairFilter filter = new FirstNEigenPairFilter(dim);
            // get cluster list for this dimension.
            List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dim);
            SortedEigenPairs epairs = settings.pca.processIds(group, relation).getEigenPairs();
            int numstrong = filter.filter(epairs.eigenValues());
            PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
            double[] centroid = Centroid.make(relation, group).getArrayRef();
            Cluster<CorrelationModel> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel(pcares, centroid));
            correlationClusters.add(correlationCluster);
        } else // partition containing noise
        {
            if (noise == null) {
                noise = clus;
            } else {
                ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
                merged.addDBIDs(clus.getIDs());
                noise.setIDs(merged);
            }
        }
    }
    if (noise != null && noise.size() > 0) {
        // get cluster list for this dimension.
        List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dimensionality);
        EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
        SortedEigenPairs epairs = settings.pca.processIds(noise.getIDs(), relation).getEigenPairs();
        int numstrong = filter.filter(epairs.eigenValues());
        PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
        double[] centroid = Centroid.make(relation, noise.getIDs()).getArrayRef();
        Cluster<CorrelationModel> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel(pcares, centroid));
        correlationClusters.add(correlationCluster);
    }
    // Delete dimensionalities not found.
    for (int i = dimensionality; i > 0; i--) {
        if (!clusterMap.get(i).isEmpty()) {
            break;
        }
        clusterMap.remove(i);
    }
    return clusterMap;
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) PercentageEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.PercentageEigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) ArrayList(java.util.ArrayList) List(java.util.List) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)

Example 2 with PCAFilteredResult

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult in project elki by elki-project.

the class DependencyDerivator method generateModel.

/**
 * Runs the pca on the given set of IDs and for the given centroid.
 *
 * @param relation the database
 * @param ids the set of ids
 * @param centroid the centroid
 * @return a matrix of equations describing the dependencies
 */
public CorrelationAnalysisSolution<V> generateModel(Relation<V> relation, DBIDs ids, double[] centroid) {
    CorrelationAnalysisSolution<V> sol;
    if (LOG.isDebuggingFine()) {
        LOG.debugFine("PCA...");
    }
    SortedEigenPairs epairs = pca.processIds(ids, relation).getEigenPairs();
    int numstrong = filter.filter(epairs.eigenValues());
    PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
    // Matrix weakEigenvectors =
    // pca.getEigenvectors().times(pca.selectionMatrixOfWeakEigenvectors());
    double[][] weakEigenvectors = pcares.getWeakEigenvectors();
    // Matrix strongEigenvectors =
    // pca.getEigenvectors().times(pca.selectionMatrixOfStrongEigenvectors());
    double[][] strongEigenvectors = pcares.getStrongEigenvectors();
    // TODO: what if we don't have any weak eigenvectors?
    if (weakEigenvectors[0].length == 0) {
        sol = new CorrelationAnalysisSolution<>(null, relation, strongEigenvectors, weakEigenvectors, pcares.similarityMatrix(), centroid);
    } else {
        double[][] transposedWeakEigenvectors = transpose(weakEigenvectors);
        if (LOG.isDebugging()) {
            StringBuilder msg = new StringBuilder(1000);
            formatTo(msg.append("Strong Eigenvectors:\n"), pcares.getStrongEigenvectors(), " [", "]\n", ", ", nf);
            formatTo(msg.append("\nTransposed weak Eigenvectors:\n"), transposedWeakEigenvectors, " [", "]\n", ", ", nf);
            formatTo(msg.append("\nEigenvalues:\n"), pcares.getEigenvalues(), ", ", nf);
            LOG.debugFine(msg.toString());
        }
        double[] b = times(transposedWeakEigenvectors, centroid);
        if (LOG.isDebugging()) {
            StringBuilder msg = new StringBuilder(1000);
            formatTo(msg.append("Centroid:\n"), centroid, ", ", nf);
            formatTo(msg.append("\ntEV * Centroid\n"), b, ", ", nf);
            LOG.debugFine(msg.toString());
        }
        // +1 == + B[0].length
        double[][] gaussJordan = new double[transposedWeakEigenvectors.length][transposedWeakEigenvectors[0].length + 1];
        setMatrix(gaussJordan, 0, transposedWeakEigenvectors.length, 0, transposedWeakEigenvectors[0].length, transposedWeakEigenvectors);
        setCol(gaussJordan, transposedWeakEigenvectors[0].length, b);
        if (LOG.isDebuggingFiner()) {
            LOG.debugFiner("Gauss-Jordan-Elimination of " + format(gaussJordan, " [", "]\n", ", ", nf));
        }
        LinearEquationSystem lq = new LinearEquationSystem(copy(transposedWeakEigenvectors), b);
        lq.solveByTotalPivotSearch();
        sol = new CorrelationAnalysisSolution<>(lq, relation, strongEigenvectors, pcares.getWeakEigenvectors(), pcares.similarityMatrix(), centroid);
        if (LOG.isDebuggingFine()) {
            LOG.debugFine(// 
            new StringBuilder().append("Solution:\n").append("Standard deviation ").append(// 
            sol.getStandardDeviation()).append(lq.equationsToString(nf.getMaximumFractionDigits())).toString());
        }
    }
    return sol;
}
Also used : SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult) LinearEquationSystem(de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem)

Example 3 with PCAFilteredResult

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult in project elki by elki-project.

the class FourCNeighborPredicate method computeLocalModel.

@Override
protected PreDeConModel computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<V> relation) {
    mvSize.put(neighbors.size());
    SortedEigenPairs epairs = pca.processIds(neighbors, relation).getEigenPairs();
    int cordim = filter.filter(epairs.eigenValues());
    PCAFilteredResult pcares = new PCAFilteredResult(epairs, cordim, settings.kappa, 1.);
    double[][] m_hat = pcares.similarityMatrix();
    double[] obj = relation.get(id).toArray();
    // To save computing the square root below.
    double sqeps = settings.epsilon * settings.epsilon;
    HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet(neighbors.size());
    for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
        // Compute weighted / projected distance:
        double[] diff = minusEquals(relation.get(iter).toArray(), obj);
        double dist = transposeTimesTimes(diff, m_hat, diff);
        if (dist <= sqeps) {
            survivors.add(iter);
        }
    }
    if (cordim <= settings.lambda) {
        mvSize2.put(survivors.size());
    }
    mvCorDim.put(cordim);
    return new PreDeConModel(cordim, survivors);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) PreDeConModel(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.PreDeConNeighborPredicate.PreDeConModel) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 4 with PCAFilteredResult

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult in project elki by elki-project.

the class COPACNeighborPredicate method computeLocalModel.

/**
 * COPAC model computation
 *
 * @param id Query object
 * @param knnneighbors k nearest neighbors
 * @param relation Data relation
 * @return COPAC object model
 */
protected COPACModel computeLocalModel(DBIDRef id, DoubleDBIDList knnneighbors, Relation<V> relation) {
    SortedEigenPairs epairs = settings.pca.processIds(knnneighbors, relation).getEigenPairs();
    int pdim = settings.filter.filter(epairs.eigenValues());
    PCAFilteredResult pcares = new PCAFilteredResult(epairs, pdim, 1., 0.);
    double[][] mat = pcares.similarityMatrix();
    double[] vecP = relation.get(id).toArray();
    if (pdim == vecP.length) {
        // Full dimensional - noise!
        return new COPACModel(pdim, DBIDUtil.EMPTYDBIDS);
    }
    // Check which neighbors survive
    HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet();
    for (DBIDIter neighbor = relation.iterDBIDs(); neighbor.valid(); neighbor.advance()) {
        double[] diff = minusEquals(relation.get(neighbor).toArray(), vecP);
        double cdistP = transposeTimesTimes(diff, mat, diff);
        if (cdistP <= epsilonsq) {
            survivors.add(neighbor);
        }
    }
    return new COPACModel(pdim, survivors);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 5 with PCAFilteredResult

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult in project elki by elki-project.

the class ERiCNeighborPredicate method instantiate.

/**
 * Full instantiation interface.
 *
 * @param database Database
 * @param relation Relation
 * @return Instance
 */
public Instance instantiate(Database database, Relation<V> relation) {
    DistanceQuery<V> dq = database.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
    KNNQuery<V> knnq = database.getKNNQuery(dq, settings.k);
    WritableDataStore<PCAFilteredResult> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, PCAFilteredResult.class);
    PCARunner pca = settings.pca;
    EigenPairFilter filter = settings.filter;
    Duration time = LOG.newDuration(this.getClass().getName() + ".preprocessing-time").begin();
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), LOG) : null;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DoubleDBIDList ref = knnq.getKNNForDBID(iditer, settings.k);
        PCAResult pcares = pca.processQueryResult(ref, relation);
        storage.put(iditer, new PCAFilteredResult(pcares.getEigenPairs(), filter.filter(pcares.getEigenvalues()), 1., 0.));
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    LOG.statistics(time.end());
    return new Instance(relation.getDBIDs(), storage, relation);
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) PCARunner(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) PCAResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)

Aggregations

PCAFilteredResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)6 SortedEigenPairs (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs)5 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)2 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 EigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter)2 PreDeConModel (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.PreDeConNeighborPredicate.PreDeConModel)1 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)1 CorrelationModel (de.lmu.ifi.dbs.elki.data.model.CorrelationModel)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)1 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)1 LinearEquationSystem (de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem)1 PCAResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult)1 PCARunner (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner)1 FirstNEigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter)1 PercentageEigenPairFilter (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.PercentageEigenPairFilter)1 EmptyDataException (de.lmu.ifi.dbs.elki.utilities.exceptions.EmptyDataException)1