Search in sources :

Example 1 with SortedEigenPairs

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.

the class ERiC method extractCorrelationClusters.

/**
 * Extracts the correlation clusters and noise from the copac result and
 * returns a mapping of correlation dimension to maps of clusters within this
 * correlation dimension. Each cluster is defined by the basis vectors
 * defining the subspace in which the cluster appears.
 *
 * @param dbscanResult
 *
 * @param relation the database containing the objects
 * @param dimensionality the dimensionality of the feature space
 * @param npred ERiC predicate
 * @return a list of clusters for each dimensionality
 */
private List<List<Cluster<CorrelationModel>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> relation, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
    // result
    List<List<Cluster<CorrelationModel>>> clusterMap = new ArrayList<>();
    for (int i = 0; i <= dimensionality; i++) {
        clusterMap.add(new ArrayList<Cluster<CorrelationModel>>());
    }
    // noise cluster containing all noise objects over all partitions
    Cluster<Model> noise = null;
    // iterate over correlation dimensions
    for (Cluster<Model> clus : dbscanResult.getAllClusters()) {
        DBIDs group = clus.getIDs();
        int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
        if (dim < dimensionality) {
            EigenPairFilter filter = new FirstNEigenPairFilter(dim);
            // get cluster list for this dimension.
            List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dim);
            SortedEigenPairs epairs = settings.pca.processIds(group, relation).getEigenPairs();
            int numstrong = filter.filter(epairs.eigenValues());
            PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
            double[] centroid = Centroid.make(relation, group).getArrayRef();
            Cluster<CorrelationModel> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel(pcares, centroid));
            correlationClusters.add(correlationCluster);
        } else // partition containing noise
        {
            if (noise == null) {
                noise = clus;
            } else {
                ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
                merged.addDBIDs(clus.getIDs());
                noise.setIDs(merged);
            }
        }
    }
    if (noise != null && noise.size() > 0) {
        // get cluster list for this dimension.
        List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dimensionality);
        EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
        SortedEigenPairs epairs = settings.pca.processIds(noise.getIDs(), relation).getEigenPairs();
        int numstrong = filter.filter(epairs.eigenValues());
        PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
        double[] centroid = Centroid.make(relation, noise.getIDs()).getArrayRef();
        Cluster<CorrelationModel> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel(pcares, centroid));
        correlationClusters.add(correlationCluster);
    }
    // Delete dimensionalities not found.
    for (int i = dimensionality; i > 0; i--) {
        if (!clusterMap.get(i).isEmpty()) {
            break;
        }
        clusterMap.remove(i);
    }
    return clusterMap;
}
Also used : EigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.EigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) PercentageEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.PercentageEigenPairFilter) FirstNEigenPairFilter(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) CorrelationModel(de.lmu.ifi.dbs.elki.data.model.CorrelationModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) ArrayList(java.util.ArrayList) List(java.util.List) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)

Example 2 with SortedEigenPairs

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.

the class DependencyDerivator method generateModel.

/**
 * Runs the pca on the given set of IDs and for the given centroid.
 *
 * @param relation the database
 * @param ids the set of ids
 * @param centroid the centroid
 * @return a matrix of equations describing the dependencies
 */
public CorrelationAnalysisSolution<V> generateModel(Relation<V> relation, DBIDs ids, double[] centroid) {
    CorrelationAnalysisSolution<V> sol;
    if (LOG.isDebuggingFine()) {
        LOG.debugFine("PCA...");
    }
    SortedEigenPairs epairs = pca.processIds(ids, relation).getEigenPairs();
    int numstrong = filter.filter(epairs.eigenValues());
    PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
    // Matrix weakEigenvectors =
    // pca.getEigenvectors().times(pca.selectionMatrixOfWeakEigenvectors());
    double[][] weakEigenvectors = pcares.getWeakEigenvectors();
    // Matrix strongEigenvectors =
    // pca.getEigenvectors().times(pca.selectionMatrixOfStrongEigenvectors());
    double[][] strongEigenvectors = pcares.getStrongEigenvectors();
    // TODO: what if we don't have any weak eigenvectors?
    if (weakEigenvectors[0].length == 0) {
        sol = new CorrelationAnalysisSolution<>(null, relation, strongEigenvectors, weakEigenvectors, pcares.similarityMatrix(), centroid);
    } else {
        double[][] transposedWeakEigenvectors = transpose(weakEigenvectors);
        if (LOG.isDebugging()) {
            StringBuilder msg = new StringBuilder(1000);
            formatTo(msg.append("Strong Eigenvectors:\n"), pcares.getStrongEigenvectors(), " [", "]\n", ", ", nf);
            formatTo(msg.append("\nTransposed weak Eigenvectors:\n"), transposedWeakEigenvectors, " [", "]\n", ", ", nf);
            formatTo(msg.append("\nEigenvalues:\n"), pcares.getEigenvalues(), ", ", nf);
            LOG.debugFine(msg.toString());
        }
        double[] b = times(transposedWeakEigenvectors, centroid);
        if (LOG.isDebugging()) {
            StringBuilder msg = new StringBuilder(1000);
            formatTo(msg.append("Centroid:\n"), centroid, ", ", nf);
            formatTo(msg.append("\ntEV * Centroid\n"), b, ", ", nf);
            LOG.debugFine(msg.toString());
        }
        // +1 == + B[0].length
        double[][] gaussJordan = new double[transposedWeakEigenvectors.length][transposedWeakEigenvectors[0].length + 1];
        setMatrix(gaussJordan, 0, transposedWeakEigenvectors.length, 0, transposedWeakEigenvectors[0].length, transposedWeakEigenvectors);
        setCol(gaussJordan, transposedWeakEigenvectors[0].length, b);
        if (LOG.isDebuggingFiner()) {
            LOG.debugFiner("Gauss-Jordan-Elimination of " + format(gaussJordan, " [", "]\n", ", ", nf));
        }
        LinearEquationSystem lq = new LinearEquationSystem(copy(transposedWeakEigenvectors), b);
        lq.solveByTotalPivotSearch();
        sol = new CorrelationAnalysisSolution<>(lq, relation, strongEigenvectors, pcares.getWeakEigenvectors(), pcares.similarityMatrix(), centroid);
        if (LOG.isDebuggingFine()) {
            LOG.debugFine(// 
            new StringBuilder().append("Solution:\n").append("Standard deviation ").append(// 
            sol.getStandardDeviation()).append(lq.equationsToString(nf.getMaximumFractionDigits())).toString());
        }
    }
    return sol;
}
Also used : SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult) LinearEquationSystem(de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem)

Example 3 with SortedEigenPairs

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.

the class FourCNeighborPredicate method computeLocalModel.

@Override
protected PreDeConModel computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<V> relation) {
    mvSize.put(neighbors.size());
    SortedEigenPairs epairs = pca.processIds(neighbors, relation).getEigenPairs();
    int cordim = filter.filter(epairs.eigenValues());
    PCAFilteredResult pcares = new PCAFilteredResult(epairs, cordim, settings.kappa, 1.);
    double[][] m_hat = pcares.similarityMatrix();
    double[] obj = relation.get(id).toArray();
    // To save computing the square root below.
    double sqeps = settings.epsilon * settings.epsilon;
    HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet(neighbors.size());
    for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
        // Compute weighted / projected distance:
        double[] diff = minusEquals(relation.get(iter).toArray(), obj);
        double dist = transposeTimesTimes(diff, m_hat, diff);
        if (dist <= sqeps) {
            survivors.add(iter);
        }
    }
    if (cordim <= settings.lambda) {
        mvSize2.put(survivors.size());
    }
    mvCorDim.put(cordim);
    return new PreDeConModel(cordim, survivors);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) PreDeConModel(de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.PreDeConNeighborPredicate.PreDeConModel) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 4 with SortedEigenPairs

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.

the class COPACNeighborPredicate method computeLocalModel.

/**
 * COPAC model computation
 *
 * @param id Query object
 * @param knnneighbors k nearest neighbors
 * @param relation Data relation
 * @return COPAC object model
 */
protected COPACModel computeLocalModel(DBIDRef id, DoubleDBIDList knnneighbors, Relation<V> relation) {
    SortedEigenPairs epairs = settings.pca.processIds(knnneighbors, relation).getEigenPairs();
    int pdim = settings.filter.filter(epairs.eigenValues());
    PCAFilteredResult pcares = new PCAFilteredResult(epairs, pdim, 1., 0.);
    double[][] mat = pcares.similarityMatrix();
    double[] vecP = relation.get(id).toArray();
    if (pdim == vecP.length) {
        // Full dimensional - noise!
        return new COPACModel(pdim, DBIDUtil.EMPTYDBIDS);
    }
    // Check which neighbors survive
    HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet();
    for (DBIDIter neighbor = relation.iterDBIDs(); neighbor.valid(); neighbor.advance()) {
        double[] diff = minusEquals(relation.get(neighbor).toArray(), vecP);
        double cdistP = transposeTimesTimes(diff, mat, diff);
        if (cdistP <= epsilonsq) {
            survivors.add(neighbor);
        }
    }
    return new COPACModel(pdim, survivors);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 5 with SortedEigenPairs

use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.

the class AbstractFilteredPCAIndex method initialize.

@Override
public void initialize() {
    if (relation == null || relation.size() <= 0) {
        throw new EmptyDataException();
    }
    // recomputed for the partitions!
    if (storage != null) {
        return;
    }
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, PCAFilteredResult.class);
    long start = System.currentTimeMillis();
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Performing local PCA", relation.size(), getLogger()) : null;
    // TODO: use a bulk operation?
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        DoubleDBIDList objects = objectsForPCA(iditer);
        SortedEigenPairs epairs = pca.processIds(objects, relation).getEigenPairs();
        int numstrong = filter.filter(epairs.eigenValues());
        PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
        storage.put(iditer, pcares);
        getLogger().incrementProcessed(progress);
    }
    getLogger().ensureCompleted(progress);
    long end = System.currentTimeMillis();
    if (getLogger().isVerbose()) {
        long elapsedTime = end - start;
        getLogger().verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
    }
}
Also used : EmptyDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.EmptyDataException) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) PCAFilteredResult(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

SortedEigenPairs (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs)7 PCAFilteredResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)5 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)3 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)2 ArrayList (java.util.ArrayList)2 PreDeConModel (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.PreDeConNeighborPredicate.PreDeConModel)1 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)1 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)1 CorrelationModel (de.lmu.ifi.dbs.elki.data.model.CorrelationModel)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)1 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 Centroid (de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid)1 CovarianceMatrix (de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)1 EigenvalueDecomposition (de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition)1 LUDecomposition (de.lmu.ifi.dbs.elki.math.linearalgebra.LUDecomposition)1 LinearEquationSystem (de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem)1 EigenPair (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.EigenPair)1