use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.
the class ERiC method extractCorrelationClusters.
/**
* Extracts the correlation clusters and noise from the copac result and
* returns a mapping of correlation dimension to maps of clusters within this
* correlation dimension. Each cluster is defined by the basis vectors
* defining the subspace in which the cluster appears.
*
* @param dbscanResult
*
* @param relation the database containing the objects
* @param dimensionality the dimensionality of the feature space
* @param npred ERiC predicate
* @return a list of clusters for each dimensionality
*/
private List<List<Cluster<CorrelationModel>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> relation, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
// result
List<List<Cluster<CorrelationModel>>> clusterMap = new ArrayList<>();
for (int i = 0; i <= dimensionality; i++) {
clusterMap.add(new ArrayList<Cluster<CorrelationModel>>());
}
// noise cluster containing all noise objects over all partitions
Cluster<Model> noise = null;
// iterate over correlation dimensions
for (Cluster<Model> clus : dbscanResult.getAllClusters()) {
DBIDs group = clus.getIDs();
int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
if (dim < dimensionality) {
EigenPairFilter filter = new FirstNEigenPairFilter(dim);
// get cluster list for this dimension.
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dim);
SortedEigenPairs epairs = settings.pca.processIds(group, relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
double[] centroid = Centroid.make(relation, group).getArrayRef();
Cluster<CorrelationModel> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel(pcares, centroid));
correlationClusters.add(correlationCluster);
} else // partition containing noise
{
if (noise == null) {
noise = clus;
} else {
ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
merged.addDBIDs(clus.getIDs());
noise.setIDs(merged);
}
}
}
if (noise != null && noise.size() > 0) {
// get cluster list for this dimension.
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dimensionality);
EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
SortedEigenPairs epairs = settings.pca.processIds(noise.getIDs(), relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
double[] centroid = Centroid.make(relation, noise.getIDs()).getArrayRef();
Cluster<CorrelationModel> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel(pcares, centroid));
correlationClusters.add(correlationCluster);
}
// Delete dimensionalities not found.
for (int i = dimensionality; i > 0; i--) {
if (!clusterMap.get(i).isEmpty()) {
break;
}
clusterMap.remove(i);
}
return clusterMap;
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.
the class DependencyDerivator method generateModel.
/**
* Runs the pca on the given set of IDs and for the given centroid.
*
* @param relation the database
* @param ids the set of ids
* @param centroid the centroid
* @return a matrix of equations describing the dependencies
*/
public CorrelationAnalysisSolution<V> generateModel(Relation<V> relation, DBIDs ids, double[] centroid) {
CorrelationAnalysisSolution<V> sol;
if (LOG.isDebuggingFine()) {
LOG.debugFine("PCA...");
}
SortedEigenPairs epairs = pca.processIds(ids, relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
// Matrix weakEigenvectors =
// pca.getEigenvectors().times(pca.selectionMatrixOfWeakEigenvectors());
double[][] weakEigenvectors = pcares.getWeakEigenvectors();
// Matrix strongEigenvectors =
// pca.getEigenvectors().times(pca.selectionMatrixOfStrongEigenvectors());
double[][] strongEigenvectors = pcares.getStrongEigenvectors();
// TODO: what if we don't have any weak eigenvectors?
if (weakEigenvectors[0].length == 0) {
sol = new CorrelationAnalysisSolution<>(null, relation, strongEigenvectors, weakEigenvectors, pcares.similarityMatrix(), centroid);
} else {
double[][] transposedWeakEigenvectors = transpose(weakEigenvectors);
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder(1000);
formatTo(msg.append("Strong Eigenvectors:\n"), pcares.getStrongEigenvectors(), " [", "]\n", ", ", nf);
formatTo(msg.append("\nTransposed weak Eigenvectors:\n"), transposedWeakEigenvectors, " [", "]\n", ", ", nf);
formatTo(msg.append("\nEigenvalues:\n"), pcares.getEigenvalues(), ", ", nf);
LOG.debugFine(msg.toString());
}
double[] b = times(transposedWeakEigenvectors, centroid);
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder(1000);
formatTo(msg.append("Centroid:\n"), centroid, ", ", nf);
formatTo(msg.append("\ntEV * Centroid\n"), b, ", ", nf);
LOG.debugFine(msg.toString());
}
// +1 == + B[0].length
double[][] gaussJordan = new double[transposedWeakEigenvectors.length][transposedWeakEigenvectors[0].length + 1];
setMatrix(gaussJordan, 0, transposedWeakEigenvectors.length, 0, transposedWeakEigenvectors[0].length, transposedWeakEigenvectors);
setCol(gaussJordan, transposedWeakEigenvectors[0].length, b);
if (LOG.isDebuggingFiner()) {
LOG.debugFiner("Gauss-Jordan-Elimination of " + format(gaussJordan, " [", "]\n", ", ", nf));
}
LinearEquationSystem lq = new LinearEquationSystem(copy(transposedWeakEigenvectors), b);
lq.solveByTotalPivotSearch();
sol = new CorrelationAnalysisSolution<>(lq, relation, strongEigenvectors, pcares.getWeakEigenvectors(), pcares.similarityMatrix(), centroid);
if (LOG.isDebuggingFine()) {
LOG.debugFine(//
new StringBuilder().append("Solution:\n").append("Standard deviation ").append(//
sol.getStandardDeviation()).append(lq.equationsToString(nf.getMaximumFractionDigits())).toString());
}
}
return sol;
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.
the class FourCNeighborPredicate method computeLocalModel.
@Override
protected PreDeConModel computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<V> relation) {
mvSize.put(neighbors.size());
SortedEigenPairs epairs = pca.processIds(neighbors, relation).getEigenPairs();
int cordim = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, cordim, settings.kappa, 1.);
double[][] m_hat = pcares.similarityMatrix();
double[] obj = relation.get(id).toArray();
// To save computing the square root below.
double sqeps = settings.epsilon * settings.epsilon;
HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet(neighbors.size());
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
// Compute weighted / projected distance:
double[] diff = minusEquals(relation.get(iter).toArray(), obj);
double dist = transposeTimesTimes(diff, m_hat, diff);
if (dist <= sqeps) {
survivors.add(iter);
}
}
if (cordim <= settings.lambda) {
mvSize2.put(survivors.size());
}
mvCorDim.put(cordim);
return new PreDeConModel(cordim, survivors);
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.
the class COPACNeighborPredicate method computeLocalModel.
/**
* COPAC model computation
*
* @param id Query object
* @param knnneighbors k nearest neighbors
* @param relation Data relation
* @return COPAC object model
*/
protected COPACModel computeLocalModel(DBIDRef id, DoubleDBIDList knnneighbors, Relation<V> relation) {
SortedEigenPairs epairs = settings.pca.processIds(knnneighbors, relation).getEigenPairs();
int pdim = settings.filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, pdim, 1., 0.);
double[][] mat = pcares.similarityMatrix();
double[] vecP = relation.get(id).toArray();
if (pdim == vecP.length) {
// Full dimensional - noise!
return new COPACModel(pdim, DBIDUtil.EMPTYDBIDS);
}
// Check which neighbors survive
HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet();
for (DBIDIter neighbor = relation.iterDBIDs(); neighbor.valid(); neighbor.advance()) {
double[] diff = minusEquals(relation.get(neighbor).toArray(), vecP);
double cdistP = transposeTimesTimes(diff, mat, diff);
if (cdistP <= epsilonsq) {
survivors.add(neighbor);
}
}
return new COPACModel(pdim, survivors);
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs in project elki by elki-project.
the class AbstractFilteredPCAIndex method initialize.
@Override
public void initialize() {
if (relation == null || relation.size() <= 0) {
throw new EmptyDataException();
}
// recomputed for the partitions!
if (storage != null) {
return;
}
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, PCAFilteredResult.class);
long start = System.currentTimeMillis();
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Performing local PCA", relation.size(), getLogger()) : null;
// TODO: use a bulk operation?
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DoubleDBIDList objects = objectsForPCA(iditer);
SortedEigenPairs epairs = pca.processIds(objects, relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
storage.put(iditer, pcares);
getLogger().incrementProcessed(progress);
}
getLogger().ensureCompleted(progress);
long end = System.currentTimeMillis();
if (getLogger().isVerbose()) {
long elapsedTime = end - start;
getLogger().verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
}
}
Aggregations