use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter in project elki by elki-project.
the class ERiC method extractCorrelationClusters.
/**
* Extracts the correlation clusters and noise from the copac result and
* returns a mapping of correlation dimension to maps of clusters within this
* correlation dimension. Each cluster is defined by the basis vectors
* defining the subspace in which the cluster appears.
*
* @param dbscanResult
*
* @param relation the database containing the objects
* @param dimensionality the dimensionality of the feature space
* @param npred ERiC predicate
* @return a list of clusters for each dimensionality
*/
private List<List<Cluster<CorrelationModel>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> relation, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
// result
List<List<Cluster<CorrelationModel>>> clusterMap = new ArrayList<>();
for (int i = 0; i <= dimensionality; i++) {
clusterMap.add(new ArrayList<Cluster<CorrelationModel>>());
}
// noise cluster containing all noise objects over all partitions
Cluster<Model> noise = null;
// iterate over correlation dimensions
for (Cluster<Model> clus : dbscanResult.getAllClusters()) {
DBIDs group = clus.getIDs();
int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
if (dim < dimensionality) {
EigenPairFilter filter = new FirstNEigenPairFilter(dim);
// get cluster list for this dimension.
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dim);
SortedEigenPairs epairs = settings.pca.processIds(group, relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
double[] centroid = Centroid.make(relation, group).getArrayRef();
Cluster<CorrelationModel> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel(pcares, centroid));
correlationClusters.add(correlationCluster);
} else // partition containing noise
{
if (noise == null) {
noise = clus;
} else {
ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
merged.addDBIDs(clus.getIDs());
noise.setIDs(merged);
}
}
}
if (noise != null && noise.size() > 0) {
// get cluster list for this dimension.
List<Cluster<CorrelationModel>> correlationClusters = clusterMap.get(dimensionality);
EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
SortedEigenPairs epairs = settings.pca.processIds(noise.getIDs(), relation).getEigenPairs();
int numstrong = filter.filter(epairs.eigenValues());
PCAFilteredResult pcares = new PCAFilteredResult(epairs, numstrong, 1., 0.);
double[] centroid = Centroid.make(relation, noise.getIDs()).getArrayRef();
Cluster<CorrelationModel> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel(pcares, centroid));
correlationClusters.add(correlationCluster);
}
// Delete dimensionalities not found.
for (int i = dimensionality; i > 0; i--) {
if (!clusterMap.get(i).isEmpty()) {
break;
}
clusterMap.remove(i);
}
return clusterMap;
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter in project elki by elki-project.
the class CASH method runDerivator.
/**
* Runs the derivator on the specified interval and assigns all points having
* a distance less then the standard deviation of the derivator model to the
* model to this model.
*
* @param relation the database containing the parameterization functions
* @param interval the interval to build the model
* @param dim the dimensionality of the database
* @param ids an empty set to assign the ids
* @return a basis of the found subspace
*/
private double[][] runDerivator(Relation<ParameterizationFunction> relation, int dim, CASHInterval interval, ModifiableDBIDs ids) {
Database derivatorDB = buildDerivatorDB(relation, interval);
PCARunner pca = new PCARunner(new StandardCovarianceMatrixBuilder());
EigenPairFilter filter = new FirstNEigenPairFilter(dim - 1);
DependencyDerivator<DoubleVector> derivator = new DependencyDerivator<>(null, FormatUtil.NF4, pca, filter, 0, false);
CorrelationAnalysisSolution<DoubleVector> model = derivator.run(derivatorDB);
double[][] weightMatrix = model.getSimilarityMatrix();
double[] centroid = model.getCentroid();
double eps = .25;
ids.addDBIDs(interval.getIDs());
// Search for nearby vectors in original database
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] v = relation.get(iditer).getColumnVector();
double d = mahalanobisDistance(weightMatrix, v, centroid);
if (d <= eps) {
ids.add(iditer);
}
}
double[][] basis = model.getStrongEigenvectors();
return getMatrix(basis, 0, basis.length, 0, dim - 1);
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter in project elki by elki-project.
the class CASH method runDerivator.
/**
* Runs the derivator on the specified interval and assigns all points having
* a distance less then the standard deviation of the derivator model to the
* model to this model.
*
* @param relation the database containing the parameterization functions
* @param ids the ids to build the model
* @param dimensionality the dimensionality of the subspace
* @return a basis of the found subspace
*/
private LinearEquationSystem runDerivator(Relation<ParameterizationFunction> relation, int dimensionality, DBIDs ids) {
try {
// build database for derivator
Database derivatorDB = buildDerivatorDB(relation, ids);
PCARunner pca = new PCARunner(new StandardCovarianceMatrixBuilder());
EigenPairFilter filter = new FirstNEigenPairFilter(dimensionality);
DependencyDerivator<DoubleVector> derivator = new DependencyDerivator<>(null, FormatUtil.NF4, pca, filter, 0, false);
CorrelationAnalysisSolution<DoubleVector> model = derivator.run(derivatorDB);
LinearEquationSystem les = model.getNormalizedLinearEquationSystem(null);
return les;
} catch (NonNumericFeaturesException e) {
throw new IllegalStateException("Error during normalization" + e);
}
}
Aggregations